aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorAnton Altaparmakov <aia21@cantab.net>2005-06-23 06:26:22 -0400
committerAnton Altaparmakov <aia21@cantab.net>2005-06-23 06:26:22 -0400
commit3357d4c75f1fb67e7304998c4ad4e9a9fed66fa4 (patch)
treeceba46966a5a1112a05d257d8ecb25ae5eee95e0 /net
parent364f6c717deef4a3ac4982e670fa9846b43cd060 (diff)
parentee98689be1b054897ff17655008c3048fe88be94 (diff)
Automatic merge with /usr/src/ntfs-2.6.git.
Diffstat (limited to 'net')
-rw-r--r--net/appletalk/aarp.c9
-rw-r--r--net/appletalk/ddp.c2
-rw-r--r--net/bridge/br_forward.c3
-rw-r--r--net/bridge/br_input.c4
-rw-r--r--net/bridge/br_netfilter.c38
-rw-r--r--net/bridge/netfilter/ebtables.c21
-rw-r--r--net/core/Makefile3
-rw-r--r--net/core/dev.c1
-rw-r--r--net/core/neighbour.c333
-rw-r--r--net/core/net-sysfs.c17
-rw-r--r--net/core/netfilter.c138
-rw-r--r--net/core/netpoll.c80
-rw-r--r--net/core/request_sock.c64
-rw-r--r--net/core/rtnetlink.c33
-rw-r--r--net/core/skbuff.c6
-rw-r--r--net/core/sock.c35
-rw-r--r--net/core/wireless.c74
-rw-r--r--net/decnet/dn_dev.c9
-rw-r--r--net/decnet/dn_neigh.c1
-rw-r--r--net/decnet/dn_route.c11
-rw-r--r--net/decnet/dn_rules.c7
-rw-r--r--net/decnet/dn_table.c8
-rw-r--r--net/ipv4/Kconfig26
-rw-r--r--net/ipv4/Makefile4
-rw-r--r--net/ipv4/af_inet.c13
-rw-r--r--net/ipv4/ah4.c2
-rw-r--r--net/ipv4/devinet.c9
-rw-r--r--net/ipv4/esp4.c2
-rw-r--r--net/ipv4/fib_frontend.c55
-rw-r--r--net/ipv4/fib_hash.c3
-rw-r--r--net/ipv4/fib_lookup.h3
-rw-r--r--net/ipv4/fib_rules.c7
-rw-r--r--net/ipv4/fib_semantics.c10
-rw-r--r--net/ipv4/fib_trie.c2454
-rw-r--r--net/ipv4/icmp.c9
-rw-r--r--net/ipv4/ip_input.c5
-rw-r--r--net/ipv4/ip_output.c11
-rw-r--r--net/ipv4/ip_sockglue.c6
-rw-r--r--net/ipv4/ipcomp.c11
-rw-r--r--net/ipv4/ipmr.c1
-rw-r--r--net/ipv4/ipvs/ip_vs_xmit.c1
-rw-r--r--net/ipv4/multipath_drr.c2
-rw-r--r--net/ipv4/multipath_random.c2
-rw-r--r--net/ipv4/multipath_rr.c2
-rw-r--r--net/ipv4/multipath_wrandom.c2
-rw-r--r--net/ipv4/netfilter/arp_tables.c1
-rw-r--r--net/ipv4/netfilter/ip_conntrack_amanda.c7
-rw-r--r--net/ipv4/netfilter/ip_conntrack_core.c107
-rw-r--r--net/ipv4/netfilter/ip_conntrack_ftp.c7
-rw-r--r--net/ipv4/netfilter/ip_conntrack_irc.c7
-rw-r--r--net/ipv4/netfilter/ip_conntrack_proto_sctp.c23
-rw-r--r--net/ipv4/netfilter/ip_conntrack_proto_tcp.c27
-rw-r--r--net/ipv4/netfilter/ip_conntrack_proto_udp.c1
-rw-r--r--net/ipv4/netfilter/ip_conntrack_standalone.c23
-rw-r--r--net/ipv4/netfilter/ip_nat_core.c32
-rw-r--r--net/ipv4/netfilter/ip_nat_helper.c13
-rw-r--r--net/ipv4/netfilter/ip_nat_rule.c4
-rw-r--r--net/ipv4/netfilter/ip_nat_standalone.c5
-rw-r--r--net/ipv4/netfilter/ip_tables.c1
-rw-r--r--net/ipv4/netfilter/ipt_CLUSTERIP.c51
-rw-r--r--net/ipv4/netfilter/ipt_MASQUERADE.c10
-rw-r--r--net/ipv4/netfilter/ipt_REJECT.c13
-rw-r--r--net/ipv4/netfilter/ipt_ULOG.c15
-rw-r--r--net/ipv4/netfilter/ipt_hashlimit.c17
-rw-r--r--net/ipv4/netfilter/ipt_helper.c4
-rw-r--r--net/ipv4/netfilter/ipt_recent.c10
-rw-r--r--net/ipv4/raw.c22
-rw-r--r--net/ipv4/route.c19
-rw-r--r--net/ipv4/syncookies.c49
-rw-r--r--net/ipv4/sysctl_net_ipv4.c9
-rw-r--r--net/ipv4/tcp.c86
-rw-r--r--net/ipv4/tcp_diag.c37
-rw-r--r--net/ipv4/tcp_ipv4.c172
-rw-r--r--net/ipv4/tcp_minisocks.c68
-rw-r--r--net/ipv4/tcp_output.c27
-rw-r--r--net/ipv4/tcp_timer.c18
-rw-r--r--net/ipv4/xfrm4_output.c8
-rw-r--r--net/ipv4/xfrm4_state.c9
-rw-r--r--net/ipv4/xfrm4_tunnel.c2
-rw-r--r--net/ipv6/addrconf.c72
-rw-r--r--net/ipv6/ah6.c2
-rw-r--r--net/ipv6/anycast.c4
-rw-r--r--net/ipv6/datagram.c6
-rw-r--r--net/ipv6/esp6.c2
-rw-r--r--net/ipv6/icmp.c14
-rw-r--r--net/ipv6/ip6_fib.c19
-rw-r--r--net/ipv6/ip6_output.c3
-rw-r--r--net/ipv6/ip6_tunnel.c1
-rw-r--r--net/ipv6/ipcomp6.c9
-rw-r--r--net/ipv6/ipv6_sockglue.c5
-rw-r--r--net/ipv6/mcast.c68
-rw-r--r--net/ipv6/ndisc.c4
-rw-r--r--net/ipv6/netfilter/ip6_tables.c1
-rw-r--r--net/ipv6/netfilter/ip6t_LOG.c54
-rw-r--r--net/ipv6/netfilter/ip6table_raw.c6
-rw-r--r--net/ipv6/raw.c8
-rw-r--r--net/ipv6/route.c79
-rw-r--r--net/ipv6/tcp_ipv6.c148
-rw-r--r--net/ipv6/udp.c4
-rw-r--r--net/ipv6/xfrm6_tunnel.c2
-rw-r--r--net/key/af_key.c385
-rw-r--r--net/netlink/af_netlink.c8
-rw-r--r--net/sched/Kconfig2
-rw-r--r--net/sched/act_api.c13
-rw-r--r--net/sched/cls_api.c5
-rw-r--r--net/sched/cls_basic.c3
-rw-r--r--net/sched/em_meta.c295
-rw-r--r--net/sched/sch_api.c10
-rw-r--r--net/sched/sch_dsmark.c357
-rw-r--r--net/sched/sch_fifo.c152
-rw-r--r--net/sched/sch_generic.c84
-rw-r--r--net/sctp/associola.c151
-rw-r--r--net/sctp/endpointola.c1
-rw-r--r--net/sctp/input.c51
-rw-r--r--net/sctp/ipv6.c36
-rw-r--r--net/sctp/outqueue.c11
-rw-r--r--net/sctp/proc.c194
-rw-r--r--net/sctp/protocol.c7
-rw-r--r--net/sctp/sm_make_chunk.c20
-rw-r--r--net/sctp/sm_sideeffect.c105
-rw-r--r--net/sctp/sm_statefuns.c148
-rw-r--r--net/sctp/sm_statetable.c6
-rw-r--r--net/sctp/socket.c425
-rw-r--r--net/sctp/transport.c4
-rw-r--r--net/socket.c12
-rw-r--r--net/sunrpc/auth.c6
-rw-r--r--net/sunrpc/auth_gss/auth_gss.c18
-rw-r--r--net/sunrpc/clnt.c205
-rw-r--r--net/sunrpc/pmap_clnt.c9
-rw-r--r--net/sunrpc/sched.c84
-rw-r--r--net/sunrpc/sunrpc_syms.c6
-rw-r--r--net/sunrpc/svc.c36
-rw-r--r--net/sunrpc/xdr.c298
-rw-r--r--net/sunrpc/xprt.c71
-rw-r--r--net/x25/af_x25.c110
-rw-r--r--net/x25/x25_facilities.c34
-rw-r--r--net/x25/x25_subr.c41
-rw-r--r--net/xfrm/xfrm_policy.c9
-rw-r--r--net/xfrm/xfrm_state.c118
-rw-r--r--net/xfrm/xfrm_user.c297
140 files changed, 6681 insertions, 2098 deletions
diff --git a/net/appletalk/aarp.c b/net/appletalk/aarp.c
index 54640c01b5..c34614ea5f 100644
--- a/net/appletalk/aarp.c
+++ b/net/appletalk/aarp.c
@@ -35,6 +35,7 @@
35#include <net/datalink.h> 35#include <net/datalink.h>
36#include <net/psnap.h> 36#include <net/psnap.h>
37#include <linux/atalk.h> 37#include <linux/atalk.h>
38#include <linux/delay.h>
38#include <linux/init.h> 39#include <linux/init.h>
39#include <linux/proc_fs.h> 40#include <linux/proc_fs.h>
40#include <linux/seq_file.h> 41#include <linux/seq_file.h>
@@ -462,8 +463,7 @@ void aarp_probe_network(struct atalk_iface *atif)
462 aarp_send_probe(atif->dev, &atif->address); 463 aarp_send_probe(atif->dev, &atif->address);
463 464
464 /* Defer 1/10th */ 465 /* Defer 1/10th */
465 current->state = TASK_INTERRUPTIBLE; 466 msleep(100);
466 schedule_timeout(HZ / 10);
467 467
468 if (atif->status & ATIF_PROBE_FAIL) 468 if (atif->status & ATIF_PROBE_FAIL)
469 break; 469 break;
@@ -510,9 +510,8 @@ int aarp_proxy_probe_network(struct atalk_iface *atif, struct atalk_addr *sa)
510 aarp_send_probe(atif->dev, sa); 510 aarp_send_probe(atif->dev, sa);
511 511
512 /* Defer 1/10th */ 512 /* Defer 1/10th */
513 current->state = TASK_INTERRUPTIBLE;
514 write_unlock_bh(&aarp_lock); 513 write_unlock_bh(&aarp_lock);
515 schedule_timeout(HZ / 10); 514 msleep(100);
516 write_lock_bh(&aarp_lock); 515 write_lock_bh(&aarp_lock);
517 516
518 if (entry->status & ATIF_PROBE_FAIL) 517 if (entry->status & ATIF_PROBE_FAIL)
@@ -565,7 +564,7 @@ int aarp_send_ddp(struct net_device *dev, struct sk_buff *skb,
565 * numbers we just happen to need. Now put the 564 * numbers we just happen to need. Now put the
566 * length in the lower two. 565 * length in the lower two.
567 */ 566 */
568 *((__u16 *)skb->data) = htons(skb->len); 567 *((__be16 *)skb->data) = htons(skb->len);
569 ft = 1; 568 ft = 1;
570 } 569 }
571 /* 570 /*
diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index 876dbac710..192b529f86 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c
@@ -401,7 +401,7 @@ out_err:
401} 401}
402 402
403/* Find a match for a specific network:node pair */ 403/* Find a match for a specific network:node pair */
404static struct atalk_iface *atalk_find_interface(int net, int node) 404static struct atalk_iface *atalk_find_interface(__be16 net, int node)
405{ 405{
406 struct atalk_iface *iface; 406 struct atalk_iface *iface;
407 407
diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index ef9f2095f9..069253f830 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -57,9 +57,6 @@ int br_forward_finish(struct sk_buff *skb)
57static void __br_deliver(const struct net_bridge_port *to, struct sk_buff *skb) 57static void __br_deliver(const struct net_bridge_port *to, struct sk_buff *skb)
58{ 58{
59 skb->dev = to->dev; 59 skb->dev = to->dev;
60#ifdef CONFIG_NETFILTER_DEBUG
61 skb->nf_debug = 0;
62#endif
63 NF_HOOK(PF_BRIDGE, NF_BR_LOCAL_OUT, skb, NULL, skb->dev, 60 NF_HOOK(PF_BRIDGE, NF_BR_LOCAL_OUT, skb, NULL, skb->dev,
64 br_forward_finish); 61 br_forward_finish);
65} 62}
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index 8f5f2e7309..9a45e6279c 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -23,11 +23,7 @@ const unsigned char bridge_ula[6] = { 0x01, 0x80, 0xc2, 0x00, 0x00, 0x00 };
23 23
24static int br_pass_frame_up_finish(struct sk_buff *skb) 24static int br_pass_frame_up_finish(struct sk_buff *skb)
25{ 25{
26#ifdef CONFIG_NETFILTER_DEBUG
27 skb->nf_debug = 0;
28#endif
29 netif_receive_skb(skb); 26 netif_receive_skb(skb);
30
31 return 0; 27 return 0;
32} 28}
33 29
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index be03d3ad26..03ae4eddda 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -102,10 +102,6 @@ static int br_nf_pre_routing_finish_ipv6(struct sk_buff *skb)
102{ 102{
103 struct nf_bridge_info *nf_bridge = skb->nf_bridge; 103 struct nf_bridge_info *nf_bridge = skb->nf_bridge;
104 104
105#ifdef CONFIG_NETFILTER_DEBUG
106 skb->nf_debug ^= (1 << NF_BR_PRE_ROUTING);
107#endif
108
109 if (nf_bridge->mask & BRNF_PKT_TYPE) { 105 if (nf_bridge->mask & BRNF_PKT_TYPE) {
110 skb->pkt_type = PACKET_OTHERHOST; 106 skb->pkt_type = PACKET_OTHERHOST;
111 nf_bridge->mask ^= BRNF_PKT_TYPE; 107 nf_bridge->mask ^= BRNF_PKT_TYPE;
@@ -182,10 +178,6 @@ static void __br_dnat_complain(void)
182 * --Bart, 20021007 (updated) */ 178 * --Bart, 20021007 (updated) */
183static int br_nf_pre_routing_finish_bridge(struct sk_buff *skb) 179static int br_nf_pre_routing_finish_bridge(struct sk_buff *skb)
184{ 180{
185#ifdef CONFIG_NETFILTER_DEBUG
186 skb->nf_debug |= (1 << NF_BR_PRE_ROUTING) | (1 << NF_BR_FORWARD);
187#endif
188
189 if (skb->pkt_type == PACKET_OTHERHOST) { 181 if (skb->pkt_type == PACKET_OTHERHOST) {
190 skb->pkt_type = PACKET_HOST; 182 skb->pkt_type = PACKET_HOST;
191 skb->nf_bridge->mask |= BRNF_PKT_TYPE; 183 skb->nf_bridge->mask |= BRNF_PKT_TYPE;
@@ -207,10 +199,6 @@ static int br_nf_pre_routing_finish(struct sk_buff *skb)
207 struct iphdr *iph = skb->nh.iph; 199 struct iphdr *iph = skb->nh.iph;
208 struct nf_bridge_info *nf_bridge = skb->nf_bridge; 200 struct nf_bridge_info *nf_bridge = skb->nf_bridge;
209 201
210#ifdef CONFIG_NETFILTER_DEBUG
211 skb->nf_debug ^= (1 << NF_BR_PRE_ROUTING);
212#endif
213
214 if (nf_bridge->mask & BRNF_PKT_TYPE) { 202 if (nf_bridge->mask & BRNF_PKT_TYPE) {
215 skb->pkt_type = PACKET_OTHERHOST; 203 skb->pkt_type = PACKET_OTHERHOST;
216 nf_bridge->mask ^= BRNF_PKT_TYPE; 204 nf_bridge->mask ^= BRNF_PKT_TYPE;
@@ -382,9 +370,6 @@ static unsigned int br_nf_pre_routing_ipv6(unsigned int hook,
382 if (hdr->nexthdr == NEXTHDR_HOP && check_hbh_len(skb)) 370 if (hdr->nexthdr == NEXTHDR_HOP && check_hbh_len(skb))
383 goto inhdr_error; 371 goto inhdr_error;
384 372
385#ifdef CONFIG_NETFILTER_DEBUG
386 skb->nf_debug ^= (1 << NF_IP6_PRE_ROUTING);
387#endif
388 if ((nf_bridge = nf_bridge_alloc(skb)) == NULL) 373 if ((nf_bridge = nf_bridge_alloc(skb)) == NULL)
389 return NF_DROP; 374 return NF_DROP;
390 setup_pre_routing(skb); 375 setup_pre_routing(skb);
@@ -468,9 +453,6 @@ static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff **pskb,
468 skb->ip_summed = CHECKSUM_NONE; 453 skb->ip_summed = CHECKSUM_NONE;
469 } 454 }
470 455
471#ifdef CONFIG_NETFILTER_DEBUG
472 skb->nf_debug ^= (1 << NF_IP_PRE_ROUTING);
473#endif
474 if ((nf_bridge = nf_bridge_alloc(skb)) == NULL) 456 if ((nf_bridge = nf_bridge_alloc(skb)) == NULL)
475 return NF_DROP; 457 return NF_DROP;
476 setup_pre_routing(skb); 458 setup_pre_routing(skb);
@@ -517,10 +499,6 @@ static int br_nf_forward_finish(struct sk_buff *skb)
517 struct net_device *in; 499 struct net_device *in;
518 struct vlan_ethhdr *hdr = vlan_eth_hdr(skb); 500 struct vlan_ethhdr *hdr = vlan_eth_hdr(skb);
519 501
520#ifdef CONFIG_NETFILTER_DEBUG
521 skb->nf_debug ^= (1 << NF_BR_FORWARD);
522#endif
523
524 if (skb->protocol != __constant_htons(ETH_P_ARP) && !IS_VLAN_ARP) { 502 if (skb->protocol != __constant_htons(ETH_P_ARP) && !IS_VLAN_ARP) {
525 in = nf_bridge->physindev; 503 in = nf_bridge->physindev;
526 if (nf_bridge->mask & BRNF_PKT_TYPE) { 504 if (nf_bridge->mask & BRNF_PKT_TYPE) {
@@ -566,9 +544,6 @@ static unsigned int br_nf_forward_ip(unsigned int hook, struct sk_buff **pskb,
566 (*pskb)->nh.raw += VLAN_HLEN; 544 (*pskb)->nh.raw += VLAN_HLEN;
567 } 545 }
568 546
569#ifdef CONFIG_NETFILTER_DEBUG
570 skb->nf_debug ^= (1 << NF_BR_FORWARD);
571#endif
572 nf_bridge = skb->nf_bridge; 547 nf_bridge = skb->nf_bridge;
573 if (skb->pkt_type == PACKET_OTHERHOST) { 548 if (skb->pkt_type == PACKET_OTHERHOST) {
574 skb->pkt_type = PACKET_HOST; 549 skb->pkt_type = PACKET_HOST;
@@ -605,10 +580,6 @@ static unsigned int br_nf_forward_arp(unsigned int hook, struct sk_buff **pskb,
605 (*pskb)->nh.raw += VLAN_HLEN; 580 (*pskb)->nh.raw += VLAN_HLEN;
606 } 581 }
607 582
608#ifdef CONFIG_NETFILTER_DEBUG
609 skb->nf_debug ^= (1 << NF_BR_FORWARD);
610#endif
611
612 if (skb->nh.arph->ar_pln != 4) { 583 if (skb->nh.arph->ar_pln != 4) {
613 if (IS_VLAN_ARP) { 584 if (IS_VLAN_ARP) {
614 skb_push(*pskb, VLAN_HLEN); 585 skb_push(*pskb, VLAN_HLEN);
@@ -627,9 +598,6 @@ static unsigned int br_nf_forward_arp(unsigned int hook, struct sk_buff **pskb,
627/* PF_BRIDGE/LOCAL_OUT ***********************************************/ 598/* PF_BRIDGE/LOCAL_OUT ***********************************************/
628static int br_nf_local_out_finish(struct sk_buff *skb) 599static int br_nf_local_out_finish(struct sk_buff *skb)
629{ 600{
630#ifdef CONFIG_NETFILTER_DEBUG
631 skb->nf_debug &= ~(1 << NF_BR_LOCAL_OUT);
632#endif
633 if (skb->protocol == __constant_htons(ETH_P_8021Q)) { 601 if (skb->protocol == __constant_htons(ETH_P_8021Q)) {
634 skb_push(skb, VLAN_HLEN); 602 skb_push(skb, VLAN_HLEN);
635 skb->nh.raw -= VLAN_HLEN; 603 skb->nh.raw -= VLAN_HLEN;
@@ -731,10 +699,6 @@ static unsigned int br_nf_local_out(unsigned int hook, struct sk_buff **pskb,
731 realoutdev, br_nf_local_out_finish, 699 realoutdev, br_nf_local_out_finish,
732 NF_IP_PRI_BRIDGE_SABOTAGE_FORWARD + 1); 700 NF_IP_PRI_BRIDGE_SABOTAGE_FORWARD + 1);
733 } else { 701 } else {
734#ifdef CONFIG_NETFILTER_DEBUG
735 skb->nf_debug ^= (1 << NF_IP_LOCAL_OUT);
736#endif
737
738 NF_HOOK_THRESH(pf, NF_IP_LOCAL_OUT, skb, realindev, 702 NF_HOOK_THRESH(pf, NF_IP_LOCAL_OUT, skb, realindev,
739 realoutdev, br_nf_local_out_finish, 703 realoutdev, br_nf_local_out_finish,
740 NF_IP_PRI_BRIDGE_SABOTAGE_LOCAL_OUT + 1); 704 NF_IP_PRI_BRIDGE_SABOTAGE_LOCAL_OUT + 1);
@@ -779,8 +743,6 @@ static unsigned int br_nf_post_routing(unsigned int hook, struct sk_buff **pskb,
779 printk(KERN_CRIT "br_netfilter: skb->dst == NULL."); 743 printk(KERN_CRIT "br_netfilter: skb->dst == NULL.");
780 goto print_error; 744 goto print_error;
781 } 745 }
782
783 skb->nf_debug ^= (1 << NF_IP_POST_ROUTING);
784#endif 746#endif
785 747
786 /* We assume any code from br_dev_queue_push_xmit onwards doesn't care 748 /* We assume any code from br_dev_queue_push_xmit onwards doesn't care
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 18ebc66476..c4540144f0 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -859,8 +859,7 @@ static int translate_table(struct ebt_replace *repl,
859 if (repl->valid_hooks & (1 << i)) 859 if (repl->valid_hooks & (1 << i))
860 if (check_chainloops(newinfo->hook_entry[i], 860 if (check_chainloops(newinfo->hook_entry[i],
861 cl_s, udc_cnt, i, newinfo->entries)) { 861 cl_s, udc_cnt, i, newinfo->entries)) {
862 if (cl_s) 862 vfree(cl_s);
863 vfree(cl_s);
864 return -EINVAL; 863 return -EINVAL;
865 } 864 }
866 865
@@ -883,8 +882,7 @@ static int translate_table(struct ebt_replace *repl,
883 EBT_ENTRY_ITERATE(newinfo->entries, newinfo->entries_size, 882 EBT_ENTRY_ITERATE(newinfo->entries, newinfo->entries_size,
884 ebt_cleanup_entry, &i); 883 ebt_cleanup_entry, &i);
885 } 884 }
886 if (cl_s) 885 vfree(cl_s);
887 vfree(cl_s);
888 return ret; 886 return ret;
889} 887}
890 888
@@ -1030,8 +1028,7 @@ static int do_replace(void __user *user, unsigned int len)
1030 } 1028 }
1031 vfree(table); 1029 vfree(table);
1032 1030
1033 if (counterstmp) 1031 vfree(counterstmp);
1034 vfree(counterstmp);
1035 return ret; 1032 return ret;
1036 1033
1037free_unlock: 1034free_unlock:
@@ -1040,8 +1037,7 @@ free_iterate:
1040 EBT_ENTRY_ITERATE(newinfo->entries, newinfo->entries_size, 1037 EBT_ENTRY_ITERATE(newinfo->entries, newinfo->entries_size,
1041 ebt_cleanup_entry, NULL); 1038 ebt_cleanup_entry, NULL);
1042free_counterstmp: 1039free_counterstmp:
1043 if (counterstmp) 1040 vfree(counterstmp);
1044 vfree(counterstmp);
1045 /* can be initialized in translate_table() */ 1041 /* can be initialized in translate_table() */
1046 if (newinfo->chainstack) { 1042 if (newinfo->chainstack) {
1047 for (i = 0; i < num_possible_cpus(); i++) 1043 for (i = 0; i < num_possible_cpus(); i++)
@@ -1049,11 +1045,9 @@ free_counterstmp:
1049 vfree(newinfo->chainstack); 1045 vfree(newinfo->chainstack);
1050 } 1046 }
1051free_entries: 1047free_entries:
1052 if (newinfo->entries) 1048 vfree(newinfo->entries);
1053 vfree(newinfo->entries);
1054free_newinfo: 1049free_newinfo:
1055 if (newinfo) 1050 vfree(newinfo);
1056 vfree(newinfo);
1057 return ret; 1051 return ret;
1058} 1052}
1059 1053
@@ -1213,8 +1207,7 @@ void ebt_unregister_table(struct ebt_table *table)
1213 down(&ebt_mutex); 1207 down(&ebt_mutex);
1214 LIST_DELETE(&ebt_tables, table); 1208 LIST_DELETE(&ebt_tables, table);
1215 up(&ebt_mutex); 1209 up(&ebt_mutex);
1216 if (table->private->entries) 1210 vfree(table->private->entries);
1217 vfree(table->private->entries);
1218 if (table->private->chainstack) { 1211 if (table->private->chainstack) {
1219 for (i = 0; i < num_possible_cpus(); i++) 1212 for (i = 0; i < num_possible_cpus(); i++)
1220 vfree(table->private->chainstack[i]); 1213 vfree(table->private->chainstack[i]);
diff --git a/net/core/Makefile b/net/core/Makefile
index 81f03243fe..5e0c56b7f6 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -2,7 +2,8 @@
2# Makefile for the Linux networking core. 2# Makefile for the Linux networking core.
3# 3#
4 4
5obj-y := sock.o skbuff.o iovec.o datagram.o stream.o scm.o gen_stats.o gen_estimator.o 5obj-y := sock.o request_sock.o skbuff.o iovec.o datagram.o stream.o scm.o \
6 gen_stats.o gen_estimator.o
6 7
7obj-$(CONFIG_SYSCTL) += sysctl_net_core.o 8obj-$(CONFIG_SYSCTL) += sysctl_net_core.o
8 9
diff --git a/net/core/dev.c b/net/core/dev.c
index f15a3ffff6..ab935778ce 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1744,6 +1744,7 @@ static int process_backlog(struct net_device *backlog_dev, int *budget)
1744 struct softnet_data *queue = &__get_cpu_var(softnet_data); 1744 struct softnet_data *queue = &__get_cpu_var(softnet_data);
1745 unsigned long start_time = jiffies; 1745 unsigned long start_time = jiffies;
1746 1746
1747 backlog_dev->weight = weight_p;
1747 for (;;) { 1748 for (;;) {
1748 struct sk_buff *skb; 1749 struct sk_buff *skb;
1749 struct net_device *dev; 1750 struct net_device *dev;
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 43bdc521e2..f6bdcad47d 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -1276,9 +1276,14 @@ struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
1276 INIT_RCU_HEAD(&p->rcu_head); 1276 INIT_RCU_HEAD(&p->rcu_head);
1277 p->reachable_time = 1277 p->reachable_time =
1278 neigh_rand_reach_time(p->base_reachable_time); 1278 neigh_rand_reach_time(p->base_reachable_time);
1279 if (dev && dev->neigh_setup && dev->neigh_setup(dev, p)) { 1279 if (dev) {
1280 kfree(p); 1280 if (dev->neigh_setup && dev->neigh_setup(dev, p)) {
1281 return NULL; 1281 kfree(p);
1282 return NULL;
1283 }
1284
1285 dev_hold(dev);
1286 p->dev = dev;
1282 } 1287 }
1283 p->sysctl_table = NULL; 1288 p->sysctl_table = NULL;
1284 write_lock_bh(&tbl->lock); 1289 write_lock_bh(&tbl->lock);
@@ -1309,6 +1314,8 @@ void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
1309 *p = parms->next; 1314 *p = parms->next;
1310 parms->dead = 1; 1315 parms->dead = 1;
1311 write_unlock_bh(&tbl->lock); 1316 write_unlock_bh(&tbl->lock);
1317 if (parms->dev)
1318 dev_put(parms->dev);
1312 call_rcu(&parms->rcu_head, neigh_rcu_free_parms); 1319 call_rcu(&parms->rcu_head, neigh_rcu_free_parms);
1313 return; 1320 return;
1314 } 1321 }
@@ -1546,20 +1553,323 @@ out:
1546 return err; 1553 return err;
1547} 1554}
1548 1555
1556static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
1557{
1558 struct rtattr *nest = NULL;
1559
1560 nest = RTA_NEST(skb, NDTA_PARMS);
1561
1562 if (parms->dev)
1563 RTA_PUT_U32(skb, NDTPA_IFINDEX, parms->dev->ifindex);
1564
1565 RTA_PUT_U32(skb, NDTPA_REFCNT, atomic_read(&parms->refcnt));
1566 RTA_PUT_U32(skb, NDTPA_QUEUE_LEN, parms->queue_len);
1567 RTA_PUT_U32(skb, NDTPA_PROXY_QLEN, parms->proxy_qlen);
1568 RTA_PUT_U32(skb, NDTPA_APP_PROBES, parms->app_probes);
1569 RTA_PUT_U32(skb, NDTPA_UCAST_PROBES, parms->ucast_probes);
1570 RTA_PUT_U32(skb, NDTPA_MCAST_PROBES, parms->mcast_probes);
1571 RTA_PUT_MSECS(skb, NDTPA_REACHABLE_TIME, parms->reachable_time);
1572 RTA_PUT_MSECS(skb, NDTPA_BASE_REACHABLE_TIME,
1573 parms->base_reachable_time);
1574 RTA_PUT_MSECS(skb, NDTPA_GC_STALETIME, parms->gc_staletime);
1575 RTA_PUT_MSECS(skb, NDTPA_DELAY_PROBE_TIME, parms->delay_probe_time);
1576 RTA_PUT_MSECS(skb, NDTPA_RETRANS_TIME, parms->retrans_time);
1577 RTA_PUT_MSECS(skb, NDTPA_ANYCAST_DELAY, parms->anycast_delay);
1578 RTA_PUT_MSECS(skb, NDTPA_PROXY_DELAY, parms->proxy_delay);
1579 RTA_PUT_MSECS(skb, NDTPA_LOCKTIME, parms->locktime);
1580
1581 return RTA_NEST_END(skb, nest);
1582
1583rtattr_failure:
1584 return RTA_NEST_CANCEL(skb, nest);
1585}
1586
1587static int neightbl_fill_info(struct neigh_table *tbl, struct sk_buff *skb,
1588 struct netlink_callback *cb)
1589{
1590 struct nlmsghdr *nlh;
1591 struct ndtmsg *ndtmsg;
1592
1593 nlh = NLMSG_NEW_ANSWER(skb, cb, RTM_NEWNEIGHTBL, sizeof(struct ndtmsg),
1594 NLM_F_MULTI);
1595
1596 ndtmsg = NLMSG_DATA(nlh);
1597
1598 read_lock_bh(&tbl->lock);
1599 ndtmsg->ndtm_family = tbl->family;
1600
1601 RTA_PUT_STRING(skb, NDTA_NAME, tbl->id);
1602 RTA_PUT_MSECS(skb, NDTA_GC_INTERVAL, tbl->gc_interval);
1603 RTA_PUT_U32(skb, NDTA_THRESH1, tbl->gc_thresh1);
1604 RTA_PUT_U32(skb, NDTA_THRESH2, tbl->gc_thresh2);
1605 RTA_PUT_U32(skb, NDTA_THRESH3, tbl->gc_thresh3);
1606
1607 {
1608 unsigned long now = jiffies;
1609 unsigned int flush_delta = now - tbl->last_flush;
1610 unsigned int rand_delta = now - tbl->last_rand;
1611
1612 struct ndt_config ndc = {
1613 .ndtc_key_len = tbl->key_len,
1614 .ndtc_entry_size = tbl->entry_size,
1615 .ndtc_entries = atomic_read(&tbl->entries),
1616 .ndtc_last_flush = jiffies_to_msecs(flush_delta),
1617 .ndtc_last_rand = jiffies_to_msecs(rand_delta),
1618 .ndtc_hash_rnd = tbl->hash_rnd,
1619 .ndtc_hash_mask = tbl->hash_mask,
1620 .ndtc_hash_chain_gc = tbl->hash_chain_gc,
1621 .ndtc_proxy_qlen = tbl->proxy_queue.qlen,
1622 };
1623
1624 RTA_PUT(skb, NDTA_CONFIG, sizeof(ndc), &ndc);
1625 }
1626
1627 {
1628 int cpu;
1629 struct ndt_stats ndst;
1630
1631 memset(&ndst, 0, sizeof(ndst));
1632
1633 for (cpu = 0; cpu < NR_CPUS; cpu++) {
1634 struct neigh_statistics *st;
1635
1636 if (!cpu_possible(cpu))
1637 continue;
1638
1639 st = per_cpu_ptr(tbl->stats, cpu);
1640 ndst.ndts_allocs += st->allocs;
1641 ndst.ndts_destroys += st->destroys;
1642 ndst.ndts_hash_grows += st->hash_grows;
1643 ndst.ndts_res_failed += st->res_failed;
1644 ndst.ndts_lookups += st->lookups;
1645 ndst.ndts_hits += st->hits;
1646 ndst.ndts_rcv_probes_mcast += st->rcv_probes_mcast;
1647 ndst.ndts_rcv_probes_ucast += st->rcv_probes_ucast;
1648 ndst.ndts_periodic_gc_runs += st->periodic_gc_runs;
1649 ndst.ndts_forced_gc_runs += st->forced_gc_runs;
1650 }
1651
1652 RTA_PUT(skb, NDTA_STATS, sizeof(ndst), &ndst);
1653 }
1654
1655 BUG_ON(tbl->parms.dev);
1656 if (neightbl_fill_parms(skb, &tbl->parms) < 0)
1657 goto rtattr_failure;
1658
1659 read_unlock_bh(&tbl->lock);
1660 return NLMSG_END(skb, nlh);
1661
1662rtattr_failure:
1663 read_unlock_bh(&tbl->lock);
1664 return NLMSG_CANCEL(skb, nlh);
1665
1666nlmsg_failure:
1667 return -1;
1668}
1669
1670static int neightbl_fill_param_info(struct neigh_table *tbl,
1671 struct neigh_parms *parms,
1672 struct sk_buff *skb,
1673 struct netlink_callback *cb)
1674{
1675 struct ndtmsg *ndtmsg;
1676 struct nlmsghdr *nlh;
1677
1678 nlh = NLMSG_NEW_ANSWER(skb, cb, RTM_NEWNEIGHTBL, sizeof(struct ndtmsg),
1679 NLM_F_MULTI);
1680
1681 ndtmsg = NLMSG_DATA(nlh);
1682
1683 read_lock_bh(&tbl->lock);
1684 ndtmsg->ndtm_family = tbl->family;
1685 RTA_PUT_STRING(skb, NDTA_NAME, tbl->id);
1686
1687 if (neightbl_fill_parms(skb, parms) < 0)
1688 goto rtattr_failure;
1689
1690 read_unlock_bh(&tbl->lock);
1691 return NLMSG_END(skb, nlh);
1692
1693rtattr_failure:
1694 read_unlock_bh(&tbl->lock);
1695 return NLMSG_CANCEL(skb, nlh);
1696
1697nlmsg_failure:
1698 return -1;
1699}
1700
1701static inline struct neigh_parms *lookup_neigh_params(struct neigh_table *tbl,
1702 int ifindex)
1703{
1704 struct neigh_parms *p;
1705
1706 for (p = &tbl->parms; p; p = p->next)
1707 if ((p->dev && p->dev->ifindex == ifindex) ||
1708 (!p->dev && !ifindex))
1709 return p;
1710
1711 return NULL;
1712}
1713
1714int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1715{
1716 struct neigh_table *tbl;
1717 struct ndtmsg *ndtmsg = NLMSG_DATA(nlh);
1718 struct rtattr **tb = arg;
1719 int err = -EINVAL;
1720
1721 if (!tb[NDTA_NAME - 1] || !RTA_PAYLOAD(tb[NDTA_NAME - 1]))
1722 return -EINVAL;
1723
1724 read_lock(&neigh_tbl_lock);
1725 for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1726 if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family)
1727 continue;
1728
1729 if (!rtattr_strcmp(tb[NDTA_NAME - 1], tbl->id))
1730 break;
1731 }
1732
1733 if (tbl == NULL) {
1734 err = -ENOENT;
1735 goto errout;
1736 }
1737
1738 /*
1739 * We acquire tbl->lock to be nice to the periodic timers and
1740 * make sure they always see a consistent set of values.
1741 */
1742 write_lock_bh(&tbl->lock);
1743
1744 if (tb[NDTA_THRESH1 - 1])
1745 tbl->gc_thresh1 = RTA_GET_U32(tb[NDTA_THRESH1 - 1]);
1746
1747 if (tb[NDTA_THRESH2 - 1])
1748 tbl->gc_thresh2 = RTA_GET_U32(tb[NDTA_THRESH2 - 1]);
1749
1750 if (tb[NDTA_THRESH3 - 1])
1751 tbl->gc_thresh3 = RTA_GET_U32(tb[NDTA_THRESH3 - 1]);
1752
1753 if (tb[NDTA_GC_INTERVAL - 1])
1754 tbl->gc_interval = RTA_GET_MSECS(tb[NDTA_GC_INTERVAL - 1]);
1755
1756 if (tb[NDTA_PARMS - 1]) {
1757 struct rtattr *tbp[NDTPA_MAX];
1758 struct neigh_parms *p;
1759 u32 ifindex = 0;
1760
1761 if (rtattr_parse_nested(tbp, NDTPA_MAX, tb[NDTA_PARMS - 1]) < 0)
1762 goto rtattr_failure;
1763
1764 if (tbp[NDTPA_IFINDEX - 1])
1765 ifindex = RTA_GET_U32(tbp[NDTPA_IFINDEX - 1]);
1766
1767 p = lookup_neigh_params(tbl, ifindex);
1768 if (p == NULL) {
1769 err = -ENOENT;
1770 goto rtattr_failure;
1771 }
1772
1773 if (tbp[NDTPA_QUEUE_LEN - 1])
1774 p->queue_len = RTA_GET_U32(tbp[NDTPA_QUEUE_LEN - 1]);
1775
1776 if (tbp[NDTPA_PROXY_QLEN - 1])
1777 p->proxy_qlen = RTA_GET_U32(tbp[NDTPA_PROXY_QLEN - 1]);
1778
1779 if (tbp[NDTPA_APP_PROBES - 1])
1780 p->app_probes = RTA_GET_U32(tbp[NDTPA_APP_PROBES - 1]);
1781
1782 if (tbp[NDTPA_UCAST_PROBES - 1])
1783 p->ucast_probes =
1784 RTA_GET_U32(tbp[NDTPA_UCAST_PROBES - 1]);
1785
1786 if (tbp[NDTPA_MCAST_PROBES - 1])
1787 p->mcast_probes =
1788 RTA_GET_U32(tbp[NDTPA_MCAST_PROBES - 1]);
1789
1790 if (tbp[NDTPA_BASE_REACHABLE_TIME - 1])
1791 p->base_reachable_time =
1792 RTA_GET_MSECS(tbp[NDTPA_BASE_REACHABLE_TIME - 1]);
1793
1794 if (tbp[NDTPA_GC_STALETIME - 1])
1795 p->gc_staletime =
1796 RTA_GET_MSECS(tbp[NDTPA_GC_STALETIME - 1]);
1797
1798 if (tbp[NDTPA_DELAY_PROBE_TIME - 1])
1799 p->delay_probe_time =
1800 RTA_GET_MSECS(tbp[NDTPA_DELAY_PROBE_TIME - 1]);
1801
1802 if (tbp[NDTPA_RETRANS_TIME - 1])
1803 p->retrans_time =
1804 RTA_GET_MSECS(tbp[NDTPA_RETRANS_TIME - 1]);
1805
1806 if (tbp[NDTPA_ANYCAST_DELAY - 1])
1807 p->anycast_delay =
1808 RTA_GET_MSECS(tbp[NDTPA_ANYCAST_DELAY - 1]);
1809
1810 if (tbp[NDTPA_PROXY_DELAY - 1])
1811 p->proxy_delay =
1812 RTA_GET_MSECS(tbp[NDTPA_PROXY_DELAY - 1]);
1813
1814 if (tbp[NDTPA_LOCKTIME - 1])
1815 p->locktime = RTA_GET_MSECS(tbp[NDTPA_LOCKTIME - 1]);
1816 }
1817
1818 err = 0;
1819
1820rtattr_failure:
1821 write_unlock_bh(&tbl->lock);
1822errout:
1823 read_unlock(&neigh_tbl_lock);
1824 return err;
1825}
1826
1827int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
1828{
1829 int idx, family;
1830 int s_idx = cb->args[0];
1831 struct neigh_table *tbl;
1832
1833 family = ((struct rtgenmsg *)NLMSG_DATA(cb->nlh))->rtgen_family;
1834
1835 read_lock(&neigh_tbl_lock);
1836 for (tbl = neigh_tables, idx = 0; tbl; tbl = tbl->next) {
1837 struct neigh_parms *p;
1838
1839 if (idx < s_idx || (family && tbl->family != family))
1840 continue;
1841
1842 if (neightbl_fill_info(tbl, skb, cb) <= 0)
1843 break;
1844
1845 for (++idx, p = tbl->parms.next; p; p = p->next, idx++) {
1846 if (idx < s_idx)
1847 continue;
1848
1849 if (neightbl_fill_param_info(tbl, p, skb, cb) <= 0)
1850 goto out;
1851 }
1852
1853 }
1854out:
1855 read_unlock(&neigh_tbl_lock);
1856 cb->args[0] = idx;
1857
1858 return skb->len;
1859}
1549 1860
1550static int neigh_fill_info(struct sk_buff *skb, struct neighbour *n, 1861static int neigh_fill_info(struct sk_buff *skb, struct neighbour *n,
1551 u32 pid, u32 seq, int event) 1862 u32 pid, u32 seq, int event, unsigned int flags)
1552{ 1863{
1553 unsigned long now = jiffies; 1864 unsigned long now = jiffies;
1554 unsigned char *b = skb->tail; 1865 unsigned char *b = skb->tail;
1555 struct nda_cacheinfo ci; 1866 struct nda_cacheinfo ci;
1556 int locked = 0; 1867 int locked = 0;
1557 u32 probes; 1868 u32 probes;
1558 struct nlmsghdr *nlh = NLMSG_PUT(skb, pid, seq, event, 1869 struct nlmsghdr *nlh = NLMSG_NEW(skb, pid, seq, event,
1559 sizeof(struct ndmsg)); 1870 sizeof(struct ndmsg), flags);
1560 struct ndmsg *ndm = NLMSG_DATA(nlh); 1871 struct ndmsg *ndm = NLMSG_DATA(nlh);
1561 1872
1562 nlh->nlmsg_flags = pid ? NLM_F_MULTI : 0;
1563 ndm->ndm_family = n->ops->family; 1873 ndm->ndm_family = n->ops->family;
1564 ndm->ndm_flags = n->flags; 1874 ndm->ndm_flags = n->flags;
1565 ndm->ndm_type = n->type; 1875 ndm->ndm_type = n->type;
@@ -1609,7 +1919,8 @@ static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
1609 continue; 1919 continue;
1610 if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).pid, 1920 if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).pid,
1611 cb->nlh->nlmsg_seq, 1921 cb->nlh->nlmsg_seq,
1612 RTM_NEWNEIGH) <= 0) { 1922 RTM_NEWNEIGH,
1923 NLM_F_MULTI) <= 0) {
1613 read_unlock_bh(&tbl->lock); 1924 read_unlock_bh(&tbl->lock);
1614 rc = -1; 1925 rc = -1;
1615 goto out; 1926 goto out;
@@ -2018,7 +2329,7 @@ void neigh_app_ns(struct neighbour *n)
2018 if (!skb) 2329 if (!skb)
2019 return; 2330 return;
2020 2331
2021 if (neigh_fill_info(skb, n, 0, 0, RTM_GETNEIGH) < 0) { 2332 if (neigh_fill_info(skb, n, 0, 0, RTM_GETNEIGH, 0) < 0) {
2022 kfree_skb(skb); 2333 kfree_skb(skb);
2023 return; 2334 return;
2024 } 2335 }
@@ -2037,7 +2348,7 @@ static void neigh_app_notify(struct neighbour *n)
2037 if (!skb) 2348 if (!skb)
2038 return; 2349 return;
2039 2350
2040 if (neigh_fill_info(skb, n, 0, 0, RTM_NEWNEIGH) < 0) { 2351 if (neigh_fill_info(skb, n, 0, 0, RTM_NEWNEIGH, 0) < 0) {
2041 kfree_skb(skb); 2352 kfree_skb(skb);
2042 return; 2353 return;
2043 } 2354 }
@@ -2352,6 +2663,8 @@ EXPORT_SYMBOL(neigh_update);
2352EXPORT_SYMBOL(neigh_update_hhs); 2663EXPORT_SYMBOL(neigh_update_hhs);
2353EXPORT_SYMBOL(pneigh_enqueue); 2664EXPORT_SYMBOL(pneigh_enqueue);
2354EXPORT_SYMBOL(pneigh_lookup); 2665EXPORT_SYMBOL(pneigh_lookup);
2666EXPORT_SYMBOL(neightbl_dump_info);
2667EXPORT_SYMBOL(neightbl_set);
2355 2668
2356#ifdef CONFIG_ARPD 2669#ifdef CONFIG_ARPD
2357EXPORT_SYMBOL(neigh_app_ns); 2670EXPORT_SYMBOL(neigh_app_ns);
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 910eb4c05a..e2137f3e48 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -185,6 +185,22 @@ static ssize_t store_tx_queue_len(struct class_device *dev, const char *buf, siz
185static CLASS_DEVICE_ATTR(tx_queue_len, S_IRUGO | S_IWUSR, show_tx_queue_len, 185static CLASS_DEVICE_ATTR(tx_queue_len, S_IRUGO | S_IWUSR, show_tx_queue_len,
186 store_tx_queue_len); 186 store_tx_queue_len);
187 187
188NETDEVICE_SHOW(weight, fmt_dec);
189
190static int change_weight(struct net_device *net, unsigned long new_weight)
191{
192 net->weight = new_weight;
193 return 0;
194}
195
196static ssize_t store_weight(struct class_device *dev, const char *buf, size_t len)
197{
198 return netdev_store(dev, buf, len, change_weight);
199}
200
201static CLASS_DEVICE_ATTR(weight, S_IRUGO | S_IWUSR, show_weight,
202 store_weight);
203
188 204
189static struct class_device_attribute *net_class_attributes[] = { 205static struct class_device_attribute *net_class_attributes[] = {
190 &class_device_attr_ifindex, 206 &class_device_attr_ifindex,
@@ -194,6 +210,7 @@ static struct class_device_attribute *net_class_attributes[] = {
194 &class_device_attr_features, 210 &class_device_attr_features,
195 &class_device_attr_mtu, 211 &class_device_attr_mtu,
196 &class_device_attr_flags, 212 &class_device_attr_flags,
213 &class_device_attr_weight,
197 &class_device_attr_type, 214 &class_device_attr_type,
198 &class_device_attr_address, 215 &class_device_attr_address,
199 &class_device_attr_broadcast, 216 &class_device_attr_broadcast,
diff --git a/net/core/netfilter.c b/net/core/netfilter.c
index 22a8f127c4..076c156d5e 100644
--- a/net/core/netfilter.c
+++ b/net/core/netfilter.c
@@ -141,136 +141,6 @@ void nf_unregister_sockopt(struct nf_sockopt_ops *reg)
141 up(&nf_sockopt_mutex); 141 up(&nf_sockopt_mutex);
142} 142}
143 143
144#ifdef CONFIG_NETFILTER_DEBUG
145#include <net/ip.h>
146#include <net/tcp.h>
147#include <linux/netfilter_ipv4.h>
148
149static void debug_print_hooks_ip(unsigned int nf_debug)
150{
151 if (nf_debug & (1 << NF_IP_PRE_ROUTING)) {
152 printk("PRE_ROUTING ");
153 nf_debug ^= (1 << NF_IP_PRE_ROUTING);
154 }
155 if (nf_debug & (1 << NF_IP_LOCAL_IN)) {
156 printk("LOCAL_IN ");
157 nf_debug ^= (1 << NF_IP_LOCAL_IN);
158 }
159 if (nf_debug & (1 << NF_IP_FORWARD)) {
160 printk("FORWARD ");
161 nf_debug ^= (1 << NF_IP_FORWARD);
162 }
163 if (nf_debug & (1 << NF_IP_LOCAL_OUT)) {
164 printk("LOCAL_OUT ");
165 nf_debug ^= (1 << NF_IP_LOCAL_OUT);
166 }
167 if (nf_debug & (1 << NF_IP_POST_ROUTING)) {
168 printk("POST_ROUTING ");
169 nf_debug ^= (1 << NF_IP_POST_ROUTING);
170 }
171 if (nf_debug)
172 printk("Crap bits: 0x%04X", nf_debug);
173 printk("\n");
174}
175
176static void nf_dump_skb(int pf, struct sk_buff *skb)
177{
178 printk("skb: pf=%i %s dev=%s len=%u\n",
179 pf,
180 skb->sk ? "(owned)" : "(unowned)",
181 skb->dev ? skb->dev->name : "(no dev)",
182 skb->len);
183 switch (pf) {
184 case PF_INET: {
185 const struct iphdr *ip = skb->nh.iph;
186 __u32 *opt = (__u32 *) (ip + 1);
187 int opti;
188 __u16 src_port = 0, dst_port = 0;
189
190 if (ip->protocol == IPPROTO_TCP
191 || ip->protocol == IPPROTO_UDP) {
192 struct tcphdr *tcp=(struct tcphdr *)((__u32 *)ip+ip->ihl);
193 src_port = ntohs(tcp->source);
194 dst_port = ntohs(tcp->dest);
195 }
196
197 printk("PROTO=%d %u.%u.%u.%u:%hu %u.%u.%u.%u:%hu"
198 " L=%hu S=0x%2.2hX I=%hu F=0x%4.4hX T=%hu",
199 ip->protocol, NIPQUAD(ip->saddr),
200 src_port, NIPQUAD(ip->daddr),
201 dst_port,
202 ntohs(ip->tot_len), ip->tos, ntohs(ip->id),
203 ntohs(ip->frag_off), ip->ttl);
204
205 for (opti = 0; opti < (ip->ihl - sizeof(struct iphdr) / 4); opti++)
206 printk(" O=0x%8.8X", *opt++);
207 printk("\n");
208 }
209 }
210}
211
212void nf_debug_ip_local_deliver(struct sk_buff *skb)
213{
214 /* If it's a loopback packet, it must have come through
215 * NF_IP_LOCAL_OUT, NF_IP_RAW_INPUT, NF_IP_PRE_ROUTING and
216 * NF_IP_LOCAL_IN. Otherwise, must have gone through
217 * NF_IP_RAW_INPUT and NF_IP_PRE_ROUTING. */
218 if (!skb->dev) {
219 printk("ip_local_deliver: skb->dev is NULL.\n");
220 } else {
221 if (skb->nf_debug != ((1<<NF_IP_PRE_ROUTING)
222 | (1<<NF_IP_LOCAL_IN))) {
223 printk("ip_local_deliver: bad skb: ");
224 debug_print_hooks_ip(skb->nf_debug);
225 nf_dump_skb(PF_INET, skb);
226 }
227 }
228}
229
230void nf_debug_ip_loopback_xmit(struct sk_buff *newskb)
231{
232 if (newskb->nf_debug != ((1 << NF_IP_LOCAL_OUT)
233 | (1 << NF_IP_POST_ROUTING))) {
234 printk("ip_dev_loopback_xmit: bad owned skb = %p: ",
235 newskb);
236 debug_print_hooks_ip(newskb->nf_debug);
237 nf_dump_skb(PF_INET, newskb);
238 }
239}
240
241void nf_debug_ip_finish_output2(struct sk_buff *skb)
242{
243 /* If it's owned, it must have gone through the
244 * NF_IP_LOCAL_OUT and NF_IP_POST_ROUTING.
245 * Otherwise, must have gone through
246 * NF_IP_PRE_ROUTING, NF_IP_FORWARD and NF_IP_POST_ROUTING.
247 */
248 if (skb->sk) {
249 if (skb->nf_debug != ((1 << NF_IP_LOCAL_OUT)
250 | (1 << NF_IP_POST_ROUTING))) {
251 printk("ip_finish_output: bad owned skb = %p: ", skb);
252 debug_print_hooks_ip(skb->nf_debug);
253 nf_dump_skb(PF_INET, skb);
254 }
255 } else {
256 if (skb->nf_debug != ((1 << NF_IP_PRE_ROUTING)
257 | (1 << NF_IP_FORWARD)
258 | (1 << NF_IP_POST_ROUTING))) {
259 /* Fragments, entunnelled packets, TCP RSTs
260 generated by ipt_REJECT will have no
261 owners, but still may be local */
262 if (skb->nf_debug != ((1 << NF_IP_LOCAL_OUT)
263 | (1 << NF_IP_POST_ROUTING))){
264 printk("ip_finish_output:"
265 " bad unowned skb = %p: ",skb);
266 debug_print_hooks_ip(skb->nf_debug);
267 nf_dump_skb(PF_INET, skb);
268 }
269 }
270 }
271}
272#endif /*CONFIG_NETFILTER_DEBUG*/
273
274/* Call get/setsockopt() */ 144/* Call get/setsockopt() */
275static int nf_sockopt(struct sock *sk, int pf, int val, 145static int nf_sockopt(struct sock *sk, int pf, int val,
276 char __user *opt, int *len, int get) 146 char __user *opt, int *len, int get)
@@ -488,14 +358,6 @@ int nf_hook_slow(int pf, unsigned int hook, struct sk_buff **pskb,
488 /* We may already have this, but read-locks nest anyway */ 358 /* We may already have this, but read-locks nest anyway */
489 rcu_read_lock(); 359 rcu_read_lock();
490 360
491#ifdef CONFIG_NETFILTER_DEBUG
492 if (unlikely((*pskb)->nf_debug & (1 << hook))) {
493 printk("nf_hook: hook %i already set.\n", hook);
494 nf_dump_skb(pf, *pskb);
495 }
496 (*pskb)->nf_debug |= (1 << hook);
497#endif
498
499 elem = &nf_hooks[pf][hook]; 361 elem = &nf_hooks[pf][hook];
500next_hook: 362next_hook:
501 verdict = nf_iterate(&nf_hooks[pf][hook], pskb, hook, indev, 363 verdict = nf_iterate(&nf_hooks[pf][hook], pskb, hook, indev,
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index a119696d55..c327c9edad 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -130,19 +130,20 @@ static int checksum_udp(struct sk_buff *skb, struct udphdr *uh,
130 */ 130 */
131static void poll_napi(struct netpoll *np) 131static void poll_napi(struct netpoll *np)
132{ 132{
133 struct netpoll_info *npinfo = np->dev->npinfo;
133 int budget = 16; 134 int budget = 16;
134 135
135 if (test_bit(__LINK_STATE_RX_SCHED, &np->dev->state) && 136 if (test_bit(__LINK_STATE_RX_SCHED, &np->dev->state) &&
136 np->poll_owner != smp_processor_id() && 137 npinfo->poll_owner != smp_processor_id() &&
137 spin_trylock(&np->poll_lock)) { 138 spin_trylock(&npinfo->poll_lock)) {
138 np->rx_flags |= NETPOLL_RX_DROP; 139 npinfo->rx_flags |= NETPOLL_RX_DROP;
139 atomic_inc(&trapped); 140 atomic_inc(&trapped);
140 141
141 np->dev->poll(np->dev, &budget); 142 np->dev->poll(np->dev, &budget);
142 143
143 atomic_dec(&trapped); 144 atomic_dec(&trapped);
144 np->rx_flags &= ~NETPOLL_RX_DROP; 145 npinfo->rx_flags &= ~NETPOLL_RX_DROP;
145 spin_unlock(&np->poll_lock); 146 spin_unlock(&npinfo->poll_lock);
146 } 147 }
147} 148}
148 149
@@ -245,6 +246,7 @@ repeat:
245static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) 246static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
246{ 247{
247 int status; 248 int status;
249 struct netpoll_info *npinfo;
248 250
249repeat: 251repeat:
250 if(!np || !np->dev || !netif_running(np->dev)) { 252 if(!np || !np->dev || !netif_running(np->dev)) {
@@ -253,8 +255,9 @@ repeat:
253 } 255 }
254 256
255 /* avoid recursion */ 257 /* avoid recursion */
256 if(np->poll_owner == smp_processor_id() || 258 npinfo = np->dev->npinfo;
257 np->dev->xmit_lock_owner == smp_processor_id()) { 259 if (npinfo->poll_owner == smp_processor_id() ||
260 np->dev->xmit_lock_owner == smp_processor_id()) {
258 if (np->drop) 261 if (np->drop)
259 np->drop(skb); 262 np->drop(skb);
260 else 263 else
@@ -341,14 +344,22 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
341 344
342static void arp_reply(struct sk_buff *skb) 345static void arp_reply(struct sk_buff *skb)
343{ 346{
347 struct netpoll_info *npinfo = skb->dev->npinfo;
344 struct arphdr *arp; 348 struct arphdr *arp;
345 unsigned char *arp_ptr; 349 unsigned char *arp_ptr;
346 int size, type = ARPOP_REPLY, ptype = ETH_P_ARP; 350 int size, type = ARPOP_REPLY, ptype = ETH_P_ARP;
347 u32 sip, tip; 351 u32 sip, tip;
352 unsigned long flags;
348 struct sk_buff *send_skb; 353 struct sk_buff *send_skb;
349 struct netpoll *np = skb->dev->np; 354 struct netpoll *np = NULL;
355
356 spin_lock_irqsave(&npinfo->rx_lock, flags);
357 if (npinfo->rx_np && npinfo->rx_np->dev == skb->dev)
358 np = npinfo->rx_np;
359 spin_unlock_irqrestore(&npinfo->rx_lock, flags);
350 360
351 if (!np) return; 361 if (!np)
362 return;
352 363
353 /* No arp on this interface */ 364 /* No arp on this interface */
354 if (skb->dev->flags & IFF_NOARP) 365 if (skb->dev->flags & IFF_NOARP)
@@ -429,9 +440,9 @@ int __netpoll_rx(struct sk_buff *skb)
429 int proto, len, ulen; 440 int proto, len, ulen;
430 struct iphdr *iph; 441 struct iphdr *iph;
431 struct udphdr *uh; 442 struct udphdr *uh;
432 struct netpoll *np = skb->dev->np; 443 struct netpoll *np = skb->dev->npinfo->rx_np;
433 444
434 if (!np->rx_hook) 445 if (!np)
435 goto out; 446 goto out;
436 if (skb->dev->type != ARPHRD_ETHER) 447 if (skb->dev->type != ARPHRD_ETHER)
437 goto out; 448 goto out;
@@ -611,9 +622,8 @@ int netpoll_setup(struct netpoll *np)
611{ 622{
612 struct net_device *ndev = NULL; 623 struct net_device *ndev = NULL;
613 struct in_device *in_dev; 624 struct in_device *in_dev;
614 625 struct netpoll_info *npinfo;
615 np->poll_lock = SPIN_LOCK_UNLOCKED; 626 unsigned long flags;
616 np->poll_owner = -1;
617 627
618 if (np->dev_name) 628 if (np->dev_name)
619 ndev = dev_get_by_name(np->dev_name); 629 ndev = dev_get_by_name(np->dev_name);
@@ -624,7 +634,17 @@ int netpoll_setup(struct netpoll *np)
624 } 634 }
625 635
626 np->dev = ndev; 636 np->dev = ndev;
627 ndev->np = np; 637 if (!ndev->npinfo) {
638 npinfo = kmalloc(sizeof(*npinfo), GFP_KERNEL);
639 if (!npinfo)
640 goto release;
641
642 npinfo->rx_np = NULL;
643 npinfo->poll_lock = SPIN_LOCK_UNLOCKED;
644 npinfo->poll_owner = -1;
645 npinfo->rx_lock = SPIN_LOCK_UNLOCKED;
646 } else
647 npinfo = ndev->npinfo;
628 648
629 if (!ndev->poll_controller) { 649 if (!ndev->poll_controller) {
630 printk(KERN_ERR "%s: %s doesn't support polling, aborting.\n", 650 printk(KERN_ERR "%s: %s doesn't support polling, aborting.\n",
@@ -692,13 +712,20 @@ int netpoll_setup(struct netpoll *np)
692 np->name, HIPQUAD(np->local_ip)); 712 np->name, HIPQUAD(np->local_ip));
693 } 713 }
694 714
695 if(np->rx_hook) 715 if (np->rx_hook) {
696 np->rx_flags = NETPOLL_RX_ENABLED; 716 spin_lock_irqsave(&npinfo->rx_lock, flags);
717 npinfo->rx_flags |= NETPOLL_RX_ENABLED;
718 npinfo->rx_np = np;
719 spin_unlock_irqrestore(&npinfo->rx_lock, flags);
720 }
721 /* last thing to do is link it to the net device structure */
722 ndev->npinfo = npinfo;
697 723
698 return 0; 724 return 0;
699 725
700 release: 726 release:
701 ndev->np = NULL; 727 if (!ndev->npinfo)
728 kfree(npinfo);
702 np->dev = NULL; 729 np->dev = NULL;
703 dev_put(ndev); 730 dev_put(ndev);
704 return -1; 731 return -1;
@@ -706,9 +733,20 @@ int netpoll_setup(struct netpoll *np)
706 733
707void netpoll_cleanup(struct netpoll *np) 734void netpoll_cleanup(struct netpoll *np)
708{ 735{
709 if (np->dev) 736 struct netpoll_info *npinfo;
710 np->dev->np = NULL; 737 unsigned long flags;
711 dev_put(np->dev); 738
739 if (np->dev) {
740 npinfo = np->dev->npinfo;
741 if (npinfo && npinfo->rx_np == np) {
742 spin_lock_irqsave(&npinfo->rx_lock, flags);
743 npinfo->rx_np = NULL;
744 npinfo->rx_flags &= ~NETPOLL_RX_ENABLED;
745 spin_unlock_irqrestore(&npinfo->rx_lock, flags);
746 }
747 dev_put(np->dev);
748 }
749
712 np->dev = NULL; 750 np->dev = NULL;
713} 751}
714 752
diff --git a/net/core/request_sock.c b/net/core/request_sock.c
new file mode 100644
index 0000000000..bb55675f06
--- /dev/null
+++ b/net/core/request_sock.c
@@ -0,0 +1,64 @@
1/*
2 * NET Generic infrastructure for Network protocols.
3 *
4 * Authors: Arnaldo Carvalho de Melo <acme@conectiva.com.br>
5 *
6 * From code originally in include/net/tcp.h
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14#include <linux/module.h>
15#include <linux/random.h>
16#include <linux/slab.h>
17#include <linux/string.h>
18
19#include <net/request_sock.h>
20
21/*
22 * Maximum number of SYN_RECV sockets in queue per LISTEN socket.
23 * One SYN_RECV socket costs about 80bytes on a 32bit machine.
24 * It would be better to replace it with a global counter for all sockets
25 * but then some measure against one socket starving all other sockets
26 * would be needed.
27 *
28 * It was 128 by default. Experiments with real servers show, that
29 * it is absolutely not enough even at 100conn/sec. 256 cures most
30 * of problems. This value is adjusted to 128 for very small machines
31 * (<=32Mb of memory) and to 1024 on normal or better ones (>=256Mb).
32 * Further increasing requires to change hash table size.
33 */
34int sysctl_max_syn_backlog = 256;
35EXPORT_SYMBOL(sysctl_max_syn_backlog);
36
37int reqsk_queue_alloc(struct request_sock_queue *queue,
38 const int nr_table_entries)
39{
40 const int lopt_size = sizeof(struct listen_sock) +
41 nr_table_entries * sizeof(struct request_sock *);
42 struct listen_sock *lopt = kmalloc(lopt_size, GFP_KERNEL);
43
44 if (lopt == NULL)
45 return -ENOMEM;
46
47 memset(lopt, 0, lopt_size);
48
49 for (lopt->max_qlen_log = 6;
50 (1 << lopt->max_qlen_log) < sysctl_max_syn_backlog;
51 lopt->max_qlen_log++);
52
53 get_random_bytes(&lopt->hash_rnd, sizeof(lopt->hash_rnd));
54 rwlock_init(&queue->syn_wait_lock);
55 queue->rskq_accept_head = queue->rskq_accept_head = NULL;
56
57 write_lock_bh(&queue->syn_wait_lock);
58 queue->listen_opt = lopt;
59 write_unlock_bh(&queue->syn_wait_lock);
60
61 return 0;
62}
63
64EXPORT_SYMBOL(reqsk_queue_alloc);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 00caf4b318..e013d836a7 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -100,6 +100,7 @@ static const int rtm_min[RTM_NR_FAMILIES] =
100 [RTM_FAM(RTM_NEWPREFIX)] = NLMSG_LENGTH(sizeof(struct rtgenmsg)), 100 [RTM_FAM(RTM_NEWPREFIX)] = NLMSG_LENGTH(sizeof(struct rtgenmsg)),
101 [RTM_FAM(RTM_GETMULTICAST)] = NLMSG_LENGTH(sizeof(struct rtgenmsg)), 101 [RTM_FAM(RTM_GETMULTICAST)] = NLMSG_LENGTH(sizeof(struct rtgenmsg)),
102 [RTM_FAM(RTM_GETANYCAST)] = NLMSG_LENGTH(sizeof(struct rtgenmsg)), 102 [RTM_FAM(RTM_GETANYCAST)] = NLMSG_LENGTH(sizeof(struct rtgenmsg)),
103 [RTM_FAM(RTM_NEWNEIGHTBL)] = NLMSG_LENGTH(sizeof(struct ndtmsg)),
103}; 104};
104 105
105static const int rta_max[RTM_NR_FAMILIES] = 106static const int rta_max[RTM_NR_FAMILIES] =
@@ -113,6 +114,7 @@ static const int rta_max[RTM_NR_FAMILIES] =
113 [RTM_FAM(RTM_NEWTCLASS)] = TCA_MAX, 114 [RTM_FAM(RTM_NEWTCLASS)] = TCA_MAX,
114 [RTM_FAM(RTM_NEWTFILTER)] = TCA_MAX, 115 [RTM_FAM(RTM_NEWTFILTER)] = TCA_MAX,
115 [RTM_FAM(RTM_NEWACTION)] = TCAA_MAX, 116 [RTM_FAM(RTM_NEWACTION)] = TCAA_MAX,
117 [RTM_FAM(RTM_NEWNEIGHTBL)] = NDTA_MAX,
116}; 118};
117 119
118void __rta_fill(struct sk_buff *skb, int attrtype, int attrlen, const void *data) 120void __rta_fill(struct sk_buff *skb, int attrtype, int attrlen, const void *data)
@@ -176,14 +178,14 @@ rtattr_failure:
176 178
177 179
178static int rtnetlink_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, 180static int rtnetlink_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
179 int type, u32 pid, u32 seq, u32 change) 181 int type, u32 pid, u32 seq, u32 change,
182 unsigned int flags)
180{ 183{
181 struct ifinfomsg *r; 184 struct ifinfomsg *r;
182 struct nlmsghdr *nlh; 185 struct nlmsghdr *nlh;
183 unsigned char *b = skb->tail; 186 unsigned char *b = skb->tail;
184 187
185 nlh = NLMSG_PUT(skb, pid, seq, type, sizeof(*r)); 188 nlh = NLMSG_NEW(skb, pid, seq, type, sizeof(*r), flags);
186 if (pid) nlh->nlmsg_flags |= NLM_F_MULTI;
187 r = NLMSG_DATA(nlh); 189 r = NLMSG_DATA(nlh);
188 r->ifi_family = AF_UNSPEC; 190 r->ifi_family = AF_UNSPEC;
189 r->ifi_type = dev->type; 191 r->ifi_type = dev->type;
@@ -273,7 +275,10 @@ static int rtnetlink_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *c
273 for (dev=dev_base, idx=0; dev; dev = dev->next, idx++) { 275 for (dev=dev_base, idx=0; dev; dev = dev->next, idx++) {
274 if (idx < s_idx) 276 if (idx < s_idx)
275 continue; 277 continue;
276 if (rtnetlink_fill_ifinfo(skb, dev, RTM_NEWLINK, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, 0) <= 0) 278 if (rtnetlink_fill_ifinfo(skb, dev, RTM_NEWLINK,
279 NETLINK_CB(cb->skb).pid,
280 cb->nlh->nlmsg_seq, 0,
281 NLM_F_MULTI) <= 0)
277 break; 282 break;
278 } 283 }
279 read_unlock(&dev_base_lock); 284 read_unlock(&dev_base_lock);
@@ -447,7 +452,7 @@ void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change)
447 if (!skb) 452 if (!skb)
448 return; 453 return;
449 454
450 if (rtnetlink_fill_ifinfo(skb, dev, type, 0, 0, change) < 0) { 455 if (rtnetlink_fill_ifinfo(skb, dev, type, current->pid, 0, change, 0) < 0) {
451 kfree_skb(skb); 456 kfree_skb(skb);
452 return; 457 return;
453 } 458 }
@@ -649,14 +654,16 @@ static void rtnetlink_rcv(struct sock *sk, int len)
649 654
650static struct rtnetlink_link link_rtnetlink_table[RTM_NR_MSGTYPES] = 655static struct rtnetlink_link link_rtnetlink_table[RTM_NR_MSGTYPES] =
651{ 656{
652 [RTM_GETLINK - RTM_BASE] = { .dumpit = rtnetlink_dump_ifinfo }, 657 [RTM_GETLINK - RTM_BASE] = { .dumpit = rtnetlink_dump_ifinfo },
653 [RTM_SETLINK - RTM_BASE] = { .doit = do_setlink }, 658 [RTM_SETLINK - RTM_BASE] = { .doit = do_setlink },
654 [RTM_GETADDR - RTM_BASE] = { .dumpit = rtnetlink_dump_all }, 659 [RTM_GETADDR - RTM_BASE] = { .dumpit = rtnetlink_dump_all },
655 [RTM_GETROUTE - RTM_BASE] = { .dumpit = rtnetlink_dump_all }, 660 [RTM_GETROUTE - RTM_BASE] = { .dumpit = rtnetlink_dump_all },
656 [RTM_NEWNEIGH - RTM_BASE] = { .doit = neigh_add }, 661 [RTM_NEWNEIGH - RTM_BASE] = { .doit = neigh_add },
657 [RTM_DELNEIGH - RTM_BASE] = { .doit = neigh_delete }, 662 [RTM_DELNEIGH - RTM_BASE] = { .doit = neigh_delete },
658 [RTM_GETNEIGH - RTM_BASE] = { .dumpit = neigh_dump_info }, 663 [RTM_GETNEIGH - RTM_BASE] = { .dumpit = neigh_dump_info },
659 [RTM_GETRULE - RTM_BASE] = { .dumpit = rtnetlink_dump_all }, 664 [RTM_GETRULE - RTM_BASE] = { .dumpit = rtnetlink_dump_all },
665 [RTM_GETNEIGHTBL - RTM_BASE] = { .dumpit = neightbl_dump_info },
666 [RTM_SETNEIGHTBL - RTM_BASE] = { .doit = neightbl_set },
660}; 667};
661 668
662static int rtnetlink_event(struct notifier_block *this, unsigned long event, void *ptr) 669static int rtnetlink_event(struct notifier_block *this, unsigned long event, void *ptr)
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index f65b3de590..6d68c03bc0 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -365,9 +365,6 @@ struct sk_buff *skb_clone(struct sk_buff *skb, int gfp_mask)
365 C(nfct); 365 C(nfct);
366 nf_conntrack_get(skb->nfct); 366 nf_conntrack_get(skb->nfct);
367 C(nfctinfo); 367 C(nfctinfo);
368#ifdef CONFIG_NETFILTER_DEBUG
369 C(nf_debug);
370#endif
371#ifdef CONFIG_BRIDGE_NETFILTER 368#ifdef CONFIG_BRIDGE_NETFILTER
372 C(nf_bridge); 369 C(nf_bridge);
373 nf_bridge_get(skb->nf_bridge); 370 nf_bridge_get(skb->nf_bridge);
@@ -432,9 +429,6 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
432 new->nfct = old->nfct; 429 new->nfct = old->nfct;
433 nf_conntrack_get(old->nfct); 430 nf_conntrack_get(old->nfct);
434 new->nfctinfo = old->nfctinfo; 431 new->nfctinfo = old->nfctinfo;
435#ifdef CONFIG_NETFILTER_DEBUG
436 new->nf_debug = old->nf_debug;
437#endif
438#ifdef CONFIG_BRIDGE_NETFILTER 432#ifdef CONFIG_BRIDGE_NETFILTER
439 new->nf_bridge = old->nf_bridge; 433 new->nf_bridge = old->nf_bridge;
440 nf_bridge_get(old->nf_bridge); 434 nf_bridge_get(old->nf_bridge);
diff --git a/net/core/sock.c b/net/core/sock.c
index 96e00b0869..a6ec3ada7f 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -118,6 +118,7 @@
118#include <linux/netdevice.h> 118#include <linux/netdevice.h>
119#include <net/protocol.h> 119#include <net/protocol.h>
120#include <linux/skbuff.h> 120#include <linux/skbuff.h>
121#include <net/request_sock.h>
121#include <net/sock.h> 122#include <net/sock.h>
122#include <net/xfrm.h> 123#include <net/xfrm.h>
123#include <linux/ipsec.h> 124#include <linux/ipsec.h>
@@ -1363,6 +1364,7 @@ static LIST_HEAD(proto_list);
1363 1364
1364int proto_register(struct proto *prot, int alloc_slab) 1365int proto_register(struct proto *prot, int alloc_slab)
1365{ 1366{
1367 char *request_sock_slab_name;
1366 int rc = -ENOBUFS; 1368 int rc = -ENOBUFS;
1367 1369
1368 if (alloc_slab) { 1370 if (alloc_slab) {
@@ -1374,6 +1376,25 @@ int proto_register(struct proto *prot, int alloc_slab)
1374 prot->name); 1376 prot->name);
1375 goto out; 1377 goto out;
1376 } 1378 }
1379
1380 if (prot->rsk_prot != NULL) {
1381 static const char mask[] = "request_sock_%s";
1382
1383 request_sock_slab_name = kmalloc(strlen(prot->name) + sizeof(mask) - 1, GFP_KERNEL);
1384 if (request_sock_slab_name == NULL)
1385 goto out_free_sock_slab;
1386
1387 sprintf(request_sock_slab_name, mask, prot->name);
1388 prot->rsk_prot->slab = kmem_cache_create(request_sock_slab_name,
1389 prot->rsk_prot->obj_size, 0,
1390 SLAB_HWCACHE_ALIGN, NULL, NULL);
1391
1392 if (prot->rsk_prot->slab == NULL) {
1393 printk(KERN_CRIT "%s: Can't create request sock SLAB cache!\n",
1394 prot->name);
1395 goto out_free_request_sock_slab_name;
1396 }
1397 }
1377 } 1398 }
1378 1399
1379 write_lock(&proto_list_lock); 1400 write_lock(&proto_list_lock);
@@ -1382,6 +1403,12 @@ int proto_register(struct proto *prot, int alloc_slab)
1382 rc = 0; 1403 rc = 0;
1383out: 1404out:
1384 return rc; 1405 return rc;
1406out_free_request_sock_slab_name:
1407 kfree(request_sock_slab_name);
1408out_free_sock_slab:
1409 kmem_cache_destroy(prot->slab);
1410 prot->slab = NULL;
1411 goto out;
1385} 1412}
1386 1413
1387EXPORT_SYMBOL(proto_register); 1414EXPORT_SYMBOL(proto_register);
@@ -1395,6 +1422,14 @@ void proto_unregister(struct proto *prot)
1395 prot->slab = NULL; 1422 prot->slab = NULL;
1396 } 1423 }
1397 1424
1425 if (prot->rsk_prot != NULL && prot->rsk_prot->slab != NULL) {
1426 const char *name = kmem_cache_name(prot->rsk_prot->slab);
1427
1428 kmem_cache_destroy(prot->rsk_prot->slab);
1429 kfree(name);
1430 prot->rsk_prot->slab = NULL;
1431 }
1432
1398 list_del(&prot->node); 1433 list_del(&prot->node);
1399 write_unlock(&proto_list_lock); 1434 write_unlock(&proto_list_lock);
1400} 1435}
diff --git a/net/core/wireless.c b/net/core/wireless.c
index 750cc5daeb..b2fe378dfb 100644
--- a/net/core/wireless.c
+++ b/net/core/wireless.c
@@ -2,7 +2,7 @@
2 * This file implement the Wireless Extensions APIs. 2 * This file implement the Wireless Extensions APIs.
3 * 3 *
4 * Authors : Jean Tourrilhes - HPL - <jt@hpl.hp.com> 4 * Authors : Jean Tourrilhes - HPL - <jt@hpl.hp.com>
5 * Copyright (c) 1997-2004 Jean Tourrilhes, All Rights Reserved. 5 * Copyright (c) 1997-2005 Jean Tourrilhes, All Rights Reserved.
6 * 6 *
7 * (As all part of the Linux kernel, this file is GPL) 7 * (As all part of the Linux kernel, this file is GPL)
8 */ 8 */
@@ -187,6 +187,12 @@ static const struct iw_ioctl_description standard_ioctl[] = {
187 .header_type = IW_HEADER_TYPE_ADDR, 187 .header_type = IW_HEADER_TYPE_ADDR,
188 .flags = IW_DESCR_FLAG_DUMP, 188 .flags = IW_DESCR_FLAG_DUMP,
189 }, 189 },
190 [SIOCSIWMLME - SIOCIWFIRST] = {
191 .header_type = IW_HEADER_TYPE_POINT,
192 .token_size = 1,
193 .min_tokens = sizeof(struct iw_mlme),
194 .max_tokens = sizeof(struct iw_mlme),
195 },
190 [SIOCGIWAPLIST - SIOCIWFIRST] = { 196 [SIOCGIWAPLIST - SIOCIWFIRST] = {
191 .header_type = IW_HEADER_TYPE_POINT, 197 .header_type = IW_HEADER_TYPE_POINT,
192 .token_size = sizeof(struct sockaddr) + 198 .token_size = sizeof(struct sockaddr) +
@@ -195,7 +201,10 @@ static const struct iw_ioctl_description standard_ioctl[] = {
195 .flags = IW_DESCR_FLAG_NOMAX, 201 .flags = IW_DESCR_FLAG_NOMAX,
196 }, 202 },
197 [SIOCSIWSCAN - SIOCIWFIRST] = { 203 [SIOCSIWSCAN - SIOCIWFIRST] = {
198 .header_type = IW_HEADER_TYPE_PARAM, 204 .header_type = IW_HEADER_TYPE_POINT,
205 .token_size = 1,
206 .min_tokens = 0,
207 .max_tokens = sizeof(struct iw_scan_req),
199 }, 208 },
200 [SIOCGIWSCAN - SIOCIWFIRST] = { 209 [SIOCGIWSCAN - SIOCIWFIRST] = {
201 .header_type = IW_HEADER_TYPE_POINT, 210 .header_type = IW_HEADER_TYPE_POINT,
@@ -273,6 +282,42 @@ static const struct iw_ioctl_description standard_ioctl[] = {
273 [SIOCGIWPOWER - SIOCIWFIRST] = { 282 [SIOCGIWPOWER - SIOCIWFIRST] = {
274 .header_type = IW_HEADER_TYPE_PARAM, 283 .header_type = IW_HEADER_TYPE_PARAM,
275 }, 284 },
285 [SIOCSIWGENIE - SIOCIWFIRST] = {
286 .header_type = IW_HEADER_TYPE_POINT,
287 .token_size = 1,
288 .max_tokens = IW_GENERIC_IE_MAX,
289 },
290 [SIOCGIWGENIE - SIOCIWFIRST] = {
291 .header_type = IW_HEADER_TYPE_POINT,
292 .token_size = 1,
293 .max_tokens = IW_GENERIC_IE_MAX,
294 },
295 [SIOCSIWAUTH - SIOCIWFIRST] = {
296 .header_type = IW_HEADER_TYPE_PARAM,
297 },
298 [SIOCGIWAUTH - SIOCIWFIRST] = {
299 .header_type = IW_HEADER_TYPE_PARAM,
300 },
301 [SIOCSIWENCODEEXT - SIOCIWFIRST] = {
302 .header_type = IW_HEADER_TYPE_POINT,
303 .token_size = 1,
304 .min_tokens = sizeof(struct iw_encode_ext),
305 .max_tokens = sizeof(struct iw_encode_ext) +
306 IW_ENCODING_TOKEN_MAX,
307 },
308 [SIOCGIWENCODEEXT - SIOCIWFIRST] = {
309 .header_type = IW_HEADER_TYPE_POINT,
310 .token_size = 1,
311 .min_tokens = sizeof(struct iw_encode_ext),
312 .max_tokens = sizeof(struct iw_encode_ext) +
313 IW_ENCODING_TOKEN_MAX,
314 },
315 [SIOCSIWPMKSA - SIOCIWFIRST] = {
316 .header_type = IW_HEADER_TYPE_POINT,
317 .token_size = 1,
318 .min_tokens = sizeof(struct iw_pmksa),
319 .max_tokens = sizeof(struct iw_pmksa),
320 },
276}; 321};
277static const int standard_ioctl_num = (sizeof(standard_ioctl) / 322static const int standard_ioctl_num = (sizeof(standard_ioctl) /
278 sizeof(struct iw_ioctl_description)); 323 sizeof(struct iw_ioctl_description));
@@ -299,6 +344,31 @@ static const struct iw_ioctl_description standard_event[] = {
299 [IWEVEXPIRED - IWEVFIRST] = { 344 [IWEVEXPIRED - IWEVFIRST] = {
300 .header_type = IW_HEADER_TYPE_ADDR, 345 .header_type = IW_HEADER_TYPE_ADDR,
301 }, 346 },
347 [IWEVGENIE - IWEVFIRST] = {
348 .header_type = IW_HEADER_TYPE_POINT,
349 .token_size = 1,
350 .max_tokens = IW_GENERIC_IE_MAX,
351 },
352 [IWEVMICHAELMICFAILURE - IWEVFIRST] = {
353 .header_type = IW_HEADER_TYPE_POINT,
354 .token_size = 1,
355 .max_tokens = sizeof(struct iw_michaelmicfailure),
356 },
357 [IWEVASSOCREQIE - IWEVFIRST] = {
358 .header_type = IW_HEADER_TYPE_POINT,
359 .token_size = 1,
360 .max_tokens = IW_GENERIC_IE_MAX,
361 },
362 [IWEVASSOCRESPIE - IWEVFIRST] = {
363 .header_type = IW_HEADER_TYPE_POINT,
364 .token_size = 1,
365 .max_tokens = IW_GENERIC_IE_MAX,
366 },
367 [IWEVPMKIDCAND - IWEVFIRST] = {
368 .header_type = IW_HEADER_TYPE_POINT,
369 .token_size = 1,
370 .max_tokens = sizeof(struct iw_pmkid_cand),
371 },
302}; 372};
303static const int standard_event_num = (sizeof(standard_event) / 373static const int standard_event_num = (sizeof(standard_event) /
304 sizeof(struct iw_ioctl_description)); 374 sizeof(struct iw_ioctl_description));
diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c
index ee7bf46eb7..00233ecbc9 100644
--- a/net/decnet/dn_dev.c
+++ b/net/decnet/dn_dev.c
@@ -716,13 +716,13 @@ static int dn_dev_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *a
716} 716}
717 717
718static int dn_dev_fill_ifaddr(struct sk_buff *skb, struct dn_ifaddr *ifa, 718static int dn_dev_fill_ifaddr(struct sk_buff *skb, struct dn_ifaddr *ifa,
719 u32 pid, u32 seq, int event) 719 u32 pid, u32 seq, int event, unsigned int flags)
720{ 720{
721 struct ifaddrmsg *ifm; 721 struct ifaddrmsg *ifm;
722 struct nlmsghdr *nlh; 722 struct nlmsghdr *nlh;
723 unsigned char *b = skb->tail; 723 unsigned char *b = skb->tail;
724 724
725 nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*ifm)); 725 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*ifm), flags);
726 ifm = NLMSG_DATA(nlh); 726 ifm = NLMSG_DATA(nlh);
727 727
728 ifm->ifa_family = AF_DECnet; 728 ifm->ifa_family = AF_DECnet;
@@ -755,7 +755,7 @@ static void rtmsg_ifa(int event, struct dn_ifaddr *ifa)
755 netlink_set_err(rtnl, 0, RTMGRP_DECnet_IFADDR, ENOBUFS); 755 netlink_set_err(rtnl, 0, RTMGRP_DECnet_IFADDR, ENOBUFS);
756 return; 756 return;
757 } 757 }
758 if (dn_dev_fill_ifaddr(skb, ifa, 0, 0, event) < 0) { 758 if (dn_dev_fill_ifaddr(skb, ifa, 0, 0, event, 0) < 0) {
759 kfree_skb(skb); 759 kfree_skb(skb);
760 netlink_set_err(rtnl, 0, RTMGRP_DECnet_IFADDR, EINVAL); 760 netlink_set_err(rtnl, 0, RTMGRP_DECnet_IFADDR, EINVAL);
761 return; 761 return;
@@ -790,7 +790,8 @@ static int dn_dev_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
790 if (dn_dev_fill_ifaddr(skb, ifa, 790 if (dn_dev_fill_ifaddr(skb, ifa,
791 NETLINK_CB(cb->skb).pid, 791 NETLINK_CB(cb->skb).pid,
792 cb->nlh->nlmsg_seq, 792 cb->nlh->nlmsg_seq,
793 RTM_NEWADDR) <= 0) 793 RTM_NEWADDR,
794 NLM_F_MULTI) <= 0)
794 goto done; 795 goto done;
795 } 796 }
796 } 797 }
diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c
index f6dfe96f45..f32dba9e26 100644
--- a/net/decnet/dn_neigh.c
+++ b/net/decnet/dn_neigh.c
@@ -101,7 +101,6 @@ struct neigh_table dn_neigh_table = {
101 .id = "dn_neigh_cache", 101 .id = "dn_neigh_cache",
102 .parms ={ 102 .parms ={
103 .tbl = &dn_neigh_table, 103 .tbl = &dn_neigh_table,
104 .entries = 0,
105 .base_reachable_time = 30 * HZ, 104 .base_reachable_time = 30 * HZ,
106 .retrans_time = 1 * HZ, 105 .retrans_time = 1 * HZ,
107 .gc_staletime = 60 * HZ, 106 .gc_staletime = 60 * HZ,
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index 1e7b5c3ea2..2399fa8a3f 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -1465,7 +1465,8 @@ int dn_route_input(struct sk_buff *skb)
1465 return dn_route_input_slow(skb); 1465 return dn_route_input_slow(skb);
1466} 1466}
1467 1467
1468static int dn_rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event, int nowait) 1468static int dn_rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
1469 int event, int nowait, unsigned int flags)
1469{ 1470{
1470 struct dn_route *rt = (struct dn_route *)skb->dst; 1471 struct dn_route *rt = (struct dn_route *)skb->dst;
1471 struct rtmsg *r; 1472 struct rtmsg *r;
@@ -1473,9 +1474,8 @@ static int dn_rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event, int
1473 unsigned char *b = skb->tail; 1474 unsigned char *b = skb->tail;
1474 struct rta_cacheinfo ci; 1475 struct rta_cacheinfo ci;
1475 1476
1476 nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*r)); 1477 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*r), flags);
1477 r = NLMSG_DATA(nlh); 1478 r = NLMSG_DATA(nlh);
1478 nlh->nlmsg_flags = (nowait && pid) ? NLM_F_MULTI : 0;
1479 r->rtm_family = AF_DECnet; 1479 r->rtm_family = AF_DECnet;
1480 r->rtm_dst_len = 16; 1480 r->rtm_dst_len = 16;
1481 r->rtm_src_len = 0; 1481 r->rtm_src_len = 0;
@@ -1596,7 +1596,7 @@ int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void *arg)
1596 1596
1597 NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid; 1597 NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid;
1598 1598
1599 err = dn_rt_fill_info(skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq, RTM_NEWROUTE, 0); 1599 err = dn_rt_fill_info(skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq, RTM_NEWROUTE, 0, 0);
1600 1600
1601 if (err == 0) 1601 if (err == 0)
1602 goto out_free; 1602 goto out_free;
@@ -1644,7 +1644,8 @@ int dn_cache_dump(struct sk_buff *skb, struct netlink_callback *cb)
1644 continue; 1644 continue;
1645 skb->dst = dst_clone(&rt->u.dst); 1645 skb->dst = dst_clone(&rt->u.dst);
1646 if (dn_rt_fill_info(skb, NETLINK_CB(cb->skb).pid, 1646 if (dn_rt_fill_info(skb, NETLINK_CB(cb->skb).pid,
1647 cb->nlh->nlmsg_seq, RTM_NEWROUTE, 1) <= 0) { 1647 cb->nlh->nlmsg_seq, RTM_NEWROUTE,
1648 1, NLM_F_MULTI) <= 0) {
1648 dst_release(xchg(&skb->dst, NULL)); 1649 dst_release(xchg(&skb->dst, NULL));
1649 rcu_read_unlock_bh(); 1650 rcu_read_unlock_bh();
1650 goto done; 1651 goto done;
diff --git a/net/decnet/dn_rules.c b/net/decnet/dn_rules.c
index 597587d170..1060de70bc 100644
--- a/net/decnet/dn_rules.c
+++ b/net/decnet/dn_rules.c
@@ -342,14 +342,15 @@ static struct notifier_block dn_fib_rules_notifier = {
342 .notifier_call = dn_fib_rules_event, 342 .notifier_call = dn_fib_rules_event,
343}; 343};
344 344
345static int dn_fib_fill_rule(struct sk_buff *skb, struct dn_fib_rule *r, struct netlink_callback *cb) 345static int dn_fib_fill_rule(struct sk_buff *skb, struct dn_fib_rule *r,
346 struct netlink_callback *cb, unsigned int flags)
346{ 347{
347 struct rtmsg *rtm; 348 struct rtmsg *rtm;
348 struct nlmsghdr *nlh; 349 struct nlmsghdr *nlh;
349 unsigned char *b = skb->tail; 350 unsigned char *b = skb->tail;
350 351
351 352
352 nlh = NLMSG_PUT(skb, NETLINK_CREDS(cb->skb)->pid, cb->nlh->nlmsg_seq, RTM_NEWRULE, sizeof(*rtm)); 353 nlh = NLMSG_NEW_ANSWER(skb, cb, RTM_NEWRULE, sizeof(*rtm), flags);
353 rtm = NLMSG_DATA(nlh); 354 rtm = NLMSG_DATA(nlh);
354 rtm->rtm_family = AF_DECnet; 355 rtm->rtm_family = AF_DECnet;
355 rtm->rtm_dst_len = r->r_dst_len; 356 rtm->rtm_dst_len = r->r_dst_len;
@@ -394,7 +395,7 @@ int dn_fib_dump_rules(struct sk_buff *skb, struct netlink_callback *cb)
394 for(r = dn_fib_rules, idx = 0; r; r = r->r_next, idx++) { 395 for(r = dn_fib_rules, idx = 0; r; r = r->r_next, idx++) {
395 if (idx < s_idx) 396 if (idx < s_idx)
396 continue; 397 continue;
397 if (dn_fib_fill_rule(skb, r, cb) < 0) 398 if (dn_fib_fill_rule(skb, r, cb, NLM_F_MULTI) < 0)
398 break; 399 break;
399 } 400 }
400 read_unlock(&dn_fib_rules_lock); 401 read_unlock(&dn_fib_rules_lock);
diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c
index dad5603912..28ba5777a2 100644
--- a/net/decnet/dn_table.c
+++ b/net/decnet/dn_table.c
@@ -270,13 +270,13 @@ static int dn_fib_nh_match(struct rtmsg *r, struct nlmsghdr *nlh, struct dn_kern
270 270
271static int dn_fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, 271static int dn_fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
272 u8 tb_id, u8 type, u8 scope, void *dst, int dst_len, 272 u8 tb_id, u8 type, u8 scope, void *dst, int dst_len,
273 struct dn_fib_info *fi) 273 struct dn_fib_info *fi, unsigned int flags)
274{ 274{
275 struct rtmsg *rtm; 275 struct rtmsg *rtm;
276 struct nlmsghdr *nlh; 276 struct nlmsghdr *nlh;
277 unsigned char *b = skb->tail; 277 unsigned char *b = skb->tail;
278 278
279 nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*rtm)); 279 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*rtm), flags);
280 rtm = NLMSG_DATA(nlh); 280 rtm = NLMSG_DATA(nlh);
281 rtm->rtm_family = AF_DECnet; 281 rtm->rtm_family = AF_DECnet;
282 rtm->rtm_dst_len = dst_len; 282 rtm->rtm_dst_len = dst_len;
@@ -345,7 +345,7 @@ static void dn_rtmsg_fib(int event, struct dn_fib_node *f, int z, int tb_id,
345 345
346 if (dn_fib_dump_info(skb, pid, nlh->nlmsg_seq, event, tb_id, 346 if (dn_fib_dump_info(skb, pid, nlh->nlmsg_seq, event, tb_id,
347 f->fn_type, f->fn_scope, &f->fn_key, z, 347 f->fn_type, f->fn_scope, &f->fn_key, z,
348 DN_FIB_INFO(f)) < 0) { 348 DN_FIB_INFO(f), 0) < 0) {
349 kfree_skb(skb); 349 kfree_skb(skb);
350 return; 350 return;
351 } 351 }
@@ -377,7 +377,7 @@ static __inline__ int dn_hash_dump_bucket(struct sk_buff *skb,
377 tb->n, 377 tb->n,
378 (f->fn_state & DN_S_ZOMBIE) ? 0 : f->fn_type, 378 (f->fn_state & DN_S_ZOMBIE) ? 0 : f->fn_type,
379 f->fn_scope, &f->fn_key, dz->dz_order, 379 f->fn_scope, &f->fn_key, dz->dz_order,
380 f->fn_info) < 0) { 380 f->fn_info, NLM_F_MULTI) < 0) {
381 cb->args[3] = i; 381 cb->args[3] = i;
382 return -1; 382 return -1;
383 } 383 }
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index 6d3e8b1bd1..567b03b1c3 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -1,6 +1,32 @@
1# 1#
2# IP configuration 2# IP configuration
3# 3#
4choice
5 prompt "Choose IP: FIB lookup"
6 depends on INET
7 default IP_FIB_HASH
8
9config IP_FIB_HASH
10 bool "FIB_HASH"
11 ---help---
12 Current FIB is very proven and good enough for most users.
13
14config IP_FIB_TRIE
15 bool "FIB_TRIE"
16 ---help---
17 Use new experimental LC-trie as FIB lookup algoritm.
18 This improves lookup performance
19
20 LC-trie is described in:
21
22 IP-address lookup using LC-tries. Stefan Nilsson and Gunnar Karlsson
23 IEEE Journal on Selected Areas in Communications, 17(6):1083-1092, June 1999
24 An experimental study of compression methods for dynamic tries
25 Stefan Nilsson and Matti Tikkanen. Algorithmica, 33(1):19-33, 2002.
26 http://www.nada.kth.se/~snilsson/public/papers/dyntrie2/
27
28endchoice
29
4config IP_MULTICAST 30config IP_MULTICAST
5 bool "IP: multicasting" 31 bool "IP: multicasting"
6 depends on INET 32 depends on INET
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index 8b379627eb..65d57d8e1a 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -7,8 +7,10 @@ obj-y := utils.o route.o inetpeer.o protocol.o \
7 ip_output.o ip_sockglue.o \ 7 ip_output.o ip_sockglue.o \
8 tcp.o tcp_input.o tcp_output.o tcp_timer.o tcp_ipv4.o tcp_minisocks.o \ 8 tcp.o tcp_input.o tcp_output.o tcp_timer.o tcp_ipv4.o tcp_minisocks.o \
9 datagram.o raw.o udp.o arp.o icmp.o devinet.o af_inet.o igmp.o \ 9 datagram.o raw.o udp.o arp.o icmp.o devinet.o af_inet.o igmp.o \
10 sysctl_net_ipv4.o fib_frontend.o fib_semantics.o fib_hash.o 10 sysctl_net_ipv4.o fib_frontend.o fib_semantics.o
11 11
12obj-$(CONFIG_IP_FIB_HASH) += fib_hash.o
13obj-$(CONFIG_IP_FIB_TRIE) += fib_trie.o
12obj-$(CONFIG_PROC_FS) += proc.o 14obj-$(CONFIG_PROC_FS) += proc.o
13obj-$(CONFIG_IP_MULTIPLE_TABLES) += fib_rules.o 15obj-$(CONFIG_IP_MULTIPLE_TABLES) += fib_rules.o
14obj-$(CONFIG_IP_MROUTE) += ipmr.o 16obj-$(CONFIG_IP_MROUTE) += ipmr.o
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index b3cb49ce5f..658e797792 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1119,6 +1119,10 @@ module_init(inet_init);
1119#ifdef CONFIG_PROC_FS 1119#ifdef CONFIG_PROC_FS
1120extern int fib_proc_init(void); 1120extern int fib_proc_init(void);
1121extern void fib_proc_exit(void); 1121extern void fib_proc_exit(void);
1122#ifdef CONFIG_IP_FIB_TRIE
1123extern int fib_stat_proc_init(void);
1124extern void fib_stat_proc_exit(void);
1125#endif
1122extern int ip_misc_proc_init(void); 1126extern int ip_misc_proc_init(void);
1123extern int raw_proc_init(void); 1127extern int raw_proc_init(void);
1124extern void raw_proc_exit(void); 1128extern void raw_proc_exit(void);
@@ -1139,11 +1143,19 @@ static int __init ipv4_proc_init(void)
1139 goto out_udp; 1143 goto out_udp;
1140 if (fib_proc_init()) 1144 if (fib_proc_init())
1141 goto out_fib; 1145 goto out_fib;
1146#ifdef CONFIG_IP_FIB_TRIE
1147 if (fib_stat_proc_init())
1148 goto out_fib_stat;
1149 #endif
1142 if (ip_misc_proc_init()) 1150 if (ip_misc_proc_init())
1143 goto out_misc; 1151 goto out_misc;
1144out: 1152out:
1145 return rc; 1153 return rc;
1146out_misc: 1154out_misc:
1155#ifdef CONFIG_IP_FIB_TRIE
1156 fib_stat_proc_exit();
1157out_fib_stat:
1158#endif
1147 fib_proc_exit(); 1159 fib_proc_exit();
1148out_fib: 1160out_fib:
1149 udp4_proc_exit(); 1161 udp4_proc_exit();
@@ -1181,6 +1193,7 @@ EXPORT_SYMBOL(inet_stream_connect);
1181EXPORT_SYMBOL(inet_stream_ops); 1193EXPORT_SYMBOL(inet_stream_ops);
1182EXPORT_SYMBOL(inet_unregister_protosw); 1194EXPORT_SYMBOL(inet_unregister_protosw);
1183EXPORT_SYMBOL(net_statistics); 1195EXPORT_SYMBOL(net_statistics);
1196EXPORT_SYMBOL(sysctl_ip_nonlocal_bind);
1184 1197
1185#ifdef INET_REFCNT_DEBUG 1198#ifdef INET_REFCNT_DEBUG
1186EXPORT_SYMBOL(inet_sock_nr); 1199EXPORT_SYMBOL(inet_sock_nr);
diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c
index 0e98f2235b..514c85b263 100644
--- a/net/ipv4/ah4.c
+++ b/net/ipv4/ah4.c
@@ -200,7 +200,7 @@ static void ah4_err(struct sk_buff *skb, u32 info)
200 xfrm_state_put(x); 200 xfrm_state_put(x);
201} 201}
202 202
203static int ah_init_state(struct xfrm_state *x, void *args) 203static int ah_init_state(struct xfrm_state *x)
204{ 204{
205 struct ah_data *ahp = NULL; 205 struct ah_data *ahp = NULL;
206 struct xfrm_algo_desc *aalg_desc; 206 struct xfrm_algo_desc *aalg_desc;
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 478a30179a..650dcb12d9 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1030,14 +1030,13 @@ static struct notifier_block ip_netdev_notifier = {
1030}; 1030};
1031 1031
1032static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa, 1032static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1033 u32 pid, u32 seq, int event) 1033 u32 pid, u32 seq, int event, unsigned int flags)
1034{ 1034{
1035 struct ifaddrmsg *ifm; 1035 struct ifaddrmsg *ifm;
1036 struct nlmsghdr *nlh; 1036 struct nlmsghdr *nlh;
1037 unsigned char *b = skb->tail; 1037 unsigned char *b = skb->tail;
1038 1038
1039 nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*ifm)); 1039 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*ifm), flags);
1040 if (pid) nlh->nlmsg_flags |= NLM_F_MULTI;
1041 ifm = NLMSG_DATA(nlh); 1040 ifm = NLMSG_DATA(nlh);
1042 ifm->ifa_family = AF_INET; 1041 ifm->ifa_family = AF_INET;
1043 ifm->ifa_prefixlen = ifa->ifa_prefixlen; 1042 ifm->ifa_prefixlen = ifa->ifa_prefixlen;
@@ -1090,7 +1089,7 @@ static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1090 continue; 1089 continue;
1091 if (inet_fill_ifaddr(skb, ifa, NETLINK_CB(cb->skb).pid, 1090 if (inet_fill_ifaddr(skb, ifa, NETLINK_CB(cb->skb).pid,
1092 cb->nlh->nlmsg_seq, 1091 cb->nlh->nlmsg_seq,
1093 RTM_NEWADDR) <= 0) { 1092 RTM_NEWADDR, NLM_F_MULTI) <= 0) {
1094 rcu_read_unlock(); 1093 rcu_read_unlock();
1095 goto done; 1094 goto done;
1096 } 1095 }
@@ -1113,7 +1112,7 @@ static void rtmsg_ifa(int event, struct in_ifaddr* ifa)
1113 1112
1114 if (!skb) 1113 if (!skb)
1115 netlink_set_err(rtnl, 0, RTMGRP_IPV4_IFADDR, ENOBUFS); 1114 netlink_set_err(rtnl, 0, RTMGRP_IPV4_IFADDR, ENOBUFS);
1116 else if (inet_fill_ifaddr(skb, ifa, 0, 0, event) < 0) { 1115 else if (inet_fill_ifaddr(skb, ifa, current->pid, 0, event, 0) < 0) {
1117 kfree_skb(skb); 1116 kfree_skb(skb);
1118 netlink_set_err(rtnl, 0, RTMGRP_IPV4_IFADDR, EINVAL); 1117 netlink_set_err(rtnl, 0, RTMGRP_IPV4_IFADDR, EINVAL);
1119 } else { 1118 } else {
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index eae84cc39d..ba57446d5d 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -362,7 +362,7 @@ static void esp_destroy(struct xfrm_state *x)
362 kfree(esp); 362 kfree(esp);
363} 363}
364 364
365static int esp_init_state(struct xfrm_state *x, void *args) 365static int esp_init_state(struct xfrm_state *x)
366{ 366{
367 struct esp_data *esp = NULL; 367 struct esp_data *esp = NULL;
368 368
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 563e7d6127..cd8e45ab95 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -516,6 +516,60 @@ static void fib_del_ifaddr(struct in_ifaddr *ifa)
516#undef BRD1_OK 516#undef BRD1_OK
517} 517}
518 518
519static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb )
520{
521
522 struct fib_result res;
523 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = frn->fl_addr,
524 .fwmark = frn->fl_fwmark,
525 .tos = frn->fl_tos,
526 .scope = frn->fl_scope } } };
527 if (tb) {
528 local_bh_disable();
529
530 frn->tb_id = tb->tb_id;
531 frn->err = tb->tb_lookup(tb, &fl, &res);
532
533 if (!frn->err) {
534 frn->prefixlen = res.prefixlen;
535 frn->nh_sel = res.nh_sel;
536 frn->type = res.type;
537 frn->scope = res.scope;
538 }
539 local_bh_enable();
540 }
541}
542
543static void nl_fib_input(struct sock *sk, int len)
544{
545 struct sk_buff *skb = NULL;
546 struct nlmsghdr *nlh = NULL;
547 struct fib_result_nl *frn;
548 int err;
549 u32 pid;
550 struct fib_table *tb;
551
552 skb = skb_recv_datagram(sk, 0, 0, &err);
553 nlh = (struct nlmsghdr *)skb->data;
554
555 frn = (struct fib_result_nl *) NLMSG_DATA(nlh);
556 tb = fib_get_table(frn->tb_id_in);
557
558 nl_fib_lookup(frn, tb);
559
560 pid = nlh->nlmsg_pid; /*pid of sending process */
561 NETLINK_CB(skb).groups = 0; /* not in mcast group */
562 NETLINK_CB(skb).pid = 0; /* from kernel */
563 NETLINK_CB(skb).dst_pid = pid;
564 NETLINK_CB(skb).dst_groups = 0; /* unicast */
565 netlink_unicast(sk, skb, pid, MSG_DONTWAIT);
566}
567
568static void nl_fib_lookup_init(void)
569{
570 netlink_kernel_create(NETLINK_FIB_LOOKUP, nl_fib_input);
571}
572
519static void fib_disable_ip(struct net_device *dev, int force) 573static void fib_disable_ip(struct net_device *dev, int force)
520{ 574{
521 if (fib_sync_down(0, dev, force)) 575 if (fib_sync_down(0, dev, force))
@@ -604,6 +658,7 @@ void __init ip_fib_init(void)
604 658
605 register_netdevice_notifier(&fib_netdev_notifier); 659 register_netdevice_notifier(&fib_netdev_notifier);
606 register_inetaddr_notifier(&fib_inetaddr_notifier); 660 register_inetaddr_notifier(&fib_inetaddr_notifier);
661 nl_fib_lookup_init();
607} 662}
608 663
609EXPORT_SYMBOL(inet_addr_type); 664EXPORT_SYMBOL(inet_addr_type);
diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c
index 6506dcc01b..b10d6bb5ef 100644
--- a/net/ipv4/fib_hash.c
+++ b/net/ipv4/fib_hash.c
@@ -703,7 +703,8 @@ fn_hash_dump_bucket(struct sk_buff *skb, struct netlink_callback *cb,
703 &f->fn_key, 703 &f->fn_key,
704 fz->fz_order, 704 fz->fz_order,
705 fa->fa_tos, 705 fa->fa_tos,
706 fa->fa_info) < 0) { 706 fa->fa_info,
707 NLM_F_MULTI) < 0) {
707 cb->args[3] = i; 708 cb->args[3] = i;
708 return -1; 709 return -1;
709 } 710 }
diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h
index ac4485f75e..b729d97cfa 100644
--- a/net/ipv4/fib_lookup.h
+++ b/net/ipv4/fib_lookup.h
@@ -30,7 +30,8 @@ extern int fib_nh_match(struct rtmsg *r, struct nlmsghdr *,
30 struct kern_rta *rta, struct fib_info *fi); 30 struct kern_rta *rta, struct fib_info *fi);
31extern int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, 31extern int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
32 u8 tb_id, u8 type, u8 scope, void *dst, 32 u8 tb_id, u8 type, u8 scope, void *dst,
33 int dst_len, u8 tos, struct fib_info *fi); 33 int dst_len, u8 tos, struct fib_info *fi,
34 unsigned int);
34extern void rtmsg_fib(int event, u32 key, struct fib_alias *fa, 35extern void rtmsg_fib(int event, u32 key, struct fib_alias *fa,
35 int z, int tb_id, 36 int z, int tb_id,
36 struct nlmsghdr *n, struct netlink_skb_parms *req); 37 struct nlmsghdr *n, struct netlink_skb_parms *req);
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index 39d0aadb9a..0b298bbc15 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -367,13 +367,14 @@ static struct notifier_block fib_rules_notifier = {
367 367
368static __inline__ int inet_fill_rule(struct sk_buff *skb, 368static __inline__ int inet_fill_rule(struct sk_buff *skb,
369 struct fib_rule *r, 369 struct fib_rule *r,
370 struct netlink_callback *cb) 370 struct netlink_callback *cb,
371 unsigned int flags)
371{ 372{
372 struct rtmsg *rtm; 373 struct rtmsg *rtm;
373 struct nlmsghdr *nlh; 374 struct nlmsghdr *nlh;
374 unsigned char *b = skb->tail; 375 unsigned char *b = skb->tail;
375 376
376 nlh = NLMSG_PUT(skb, NETLINK_CREDS(cb->skb)->pid, cb->nlh->nlmsg_seq, RTM_NEWRULE, sizeof(*rtm)); 377 nlh = NLMSG_NEW_ANSWER(skb, cb, RTM_NEWRULE, sizeof(*rtm), flags);
377 rtm = NLMSG_DATA(nlh); 378 rtm = NLMSG_DATA(nlh);
378 rtm->rtm_family = AF_INET; 379 rtm->rtm_family = AF_INET;
379 rtm->rtm_dst_len = r->r_dst_len; 380 rtm->rtm_dst_len = r->r_dst_len;
@@ -422,7 +423,7 @@ int inet_dump_rules(struct sk_buff *skb, struct netlink_callback *cb)
422 for (r=fib_rules, idx=0; r; r = r->r_next, idx++) { 423 for (r=fib_rules, idx=0; r; r = r->r_next, idx++) {
423 if (idx < s_idx) 424 if (idx < s_idx)
424 continue; 425 continue;
425 if (inet_fill_rule(skb, r, cb) < 0) 426 if (inet_fill_rule(skb, r, cb, NLM_F_MULTI) < 0)
426 break; 427 break;
427 } 428 }
428 read_unlock(&fib_rules_lock); 429 read_unlock(&fib_rules_lock);
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 029362d661..c886b28ba9 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -276,7 +276,7 @@ void rtmsg_fib(int event, u32 key, struct fib_alias *fa,
276 struct nlmsghdr *n, struct netlink_skb_parms *req) 276 struct nlmsghdr *n, struct netlink_skb_parms *req)
277{ 277{
278 struct sk_buff *skb; 278 struct sk_buff *skb;
279 u32 pid = req ? req->pid : 0; 279 u32 pid = req ? req->pid : n->nlmsg_pid;
280 int size = NLMSG_SPACE(sizeof(struct rtmsg)+256); 280 int size = NLMSG_SPACE(sizeof(struct rtmsg)+256);
281 281
282 skb = alloc_skb(size, GFP_KERNEL); 282 skb = alloc_skb(size, GFP_KERNEL);
@@ -286,7 +286,7 @@ void rtmsg_fib(int event, u32 key, struct fib_alias *fa,
286 if (fib_dump_info(skb, pid, n->nlmsg_seq, event, tb_id, 286 if (fib_dump_info(skb, pid, n->nlmsg_seq, event, tb_id,
287 fa->fa_type, fa->fa_scope, &key, z, 287 fa->fa_type, fa->fa_scope, &key, z,
288 fa->fa_tos, 288 fa->fa_tos,
289 fa->fa_info) < 0) { 289 fa->fa_info, 0) < 0) {
290 kfree_skb(skb); 290 kfree_skb(skb);
291 return; 291 return;
292 } 292 }
@@ -932,13 +932,13 @@ u32 __fib_res_prefsrc(struct fib_result *res)
932int 932int
933fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, 933fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
934 u8 tb_id, u8 type, u8 scope, void *dst, int dst_len, u8 tos, 934 u8 tb_id, u8 type, u8 scope, void *dst, int dst_len, u8 tos,
935 struct fib_info *fi) 935 struct fib_info *fi, unsigned int flags)
936{ 936{
937 struct rtmsg *rtm; 937 struct rtmsg *rtm;
938 struct nlmsghdr *nlh; 938 struct nlmsghdr *nlh;
939 unsigned char *b = skb->tail; 939 unsigned char *b = skb->tail;
940 940
941 nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*rtm)); 941 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*rtm), flags);
942 rtm = NLMSG_DATA(nlh); 942 rtm = NLMSG_DATA(nlh);
943 rtm->rtm_family = AF_INET; 943 rtm->rtm_family = AF_INET;
944 rtm->rtm_dst_len = dst_len; 944 rtm->rtm_dst_len = dst_len;
@@ -1035,7 +1035,7 @@ fib_convert_rtentry(int cmd, struct nlmsghdr *nl, struct rtmsg *rtm,
1035 } 1035 }
1036 1036
1037 nl->nlmsg_flags = NLM_F_REQUEST; 1037 nl->nlmsg_flags = NLM_F_REQUEST;
1038 nl->nlmsg_pid = 0; 1038 nl->nlmsg_pid = current->pid;
1039 nl->nlmsg_seq = 0; 1039 nl->nlmsg_seq = 0;
1040 nl->nlmsg_len = NLMSG_LENGTH(sizeof(*rtm)); 1040 nl->nlmsg_len = NLMSG_LENGTH(sizeof(*rtm));
1041 if (cmd == SIOCDELRT) { 1041 if (cmd == SIOCDELRT) {
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
new file mode 100644
index 0000000000..0671569ee6
--- /dev/null
+++ b/net/ipv4/fib_trie.c
@@ -0,0 +1,2454 @@
1/*
2 * This program is free software; you can redistribute it and/or
3 * modify it under the terms of the GNU General Public License
4 * as published by the Free Software Foundation; either version
5 * 2 of the License, or (at your option) any later version.
6 *
7 * Robert Olsson <robert.olsson@its.uu.se> Uppsala Universitet
8 * & Swedish University of Agricultural Sciences.
9 *
10 * Jens Laas <jens.laas@data.slu.se> Swedish University of
11 * Agricultural Sciences.
12 *
13 * Hans Liss <hans.liss@its.uu.se> Uppsala Universitet
14 *
15 * This work is based on the LPC-trie which is originally descibed in:
16 *
17 * An experimental study of compression methods for dynamic tries
18 * Stefan Nilsson and Matti Tikkanen. Algorithmica, 33(1):19-33, 2002.
19 * http://www.nada.kth.se/~snilsson/public/papers/dyntrie2/
20 *
21 *
22 * IP-address lookup using LC-tries. Stefan Nilsson and Gunnar Karlsson
23 * IEEE Journal on Selected Areas in Communications, 17(6):1083-1092, June 1999
24 *
25 * Version: $Id: fib_trie.c,v 1.3 2005/06/08 14:20:01 robert Exp $
26 *
27 *
28 * Code from fib_hash has been reused which includes the following header:
29 *
30 *
31 * INET An implementation of the TCP/IP protocol suite for the LINUX
32 * operating system. INET is implemented using the BSD Socket
33 * interface as the means of communication with the user level.
34 *
35 * IPv4 FIB: lookup engine and maintenance routines.
36 *
37 *
38 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
39 *
40 * This program is free software; you can redistribute it and/or
41 * modify it under the terms of the GNU General Public License
42 * as published by the Free Software Foundation; either version
43 * 2 of the License, or (at your option) any later version.
44 */
45
46#define VERSION "0.323"
47
48#include <linux/config.h>
49#include <asm/uaccess.h>
50#include <asm/system.h>
51#include <asm/bitops.h>
52#include <linux/types.h>
53#include <linux/kernel.h>
54#include <linux/sched.h>
55#include <linux/mm.h>
56#include <linux/string.h>
57#include <linux/socket.h>
58#include <linux/sockios.h>
59#include <linux/errno.h>
60#include <linux/in.h>
61#include <linux/inet.h>
62#include <linux/netdevice.h>
63#include <linux/if_arp.h>
64#include <linux/proc_fs.h>
65#include <linux/skbuff.h>
66#include <linux/netlink.h>
67#include <linux/init.h>
68#include <linux/list.h>
69#include <net/ip.h>
70#include <net/protocol.h>
71#include <net/route.h>
72#include <net/tcp.h>
73#include <net/sock.h>
74#include <net/ip_fib.h>
75#include "fib_lookup.h"
76
77#undef CONFIG_IP_FIB_TRIE_STATS
78#define MAX_CHILDS 16384
79
80#define EXTRACT(p, n, str) ((str)<<(p)>>(32-(n)))
81#define KEYLENGTH (8*sizeof(t_key))
82#define MASK_PFX(k, l) (((l)==0)?0:(k >> (KEYLENGTH-l)) << (KEYLENGTH-l))
83#define TKEY_GET_MASK(offset, bits) (((bits)==0)?0:((t_key)(-1) << (KEYLENGTH - bits) >> offset))
84
85static DEFINE_RWLOCK(fib_lock);
86
87typedef unsigned int t_key;
88
89#define T_TNODE 0
90#define T_LEAF 1
91#define NODE_TYPE_MASK 0x1UL
92#define NODE_PARENT(_node) \
93((struct tnode *)((_node)->_parent & ~NODE_TYPE_MASK))
94#define NODE_SET_PARENT(_node, _ptr) \
95((_node)->_parent = (((unsigned long)(_ptr)) | \
96 ((_node)->_parent & NODE_TYPE_MASK)))
97#define NODE_INIT_PARENT(_node, _type) \
98((_node)->_parent = (_type))
99#define NODE_TYPE(_node) \
100((_node)->_parent & NODE_TYPE_MASK)
101
102#define IS_TNODE(n) (!(n->_parent & T_LEAF))
103#define IS_LEAF(n) (n->_parent & T_LEAF)
104
105struct node {
106 t_key key;
107 unsigned long _parent;
108};
109
110struct leaf {
111 t_key key;
112 unsigned long _parent;
113 struct hlist_head list;
114};
115
116struct leaf_info {
117 struct hlist_node hlist;
118 int plen;
119 struct list_head falh;
120};
121
122struct tnode {
123 t_key key;
124 unsigned long _parent;
125 unsigned short pos:5; /* 2log(KEYLENGTH) bits needed */
126 unsigned short bits:5; /* 2log(KEYLENGTH) bits needed */
127 unsigned short full_children; /* KEYLENGTH bits needed */
128 unsigned short empty_children; /* KEYLENGTH bits needed */
129 struct node *child[0];
130};
131
132#ifdef CONFIG_IP_FIB_TRIE_STATS
133struct trie_use_stats {
134 unsigned int gets;
135 unsigned int backtrack;
136 unsigned int semantic_match_passed;
137 unsigned int semantic_match_miss;
138 unsigned int null_node_hit;
139};
140#endif
141
142struct trie_stat {
143 unsigned int totdepth;
144 unsigned int maxdepth;
145 unsigned int tnodes;
146 unsigned int leaves;
147 unsigned int nullpointers;
148 unsigned int nodesizes[MAX_CHILDS];
149};
150
151struct trie {
152 struct node *trie;
153#ifdef CONFIG_IP_FIB_TRIE_STATS
154 struct trie_use_stats stats;
155#endif
156 int size;
157 unsigned int revision;
158};
159
160static int trie_debug = 0;
161
162static int tnode_full(struct tnode *tn, struct node *n);
163static void put_child(struct trie *t, struct tnode *tn, int i, struct node *n);
164static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n, int wasfull);
165static int tnode_child_length(struct tnode *tn);
166static struct node *resize(struct trie *t, struct tnode *tn);
167static struct tnode *inflate(struct trie *t, struct tnode *tn);
168static struct tnode *halve(struct trie *t, struct tnode *tn);
169static void tnode_free(struct tnode *tn);
170static void trie_dump_seq(struct seq_file *seq, struct trie *t);
171extern struct fib_alias *fib_find_alias(struct list_head *fah, u8 tos, u32 prio);
172extern int fib_detect_death(struct fib_info *fi, int order,
173 struct fib_info **last_resort, int *last_idx, int *dflt);
174
175extern void rtmsg_fib(int event, u32 key, struct fib_alias *fa, int z, int tb_id,
176 struct nlmsghdr *n, struct netlink_skb_parms *req);
177
178static kmem_cache_t *fn_alias_kmem;
179static struct trie *trie_local = NULL, *trie_main = NULL;
180
181static void trie_bug(char *err)
182{
183 printk("Trie Bug: %s\n", err);
184 BUG();
185}
186
187static inline struct node *tnode_get_child(struct tnode *tn, int i)
188{
189 if (i >= 1<<tn->bits)
190 trie_bug("tnode_get_child");
191
192 return tn->child[i];
193}
194
195static inline int tnode_child_length(struct tnode *tn)
196{
197 return 1<<tn->bits;
198}
199
200/*
201 _________________________________________________________________
202 | i | i | i | i | i | i | i | N | N | N | S | S | S | S | S | C |
203 ----------------------------------------------------------------
204 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
205
206 _________________________________________________________________
207 | C | C | C | u | u | u | u | u | u | u | u | u | u | u | u | u |
208 -----------------------------------------------------------------
209 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
210
211 tp->pos = 7
212 tp->bits = 3
213 n->pos = 15
214 n->bits=4
215 KEYLENGTH=32
216*/
217
218static inline t_key tkey_extract_bits(t_key a, int offset, int bits)
219{
220 if (offset < KEYLENGTH)
221 return ((t_key)(a << offset)) >> (KEYLENGTH - bits);
222 else
223 return 0;
224}
225
226static inline int tkey_equals(t_key a, t_key b)
227{
228 return a == b;
229}
230
231static inline int tkey_sub_equals(t_key a, int offset, int bits, t_key b)
232{
233 if (bits == 0 || offset >= KEYLENGTH)
234 return 1;
235 bits = bits > KEYLENGTH ? KEYLENGTH : bits;
236 return ((a ^ b) << offset) >> (KEYLENGTH - bits) == 0;
237}
238
239static inline int tkey_mismatch(t_key a, int offset, t_key b)
240{
241 t_key diff = a ^ b;
242 int i = offset;
243
244 if(!diff)
245 return 0;
246 while((diff << i) >> (KEYLENGTH-1) == 0)
247 i++;
248 return i;
249}
250
251/* Candiate for fib_semantics */
252
253static void fn_free_alias(struct fib_alias *fa)
254{
255 fib_release_info(fa->fa_info);
256 kmem_cache_free(fn_alias_kmem, fa);
257}
258
259/*
260 To understand this stuff, an understanding of keys and all their bits is
261 necessary. Every node in the trie has a key associated with it, but not
262 all of the bits in that key are significant.
263
264 Consider a node 'n' and its parent 'tp'.
265
266 If n is a leaf, every bit in its key is significant. Its presence is
267 necessitaded by path compression, since during a tree traversal (when
268 searching for a leaf - unless we are doing an insertion) we will completely
269 ignore all skipped bits we encounter. Thus we need to verify, at the end of
270 a potentially successful search, that we have indeed been walking the
271 correct key path.
272
273 Note that we can never "miss" the correct key in the tree if present by
274 following the wrong path. Path compression ensures that segments of the key
275 that are the same for all keys with a given prefix are skipped, but the
276 skipped part *is* identical for each node in the subtrie below the skipped
277 bit! trie_insert() in this implementation takes care of that - note the
278 call to tkey_sub_equals() in trie_insert().
279
280 if n is an internal node - a 'tnode' here, the various parts of its key
281 have many different meanings.
282
283 Example:
284 _________________________________________________________________
285 | i | i | i | i | i | i | i | N | N | N | S | S | S | S | S | C |
286 -----------------------------------------------------------------
287 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
288
289 _________________________________________________________________
290 | C | C | C | u | u | u | u | u | u | u | u | u | u | u | u | u |
291 -----------------------------------------------------------------
292 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
293
294 tp->pos = 7
295 tp->bits = 3
296 n->pos = 15
297 n->bits=4
298
299 First, let's just ignore the bits that come before the parent tp, that is
300 the bits from 0 to (tp->pos-1). They are *known* but at this point we do
301 not use them for anything.
302
303 The bits from (tp->pos) to (tp->pos + tp->bits - 1) - "N", above - are the
304 index into the parent's child array. That is, they will be used to find
305 'n' among tp's children.
306
307 The bits from (tp->pos + tp->bits) to (n->pos - 1) - "S" - are skipped bits
308 for the node n.
309
310 All the bits we have seen so far are significant to the node n. The rest
311 of the bits are really not needed or indeed known in n->key.
312
313 The bits from (n->pos) to (n->pos + n->bits - 1) - "C" - are the index into
314 n's child array, and will of course be different for each child.
315
316 The rest of the bits, from (n->pos + n->bits) onward, are completely unknown
317 at this point.
318
319*/
320
321static void check_tnode(struct tnode *tn)
322{
323 if(tn && tn->pos+tn->bits > 32) {
324 printk("TNODE ERROR tn=%p, pos=%d, bits=%d\n", tn, tn->pos, tn->bits);
325 }
326}
327
328static int halve_threshold = 25;
329static int inflate_threshold = 50;
330
331static struct leaf *leaf_new(void)
332{
333 struct leaf *l = kmalloc(sizeof(struct leaf), GFP_KERNEL);
334 if(l) {
335 NODE_INIT_PARENT(l, T_LEAF);
336 INIT_HLIST_HEAD(&l->list);
337 }
338 return l;
339}
340
341static struct leaf_info *leaf_info_new(int plen)
342{
343 struct leaf_info *li = kmalloc(sizeof(struct leaf_info), GFP_KERNEL);
344 li->plen = plen;
345 INIT_LIST_HEAD(&li->falh);
346 return li;
347}
348
349static inline void free_leaf(struct leaf *l)
350{
351 kfree(l);
352}
353
354static inline void free_leaf_info(struct leaf_info *li)
355{
356 kfree(li);
357}
358
359static struct tnode* tnode_new(t_key key, int pos, int bits)
360{
361 int nchildren = 1<<bits;
362 int sz = sizeof(struct tnode) + nchildren * sizeof(struct node *);
363 struct tnode *tn = kmalloc(sz, GFP_KERNEL);
364
365 if(tn) {
366 memset(tn, 0, sz);
367 NODE_INIT_PARENT(tn, T_TNODE);
368 tn->pos = pos;
369 tn->bits = bits;
370 tn->key = key;
371 tn->full_children = 0;
372 tn->empty_children = 1<<bits;
373 }
374 if(trie_debug > 0)
375 printk("AT %p s=%u %u\n", tn, (unsigned int) sizeof(struct tnode),
376 (unsigned int) (sizeof(struct node) * 1<<bits));
377 return tn;
378}
379
380static void tnode_free(struct tnode *tn)
381{
382 if(!tn) {
383 trie_bug("tnode_free\n");
384 }
385 if(IS_LEAF(tn)) {
386 free_leaf((struct leaf *)tn);
387 if(trie_debug > 0 )
388 printk("FL %p \n", tn);
389 }
390 else if(IS_TNODE(tn)) {
391 kfree(tn);
392 if(trie_debug > 0 )
393 printk("FT %p \n", tn);
394 }
395 else {
396 trie_bug("tnode_free\n");
397 }
398}
399
400/*
401 * Check whether a tnode 'n' is "full", i.e. it is an internal node
402 * and no bits are skipped. See discussion in dyntree paper p. 6
403 */
404
405static inline int tnode_full(struct tnode *tn, struct node *n)
406{
407 if(n == NULL || IS_LEAF(n))
408 return 0;
409
410 return ((struct tnode *) n)->pos == tn->pos + tn->bits;
411}
412
413static inline void put_child(struct trie *t, struct tnode *tn, int i, struct node *n)
414{
415 tnode_put_child_reorg(tn, i, n, -1);
416}
417
418 /*
419 * Add a child at position i overwriting the old value.
420 * Update the value of full_children and empty_children.
421 */
422
423static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n, int wasfull)
424{
425 struct node *chi;
426 int isfull;
427
428 if(i >= 1<<tn->bits) {
429 printk("bits=%d, i=%d\n", tn->bits, i);
430 trie_bug("tnode_put_child_reorg bits");
431 }
432 write_lock_bh(&fib_lock);
433 chi = tn->child[i];
434
435 /* update emptyChildren */
436 if (n == NULL && chi != NULL)
437 tn->empty_children++;
438 else if (n != NULL && chi == NULL)
439 tn->empty_children--;
440
441 /* update fullChildren */
442 if (wasfull == -1)
443 wasfull = tnode_full(tn, chi);
444
445 isfull = tnode_full(tn, n);
446 if (wasfull && !isfull)
447 tn->full_children--;
448
449 else if (!wasfull && isfull)
450 tn->full_children++;
451 if(n)
452 NODE_SET_PARENT(n, tn);
453
454 tn->child[i] = n;
455 write_unlock_bh(&fib_lock);
456}
457
458static struct node *resize(struct trie *t, struct tnode *tn)
459{
460 int i;
461
462 if (!tn)
463 return NULL;
464
465 if(trie_debug)
466 printk("In tnode_resize %p inflate_threshold=%d threshold=%d\n",
467 tn, inflate_threshold, halve_threshold);
468
469 /* No children */
470 if (tn->empty_children == tnode_child_length(tn)) {
471 tnode_free(tn);
472 return NULL;
473 }
474 /* One child */
475 if (tn->empty_children == tnode_child_length(tn) - 1)
476 for (i = 0; i < tnode_child_length(tn); i++) {
477
478 write_lock_bh(&fib_lock);
479 if (tn->child[i] != NULL) {
480
481 /* compress one level */
482 struct node *n = tn->child[i];
483 if(n)
484 NODE_INIT_PARENT(n, NODE_TYPE(n));
485
486 write_unlock_bh(&fib_lock);
487 tnode_free(tn);
488 return n;
489 }
490 write_unlock_bh(&fib_lock);
491 }
492 /*
493 * Double as long as the resulting node has a number of
494 * nonempty nodes that are above the threshold.
495 */
496
497 /*
498 * From "Implementing a dynamic compressed trie" by Stefan Nilsson of
499 * the Helsinki University of Technology and Matti Tikkanen of Nokia
500 * Telecommunications, page 6:
501 * "A node is doubled if the ratio of non-empty children to all
502 * children in the *doubled* node is at least 'high'."
503 *
504 * 'high' in this instance is the variable 'inflate_threshold'. It
505 * is expressed as a percentage, so we multiply it with
506 * tnode_child_length() and instead of multiplying by 2 (since the
507 * child array will be doubled by inflate()) and multiplying
508 * the left-hand side by 100 (to handle the percentage thing) we
509 * multiply the left-hand side by 50.
510 *
511 * The left-hand side may look a bit weird: tnode_child_length(tn)
512 * - tn->empty_children is of course the number of non-null children
513 * in the current node. tn->full_children is the number of "full"
514 * children, that is non-null tnodes with a skip value of 0.
515 * All of those will be doubled in the resulting inflated tnode, so
516 * we just count them one extra time here.
517 *
518 * A clearer way to write this would be:
519 *
520 * to_be_doubled = tn->full_children;
521 * not_to_be_doubled = tnode_child_length(tn) - tn->empty_children -
522 * tn->full_children;
523 *
524 * new_child_length = tnode_child_length(tn) * 2;
525 *
526 * new_fill_factor = 100 * (not_to_be_doubled + 2*to_be_doubled) /
527 * new_child_length;
528 * if (new_fill_factor >= inflate_threshold)
529 *
530 * ...and so on, tho it would mess up the while() loop.
531 *
532 * anyway,
533 * 100 * (not_to_be_doubled + 2*to_be_doubled) / new_child_length >=
534 * inflate_threshold
535 *
536 * avoid a division:
537 * 100 * (not_to_be_doubled + 2*to_be_doubled) >=
538 * inflate_threshold * new_child_length
539 *
540 * expand not_to_be_doubled and to_be_doubled, and shorten:
541 * 100 * (tnode_child_length(tn) - tn->empty_children +
542 * tn->full_children ) >= inflate_threshold * new_child_length
543 *
544 * expand new_child_length:
545 * 100 * (tnode_child_length(tn) - tn->empty_children +
546 * tn->full_children ) >=
547 * inflate_threshold * tnode_child_length(tn) * 2
548 *
549 * shorten again:
550 * 50 * (tn->full_children + tnode_child_length(tn) -
551 * tn->empty_children ) >= inflate_threshold *
552 * tnode_child_length(tn)
553 *
554 */
555
556 check_tnode(tn);
557
558 while ((tn->full_children > 0 &&
559 50 * (tn->full_children + tnode_child_length(tn) - tn->empty_children) >=
560 inflate_threshold * tnode_child_length(tn))) {
561
562 tn = inflate(t, tn);
563 }
564
565 check_tnode(tn);
566
567 /*
568 * Halve as long as the number of empty children in this
569 * node is above threshold.
570 */
571 while (tn->bits > 1 &&
572 100 * (tnode_child_length(tn) - tn->empty_children) <
573 halve_threshold * tnode_child_length(tn))
574
575 tn = halve(t, tn);
576
577 /* Only one child remains */
578
579 if (tn->empty_children == tnode_child_length(tn) - 1)
580 for (i = 0; i < tnode_child_length(tn); i++) {
581
582 write_lock_bh(&fib_lock);
583 if (tn->child[i] != NULL) {
584 /* compress one level */
585 struct node *n = tn->child[i];
586
587 if(n)
588 NODE_INIT_PARENT(n, NODE_TYPE(n));
589
590 write_unlock_bh(&fib_lock);
591 tnode_free(tn);
592 return n;
593 }
594 write_unlock_bh(&fib_lock);
595 }
596
597 return (struct node *) tn;
598}
599
600static struct tnode *inflate(struct trie *t, struct tnode *tn)
601{
602 struct tnode *inode;
603 struct tnode *oldtnode = tn;
604 int olen = tnode_child_length(tn);
605 int i;
606
607 if(trie_debug)
608 printk("In inflate\n");
609
610 tn = tnode_new(oldtnode->key, oldtnode->pos, oldtnode->bits + 1);
611
612 if (!tn)
613 trie_bug("tnode_new failed");
614
615 for(i = 0; i < olen; i++) {
616 struct node *node = tnode_get_child(oldtnode, i);
617
618 /* An empty child */
619 if (node == NULL)
620 continue;
621
622 /* A leaf or an internal node with skipped bits */
623
624 if(IS_LEAF(node) || ((struct tnode *) node)->pos >
625 tn->pos + tn->bits - 1) {
626 if(tkey_extract_bits(node->key, tn->pos + tn->bits - 1,
627 1) == 0)
628 put_child(t, tn, 2*i, node);
629 else
630 put_child(t, tn, 2*i+1, node);
631 continue;
632 }
633
634 /* An internal node with two children */
635 inode = (struct tnode *) node;
636
637 if (inode->bits == 1) {
638 put_child(t, tn, 2*i, inode->child[0]);
639 put_child(t, tn, 2*i+1, inode->child[1]);
640
641 tnode_free(inode);
642 }
643
644 /* An internal node with more than two children */
645 else {
646 struct tnode *left, *right;
647 int size, j;
648
649 /* We will replace this node 'inode' with two new
650 * ones, 'left' and 'right', each with half of the
651 * original children. The two new nodes will have
652 * a position one bit further down the key and this
653 * means that the "significant" part of their keys
654 * (see the discussion near the top of this file)
655 * will differ by one bit, which will be "0" in
656 * left's key and "1" in right's key. Since we are
657 * moving the key position by one step, the bit that
658 * we are moving away from - the bit at position
659 * (inode->pos) - is the one that will differ between
660 * left and right. So... we synthesize that bit in the
661 * two new keys.
662 * The mask 'm' below will be a single "one" bit at
663 * the position (inode->pos)
664 */
665
666 t_key m = TKEY_GET_MASK(inode->pos, 1);
667
668 /* Use the old key, but set the new significant
669 * bit to zero.
670 */
671 left = tnode_new(inode->key&(~m), inode->pos + 1,
672 inode->bits - 1);
673
674 if(!left)
675 trie_bug("tnode_new failed");
676
677
678 /* Use the old key, but set the new significant
679 * bit to one.
680 */
681 right = tnode_new(inode->key|m, inode->pos + 1,
682 inode->bits - 1);
683
684 if(!right)
685 trie_bug("tnode_new failed");
686
687 size = tnode_child_length(left);
688 for(j = 0; j < size; j++) {
689 put_child(t, left, j, inode->child[j]);
690 put_child(t, right, j, inode->child[j + size]);
691 }
692 put_child(t, tn, 2*i, resize(t, left));
693 put_child(t, tn, 2*i+1, resize(t, right));
694
695 tnode_free(inode);
696 }
697 }
698 tnode_free(oldtnode);
699 return tn;
700}
701
702static struct tnode *halve(struct trie *t, struct tnode *tn)
703{
704 struct tnode *oldtnode = tn;
705 struct node *left, *right;
706 int i;
707 int olen = tnode_child_length(tn);
708
709 if(trie_debug) printk("In halve\n");
710
711 tn=tnode_new(oldtnode->key, oldtnode->pos, oldtnode->bits - 1);
712
713 if(!tn)
714 trie_bug("tnode_new failed");
715
716 for(i = 0; i < olen; i += 2) {
717 left = tnode_get_child(oldtnode, i);
718 right = tnode_get_child(oldtnode, i+1);
719
720 /* At least one of the children is empty */
721 if (left == NULL) {
722 if (right == NULL) /* Both are empty */
723 continue;
724 put_child(t, tn, i/2, right);
725 } else if (right == NULL)
726 put_child(t, tn, i/2, left);
727
728 /* Two nonempty children */
729 else {
730 struct tnode *newBinNode =
731 tnode_new(left->key, tn->pos + tn->bits, 1);
732
733 if(!newBinNode)
734 trie_bug("tnode_new failed");
735
736 put_child(t, newBinNode, 0, left);
737 put_child(t, newBinNode, 1, right);
738 put_child(t, tn, i/2, resize(t, newBinNode));
739 }
740 }
741 tnode_free(oldtnode);
742 return tn;
743}
744
745static void *trie_init(struct trie *t)
746{
747 if(t) {
748 t->size = 0;
749 t->trie = NULL;
750 t->revision = 0;
751#ifdef CONFIG_IP_FIB_TRIE_STATS
752 memset(&t->stats, 0, sizeof(struct trie_use_stats));
753#endif
754 }
755 return t;
756}
757
758static struct leaf_info *find_leaf_info(struct hlist_head *head, int plen)
759{
760 struct hlist_node *node;
761 struct leaf_info *li;
762
763 hlist_for_each_entry(li, node, head, hlist) {
764
765 if ( li->plen == plen )
766 return li;
767 }
768 return NULL;
769}
770
771static inline struct list_head * get_fa_head(struct leaf *l, int plen)
772{
773 struct list_head *fa_head=NULL;
774 struct leaf_info *li = find_leaf_info(&l->list, plen);
775
776 if(li)
777 fa_head = &li->falh;
778
779 return fa_head;
780}
781
782static void insert_leaf_info(struct hlist_head *head, struct leaf_info *new)
783{
784 struct leaf_info *li=NULL, *last=NULL;
785 struct hlist_node *node, *tmp;
786
787 write_lock_bh(&fib_lock);
788
789 if(hlist_empty(head))
790 hlist_add_head(&new->hlist, head);
791 else {
792 hlist_for_each_entry_safe(li, node, tmp, head, hlist) {
793
794 if (new->plen > li->plen)
795 break;
796
797 last = li;
798 }
799 if(last)
800 hlist_add_after(&last->hlist, &new->hlist);
801 else
802 hlist_add_before(&new->hlist, &li->hlist);
803 }
804 write_unlock_bh(&fib_lock);
805}
806
807static struct leaf *
808fib_find_node(struct trie *t, u32 key)
809{
810 int pos;
811 struct tnode *tn;
812 struct node *n;
813
814 pos = 0;
815 n=t->trie;
816
817 while (n != NULL && NODE_TYPE(n) == T_TNODE) {
818 tn = (struct tnode *) n;
819
820 check_tnode(tn);
821
822 if(tkey_sub_equals(tn->key, pos, tn->pos-pos, key)) {
823 pos=tn->pos + tn->bits;
824 n = tnode_get_child(tn, tkey_extract_bits(key, tn->pos, tn->bits));
825 }
826 else
827 break;
828 }
829 /* Case we have found a leaf. Compare prefixes */
830
831 if (n != NULL && IS_LEAF(n) && tkey_equals(key, n->key)) {
832 struct leaf *l = (struct leaf *) n;
833 return l;
834 }
835 return NULL;
836}
837
838static struct node *trie_rebalance(struct trie *t, struct tnode *tn)
839{
840 int i = 0;
841 int wasfull;
842 t_key cindex, key;
843 struct tnode *tp = NULL;
844
845 if(!tn)
846 BUG();
847
848 key = tn->key;
849 i = 0;
850
851 while (tn != NULL && NODE_PARENT(tn) != NULL) {
852
853 if( i > 10 ) {
854 printk("Rebalance tn=%p \n", tn);
855 if(tn) printk("tn->parent=%p \n", NODE_PARENT(tn));
856
857 printk("Rebalance tp=%p \n", tp);
858 if(tp) printk("tp->parent=%p \n", NODE_PARENT(tp));
859 }
860
861 if( i > 12 ) BUG();
862 i++;
863
864 tp = NODE_PARENT(tn);
865 cindex = tkey_extract_bits(key, tp->pos, tp->bits);
866 wasfull = tnode_full(tp, tnode_get_child(tp, cindex));
867 tn = (struct tnode *) resize (t, (struct tnode *)tn);
868 tnode_put_child_reorg((struct tnode *)tp, cindex,(struct node*)tn, wasfull);
869
870 if(!NODE_PARENT(tn))
871 break;
872
873 tn = NODE_PARENT(tn);
874 }
875 /* Handle last (top) tnode */
876 if (IS_TNODE(tn))
877 tn = (struct tnode*) resize(t, (struct tnode *)tn);
878
879 return (struct node*) tn;
880}
881
882static struct list_head *
883fib_insert_node(struct trie *t, u32 key, int plen)
884{
885 int pos, newpos;
886 struct tnode *tp = NULL, *tn = NULL;
887 struct node *n;
888 struct leaf *l;
889 int missbit;
890 struct list_head *fa_head=NULL;
891 struct leaf_info *li;
892 t_key cindex;
893
894 pos = 0;
895 n=t->trie;
896
897 /* If we point to NULL, stop. Either the tree is empty and we should
898 * just put a new leaf in if, or we have reached an empty child slot,
899 * and we should just put our new leaf in that.
900 * If we point to a T_TNODE, check if it matches our key. Note that
901 * a T_TNODE might be skipping any number of bits - its 'pos' need
902 * not be the parent's 'pos'+'bits'!
903 *
904 * If it does match the current key, get pos/bits from it, extract
905 * the index from our key, push the T_TNODE and walk the tree.
906 *
907 * If it doesn't, we have to replace it with a new T_TNODE.
908 *
909 * If we point to a T_LEAF, it might or might not have the same key
910 * as we do. If it does, just change the value, update the T_LEAF's
911 * value, and return it.
912 * If it doesn't, we need to replace it with a T_TNODE.
913 */
914
915 while (n != NULL && NODE_TYPE(n) == T_TNODE) {
916 tn = (struct tnode *) n;
917
918 check_tnode(tn);
919
920 if(tkey_sub_equals(tn->key, pos, tn->pos-pos, key)) {
921 tp = tn;
922 pos=tn->pos + tn->bits;
923 n = tnode_get_child(tn, tkey_extract_bits(key, tn->pos, tn->bits));
924
925 if(n && NODE_PARENT(n) != tn) {
926 printk("BUG tn=%p, n->parent=%p\n", tn, NODE_PARENT(n));
927 BUG();
928 }
929 }
930 else
931 break;
932 }
933
934 /*
935 * n ----> NULL, LEAF or TNODE
936 *
937 * tp is n's (parent) ----> NULL or TNODE
938 */
939
940 if(tp && IS_LEAF(tp))
941 BUG();
942
943 t->revision++;
944
945 /* Case 1: n is a leaf. Compare prefixes */
946
947 if (n != NULL && IS_LEAF(n) && tkey_equals(key, n->key)) {
948 struct leaf *l = ( struct leaf *) n;
949
950 li = leaf_info_new(plen);
951
952 if(! li)
953 BUG();
954
955 fa_head = &li->falh;
956 insert_leaf_info(&l->list, li);
957 goto done;
958 }
959 t->size++;
960 l = leaf_new();
961
962 if(! l)
963 BUG();
964
965 l->key = key;
966 li = leaf_info_new(plen);
967
968 if(! li)
969 BUG();
970
971 fa_head = &li->falh;
972 insert_leaf_info(&l->list, li);
973
974 /* Case 2: n is NULL, and will just insert a new leaf */
975 if (t->trie && n == NULL) {
976
977 NODE_SET_PARENT(l, tp);
978
979 if (!tp)
980 BUG();
981
982 else {
983 cindex = tkey_extract_bits(key, tp->pos, tp->bits);
984 put_child(t, (struct tnode *)tp, cindex, (struct node *)l);
985 }
986 }
987 /* Case 3: n is a LEAF or a TNODE and the key doesn't match. */
988 else {
989 /*
990 * Add a new tnode here
991 * first tnode need some special handling
992 */
993
994 if (tp)
995 pos=tp->pos+tp->bits;
996 else
997 pos=0;
998 if(n) {
999 newpos = tkey_mismatch(key, pos, n->key);
1000 tn = tnode_new(n->key, newpos, 1);
1001 }
1002 else {
1003 newpos = 0;
1004 tn = tnode_new(key, newpos, 1); /* First tnode */
1005 }
1006 if(!tn)
1007 trie_bug("tnode_pfx_new failed");
1008
1009 NODE_SET_PARENT(tn, tp);
1010
1011 missbit=tkey_extract_bits(key, newpos, 1);
1012 put_child(t, tn, missbit, (struct node *)l);
1013 put_child(t, tn, 1-missbit, n);
1014
1015 if(tp) {
1016 cindex = tkey_extract_bits(key, tp->pos, tp->bits);
1017 put_child(t, (struct tnode *)tp, cindex, (struct node *)tn);
1018 }
1019 else {
1020 t->trie = (struct node*) tn; /* First tnode */
1021 tp = tn;
1022 }
1023 }
1024 if(tp && tp->pos+tp->bits > 32) {
1025 printk("ERROR tp=%p pos=%d, bits=%d, key=%0x plen=%d\n",
1026 tp, tp->pos, tp->bits, key, plen);
1027 }
1028 /* Rebalance the trie */
1029 t->trie = trie_rebalance(t, tp);
1030done:;
1031 return fa_head;
1032}
1033
1034static int
1035fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
1036 struct nlmsghdr *nlhdr, struct netlink_skb_parms *req)
1037{
1038 struct trie *t = (struct trie *) tb->tb_data;
1039 struct fib_alias *fa, *new_fa;
1040 struct list_head *fa_head=NULL;
1041 struct fib_info *fi;
1042 int plen = r->rtm_dst_len;
1043 int type = r->rtm_type;
1044 u8 tos = r->rtm_tos;
1045 u32 key, mask;
1046 int err;
1047 struct leaf *l;
1048
1049 if (plen > 32)
1050 return -EINVAL;
1051
1052 key = 0;
1053 if (rta->rta_dst)
1054 memcpy(&key, rta->rta_dst, 4);
1055
1056 key = ntohl(key);
1057
1058 if(trie_debug)
1059 printk("Insert table=%d %08x/%d\n", tb->tb_id, key, plen);
1060
1061 mask = ntohl( inet_make_mask(plen) );
1062
1063 if(key & ~mask)
1064 return -EINVAL;
1065
1066 key = key & mask;
1067
1068 if ((fi = fib_create_info(r, rta, nlhdr, &err)) == NULL)
1069 goto err;
1070
1071 l = fib_find_node(t, key);
1072 fa = NULL;
1073
1074 if(l) {
1075 fa_head = get_fa_head(l, plen);
1076 fa = fib_find_alias(fa_head, tos, fi->fib_priority);
1077 }
1078
1079 /* Now fa, if non-NULL, points to the first fib alias
1080 * with the same keys [prefix,tos,priority], if such key already
1081 * exists or to the node before which we will insert new one.
1082 *
1083 * If fa is NULL, we will need to allocate a new one and
1084 * insert to the head of f.
1085 *
1086 * If f is NULL, no fib node matched the destination key
1087 * and we need to allocate a new one of those as well.
1088 */
1089
1090 if (fa &&
1091 fa->fa_info->fib_priority == fi->fib_priority) {
1092 struct fib_alias *fa_orig;
1093
1094 err = -EEXIST;
1095 if (nlhdr->nlmsg_flags & NLM_F_EXCL)
1096 goto out;
1097
1098 if (nlhdr->nlmsg_flags & NLM_F_REPLACE) {
1099 struct fib_info *fi_drop;
1100 u8 state;
1101
1102 write_lock_bh(&fib_lock);
1103
1104 fi_drop = fa->fa_info;
1105 fa->fa_info = fi;
1106 fa->fa_type = type;
1107 fa->fa_scope = r->rtm_scope;
1108 state = fa->fa_state;
1109 fa->fa_state &= ~FA_S_ACCESSED;
1110
1111 write_unlock_bh(&fib_lock);
1112
1113 fib_release_info(fi_drop);
1114 if (state & FA_S_ACCESSED)
1115 rt_cache_flush(-1);
1116
1117 goto succeeded;
1118 }
1119 /* Error if we find a perfect match which
1120 * uses the same scope, type, and nexthop
1121 * information.
1122 */
1123 fa_orig = fa;
1124 list_for_each_entry(fa, fa_orig->fa_list.prev, fa_list) {
1125 if (fa->fa_tos != tos)
1126 break;
1127 if (fa->fa_info->fib_priority != fi->fib_priority)
1128 break;
1129 if (fa->fa_type == type &&
1130 fa->fa_scope == r->rtm_scope &&
1131 fa->fa_info == fi) {
1132 goto out;
1133 }
1134 }
1135 if (!(nlhdr->nlmsg_flags & NLM_F_APPEND))
1136 fa = fa_orig;
1137 }
1138 err = -ENOENT;
1139 if (!(nlhdr->nlmsg_flags&NLM_F_CREATE))
1140 goto out;
1141
1142 err = -ENOBUFS;
1143 new_fa = kmem_cache_alloc(fn_alias_kmem, SLAB_KERNEL);
1144 if (new_fa == NULL)
1145 goto out;
1146
1147 new_fa->fa_info = fi;
1148 new_fa->fa_tos = tos;
1149 new_fa->fa_type = type;
1150 new_fa->fa_scope = r->rtm_scope;
1151 new_fa->fa_state = 0;
1152#if 0
1153 new_fa->dst = NULL;
1154#endif
1155 /*
1156 * Insert new entry to the list.
1157 */
1158
1159 if(!fa_head)
1160 fa_head = fib_insert_node(t, key, plen);
1161
1162 write_lock_bh(&fib_lock);
1163
1164 list_add_tail(&new_fa->fa_list,
1165 (fa ? &fa->fa_list : fa_head));
1166
1167 write_unlock_bh(&fib_lock);
1168
1169 rt_cache_flush(-1);
1170 rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, tb->tb_id, nlhdr, req);
1171succeeded:
1172 return 0;
1173out:
1174 fib_release_info(fi);
1175err:;
1176 return err;
1177}
1178
1179static inline int check_leaf(struct trie *t, struct leaf *l, t_key key, int *plen, const struct flowi *flp,
1180 struct fib_result *res, int *err)
1181{
1182 int i;
1183 t_key mask;
1184 struct leaf_info *li;
1185 struct hlist_head *hhead = &l->list;
1186 struct hlist_node *node;
1187
1188 hlist_for_each_entry(li, node, hhead, hlist) {
1189
1190 i = li->plen;
1191 mask = ntohl(inet_make_mask(i));
1192 if (l->key != (key & mask))
1193 continue;
1194
1195 if (((*err) = fib_semantic_match(&li->falh, flp, res, l->key, mask, i)) == 0) {
1196 *plen = i;
1197#ifdef CONFIG_IP_FIB_TRIE_STATS
1198 t->stats.semantic_match_passed++;
1199#endif
1200 return 1;
1201 }
1202#ifdef CONFIG_IP_FIB_TRIE_STATS
1203 t->stats.semantic_match_miss++;
1204#endif
1205 }
1206 return 0;
1207}
1208
1209static int
1210fn_trie_lookup(struct fib_table *tb, const struct flowi *flp, struct fib_result *res)
1211{
1212 struct trie *t = (struct trie *) tb->tb_data;
1213 int plen, ret = 0;
1214 struct node *n;
1215 struct tnode *pn;
1216 int pos, bits;
1217 t_key key=ntohl(flp->fl4_dst);
1218 int chopped_off;
1219 t_key cindex = 0;
1220 int current_prefix_length = KEYLENGTH;
1221 n = t->trie;
1222
1223 read_lock(&fib_lock);
1224 if(!n)
1225 goto failed;
1226
1227#ifdef CONFIG_IP_FIB_TRIE_STATS
1228 t->stats.gets++;
1229#endif
1230
1231 /* Just a leaf? */
1232 if (IS_LEAF(n)) {
1233 if( check_leaf(t, (struct leaf *)n, key, &plen, flp, res, &ret) )
1234 goto found;
1235 goto failed;
1236 }
1237 pn = (struct tnode *) n;
1238 chopped_off = 0;
1239
1240 while (pn) {
1241
1242 pos = pn->pos;
1243 bits = pn->bits;
1244
1245 if(!chopped_off)
1246 cindex = tkey_extract_bits(MASK_PFX(key, current_prefix_length), pos, bits);
1247
1248 n = tnode_get_child(pn, cindex);
1249
1250 if (n == NULL) {
1251#ifdef CONFIG_IP_FIB_TRIE_STATS
1252 t->stats.null_node_hit++;
1253#endif
1254 goto backtrace;
1255 }
1256
1257 if (IS_TNODE(n)) {
1258#define HL_OPTIMIZE
1259#ifdef HL_OPTIMIZE
1260 struct tnode *cn = (struct tnode *)n;
1261 t_key node_prefix, key_prefix, pref_mismatch;
1262 int mp;
1263
1264 /*
1265 * It's a tnode, and we can do some extra checks here if we
1266 * like, to avoid descending into a dead-end branch.
1267 * This tnode is in the parent's child array at index
1268 * key[p_pos..p_pos+p_bits] but potentially with some bits
1269 * chopped off, so in reality the index may be just a
1270 * subprefix, padded with zero at the end.
1271 * We can also take a look at any skipped bits in this
1272 * tnode - everything up to p_pos is supposed to be ok,
1273 * and the non-chopped bits of the index (se previous
1274 * paragraph) are also guaranteed ok, but the rest is
1275 * considered unknown.
1276 *
1277 * The skipped bits are key[pos+bits..cn->pos].
1278 */
1279
1280 /* If current_prefix_length < pos+bits, we are already doing
1281 * actual prefix matching, which means everything from
1282 * pos+(bits-chopped_off) onward must be zero along some
1283 * branch of this subtree - otherwise there is *no* valid
1284 * prefix present. Here we can only check the skipped
1285 * bits. Remember, since we have already indexed into the
1286 * parent's child array, we know that the bits we chopped of
1287 * *are* zero.
1288 */
1289
1290 /* NOTA BENE: CHECKING ONLY SKIPPED BITS FOR THE NEW NODE HERE */
1291
1292 if (current_prefix_length < pos+bits) {
1293 if (tkey_extract_bits(cn->key, current_prefix_length,
1294 cn->pos - current_prefix_length) != 0 ||
1295 !(cn->child[0]))
1296 goto backtrace;
1297 }
1298
1299 /*
1300 * If chopped_off=0, the index is fully validated and we
1301 * only need to look at the skipped bits for this, the new,
1302 * tnode. What we actually want to do is to find out if
1303 * these skipped bits match our key perfectly, or if we will
1304 * have to count on finding a matching prefix further down,
1305 * because if we do, we would like to have some way of
1306 * verifying the existence of such a prefix at this point.
1307 */
1308
1309 /* The only thing we can do at this point is to verify that
1310 * any such matching prefix can indeed be a prefix to our
1311 * key, and if the bits in the node we are inspecting that
1312 * do not match our key are not ZERO, this cannot be true.
1313 * Thus, find out where there is a mismatch (before cn->pos)
1314 * and verify that all the mismatching bits are zero in the
1315 * new tnode's key.
1316 */
1317
1318 /* Note: We aren't very concerned about the piece of the key
1319 * that precede pn->pos+pn->bits, since these have already been
1320 * checked. The bits after cn->pos aren't checked since these are
1321 * by definition "unknown" at this point. Thus, what we want to
1322 * see is if we are about to enter the "prefix matching" state,
1323 * and in that case verify that the skipped bits that will prevail
1324 * throughout this subtree are zero, as they have to be if we are
1325 * to find a matching prefix.
1326 */
1327
1328 node_prefix = MASK_PFX(cn->key, cn->pos);
1329 key_prefix = MASK_PFX(key, cn->pos);
1330 pref_mismatch = key_prefix^node_prefix;
1331 mp = 0;
1332
1333 /* In short: If skipped bits in this node do not match the search
1334 * key, enter the "prefix matching" state.directly.
1335 */
1336 if (pref_mismatch) {
1337 while (!(pref_mismatch & (1<<(KEYLENGTH-1)))) {
1338 mp++;
1339 pref_mismatch = pref_mismatch <<1;
1340 }
1341 key_prefix = tkey_extract_bits(cn->key, mp, cn->pos-mp);
1342
1343 if (key_prefix != 0)
1344 goto backtrace;
1345
1346 if (current_prefix_length >= cn->pos)
1347 current_prefix_length=mp;
1348 }
1349#endif
1350 pn = (struct tnode *)n; /* Descend */
1351 chopped_off = 0;
1352 continue;
1353 }
1354 if (IS_LEAF(n)) {
1355 if( check_leaf(t, (struct leaf *)n, key, &plen, flp, res, &ret))
1356 goto found;
1357 }
1358backtrace:
1359 chopped_off++;
1360
1361 /* As zero don't change the child key (cindex) */
1362 while ((chopped_off <= pn->bits) && !(cindex & (1<<(chopped_off-1)))) {
1363 chopped_off++;
1364 }
1365
1366 /* Decrease current_... with bits chopped off */
1367 if (current_prefix_length > pn->pos + pn->bits - chopped_off)
1368 current_prefix_length = pn->pos + pn->bits - chopped_off;
1369
1370 /*
1371 * Either we do the actual chop off according or if we have
1372 * chopped off all bits in this tnode walk up to our parent.
1373 */
1374
1375 if(chopped_off <= pn->bits)
1376 cindex &= ~(1 << (chopped_off-1));
1377 else {
1378 if( NODE_PARENT(pn) == NULL)
1379 goto failed;
1380
1381 /* Get Child's index */
1382 cindex = tkey_extract_bits(pn->key, NODE_PARENT(pn)->pos, NODE_PARENT(pn)->bits);
1383 pn = NODE_PARENT(pn);
1384 chopped_off = 0;
1385
1386#ifdef CONFIG_IP_FIB_TRIE_STATS
1387 t->stats.backtrack++;
1388#endif
1389 goto backtrace;
1390 }
1391 }
1392failed:
1393 ret = 1;
1394found:
1395 read_unlock(&fib_lock);
1396 return ret;
1397}
1398
1399static int trie_leaf_remove(struct trie *t, t_key key)
1400{
1401 t_key cindex;
1402 struct tnode *tp = NULL;
1403 struct node *n = t->trie;
1404 struct leaf *l;
1405
1406 if(trie_debug)
1407 printk("entering trie_leaf_remove(%p)\n", n);
1408
1409 /* Note that in the case skipped bits, those bits are *not* checked!
1410 * When we finish this, we will have NULL or a T_LEAF, and the
1411 * T_LEAF may or may not match our key.
1412 */
1413
1414 while (n != NULL && IS_TNODE(n)) {
1415 struct tnode *tn = (struct tnode *) n;
1416 check_tnode(tn);
1417 n = tnode_get_child(tn ,tkey_extract_bits(key, tn->pos, tn->bits));
1418
1419 if(n && NODE_PARENT(n) != tn) {
1420 printk("BUG tn=%p, n->parent=%p\n", tn, NODE_PARENT(n));
1421 BUG();
1422 }
1423 }
1424 l = (struct leaf *) n;
1425
1426 if(!n || !tkey_equals(l->key, key))
1427 return 0;
1428
1429 /*
1430 * Key found.
1431 * Remove the leaf and rebalance the tree
1432 */
1433
1434 t->revision++;
1435 t->size--;
1436
1437 tp = NODE_PARENT(n);
1438 tnode_free((struct tnode *) n);
1439
1440 if(tp) {
1441 cindex = tkey_extract_bits(key, tp->pos, tp->bits);
1442 put_child(t, (struct tnode *)tp, cindex, NULL);
1443 t->trie = trie_rebalance(t, tp);
1444 }
1445 else
1446 t->trie = NULL;
1447
1448 return 1;
1449}
1450
1451static int
1452fn_trie_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
1453 struct nlmsghdr *nlhdr, struct netlink_skb_parms *req)
1454{
1455 struct trie *t = (struct trie *) tb->tb_data;
1456 u32 key, mask;
1457 int plen = r->rtm_dst_len;
1458 u8 tos = r->rtm_tos;
1459 struct fib_alias *fa, *fa_to_delete;
1460 struct list_head *fa_head;
1461 struct leaf *l;
1462
1463 if (plen > 32)
1464 return -EINVAL;
1465
1466 key = 0;
1467 if (rta->rta_dst)
1468 memcpy(&key, rta->rta_dst, 4);
1469
1470 key = ntohl(key);
1471 mask = ntohl( inet_make_mask(plen) );
1472
1473 if(key & ~mask)
1474 return -EINVAL;
1475
1476 key = key & mask;
1477 l = fib_find_node(t, key);
1478
1479 if(!l)
1480 return -ESRCH;
1481
1482 fa_head = get_fa_head(l, plen);
1483 fa = fib_find_alias(fa_head, tos, 0);
1484
1485 if (!fa)
1486 return -ESRCH;
1487
1488 if (trie_debug)
1489 printk("Deleting %08x/%d tos=%d t=%p\n", key, plen, tos, t);
1490
1491 fa_to_delete = NULL;
1492 fa_head = fa->fa_list.prev;
1493 list_for_each_entry(fa, fa_head, fa_list) {
1494 struct fib_info *fi = fa->fa_info;
1495
1496 if (fa->fa_tos != tos)
1497 break;
1498
1499 if ((!r->rtm_type ||
1500 fa->fa_type == r->rtm_type) &&
1501 (r->rtm_scope == RT_SCOPE_NOWHERE ||
1502 fa->fa_scope == r->rtm_scope) &&
1503 (!r->rtm_protocol ||
1504 fi->fib_protocol == r->rtm_protocol) &&
1505 fib_nh_match(r, nlhdr, rta, fi) == 0) {
1506 fa_to_delete = fa;
1507 break;
1508 }
1509 }
1510
1511 if (fa_to_delete) {
1512 int kill_li = 0;
1513 struct leaf_info *li;
1514
1515 fa = fa_to_delete;
1516 rtmsg_fib(RTM_DELROUTE, htonl(key), fa, plen, tb->tb_id, nlhdr, req);
1517
1518 l = fib_find_node(t, key);
1519 li = find_leaf_info(&l->list, plen);
1520
1521 write_lock_bh(&fib_lock);
1522
1523 list_del(&fa->fa_list);
1524
1525 if(list_empty(fa_head)) {
1526 hlist_del(&li->hlist);
1527 kill_li = 1;
1528 }
1529 write_unlock_bh(&fib_lock);
1530
1531 if(kill_li)
1532 free_leaf_info(li);
1533
1534 if(hlist_empty(&l->list))
1535 trie_leaf_remove(t, key);
1536
1537 if (fa->fa_state & FA_S_ACCESSED)
1538 rt_cache_flush(-1);
1539
1540 fn_free_alias(fa);
1541 return 0;
1542 }
1543 return -ESRCH;
1544}
1545
1546static int trie_flush_list(struct trie *t, struct list_head *head)
1547{
1548 struct fib_alias *fa, *fa_node;
1549 int found = 0;
1550
1551 list_for_each_entry_safe(fa, fa_node, head, fa_list) {
1552 struct fib_info *fi = fa->fa_info;
1553
1554 if (fi && (fi->fib_flags&RTNH_F_DEAD)) {
1555
1556 write_lock_bh(&fib_lock);
1557 list_del(&fa->fa_list);
1558 write_unlock_bh(&fib_lock);
1559
1560 fn_free_alias(fa);
1561 found++;
1562 }
1563 }
1564 return found;
1565}
1566
1567static int trie_flush_leaf(struct trie *t, struct leaf *l)
1568{
1569 int found = 0;
1570 struct hlist_head *lih = &l->list;
1571 struct hlist_node *node, *tmp;
1572 struct leaf_info *li = NULL;
1573
1574 hlist_for_each_entry_safe(li, node, tmp, lih, hlist) {
1575
1576 found += trie_flush_list(t, &li->falh);
1577
1578 if (list_empty(&li->falh)) {
1579
1580 write_lock_bh(&fib_lock);
1581 hlist_del(&li->hlist);
1582 write_unlock_bh(&fib_lock);
1583
1584 free_leaf_info(li);
1585 }
1586 }
1587 return found;
1588}
1589
1590static struct leaf *nextleaf(struct trie *t, struct leaf *thisleaf)
1591{
1592 struct node *c = (struct node *) thisleaf;
1593 struct tnode *p;
1594 int idx;
1595
1596 if(c == NULL) {
1597 if(t->trie == NULL)
1598 return NULL;
1599
1600 if (IS_LEAF(t->trie)) /* trie w. just a leaf */
1601 return (struct leaf *) t->trie;
1602
1603 p = (struct tnode*) t->trie; /* Start */
1604 }
1605 else
1606 p = (struct tnode *) NODE_PARENT(c);
1607 while (p) {
1608 int pos, last;
1609
1610 /* Find the next child of the parent */
1611 if(c)
1612 pos = 1 + tkey_extract_bits(c->key, p->pos, p->bits);
1613 else
1614 pos = 0;
1615
1616 last = 1 << p->bits;
1617 for(idx = pos; idx < last ; idx++) {
1618 if( p->child[idx]) {
1619
1620 /* Decend if tnode */
1621
1622 while (IS_TNODE(p->child[idx])) {
1623 p = (struct tnode*) p->child[idx];
1624 idx = 0;
1625
1626 /* Rightmost non-NULL branch */
1627 if( p && IS_TNODE(p) )
1628 while ( p->child[idx] == NULL && idx < (1 << p->bits) ) idx++;
1629
1630 /* Done with this tnode? */
1631 if( idx >= (1 << p->bits) || p->child[idx] == NULL )
1632 goto up;
1633 }
1634 return (struct leaf*) p->child[idx];
1635 }
1636 }
1637up:
1638 /* No more children go up one step */
1639 c = (struct node*) p;
1640 p = (struct tnode *) NODE_PARENT(p);
1641 }
1642 return NULL; /* Ready. Root of trie */
1643}
1644
1645static int fn_trie_flush(struct fib_table *tb)
1646{
1647 struct trie *t = (struct trie *) tb->tb_data;
1648 struct leaf *ll = NULL, *l = NULL;
1649 int found = 0, h;
1650
1651 t->revision++;
1652
1653 for (h=0; (l = nextleaf(t, l)) != NULL; h++) {
1654 found += trie_flush_leaf(t, l);
1655
1656 if (ll && hlist_empty(&ll->list))
1657 trie_leaf_remove(t, ll->key);
1658 ll = l;
1659 }
1660
1661 if (ll && hlist_empty(&ll->list))
1662 trie_leaf_remove(t, ll->key);
1663
1664 if(trie_debug)
1665 printk("trie_flush found=%d\n", found);
1666 return found;
1667}
1668
1669static int trie_last_dflt=-1;
1670
1671static void
1672fn_trie_select_default(struct fib_table *tb, const struct flowi *flp, struct fib_result *res)
1673{
1674 struct trie *t = (struct trie *) tb->tb_data;
1675 int order, last_idx;
1676 struct fib_info *fi = NULL;
1677 struct fib_info *last_resort;
1678 struct fib_alias *fa = NULL;
1679 struct list_head *fa_head;
1680 struct leaf *l;
1681
1682 last_idx = -1;
1683 last_resort = NULL;
1684 order = -1;
1685
1686 read_lock(&fib_lock);
1687
1688 l = fib_find_node(t, 0);
1689 if(!l)
1690 goto out;
1691
1692 fa_head = get_fa_head(l, 0);
1693 if(!fa_head)
1694 goto out;
1695
1696 if (list_empty(fa_head))
1697 goto out;
1698
1699 list_for_each_entry(fa, fa_head, fa_list) {
1700 struct fib_info *next_fi = fa->fa_info;
1701
1702 if (fa->fa_scope != res->scope ||
1703 fa->fa_type != RTN_UNICAST)
1704 continue;
1705
1706 if (next_fi->fib_priority > res->fi->fib_priority)
1707 break;
1708 if (!next_fi->fib_nh[0].nh_gw ||
1709 next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK)
1710 continue;
1711 fa->fa_state |= FA_S_ACCESSED;
1712
1713 if (fi == NULL) {
1714 if (next_fi != res->fi)
1715 break;
1716 } else if (!fib_detect_death(fi, order, &last_resort,
1717 &last_idx, &trie_last_dflt)) {
1718 if (res->fi)
1719 fib_info_put(res->fi);
1720 res->fi = fi;
1721 atomic_inc(&fi->fib_clntref);
1722 trie_last_dflt = order;
1723 goto out;
1724 }
1725 fi = next_fi;
1726 order++;
1727 }
1728 if (order <= 0 || fi == NULL) {
1729 trie_last_dflt = -1;
1730 goto out;
1731 }
1732
1733 if (!fib_detect_death(fi, order, &last_resort, &last_idx, &trie_last_dflt)) {
1734 if (res->fi)
1735 fib_info_put(res->fi);
1736 res->fi = fi;
1737 atomic_inc(&fi->fib_clntref);
1738 trie_last_dflt = order;
1739 goto out;
1740 }
1741 if (last_idx >= 0) {
1742 if (res->fi)
1743 fib_info_put(res->fi);
1744 res->fi = last_resort;
1745 if (last_resort)
1746 atomic_inc(&last_resort->fib_clntref);
1747 }
1748 trie_last_dflt = last_idx;
1749 out:;
1750 read_unlock(&fib_lock);
1751}
1752
1753static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah, struct fib_table *tb,
1754 struct sk_buff *skb, struct netlink_callback *cb)
1755{
1756 int i, s_i;
1757 struct fib_alias *fa;
1758
1759 u32 xkey=htonl(key);
1760
1761 s_i=cb->args[3];
1762 i = 0;
1763
1764 list_for_each_entry(fa, fah, fa_list) {
1765 if (i < s_i) {
1766 i++;
1767 continue;
1768 }
1769 if (fa->fa_info->fib_nh == NULL) {
1770 printk("Trie error _fib_nh=NULL in fa[%d] k=%08x plen=%d\n", i, key, plen);
1771 i++;
1772 continue;
1773 }
1774 if (fa->fa_info == NULL) {
1775 printk("Trie error fa_info=NULL in fa[%d] k=%08x plen=%d\n", i, key, plen);
1776 i++;
1777 continue;
1778 }
1779
1780 if (fib_dump_info(skb, NETLINK_CB(cb->skb).pid,
1781 cb->nlh->nlmsg_seq,
1782 RTM_NEWROUTE,
1783 tb->tb_id,
1784 fa->fa_type,
1785 fa->fa_scope,
1786 &xkey,
1787 plen,
1788 fa->fa_tos,
1789 fa->fa_info, 0) < 0) {
1790 cb->args[3] = i;
1791 return -1;
1792 }
1793 i++;
1794 }
1795 cb->args[3]=i;
1796 return skb->len;
1797}
1798
1799static int fn_trie_dump_plen(struct trie *t, int plen, struct fib_table *tb, struct sk_buff *skb,
1800 struct netlink_callback *cb)
1801{
1802 int h, s_h;
1803 struct list_head *fa_head;
1804 struct leaf *l = NULL;
1805 s_h=cb->args[2];
1806
1807 for (h=0; (l = nextleaf(t, l)) != NULL; h++) {
1808
1809 if (h < s_h)
1810 continue;
1811 if (h > s_h)
1812 memset(&cb->args[3], 0,
1813 sizeof(cb->args) - 3*sizeof(cb->args[0]));
1814
1815 fa_head = get_fa_head(l, plen);
1816
1817 if(!fa_head)
1818 continue;
1819
1820 if(list_empty(fa_head))
1821 continue;
1822
1823 if (fn_trie_dump_fa(l->key, plen, fa_head, tb, skb, cb)<0) {
1824 cb->args[2]=h;
1825 return -1;
1826 }
1827 }
1828 cb->args[2]=h;
1829 return skb->len;
1830}
1831
1832static int fn_trie_dump(struct fib_table *tb, struct sk_buff *skb, struct netlink_callback *cb)
1833{
1834 int m, s_m;
1835 struct trie *t = (struct trie *) tb->tb_data;
1836
1837 s_m = cb->args[1];
1838
1839 read_lock(&fib_lock);
1840 for (m=0; m<=32; m++) {
1841
1842 if (m < s_m)
1843 continue;
1844 if (m > s_m)
1845 memset(&cb->args[2], 0,
1846 sizeof(cb->args) - 2*sizeof(cb->args[0]));
1847
1848 if (fn_trie_dump_plen(t, 32-m, tb, skb, cb)<0) {
1849 cb->args[1] = m;
1850 goto out;
1851 }
1852 }
1853 read_unlock(&fib_lock);
1854 cb->args[1] = m;
1855 return skb->len;
1856 out:
1857 read_unlock(&fib_lock);
1858 return -1;
1859}
1860
1861/* Fix more generic FIB names for init later */
1862
1863#ifdef CONFIG_IP_MULTIPLE_TABLES
1864struct fib_table * fib_hash_init(int id)
1865#else
1866struct fib_table * __init fib_hash_init(int id)
1867#endif
1868{
1869 struct fib_table *tb;
1870 struct trie *t;
1871
1872 if (fn_alias_kmem == NULL)
1873 fn_alias_kmem = kmem_cache_create("ip_fib_alias",
1874 sizeof(struct fib_alias),
1875 0, SLAB_HWCACHE_ALIGN,
1876 NULL, NULL);
1877
1878 tb = kmalloc(sizeof(struct fib_table) + sizeof(struct trie),
1879 GFP_KERNEL);
1880 if (tb == NULL)
1881 return NULL;
1882
1883 tb->tb_id = id;
1884 tb->tb_lookup = fn_trie_lookup;
1885 tb->tb_insert = fn_trie_insert;
1886 tb->tb_delete = fn_trie_delete;
1887 tb->tb_flush = fn_trie_flush;
1888 tb->tb_select_default = fn_trie_select_default;
1889 tb->tb_dump = fn_trie_dump;
1890 memset(tb->tb_data, 0, sizeof(struct trie));
1891
1892 t = (struct trie *) tb->tb_data;
1893
1894 trie_init(t);
1895
1896 if (id == RT_TABLE_LOCAL)
1897 trie_local=t;
1898 else if (id == RT_TABLE_MAIN)
1899 trie_main=t;
1900
1901 if (id == RT_TABLE_LOCAL)
1902 printk("IPv4 FIB: Using LC-trie version %s\n", VERSION);
1903
1904 return tb;
1905}
1906
1907/* Trie dump functions */
1908
1909static void putspace_seq(struct seq_file *seq, int n)
1910{
1911 while (n--) seq_printf(seq, " ");
1912}
1913
1914static void printbin_seq(struct seq_file *seq, unsigned int v, int bits)
1915{
1916 while (bits--)
1917 seq_printf(seq, "%s", (v & (1<<bits))?"1":"0");
1918}
1919
1920static void printnode_seq(struct seq_file *seq, int indent, struct node *n,
1921 int pend, int cindex, int bits)
1922{
1923 putspace_seq(seq, indent);
1924 if (IS_LEAF(n))
1925 seq_printf(seq, "|");
1926 else
1927 seq_printf(seq, "+");
1928 if (bits) {
1929 seq_printf(seq, "%d/", cindex);
1930 printbin_seq(seq, cindex, bits);
1931 seq_printf(seq, ": ");
1932 }
1933 else
1934 seq_printf(seq, "<root>: ");
1935 seq_printf(seq, "%s:%p ", IS_LEAF(n)?"Leaf":"Internal node", n);
1936
1937 if (IS_LEAF(n))
1938 seq_printf(seq, "key=%d.%d.%d.%d\n",
1939 n->key >> 24, (n->key >> 16) % 256, (n->key >> 8) % 256, n->key % 256);
1940 else {
1941 int plen=((struct tnode *)n)->pos;
1942 t_key prf=MASK_PFX(n->key, plen);
1943 seq_printf(seq, "key=%d.%d.%d.%d/%d\n",
1944 prf >> 24, (prf >> 16) % 256, (prf >> 8) % 256, prf % 256, plen);
1945 }
1946 if (IS_LEAF(n)) {
1947 struct leaf *l=(struct leaf *)n;
1948 struct fib_alias *fa;
1949 int i;
1950 for (i=32; i>=0; i--)
1951 if(find_leaf_info(&l->list, i)) {
1952
1953 struct list_head *fa_head = get_fa_head(l, i);
1954
1955 if(!fa_head)
1956 continue;
1957
1958 if(list_empty(fa_head))
1959 continue;
1960
1961 putspace_seq(seq, indent+2);
1962 seq_printf(seq, "{/%d...dumping}\n", i);
1963
1964
1965 list_for_each_entry(fa, fa_head, fa_list) {
1966 putspace_seq(seq, indent+2);
1967 if (fa->fa_info->fib_nh == NULL) {
1968 seq_printf(seq, "Error _fib_nh=NULL\n");
1969 continue;
1970 }
1971 if (fa->fa_info == NULL) {
1972 seq_printf(seq, "Error fa_info=NULL\n");
1973 continue;
1974 }
1975
1976 seq_printf(seq, "{type=%d scope=%d TOS=%d}\n",
1977 fa->fa_type,
1978 fa->fa_scope,
1979 fa->fa_tos);
1980 }
1981 }
1982 }
1983 else if (IS_TNODE(n)) {
1984 struct tnode *tn=(struct tnode *)n;
1985 putspace_seq(seq, indent); seq_printf(seq, "| ");
1986 seq_printf(seq, "{key prefix=%08x/", tn->key&TKEY_GET_MASK(0, tn->pos));
1987 printbin_seq(seq, tkey_extract_bits(tn->key, 0, tn->pos), tn->pos);
1988 seq_printf(seq, "}\n");
1989 putspace_seq(seq, indent); seq_printf(seq, "| ");
1990 seq_printf(seq, "{pos=%d", tn->pos);
1991 seq_printf(seq, " (skip=%d bits)", tn->pos - pend);
1992 seq_printf(seq, " bits=%d (%u children)}\n", tn->bits, (1 << tn->bits));
1993 putspace_seq(seq, indent); seq_printf(seq, "| ");
1994 seq_printf(seq, "{empty=%d full=%d}\n", tn->empty_children, tn->full_children);
1995 }
1996}
1997
1998static void trie_dump_seq(struct seq_file *seq, struct trie *t)
1999{
2000 struct node *n=t->trie;
2001 int cindex=0;
2002 int indent=1;
2003 int pend=0;
2004 int depth = 0;
2005
2006 read_lock(&fib_lock);
2007
2008 seq_printf(seq, "------ trie_dump of t=%p ------\n", t);
2009 if (n) {
2010 printnode_seq(seq, indent, n, pend, cindex, 0);
2011 if (IS_TNODE(n)) {
2012 struct tnode *tn=(struct tnode *)n;
2013 pend = tn->pos+tn->bits;
2014 putspace_seq(seq, indent); seq_printf(seq, "\\--\n");
2015 indent += 3;
2016 depth++;
2017
2018 while (tn && cindex < (1 << tn->bits)) {
2019 if (tn->child[cindex]) {
2020
2021 /* Got a child */
2022
2023 printnode_seq(seq, indent, tn->child[cindex], pend, cindex, tn->bits);
2024 if (IS_LEAF(tn->child[cindex])) {
2025 cindex++;
2026
2027 }
2028 else {
2029 /*
2030 * New tnode. Decend one level
2031 */
2032
2033 depth++;
2034 n=tn->child[cindex];
2035 tn=(struct tnode *)n;
2036 pend=tn->pos+tn->bits;
2037 putspace_seq(seq, indent); seq_printf(seq, "\\--\n");
2038 indent+=3;
2039 cindex=0;
2040 }
2041 }
2042 else
2043 cindex++;
2044
2045 /*
2046 * Test if we are done
2047 */
2048
2049 while (cindex >= (1 << tn->bits)) {
2050
2051 /*
2052 * Move upwards and test for root
2053 * pop off all traversed nodes
2054 */
2055
2056 if (NODE_PARENT(tn) == NULL) {
2057 tn = NULL;
2058 n = NULL;
2059 break;
2060 }
2061 else {
2062 cindex = tkey_extract_bits(tn->key, NODE_PARENT(tn)->pos, NODE_PARENT(tn)->bits);
2063 tn = NODE_PARENT(tn);
2064 cindex++;
2065 n=(struct node *)tn;
2066 pend=tn->pos+tn->bits;
2067 indent-=3;
2068 depth--;
2069 }
2070 }
2071 }
2072 }
2073 else n = NULL;
2074 }
2075 else seq_printf(seq, "------ trie is empty\n");
2076
2077 read_unlock(&fib_lock);
2078}
2079
2080static struct trie_stat *trie_stat_new(void)
2081{
2082 struct trie_stat *s = kmalloc(sizeof(struct trie_stat), GFP_KERNEL);
2083 int i;
2084
2085 if(s) {
2086 s->totdepth = 0;
2087 s->maxdepth = 0;
2088 s->tnodes = 0;
2089 s->leaves = 0;
2090 s->nullpointers = 0;
2091
2092 for(i=0; i< MAX_CHILDS; i++)
2093 s->nodesizes[i] = 0;
2094 }
2095 return s;
2096}
2097
2098static struct trie_stat *trie_collect_stats(struct trie *t)
2099{
2100 struct node *n=t->trie;
2101 struct trie_stat *s = trie_stat_new();
2102 int cindex = 0;
2103 int indent = 1;
2104 int pend = 0;
2105 int depth = 0;
2106
2107 read_lock(&fib_lock);
2108
2109 if (s) {
2110 if (n) {
2111 if (IS_TNODE(n)) {
2112 struct tnode *tn = (struct tnode *)n;
2113 pend=tn->pos+tn->bits;
2114 indent += 3;
2115 s->nodesizes[tn->bits]++;
2116 depth++;
2117
2118 while (tn && cindex < (1 << tn->bits)) {
2119 if (tn->child[cindex]) {
2120 /* Got a child */
2121
2122 if (IS_LEAF(tn->child[cindex])) {
2123 cindex++;
2124
2125 /* stats */
2126 if (depth > s->maxdepth)
2127 s->maxdepth = depth;
2128 s->totdepth += depth;
2129 s->leaves++;
2130 }
2131
2132 else {
2133 /*
2134 * New tnode. Decend one level
2135 */
2136
2137 s->tnodes++;
2138 s->nodesizes[tn->bits]++;
2139 depth++;
2140
2141 n = tn->child[cindex];
2142 tn = (struct tnode *)n;
2143 pend = tn->pos+tn->bits;
2144
2145 indent += 3;
2146 cindex = 0;
2147 }
2148 }
2149 else {
2150 cindex++;
2151 s->nullpointers++;
2152 }
2153
2154 /*
2155 * Test if we are done
2156 */
2157
2158 while (cindex >= (1 << tn->bits)) {
2159
2160 /*
2161 * Move upwards and test for root
2162 * pop off all traversed nodes
2163 */
2164
2165
2166 if (NODE_PARENT(tn) == NULL) {
2167 tn = NULL;
2168 n = NULL;
2169 break;
2170 }
2171 else {
2172 cindex = tkey_extract_bits(tn->key, NODE_PARENT(tn)->pos, NODE_PARENT(tn)->bits);
2173 tn = NODE_PARENT(tn);
2174 cindex++;
2175 n = (struct node *)tn;
2176 pend=tn->pos+tn->bits;
2177 indent -= 3;
2178 depth--;
2179 }
2180 }
2181 }
2182 }
2183 else n = NULL;
2184 }
2185 }
2186
2187 read_unlock(&fib_lock);
2188 return s;
2189}
2190
2191#ifdef CONFIG_PROC_FS
2192
2193static struct fib_alias *fib_triestat_get_first(struct seq_file *seq)
2194{
2195 return NULL;
2196}
2197
2198static struct fib_alias *fib_triestat_get_next(struct seq_file *seq)
2199{
2200 return NULL;
2201}
2202
2203static void *fib_triestat_seq_start(struct seq_file *seq, loff_t *pos)
2204{
2205 void *v = NULL;
2206
2207 if (ip_fib_main_table)
2208 v = *pos ? fib_triestat_get_next(seq) : SEQ_START_TOKEN;
2209 return v;
2210}
2211
2212static void *fib_triestat_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2213{
2214 ++*pos;
2215 return v == SEQ_START_TOKEN ? fib_triestat_get_first(seq) : fib_triestat_get_next(seq);
2216}
2217
2218static void fib_triestat_seq_stop(struct seq_file *seq, void *v)
2219{
2220
2221}
2222
2223/*
2224 * This outputs /proc/net/fib_triestats
2225 *
2226 * It always works in backward compatibility mode.
2227 * The format of the file is not supposed to be changed.
2228 */
2229
2230static void collect_and_show(struct trie *t, struct seq_file *seq)
2231{
2232 int bytes = 0; /* How many bytes are used, a ref is 4 bytes */
2233 int i, max, pointers;
2234 struct trie_stat *stat;
2235 int avdepth;
2236
2237 stat = trie_collect_stats(t);
2238
2239 bytes=0;
2240 seq_printf(seq, "trie=%p\n", t);
2241
2242 if (stat) {
2243 if (stat->leaves)
2244 avdepth=stat->totdepth*100 / stat->leaves;
2245 else
2246 avdepth=0;
2247 seq_printf(seq, "Aver depth: %d.%02d\n", avdepth / 100, avdepth % 100 );
2248 seq_printf(seq, "Max depth: %4d\n", stat->maxdepth);
2249
2250 seq_printf(seq, "Leaves: %d\n", stat->leaves);
2251 bytes += sizeof(struct leaf) * stat->leaves;
2252 seq_printf(seq, "Internal nodes: %d\n", stat->tnodes);
2253 bytes += sizeof(struct tnode) * stat->tnodes;
2254
2255 max = MAX_CHILDS-1;
2256
2257 while (max >= 0 && stat->nodesizes[max] == 0)
2258 max--;
2259 pointers = 0;
2260
2261 for (i = 1; i <= max; i++)
2262 if (stat->nodesizes[i] != 0) {
2263 seq_printf(seq, " %d: %d", i, stat->nodesizes[i]);
2264 pointers += (1<<i) * stat->nodesizes[i];
2265 }
2266 seq_printf(seq, "\n");
2267 seq_printf(seq, "Pointers: %d\n", pointers);
2268 bytes += sizeof(struct node *) * pointers;
2269 seq_printf(seq, "Null ptrs: %d\n", stat->nullpointers);
2270 seq_printf(seq, "Total size: %d kB\n", bytes / 1024);
2271
2272 kfree(stat);
2273 }
2274
2275#ifdef CONFIG_IP_FIB_TRIE_STATS
2276 seq_printf(seq, "Counters:\n---------\n");
2277 seq_printf(seq,"gets = %d\n", t->stats.gets);
2278 seq_printf(seq,"backtracks = %d\n", t->stats.backtrack);
2279 seq_printf(seq,"semantic match passed = %d\n", t->stats.semantic_match_passed);
2280 seq_printf(seq,"semantic match miss = %d\n", t->stats.semantic_match_miss);
2281 seq_printf(seq,"null node hit= %d\n", t->stats.null_node_hit);
2282#ifdef CLEAR_STATS
2283 memset(&(t->stats), 0, sizeof(t->stats));
2284#endif
2285#endif /* CONFIG_IP_FIB_TRIE_STATS */
2286}
2287
2288static int fib_triestat_seq_show(struct seq_file *seq, void *v)
2289{
2290 char bf[128];
2291
2292 if (v == SEQ_START_TOKEN) {
2293 seq_printf(seq, "Basic info: size of leaf: %Zd bytes, size of tnode: %Zd bytes.\n",
2294 sizeof(struct leaf), sizeof(struct tnode));
2295 if (trie_local)
2296 collect_and_show(trie_local, seq);
2297
2298 if (trie_main)
2299 collect_and_show(trie_main, seq);
2300 }
2301 else {
2302 snprintf(bf, sizeof(bf),
2303 "*\t%08X\t%08X", 200, 400);
2304
2305 seq_printf(seq, "%-127s\n", bf);
2306 }
2307 return 0;
2308}
2309
2310static struct seq_operations fib_triestat_seq_ops = {
2311 .start = fib_triestat_seq_start,
2312 .next = fib_triestat_seq_next,
2313 .stop = fib_triestat_seq_stop,
2314 .show = fib_triestat_seq_show,
2315};
2316
2317static int fib_triestat_seq_open(struct inode *inode, struct file *file)
2318{
2319 struct seq_file *seq;
2320 int rc = -ENOMEM;
2321
2322 rc = seq_open(file, &fib_triestat_seq_ops);
2323 if (rc)
2324 goto out_kfree;
2325
2326 seq = file->private_data;
2327out:
2328 return rc;
2329out_kfree:
2330 goto out;
2331}
2332
2333static struct file_operations fib_triestat_seq_fops = {
2334 .owner = THIS_MODULE,
2335 .open = fib_triestat_seq_open,
2336 .read = seq_read,
2337 .llseek = seq_lseek,
2338 .release = seq_release_private,
2339};
2340
2341int __init fib_stat_proc_init(void)
2342{
2343 if (!proc_net_fops_create("fib_triestat", S_IRUGO, &fib_triestat_seq_fops))
2344 return -ENOMEM;
2345 return 0;
2346}
2347
2348void __init fib_stat_proc_exit(void)
2349{
2350 proc_net_remove("fib_triestat");
2351}
2352
2353static struct fib_alias *fib_trie_get_first(struct seq_file *seq)
2354{
2355 return NULL;
2356}
2357
2358static struct fib_alias *fib_trie_get_next(struct seq_file *seq)
2359{
2360 return NULL;
2361}
2362
2363static void *fib_trie_seq_start(struct seq_file *seq, loff_t *pos)
2364{
2365 void *v = NULL;
2366
2367 if (ip_fib_main_table)
2368 v = *pos ? fib_trie_get_next(seq) : SEQ_START_TOKEN;
2369 return v;
2370}
2371
2372static void *fib_trie_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2373{
2374 ++*pos;
2375 return v == SEQ_START_TOKEN ? fib_trie_get_first(seq) : fib_trie_get_next(seq);
2376}
2377
2378static void fib_trie_seq_stop(struct seq_file *seq, void *v)
2379{
2380
2381}
2382
2383/*
2384 * This outputs /proc/net/fib_trie.
2385 *
2386 * It always works in backward compatibility mode.
2387 * The format of the file is not supposed to be changed.
2388 */
2389
2390static int fib_trie_seq_show(struct seq_file *seq, void *v)
2391{
2392 char bf[128];
2393
2394 if (v == SEQ_START_TOKEN) {
2395 if (trie_local)
2396 trie_dump_seq(seq, trie_local);
2397
2398 if (trie_main)
2399 trie_dump_seq(seq, trie_main);
2400 }
2401
2402 else {
2403 snprintf(bf, sizeof(bf),
2404 "*\t%08X\t%08X", 200, 400);
2405 seq_printf(seq, "%-127s\n", bf);
2406 }
2407
2408 return 0;
2409}
2410
2411static struct seq_operations fib_trie_seq_ops = {
2412 .start = fib_trie_seq_start,
2413 .next = fib_trie_seq_next,
2414 .stop = fib_trie_seq_stop,
2415 .show = fib_trie_seq_show,
2416};
2417
2418static int fib_trie_seq_open(struct inode *inode, struct file *file)
2419{
2420 struct seq_file *seq;
2421 int rc = -ENOMEM;
2422
2423 rc = seq_open(file, &fib_trie_seq_ops);
2424 if (rc)
2425 goto out_kfree;
2426
2427 seq = file->private_data;
2428out:
2429 return rc;
2430out_kfree:
2431 goto out;
2432}
2433
2434static struct file_operations fib_trie_seq_fops = {
2435 .owner = THIS_MODULE,
2436 .open = fib_trie_seq_open,
2437 .read = seq_read,
2438 .llseek = seq_lseek,
2439 .release = seq_release_private,
2440};
2441
2442int __init fib_proc_init(void)
2443{
2444 if (!proc_net_fops_create("fib_trie", S_IRUGO, &fib_trie_seq_fops))
2445 return -ENOMEM;
2446 return 0;
2447}
2448
2449void __init fib_proc_exit(void)
2450{
2451 proc_net_remove("fib_trie");
2452}
2453
2454#endif /* CONFIG_PROC_FS */
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 85bf0d3e29..cb75948497 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -207,6 +207,7 @@ int sysctl_icmp_ignore_bogus_error_responses;
207 207
208int sysctl_icmp_ratelimit = 1 * HZ; 208int sysctl_icmp_ratelimit = 1 * HZ;
209int sysctl_icmp_ratemask = 0x1818; 209int sysctl_icmp_ratemask = 0x1818;
210int sysctl_icmp_errors_use_inbound_ifaddr;
210 211
211/* 212/*
212 * ICMP control array. This specifies what to do with each ICMP. 213 * ICMP control array. This specifies what to do with each ICMP.
@@ -511,8 +512,12 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, u32 info)
511 */ 512 */
512 513
513 saddr = iph->daddr; 514 saddr = iph->daddr;
514 if (!(rt->rt_flags & RTCF_LOCAL)) 515 if (!(rt->rt_flags & RTCF_LOCAL)) {
515 saddr = 0; 516 if (sysctl_icmp_errors_use_inbound_ifaddr)
517 saddr = inet_select_addr(skb_in->dev, 0, RT_SCOPE_LINK);
518 else
519 saddr = 0;
520 }
516 521
517 tos = icmp_pointers[type].error ? ((iph->tos & IPTOS_TOS_MASK) | 522 tos = icmp_pointers[type].error ? ((iph->tos & IPTOS_TOS_MASK) |
518 IPTOS_PREC_INTERNETCONTROL) : 523 IPTOS_PREC_INTERNETCONTROL) :
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index 4e47a2658c..af2ec88bbb 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -184,6 +184,7 @@ int ip_call_ra_chain(struct sk_buff *skb)
184 raw_rcv(last, skb2); 184 raw_rcv(last, skb2);
185 } 185 }
186 last = sk; 186 last = sk;
187 nf_reset(skb);
187 } 188 }
188 } 189 }
189 190
@@ -200,10 +201,6 @@ static inline int ip_local_deliver_finish(struct sk_buff *skb)
200{ 201{
201 int ihl = skb->nh.iph->ihl*4; 202 int ihl = skb->nh.iph->ihl*4;
202 203
203#ifdef CONFIG_NETFILTER_DEBUG
204 nf_debug_ip_local_deliver(skb);
205#endif /*CONFIG_NETFILTER_DEBUG*/
206
207 __skb_pull(skb, ihl); 204 __skb_pull(skb, ihl);
208 205
209 /* Free reference early: we don't need it any more, and it may 206 /* Free reference early: we don't need it any more, and it may
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 760dc8238d..ee07aec215 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -107,10 +107,6 @@ static int ip_dev_loopback_xmit(struct sk_buff *newskb)
107 newskb->pkt_type = PACKET_LOOPBACK; 107 newskb->pkt_type = PACKET_LOOPBACK;
108 newskb->ip_summed = CHECKSUM_UNNECESSARY; 108 newskb->ip_summed = CHECKSUM_UNNECESSARY;
109 BUG_TRAP(newskb->dst); 109 BUG_TRAP(newskb->dst);
110
111#ifdef CONFIG_NETFILTER_DEBUG
112 nf_debug_ip_loopback_xmit(newskb);
113#endif
114 nf_reset(newskb); 110 nf_reset(newskb);
115 netif_rx(newskb); 111 netif_rx(newskb);
116 return 0; 112 return 0;
@@ -192,10 +188,6 @@ static inline int ip_finish_output2(struct sk_buff *skb)
192 skb = skb2; 188 skb = skb2;
193 } 189 }
194 190
195#ifdef CONFIG_NETFILTER_DEBUG
196 nf_debug_ip_finish_output2(skb);
197#endif /*CONFIG_NETFILTER_DEBUG*/
198
199 nf_reset(skb); 191 nf_reset(skb);
200 192
201 if (hh) { 193 if (hh) {
@@ -415,9 +407,6 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from)
415 to->nf_bridge = from->nf_bridge; 407 to->nf_bridge = from->nf_bridge;
416 nf_bridge_get(to->nf_bridge); 408 nf_bridge_get(to->nf_bridge);
417#endif 409#endif
418#ifdef CONFIG_NETFILTER_DEBUG
419 to->nf_debug = from->nf_debug;
420#endif
421#endif 410#endif
422} 411}
423 412
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 47012b93ca..f8b172f898 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -360,14 +360,14 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len)
360 err = copied; 360 err = copied;
361 361
362 /* Reset and regenerate socket error */ 362 /* Reset and regenerate socket error */
363 spin_lock_irq(&sk->sk_error_queue.lock); 363 spin_lock_bh(&sk->sk_error_queue.lock);
364 sk->sk_err = 0; 364 sk->sk_err = 0;
365 if ((skb2 = skb_peek(&sk->sk_error_queue)) != NULL) { 365 if ((skb2 = skb_peek(&sk->sk_error_queue)) != NULL) {
366 sk->sk_err = SKB_EXT_ERR(skb2)->ee.ee_errno; 366 sk->sk_err = SKB_EXT_ERR(skb2)->ee.ee_errno;
367 spin_unlock_irq(&sk->sk_error_queue.lock); 367 spin_unlock_bh(&sk->sk_error_queue.lock);
368 sk->sk_error_report(sk); 368 sk->sk_error_report(sk);
369 } else 369 } else
370 spin_unlock_irq(&sk->sk_error_queue.lock); 370 spin_unlock_bh(&sk->sk_error_queue.lock);
371 371
372out_free_skb: 372out_free_skb:
373 kfree_skb(skb); 373 kfree_skb(skb);
diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c
index 1a23c5263b..2065944fd9 100644
--- a/net/ipv4/ipcomp.c
+++ b/net/ipv4/ipcomp.c
@@ -236,15 +236,10 @@ static struct xfrm_state *ipcomp_tunnel_create(struct xfrm_state *x)
236 t->props.mode = 1; 236 t->props.mode = 1;
237 t->props.saddr.a4 = x->props.saddr.a4; 237 t->props.saddr.a4 = x->props.saddr.a4;
238 t->props.flags = x->props.flags; 238 t->props.flags = x->props.flags;
239 239
240 t->type = xfrm_get_type(IPPROTO_IPIP, t->props.family); 240 if (xfrm_init_state(t))
241 if (t->type == NULL)
242 goto error;
243
244 if (t->type->init_state(t, NULL))
245 goto error; 241 goto error;
246 242
247 t->km.state = XFRM_STATE_VALID;
248 atomic_set(&t->tunnel_users, 1); 243 atomic_set(&t->tunnel_users, 1);
249out: 244out:
250 return t; 245 return t;
@@ -422,7 +417,7 @@ static void ipcomp_destroy(struct xfrm_state *x)
422 kfree(ipcd); 417 kfree(ipcd);
423} 418}
424 419
425static int ipcomp_init_state(struct xfrm_state *x, void *args) 420static int ipcomp_init_state(struct xfrm_state *x)
426{ 421{
427 int err; 422 int err;
428 struct ipcomp_data *ipcd; 423 struct ipcomp_data *ipcd;
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index e21c049ec6..e4f809a93f 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -1350,6 +1350,7 @@ int ip_mr_input(struct sk_buff *skb)
1350 */ 1350 */
1351 read_lock(&mrt_lock); 1351 read_lock(&mrt_lock);
1352 if (mroute_socket) { 1352 if (mroute_socket) {
1353 nf_reset(skb);
1353 raw_rcv(mroute_socket, skb); 1354 raw_rcv(mroute_socket, skb);
1354 read_unlock(&mrt_lock); 1355 read_unlock(&mrt_lock);
1355 return 0; 1356 return 0;
diff --git a/net/ipv4/ipvs/ip_vs_xmit.c b/net/ipv4/ipvs/ip_vs_xmit.c
index de21da0005..a8512a3fd0 100644
--- a/net/ipv4/ipvs/ip_vs_xmit.c
+++ b/net/ipv4/ipvs/ip_vs_xmit.c
@@ -127,7 +127,6 @@ ip_vs_dst_reset(struct ip_vs_dest *dest)
127 127
128#define IP_VS_XMIT(skb, rt) \ 128#define IP_VS_XMIT(skb, rt) \
129do { \ 129do { \
130 nf_reset_debug(skb); \
131 (skb)->nfcache |= NFC_IPVS_PROPERTY; \ 130 (skb)->nfcache |= NFC_IPVS_PROPERTY; \
132 (skb)->ip_summed = CHECKSUM_NONE; \ 131 (skb)->ip_summed = CHECKSUM_NONE; \
133 NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, (skb), NULL, \ 132 NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, (skb), NULL, \
diff --git a/net/ipv4/multipath_drr.c b/net/ipv4/multipath_drr.c
index cf2e6bcf79..c9cf872605 100644
--- a/net/ipv4/multipath_drr.c
+++ b/net/ipv4/multipath_drr.c
@@ -31,6 +31,7 @@
31#include <linux/igmp.h> 31#include <linux/igmp.h>
32#include <linux/proc_fs.h> 32#include <linux/proc_fs.h>
33#include <linux/seq_file.h> 33#include <linux/seq_file.h>
34#include <linux/module.h>
34#include <linux/mroute.h> 35#include <linux/mroute.h>
35#include <linux/init.h> 36#include <linux/init.h>
36#include <net/ip.h> 37#include <net/ip.h>
@@ -247,3 +248,4 @@ static void __exit drr_exit(void)
247 248
248module_init(drr_init); 249module_init(drr_init);
249module_exit(drr_exit); 250module_exit(drr_exit);
251MODULE_LICENSE("GPL");
diff --git a/net/ipv4/multipath_random.c b/net/ipv4/multipath_random.c
index 805a16e47d..5249dbe7c5 100644
--- a/net/ipv4/multipath_random.c
+++ b/net/ipv4/multipath_random.c
@@ -31,6 +31,7 @@
31#include <linux/igmp.h> 31#include <linux/igmp.h>
32#include <linux/proc_fs.h> 32#include <linux/proc_fs.h>
33#include <linux/seq_file.h> 33#include <linux/seq_file.h>
34#include <linux/module.h>
34#include <linux/mroute.h> 35#include <linux/mroute.h>
35#include <linux/init.h> 36#include <linux/init.h>
36#include <net/ip.h> 37#include <net/ip.h>
@@ -126,3 +127,4 @@ static void __exit random_exit(void)
126 127
127module_init(random_init); 128module_init(random_init);
128module_exit(random_exit); 129module_exit(random_exit);
130MODULE_LICENSE("GPL");
diff --git a/net/ipv4/multipath_rr.c b/net/ipv4/multipath_rr.c
index 061b6b2539..b6cd287047 100644
--- a/net/ipv4/multipath_rr.c
+++ b/net/ipv4/multipath_rr.c
@@ -31,6 +31,7 @@
31#include <linux/igmp.h> 31#include <linux/igmp.h>
32#include <linux/proc_fs.h> 32#include <linux/proc_fs.h>
33#include <linux/seq_file.h> 33#include <linux/seq_file.h>
34#include <linux/module.h>
34#include <linux/mroute.h> 35#include <linux/mroute.h>
35#include <linux/init.h> 36#include <linux/init.h>
36#include <net/ip.h> 37#include <net/ip.h>
@@ -93,3 +94,4 @@ static void __exit rr_exit(void)
93 94
94module_init(rr_init); 95module_init(rr_init);
95module_exit(rr_exit); 96module_exit(rr_exit);
97MODULE_LICENSE("GPL");
diff --git a/net/ipv4/multipath_wrandom.c b/net/ipv4/multipath_wrandom.c
index c3d2ca1a67..bd7d75b6ab 100644
--- a/net/ipv4/multipath_wrandom.c
+++ b/net/ipv4/multipath_wrandom.c
@@ -31,6 +31,7 @@
31#include <linux/igmp.h> 31#include <linux/igmp.h>
32#include <linux/proc_fs.h> 32#include <linux/proc_fs.h>
33#include <linux/seq_file.h> 33#include <linux/seq_file.h>
34#include <linux/module.h>
34#include <linux/mroute.h> 35#include <linux/mroute.h>
35#include <linux/init.h> 36#include <linux/init.h>
36#include <net/ip.h> 37#include <net/ip.h>
@@ -342,3 +343,4 @@ static void __exit wrandom_exit(void)
342 343
343module_init(wrandom_init); 344module_init(wrandom_init);
344module_exit(wrandom_exit); 345module_exit(wrandom_exit);
346MODULE_LICENSE("GPL");
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index df79f5ed6a..fa16342566 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -60,7 +60,6 @@ static DECLARE_MUTEX(arpt_mutex);
60 60
61#define ASSERT_READ_LOCK(x) ARP_NF_ASSERT(down_trylock(&arpt_mutex) != 0) 61#define ASSERT_READ_LOCK(x) ARP_NF_ASSERT(down_trylock(&arpt_mutex) != 0)
62#define ASSERT_WRITE_LOCK(x) ARP_NF_ASSERT(down_trylock(&arpt_mutex) != 0) 62#define ASSERT_WRITE_LOCK(x) ARP_NF_ASSERT(down_trylock(&arpt_mutex) != 0)
63#include <linux/netfilter_ipv4/lockhelp.h>
64#include <linux/netfilter_ipv4/listhelp.h> 63#include <linux/netfilter_ipv4/listhelp.h>
65 64
66struct arpt_table_info { 65struct arpt_table_info {
diff --git a/net/ipv4/netfilter/ip_conntrack_amanda.c b/net/ipv4/netfilter/ip_conntrack_amanda.c
index 3dbddd0626..a78a320eee 100644
--- a/net/ipv4/netfilter/ip_conntrack_amanda.c
+++ b/net/ipv4/netfilter/ip_conntrack_amanda.c
@@ -26,7 +26,6 @@
26#include <net/checksum.h> 26#include <net/checksum.h>
27#include <net/udp.h> 27#include <net/udp.h>
28 28
29#include <linux/netfilter_ipv4/lockhelp.h>
30#include <linux/netfilter_ipv4/ip_conntrack_helper.h> 29#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
31#include <linux/netfilter_ipv4/ip_conntrack_amanda.h> 30#include <linux/netfilter_ipv4/ip_conntrack_amanda.h>
32 31
@@ -42,7 +41,7 @@ static char *conns[] = { "DATA ", "MESG ", "INDEX " };
42 41
43/* This is slow, but it's simple. --RR */ 42/* This is slow, but it's simple. --RR */
44static char amanda_buffer[65536]; 43static char amanda_buffer[65536];
45static DECLARE_LOCK(amanda_buffer_lock); 44static DEFINE_SPINLOCK(amanda_buffer_lock);
46 45
47unsigned int (*ip_nat_amanda_hook)(struct sk_buff **pskb, 46unsigned int (*ip_nat_amanda_hook)(struct sk_buff **pskb,
48 enum ip_conntrack_info ctinfo, 47 enum ip_conntrack_info ctinfo,
@@ -76,7 +75,7 @@ static int help(struct sk_buff **pskb,
76 return NF_ACCEPT; 75 return NF_ACCEPT;
77 } 76 }
78 77
79 LOCK_BH(&amanda_buffer_lock); 78 spin_lock_bh(&amanda_buffer_lock);
80 skb_copy_bits(*pskb, dataoff, amanda_buffer, (*pskb)->len - dataoff); 79 skb_copy_bits(*pskb, dataoff, amanda_buffer, (*pskb)->len - dataoff);
81 data = amanda_buffer; 80 data = amanda_buffer;
82 data_limit = amanda_buffer + (*pskb)->len - dataoff; 81 data_limit = amanda_buffer + (*pskb)->len - dataoff;
@@ -134,7 +133,7 @@ static int help(struct sk_buff **pskb,
134 } 133 }
135 134
136out: 135out:
137 UNLOCK_BH(&amanda_buffer_lock); 136 spin_unlock_bh(&amanda_buffer_lock);
138 return ret; 137 return ret;
139} 138}
140 139
diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c
index 09e8246229..4b78ebeb66 100644
--- a/net/ipv4/netfilter/ip_conntrack_core.c
+++ b/net/ipv4/netfilter/ip_conntrack_core.c
@@ -38,10 +38,10 @@
38#include <linux/percpu.h> 38#include <linux/percpu.h>
39#include <linux/moduleparam.h> 39#include <linux/moduleparam.h>
40 40
41/* This rwlock protects the main hash table, protocol/helper/expected 41/* ip_conntrack_lock protects the main hash table, protocol/helper/expected
42 registrations, conntrack timers*/ 42 registrations, conntrack timers*/
43#define ASSERT_READ_LOCK(x) MUST_BE_READ_LOCKED(&ip_conntrack_lock) 43#define ASSERT_READ_LOCK(x)
44#define ASSERT_WRITE_LOCK(x) MUST_BE_WRITE_LOCKED(&ip_conntrack_lock) 44#define ASSERT_WRITE_LOCK(x)
45 45
46#include <linux/netfilter_ipv4/ip_conntrack.h> 46#include <linux/netfilter_ipv4/ip_conntrack.h>
47#include <linux/netfilter_ipv4/ip_conntrack_protocol.h> 47#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
@@ -57,7 +57,7 @@
57#define DEBUGP(format, args...) 57#define DEBUGP(format, args...)
58#endif 58#endif
59 59
60DECLARE_RWLOCK(ip_conntrack_lock); 60DEFINE_RWLOCK(ip_conntrack_lock);
61 61
62/* ip_conntrack_standalone needs this */ 62/* ip_conntrack_standalone needs this */
63atomic_t ip_conntrack_count = ATOMIC_INIT(0); 63atomic_t ip_conntrack_count = ATOMIC_INIT(0);
@@ -147,7 +147,7 @@ static void destroy_expect(struct ip_conntrack_expect *exp)
147 147
148static void unlink_expect(struct ip_conntrack_expect *exp) 148static void unlink_expect(struct ip_conntrack_expect *exp)
149{ 149{
150 MUST_BE_WRITE_LOCKED(&ip_conntrack_lock); 150 ASSERT_WRITE_LOCK(&ip_conntrack_lock);
151 list_del(&exp->list); 151 list_del(&exp->list);
152 /* Logically in destroy_expect, but we hold the lock here. */ 152 /* Logically in destroy_expect, but we hold the lock here. */
153 exp->master->expecting--; 153 exp->master->expecting--;
@@ -157,9 +157,9 @@ static void expectation_timed_out(unsigned long ul_expect)
157{ 157{
158 struct ip_conntrack_expect *exp = (void *)ul_expect; 158 struct ip_conntrack_expect *exp = (void *)ul_expect;
159 159
160 WRITE_LOCK(&ip_conntrack_lock); 160 write_lock_bh(&ip_conntrack_lock);
161 unlink_expect(exp); 161 unlink_expect(exp);
162 WRITE_UNLOCK(&ip_conntrack_lock); 162 write_unlock_bh(&ip_conntrack_lock);
163 destroy_expect(exp); 163 destroy_expect(exp);
164} 164}
165 165
@@ -209,7 +209,7 @@ clean_from_lists(struct ip_conntrack *ct)
209 unsigned int ho, hr; 209 unsigned int ho, hr;
210 210
211 DEBUGP("clean_from_lists(%p)\n", ct); 211 DEBUGP("clean_from_lists(%p)\n", ct);
212 MUST_BE_WRITE_LOCKED(&ip_conntrack_lock); 212 ASSERT_WRITE_LOCK(&ip_conntrack_lock);
213 213
214 ho = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); 214 ho = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
215 hr = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple); 215 hr = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
@@ -240,7 +240,7 @@ destroy_conntrack(struct nf_conntrack *nfct)
240 if (ip_conntrack_destroyed) 240 if (ip_conntrack_destroyed)
241 ip_conntrack_destroyed(ct); 241 ip_conntrack_destroyed(ct);
242 242
243 WRITE_LOCK(&ip_conntrack_lock); 243 write_lock_bh(&ip_conntrack_lock);
244 /* Expectations will have been removed in clean_from_lists, 244 /* Expectations will have been removed in clean_from_lists,
245 * except TFTP can create an expectation on the first packet, 245 * except TFTP can create an expectation on the first packet,
246 * before connection is in the list, so we need to clean here, 246 * before connection is in the list, so we need to clean here,
@@ -254,7 +254,7 @@ destroy_conntrack(struct nf_conntrack *nfct)
254 } 254 }
255 255
256 CONNTRACK_STAT_INC(delete); 256 CONNTRACK_STAT_INC(delete);
257 WRITE_UNLOCK(&ip_conntrack_lock); 257 write_unlock_bh(&ip_conntrack_lock);
258 258
259 if (ct->master) 259 if (ct->master)
260 ip_conntrack_put(ct->master); 260 ip_conntrack_put(ct->master);
@@ -268,12 +268,12 @@ static void death_by_timeout(unsigned long ul_conntrack)
268{ 268{
269 struct ip_conntrack *ct = (void *)ul_conntrack; 269 struct ip_conntrack *ct = (void *)ul_conntrack;
270 270
271 WRITE_LOCK(&ip_conntrack_lock); 271 write_lock_bh(&ip_conntrack_lock);
272 /* Inside lock so preempt is disabled on module removal path. 272 /* Inside lock so preempt is disabled on module removal path.
273 * Otherwise we can get spurious warnings. */ 273 * Otherwise we can get spurious warnings. */
274 CONNTRACK_STAT_INC(delete_list); 274 CONNTRACK_STAT_INC(delete_list);
275 clean_from_lists(ct); 275 clean_from_lists(ct);
276 WRITE_UNLOCK(&ip_conntrack_lock); 276 write_unlock_bh(&ip_conntrack_lock);
277 ip_conntrack_put(ct); 277 ip_conntrack_put(ct);
278} 278}
279 279
@@ -282,7 +282,7 @@ conntrack_tuple_cmp(const struct ip_conntrack_tuple_hash *i,
282 const struct ip_conntrack_tuple *tuple, 282 const struct ip_conntrack_tuple *tuple,
283 const struct ip_conntrack *ignored_conntrack) 283 const struct ip_conntrack *ignored_conntrack)
284{ 284{
285 MUST_BE_READ_LOCKED(&ip_conntrack_lock); 285 ASSERT_READ_LOCK(&ip_conntrack_lock);
286 return tuplehash_to_ctrack(i) != ignored_conntrack 286 return tuplehash_to_ctrack(i) != ignored_conntrack
287 && ip_ct_tuple_equal(tuple, &i->tuple); 287 && ip_ct_tuple_equal(tuple, &i->tuple);
288} 288}
@@ -294,7 +294,7 @@ __ip_conntrack_find(const struct ip_conntrack_tuple *tuple,
294 struct ip_conntrack_tuple_hash *h; 294 struct ip_conntrack_tuple_hash *h;
295 unsigned int hash = hash_conntrack(tuple); 295 unsigned int hash = hash_conntrack(tuple);
296 296
297 MUST_BE_READ_LOCKED(&ip_conntrack_lock); 297 ASSERT_READ_LOCK(&ip_conntrack_lock);
298 list_for_each_entry(h, &ip_conntrack_hash[hash], list) { 298 list_for_each_entry(h, &ip_conntrack_hash[hash], list) {
299 if (conntrack_tuple_cmp(h, tuple, ignored_conntrack)) { 299 if (conntrack_tuple_cmp(h, tuple, ignored_conntrack)) {
300 CONNTRACK_STAT_INC(found); 300 CONNTRACK_STAT_INC(found);
@@ -313,11 +313,11 @@ ip_conntrack_find_get(const struct ip_conntrack_tuple *tuple,
313{ 313{
314 struct ip_conntrack_tuple_hash *h; 314 struct ip_conntrack_tuple_hash *h;
315 315
316 READ_LOCK(&ip_conntrack_lock); 316 read_lock_bh(&ip_conntrack_lock);
317 h = __ip_conntrack_find(tuple, ignored_conntrack); 317 h = __ip_conntrack_find(tuple, ignored_conntrack);
318 if (h) 318 if (h)
319 atomic_inc(&tuplehash_to_ctrack(h)->ct_general.use); 319 atomic_inc(&tuplehash_to_ctrack(h)->ct_general.use);
320 READ_UNLOCK(&ip_conntrack_lock); 320 read_unlock_bh(&ip_conntrack_lock);
321 321
322 return h; 322 return h;
323} 323}
@@ -352,7 +352,7 @@ __ip_conntrack_confirm(struct sk_buff **pskb)
352 IP_NF_ASSERT(!is_confirmed(ct)); 352 IP_NF_ASSERT(!is_confirmed(ct));
353 DEBUGP("Confirming conntrack %p\n", ct); 353 DEBUGP("Confirming conntrack %p\n", ct);
354 354
355 WRITE_LOCK(&ip_conntrack_lock); 355 write_lock_bh(&ip_conntrack_lock);
356 356
357 /* See if there's one in the list already, including reverse: 357 /* See if there's one in the list already, including reverse:
358 NAT could have grabbed it without realizing, since we're 358 NAT could have grabbed it without realizing, since we're
@@ -380,12 +380,12 @@ __ip_conntrack_confirm(struct sk_buff **pskb)
380 atomic_inc(&ct->ct_general.use); 380 atomic_inc(&ct->ct_general.use);
381 set_bit(IPS_CONFIRMED_BIT, &ct->status); 381 set_bit(IPS_CONFIRMED_BIT, &ct->status);
382 CONNTRACK_STAT_INC(insert); 382 CONNTRACK_STAT_INC(insert);
383 WRITE_UNLOCK(&ip_conntrack_lock); 383 write_unlock_bh(&ip_conntrack_lock);
384 return NF_ACCEPT; 384 return NF_ACCEPT;
385 } 385 }
386 386
387 CONNTRACK_STAT_INC(insert_failed); 387 CONNTRACK_STAT_INC(insert_failed);
388 WRITE_UNLOCK(&ip_conntrack_lock); 388 write_unlock_bh(&ip_conntrack_lock);
389 389
390 return NF_DROP; 390 return NF_DROP;
391} 391}
@@ -398,9 +398,9 @@ ip_conntrack_tuple_taken(const struct ip_conntrack_tuple *tuple,
398{ 398{
399 struct ip_conntrack_tuple_hash *h; 399 struct ip_conntrack_tuple_hash *h;
400 400
401 READ_LOCK(&ip_conntrack_lock); 401 read_lock_bh(&ip_conntrack_lock);
402 h = __ip_conntrack_find(tuple, ignored_conntrack); 402 h = __ip_conntrack_find(tuple, ignored_conntrack);
403 READ_UNLOCK(&ip_conntrack_lock); 403 read_unlock_bh(&ip_conntrack_lock);
404 404
405 return h != NULL; 405 return h != NULL;
406} 406}
@@ -419,13 +419,13 @@ static int early_drop(struct list_head *chain)
419 struct ip_conntrack *ct = NULL; 419 struct ip_conntrack *ct = NULL;
420 int dropped = 0; 420 int dropped = 0;
421 421
422 READ_LOCK(&ip_conntrack_lock); 422 read_lock_bh(&ip_conntrack_lock);
423 h = LIST_FIND_B(chain, unreplied, struct ip_conntrack_tuple_hash *); 423 h = LIST_FIND_B(chain, unreplied, struct ip_conntrack_tuple_hash *);
424 if (h) { 424 if (h) {
425 ct = tuplehash_to_ctrack(h); 425 ct = tuplehash_to_ctrack(h);
426 atomic_inc(&ct->ct_general.use); 426 atomic_inc(&ct->ct_general.use);
427 } 427 }
428 READ_UNLOCK(&ip_conntrack_lock); 428 read_unlock_bh(&ip_conntrack_lock);
429 429
430 if (!ct) 430 if (!ct)
431 return dropped; 431 return dropped;
@@ -508,7 +508,7 @@ init_conntrack(const struct ip_conntrack_tuple *tuple,
508 conntrack->timeout.data = (unsigned long)conntrack; 508 conntrack->timeout.data = (unsigned long)conntrack;
509 conntrack->timeout.function = death_by_timeout; 509 conntrack->timeout.function = death_by_timeout;
510 510
511 WRITE_LOCK(&ip_conntrack_lock); 511 write_lock_bh(&ip_conntrack_lock);
512 exp = find_expectation(tuple); 512 exp = find_expectation(tuple);
513 513
514 if (exp) { 514 if (exp) {
@@ -532,7 +532,7 @@ init_conntrack(const struct ip_conntrack_tuple *tuple,
532 list_add(&conntrack->tuplehash[IP_CT_DIR_ORIGINAL].list, &unconfirmed); 532 list_add(&conntrack->tuplehash[IP_CT_DIR_ORIGINAL].list, &unconfirmed);
533 533
534 atomic_inc(&ip_conntrack_count); 534 atomic_inc(&ip_conntrack_count);
535 WRITE_UNLOCK(&ip_conntrack_lock); 535 write_unlock_bh(&ip_conntrack_lock);
536 536
537 if (exp) { 537 if (exp) {
538 if (exp->expectfn) 538 if (exp->expectfn)
@@ -723,17 +723,17 @@ void ip_conntrack_unexpect_related(struct ip_conntrack_expect *exp)
723{ 723{
724 struct ip_conntrack_expect *i; 724 struct ip_conntrack_expect *i;
725 725
726 WRITE_LOCK(&ip_conntrack_lock); 726 write_lock_bh(&ip_conntrack_lock);
727 /* choose the the oldest expectation to evict */ 727 /* choose the the oldest expectation to evict */
728 list_for_each_entry_reverse(i, &ip_conntrack_expect_list, list) { 728 list_for_each_entry_reverse(i, &ip_conntrack_expect_list, list) {
729 if (expect_matches(i, exp) && del_timer(&i->timeout)) { 729 if (expect_matches(i, exp) && del_timer(&i->timeout)) {
730 unlink_expect(i); 730 unlink_expect(i);
731 WRITE_UNLOCK(&ip_conntrack_lock); 731 write_unlock_bh(&ip_conntrack_lock);
732 destroy_expect(i); 732 destroy_expect(i);
733 return; 733 return;
734 } 734 }
735 } 735 }
736 WRITE_UNLOCK(&ip_conntrack_lock); 736 write_unlock_bh(&ip_conntrack_lock);
737} 737}
738 738
739struct ip_conntrack_expect *ip_conntrack_expect_alloc(void) 739struct ip_conntrack_expect *ip_conntrack_expect_alloc(void)
@@ -760,15 +760,11 @@ static void ip_conntrack_expect_insert(struct ip_conntrack_expect *exp)
760 exp->master->expecting++; 760 exp->master->expecting++;
761 list_add(&exp->list, &ip_conntrack_expect_list); 761 list_add(&exp->list, &ip_conntrack_expect_list);
762 762
763 if (exp->master->helper->timeout) { 763 init_timer(&exp->timeout);
764 init_timer(&exp->timeout); 764 exp->timeout.data = (unsigned long)exp;
765 exp->timeout.data = (unsigned long)exp; 765 exp->timeout.function = expectation_timed_out;
766 exp->timeout.function = expectation_timed_out; 766 exp->timeout.expires = jiffies + exp->master->helper->timeout * HZ;
767 exp->timeout.expires 767 add_timer(&exp->timeout);
768 = jiffies + exp->master->helper->timeout * HZ;
769 add_timer(&exp->timeout);
770 } else
771 exp->timeout.function = NULL;
772 768
773 CONNTRACK_STAT_INC(expect_create); 769 CONNTRACK_STAT_INC(expect_create);
774} 770}
@@ -808,7 +804,7 @@ int ip_conntrack_expect_related(struct ip_conntrack_expect *expect)
808 DEBUGP("tuple: "); DUMP_TUPLE(&expect->tuple); 804 DEBUGP("tuple: "); DUMP_TUPLE(&expect->tuple);
809 DEBUGP("mask: "); DUMP_TUPLE(&expect->mask); 805 DEBUGP("mask: "); DUMP_TUPLE(&expect->mask);
810 806
811 WRITE_LOCK(&ip_conntrack_lock); 807 write_lock_bh(&ip_conntrack_lock);
812 list_for_each_entry(i, &ip_conntrack_expect_list, list) { 808 list_for_each_entry(i, &ip_conntrack_expect_list, list) {
813 if (expect_matches(i, expect)) { 809 if (expect_matches(i, expect)) {
814 /* Refresh timer: if it's dying, ignore.. */ 810 /* Refresh timer: if it's dying, ignore.. */
@@ -832,7 +828,7 @@ int ip_conntrack_expect_related(struct ip_conntrack_expect *expect)
832 ip_conntrack_expect_insert(expect); 828 ip_conntrack_expect_insert(expect);
833 ret = 0; 829 ret = 0;
834out: 830out:
835 WRITE_UNLOCK(&ip_conntrack_lock); 831 write_unlock_bh(&ip_conntrack_lock);
836 return ret; 832 return ret;
837} 833}
838 834
@@ -841,7 +837,7 @@ out:
841void ip_conntrack_alter_reply(struct ip_conntrack *conntrack, 837void ip_conntrack_alter_reply(struct ip_conntrack *conntrack,
842 const struct ip_conntrack_tuple *newreply) 838 const struct ip_conntrack_tuple *newreply)
843{ 839{
844 WRITE_LOCK(&ip_conntrack_lock); 840 write_lock_bh(&ip_conntrack_lock);
845 /* Should be unconfirmed, so not in hash table yet */ 841 /* Should be unconfirmed, so not in hash table yet */
846 IP_NF_ASSERT(!is_confirmed(conntrack)); 842 IP_NF_ASSERT(!is_confirmed(conntrack));
847 843
@@ -851,15 +847,15 @@ void ip_conntrack_alter_reply(struct ip_conntrack *conntrack,
851 conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = *newreply; 847 conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = *newreply;
852 if (!conntrack->master && conntrack->expecting == 0) 848 if (!conntrack->master && conntrack->expecting == 0)
853 conntrack->helper = ip_ct_find_helper(newreply); 849 conntrack->helper = ip_ct_find_helper(newreply);
854 WRITE_UNLOCK(&ip_conntrack_lock); 850 write_unlock_bh(&ip_conntrack_lock);
855} 851}
856 852
857int ip_conntrack_helper_register(struct ip_conntrack_helper *me) 853int ip_conntrack_helper_register(struct ip_conntrack_helper *me)
858{ 854{
859 BUG_ON(me->timeout == 0); 855 BUG_ON(me->timeout == 0);
860 WRITE_LOCK(&ip_conntrack_lock); 856 write_lock_bh(&ip_conntrack_lock);
861 list_prepend(&helpers, me); 857 list_prepend(&helpers, me);
862 WRITE_UNLOCK(&ip_conntrack_lock); 858 write_unlock_bh(&ip_conntrack_lock);
863 859
864 return 0; 860 return 0;
865} 861}
@@ -878,7 +874,7 @@ void ip_conntrack_helper_unregister(struct ip_conntrack_helper *me)
878 struct ip_conntrack_expect *exp, *tmp; 874 struct ip_conntrack_expect *exp, *tmp;
879 875
880 /* Need write lock here, to delete helper. */ 876 /* Need write lock here, to delete helper. */
881 WRITE_LOCK(&ip_conntrack_lock); 877 write_lock_bh(&ip_conntrack_lock);
882 LIST_DELETE(&helpers, me); 878 LIST_DELETE(&helpers, me);
883 879
884 /* Get rid of expectations */ 880 /* Get rid of expectations */
@@ -893,7 +889,7 @@ void ip_conntrack_helper_unregister(struct ip_conntrack_helper *me)
893 for (i = 0; i < ip_conntrack_htable_size; i++) 889 for (i = 0; i < ip_conntrack_htable_size; i++)
894 LIST_FIND_W(&ip_conntrack_hash[i], unhelp, 890 LIST_FIND_W(&ip_conntrack_hash[i], unhelp,
895 struct ip_conntrack_tuple_hash *, me); 891 struct ip_conntrack_tuple_hash *, me);
896 WRITE_UNLOCK(&ip_conntrack_lock); 892 write_unlock_bh(&ip_conntrack_lock);
897 893
898 /* Someone could be still looking at the helper in a bh. */ 894 /* Someone could be still looking at the helper in a bh. */
899 synchronize_net(); 895 synchronize_net();
@@ -925,14 +921,14 @@ void ip_ct_refresh_acct(struct ip_conntrack *ct,
925 ct->timeout.expires = extra_jiffies; 921 ct->timeout.expires = extra_jiffies;
926 ct_add_counters(ct, ctinfo, skb); 922 ct_add_counters(ct, ctinfo, skb);
927 } else { 923 } else {
928 WRITE_LOCK(&ip_conntrack_lock); 924 write_lock_bh(&ip_conntrack_lock);
929 /* Need del_timer for race avoidance (may already be dying). */ 925 /* Need del_timer for race avoidance (may already be dying). */
930 if (del_timer(&ct->timeout)) { 926 if (del_timer(&ct->timeout)) {
931 ct->timeout.expires = jiffies + extra_jiffies; 927 ct->timeout.expires = jiffies + extra_jiffies;
932 add_timer(&ct->timeout); 928 add_timer(&ct->timeout);
933 } 929 }
934 ct_add_counters(ct, ctinfo, skb); 930 ct_add_counters(ct, ctinfo, skb);
935 WRITE_UNLOCK(&ip_conntrack_lock); 931 write_unlock_bh(&ip_conntrack_lock);
936 } 932 }
937} 933}
938 934
@@ -940,10 +936,6 @@ void ip_ct_refresh_acct(struct ip_conntrack *ct,
940struct sk_buff * 936struct sk_buff *
941ip_ct_gather_frags(struct sk_buff *skb, u_int32_t user) 937ip_ct_gather_frags(struct sk_buff *skb, u_int32_t user)
942{ 938{
943#ifdef CONFIG_NETFILTER_DEBUG
944 unsigned int olddebug = skb->nf_debug;
945#endif
946
947 skb_orphan(skb); 939 skb_orphan(skb);
948 940
949 local_bh_disable(); 941 local_bh_disable();
@@ -953,12 +945,7 @@ ip_ct_gather_frags(struct sk_buff *skb, u_int32_t user)
953 if (skb) { 945 if (skb) {
954 ip_send_check(skb->nh.iph); 946 ip_send_check(skb->nh.iph);
955 skb->nfcache |= NFC_ALTERED; 947 skb->nfcache |= NFC_ALTERED;
956#ifdef CONFIG_NETFILTER_DEBUG
957 /* Packet path as if nothing had happened. */
958 skb->nf_debug = olddebug;
959#endif
960 } 948 }
961
962 return skb; 949 return skb;
963} 950}
964 951
@@ -997,7 +984,7 @@ get_next_corpse(int (*iter)(struct ip_conntrack *i, void *data),
997{ 984{
998 struct ip_conntrack_tuple_hash *h = NULL; 985 struct ip_conntrack_tuple_hash *h = NULL;
999 986
1000 WRITE_LOCK(&ip_conntrack_lock); 987 write_lock_bh(&ip_conntrack_lock);
1001 for (; *bucket < ip_conntrack_htable_size; (*bucket)++) { 988 for (; *bucket < ip_conntrack_htable_size; (*bucket)++) {
1002 h = LIST_FIND_W(&ip_conntrack_hash[*bucket], do_iter, 989 h = LIST_FIND_W(&ip_conntrack_hash[*bucket], do_iter,
1003 struct ip_conntrack_tuple_hash *, iter, data); 990 struct ip_conntrack_tuple_hash *, iter, data);
@@ -1009,7 +996,7 @@ get_next_corpse(int (*iter)(struct ip_conntrack *i, void *data),
1009 struct ip_conntrack_tuple_hash *, iter, data); 996 struct ip_conntrack_tuple_hash *, iter, data);
1010 if (h) 997 if (h)
1011 atomic_inc(&tuplehash_to_ctrack(h)->ct_general.use); 998 atomic_inc(&tuplehash_to_ctrack(h)->ct_general.use);
1012 WRITE_UNLOCK(&ip_conntrack_lock); 999 write_unlock_bh(&ip_conntrack_lock);
1013 1000
1014 return h; 1001 return h;
1015} 1002}
@@ -1201,14 +1188,14 @@ int __init ip_conntrack_init(void)
1201 } 1188 }
1202 1189
1203 /* Don't NEED lock here, but good form anyway. */ 1190 /* Don't NEED lock here, but good form anyway. */
1204 WRITE_LOCK(&ip_conntrack_lock); 1191 write_lock_bh(&ip_conntrack_lock);
1205 for (i = 0; i < MAX_IP_CT_PROTO; i++) 1192 for (i = 0; i < MAX_IP_CT_PROTO; i++)
1206 ip_ct_protos[i] = &ip_conntrack_generic_protocol; 1193 ip_ct_protos[i] = &ip_conntrack_generic_protocol;
1207 /* Sew in builtin protocols. */ 1194 /* Sew in builtin protocols. */
1208 ip_ct_protos[IPPROTO_TCP] = &ip_conntrack_protocol_tcp; 1195 ip_ct_protos[IPPROTO_TCP] = &ip_conntrack_protocol_tcp;
1209 ip_ct_protos[IPPROTO_UDP] = &ip_conntrack_protocol_udp; 1196 ip_ct_protos[IPPROTO_UDP] = &ip_conntrack_protocol_udp;
1210 ip_ct_protos[IPPROTO_ICMP] = &ip_conntrack_protocol_icmp; 1197 ip_ct_protos[IPPROTO_ICMP] = &ip_conntrack_protocol_icmp;
1211 WRITE_UNLOCK(&ip_conntrack_lock); 1198 write_unlock_bh(&ip_conntrack_lock);
1212 1199
1213 for (i = 0; i < ip_conntrack_htable_size; i++) 1200 for (i = 0; i < ip_conntrack_htable_size; i++)
1214 INIT_LIST_HEAD(&ip_conntrack_hash[i]); 1201 INIT_LIST_HEAD(&ip_conntrack_hash[i]);
diff --git a/net/ipv4/netfilter/ip_conntrack_ftp.c b/net/ipv4/netfilter/ip_conntrack_ftp.c
index dd86503aa7..fea6dd2a00 100644
--- a/net/ipv4/netfilter/ip_conntrack_ftp.c
+++ b/net/ipv4/netfilter/ip_conntrack_ftp.c
@@ -16,7 +16,6 @@
16#include <net/checksum.h> 16#include <net/checksum.h>
17#include <net/tcp.h> 17#include <net/tcp.h>
18 18
19#include <linux/netfilter_ipv4/lockhelp.h>
20#include <linux/netfilter_ipv4/ip_conntrack_helper.h> 19#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
21#include <linux/netfilter_ipv4/ip_conntrack_ftp.h> 20#include <linux/netfilter_ipv4/ip_conntrack_ftp.h>
22#include <linux/moduleparam.h> 21#include <linux/moduleparam.h>
@@ -28,7 +27,7 @@ MODULE_DESCRIPTION("ftp connection tracking helper");
28/* This is slow, but it's simple. --RR */ 27/* This is slow, but it's simple. --RR */
29static char ftp_buffer[65536]; 28static char ftp_buffer[65536];
30 29
31static DECLARE_LOCK(ip_ftp_lock); 30static DEFINE_SPINLOCK(ip_ftp_lock);
32 31
33#define MAX_PORTS 8 32#define MAX_PORTS 8
34static int ports[MAX_PORTS]; 33static int ports[MAX_PORTS];
@@ -319,7 +318,7 @@ static int help(struct sk_buff **pskb,
319 } 318 }
320 datalen = (*pskb)->len - dataoff; 319 datalen = (*pskb)->len - dataoff;
321 320
322 LOCK_BH(&ip_ftp_lock); 321 spin_lock_bh(&ip_ftp_lock);
323 fb_ptr = skb_header_pointer(*pskb, dataoff, 322 fb_ptr = skb_header_pointer(*pskb, dataoff,
324 (*pskb)->len - dataoff, ftp_buffer); 323 (*pskb)->len - dataoff, ftp_buffer);
325 BUG_ON(fb_ptr == NULL); 324 BUG_ON(fb_ptr == NULL);
@@ -442,7 +441,7 @@ out_update_nl:
442 if (ends_in_nl) 441 if (ends_in_nl)
443 update_nl_seq(seq, ct_ftp_info,dir); 442 update_nl_seq(seq, ct_ftp_info,dir);
444 out: 443 out:
445 UNLOCK_BH(&ip_ftp_lock); 444 spin_unlock_bh(&ip_ftp_lock);
446 return ret; 445 return ret;
447} 446}
448 447
diff --git a/net/ipv4/netfilter/ip_conntrack_irc.c b/net/ipv4/netfilter/ip_conntrack_irc.c
index 33cc7348b6..cd98772cc3 100644
--- a/net/ipv4/netfilter/ip_conntrack_irc.c
+++ b/net/ipv4/netfilter/ip_conntrack_irc.c
@@ -29,7 +29,6 @@
29#include <net/checksum.h> 29#include <net/checksum.h>
30#include <net/tcp.h> 30#include <net/tcp.h>
31 31
32#include <linux/netfilter_ipv4/lockhelp.h>
33#include <linux/netfilter_ipv4/ip_conntrack_helper.h> 32#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
34#include <linux/netfilter_ipv4/ip_conntrack_irc.h> 33#include <linux/netfilter_ipv4/ip_conntrack_irc.h>
35#include <linux/moduleparam.h> 34#include <linux/moduleparam.h>
@@ -41,7 +40,7 @@ static int max_dcc_channels = 8;
41static unsigned int dcc_timeout = 300; 40static unsigned int dcc_timeout = 300;
42/* This is slow, but it's simple. --RR */ 41/* This is slow, but it's simple. --RR */
43static char irc_buffer[65536]; 42static char irc_buffer[65536];
44static DECLARE_LOCK(irc_buffer_lock); 43static DEFINE_SPINLOCK(irc_buffer_lock);
45 44
46unsigned int (*ip_nat_irc_hook)(struct sk_buff **pskb, 45unsigned int (*ip_nat_irc_hook)(struct sk_buff **pskb,
47 enum ip_conntrack_info ctinfo, 46 enum ip_conntrack_info ctinfo,
@@ -141,7 +140,7 @@ static int help(struct sk_buff **pskb,
141 if (dataoff >= (*pskb)->len) 140 if (dataoff >= (*pskb)->len)
142 return NF_ACCEPT; 141 return NF_ACCEPT;
143 142
144 LOCK_BH(&irc_buffer_lock); 143 spin_lock_bh(&irc_buffer_lock);
145 ib_ptr = skb_header_pointer(*pskb, dataoff, 144 ib_ptr = skb_header_pointer(*pskb, dataoff,
146 (*pskb)->len - dataoff, irc_buffer); 145 (*pskb)->len - dataoff, irc_buffer);
147 BUG_ON(ib_ptr == NULL); 146 BUG_ON(ib_ptr == NULL);
@@ -237,7 +236,7 @@ static int help(struct sk_buff **pskb,
237 } /* while data < ... */ 236 } /* while data < ... */
238 237
239 out: 238 out:
240 UNLOCK_BH(&irc_buffer_lock); 239 spin_unlock_bh(&irc_buffer_lock);
241 return ret; 240 return ret;
242} 241}
243 242
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_sctp.c b/net/ipv4/netfilter/ip_conntrack_proto_sctp.c
index ff8c34a860..31d75390bf 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_sctp.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_sctp.c
@@ -26,7 +26,6 @@
26 26
27#include <linux/netfilter_ipv4/ip_conntrack.h> 27#include <linux/netfilter_ipv4/ip_conntrack.h>
28#include <linux/netfilter_ipv4/ip_conntrack_protocol.h> 28#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
29#include <linux/netfilter_ipv4/lockhelp.h>
30 29
31#if 0 30#if 0
32#define DEBUGP(format, ...) printk(format, ## __VA_ARGS__) 31#define DEBUGP(format, ...) printk(format, ## __VA_ARGS__)
@@ -35,7 +34,7 @@
35#endif 34#endif
36 35
37/* Protects conntrack->proto.sctp */ 36/* Protects conntrack->proto.sctp */
38static DECLARE_RWLOCK(sctp_lock); 37static DEFINE_RWLOCK(sctp_lock);
39 38
40/* FIXME: Examine ipfilter's timeouts and conntrack transitions more 39/* FIXME: Examine ipfilter's timeouts and conntrack transitions more
41 closely. They're more complex. --RR 40 closely. They're more complex. --RR
@@ -199,9 +198,9 @@ static int sctp_print_conntrack(struct seq_file *s,
199 DEBUGP(__FUNCTION__); 198 DEBUGP(__FUNCTION__);
200 DEBUGP("\n"); 199 DEBUGP("\n");
201 200
202 READ_LOCK(&sctp_lock); 201 read_lock_bh(&sctp_lock);
203 state = conntrack->proto.sctp.state; 202 state = conntrack->proto.sctp.state;
204 READ_UNLOCK(&sctp_lock); 203 read_unlock_bh(&sctp_lock);
205 204
206 return seq_printf(s, "%s ", sctp_conntrack_names[state]); 205 return seq_printf(s, "%s ", sctp_conntrack_names[state]);
207} 206}
@@ -343,13 +342,13 @@ static int sctp_packet(struct ip_conntrack *conntrack,
343 342
344 oldsctpstate = newconntrack = SCTP_CONNTRACK_MAX; 343 oldsctpstate = newconntrack = SCTP_CONNTRACK_MAX;
345 for_each_sctp_chunk (skb, sch, _sch, offset, count) { 344 for_each_sctp_chunk (skb, sch, _sch, offset, count) {
346 WRITE_LOCK(&sctp_lock); 345 write_lock_bh(&sctp_lock);
347 346
348 /* Special cases of Verification tag check (Sec 8.5.1) */ 347 /* Special cases of Verification tag check (Sec 8.5.1) */
349 if (sch->type == SCTP_CID_INIT) { 348 if (sch->type == SCTP_CID_INIT) {
350 /* Sec 8.5.1 (A) */ 349 /* Sec 8.5.1 (A) */
351 if (sh->vtag != 0) { 350 if (sh->vtag != 0) {
352 WRITE_UNLOCK(&sctp_lock); 351 write_unlock_bh(&sctp_lock);
353 return -1; 352 return -1;
354 } 353 }
355 } else if (sch->type == SCTP_CID_ABORT) { 354 } else if (sch->type == SCTP_CID_ABORT) {
@@ -357,7 +356,7 @@ static int sctp_packet(struct ip_conntrack *conntrack,
357 if (!(sh->vtag == conntrack->proto.sctp.vtag[CTINFO2DIR(ctinfo)]) 356 if (!(sh->vtag == conntrack->proto.sctp.vtag[CTINFO2DIR(ctinfo)])
358 && !(sh->vtag == conntrack->proto.sctp.vtag 357 && !(sh->vtag == conntrack->proto.sctp.vtag
359 [1 - CTINFO2DIR(ctinfo)])) { 358 [1 - CTINFO2DIR(ctinfo)])) {
360 WRITE_UNLOCK(&sctp_lock); 359 write_unlock_bh(&sctp_lock);
361 return -1; 360 return -1;
362 } 361 }
363 } else if (sch->type == SCTP_CID_SHUTDOWN_COMPLETE) { 362 } else if (sch->type == SCTP_CID_SHUTDOWN_COMPLETE) {
@@ -366,13 +365,13 @@ static int sctp_packet(struct ip_conntrack *conntrack,
366 && !(sh->vtag == conntrack->proto.sctp.vtag 365 && !(sh->vtag == conntrack->proto.sctp.vtag
367 [1 - CTINFO2DIR(ctinfo)] 366 [1 - CTINFO2DIR(ctinfo)]
368 && (sch->flags & 1))) { 367 && (sch->flags & 1))) {
369 WRITE_UNLOCK(&sctp_lock); 368 write_unlock_bh(&sctp_lock);
370 return -1; 369 return -1;
371 } 370 }
372 } else if (sch->type == SCTP_CID_COOKIE_ECHO) { 371 } else if (sch->type == SCTP_CID_COOKIE_ECHO) {
373 /* Sec 8.5.1 (D) */ 372 /* Sec 8.5.1 (D) */
374 if (!(sh->vtag == conntrack->proto.sctp.vtag[CTINFO2DIR(ctinfo)])) { 373 if (!(sh->vtag == conntrack->proto.sctp.vtag[CTINFO2DIR(ctinfo)])) {
375 WRITE_UNLOCK(&sctp_lock); 374 write_unlock_bh(&sctp_lock);
376 return -1; 375 return -1;
377 } 376 }
378 } 377 }
@@ -384,7 +383,7 @@ static int sctp_packet(struct ip_conntrack *conntrack,
384 if (newconntrack == SCTP_CONNTRACK_MAX) { 383 if (newconntrack == SCTP_CONNTRACK_MAX) {
385 DEBUGP("ip_conntrack_sctp: Invalid dir=%i ctype=%u conntrack=%u\n", 384 DEBUGP("ip_conntrack_sctp: Invalid dir=%i ctype=%u conntrack=%u\n",
386 CTINFO2DIR(ctinfo), sch->type, oldsctpstate); 385 CTINFO2DIR(ctinfo), sch->type, oldsctpstate);
387 WRITE_UNLOCK(&sctp_lock); 386 write_unlock_bh(&sctp_lock);
388 return -1; 387 return -1;
389 } 388 }
390 389
@@ -396,7 +395,7 @@ static int sctp_packet(struct ip_conntrack *conntrack,
396 ih = skb_header_pointer(skb, offset + sizeof(sctp_chunkhdr_t), 395 ih = skb_header_pointer(skb, offset + sizeof(sctp_chunkhdr_t),
397 sizeof(_inithdr), &_inithdr); 396 sizeof(_inithdr), &_inithdr);
398 if (ih == NULL) { 397 if (ih == NULL) {
399 WRITE_UNLOCK(&sctp_lock); 398 write_unlock_bh(&sctp_lock);
400 return -1; 399 return -1;
401 } 400 }
402 DEBUGP("Setting vtag %x for dir %d\n", 401 DEBUGP("Setting vtag %x for dir %d\n",
@@ -405,7 +404,7 @@ static int sctp_packet(struct ip_conntrack *conntrack,
405 } 404 }
406 405
407 conntrack->proto.sctp.state = newconntrack; 406 conntrack->proto.sctp.state = newconntrack;
408 WRITE_UNLOCK(&sctp_lock); 407 write_unlock_bh(&sctp_lock);
409 } 408 }
410 409
411 ip_ct_refresh_acct(conntrack, ctinfo, skb, *sctp_timeouts[newconntrack]); 410 ip_ct_refresh_acct(conntrack, ctinfo, skb, *sctp_timeouts[newconntrack]);
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
index 721ddbf522..809dfed766 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
@@ -36,7 +36,6 @@
36#include <linux/netfilter_ipv4.h> 36#include <linux/netfilter_ipv4.h>
37#include <linux/netfilter_ipv4/ip_conntrack.h> 37#include <linux/netfilter_ipv4/ip_conntrack.h>
38#include <linux/netfilter_ipv4/ip_conntrack_protocol.h> 38#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
39#include <linux/netfilter_ipv4/lockhelp.h>
40 39
41#if 0 40#if 0
42#define DEBUGP printk 41#define DEBUGP printk
@@ -46,7 +45,7 @@
46#endif 45#endif
47 46
48/* Protects conntrack->proto.tcp */ 47/* Protects conntrack->proto.tcp */
49static DECLARE_RWLOCK(tcp_lock); 48static DEFINE_RWLOCK(tcp_lock);
50 49
51/* "Be conservative in what you do, 50/* "Be conservative in what you do,
52 be liberal in what you accept from others." 51 be liberal in what you accept from others."
@@ -330,9 +329,9 @@ static int tcp_print_conntrack(struct seq_file *s,
330{ 329{
331 enum tcp_conntrack state; 330 enum tcp_conntrack state;
332 331
333 READ_LOCK(&tcp_lock); 332 read_lock_bh(&tcp_lock);
334 state = conntrack->proto.tcp.state; 333 state = conntrack->proto.tcp.state;
335 READ_UNLOCK(&tcp_lock); 334 read_unlock_bh(&tcp_lock);
336 335
337 return seq_printf(s, "%s ", tcp_conntrack_names[state]); 336 return seq_printf(s, "%s ", tcp_conntrack_names[state]);
338} 337}
@@ -738,14 +737,14 @@ void ip_conntrack_tcp_update(struct sk_buff *skb,
738 737
739 end = segment_seq_plus_len(ntohl(tcph->seq), skb->len, iph, tcph); 738 end = segment_seq_plus_len(ntohl(tcph->seq), skb->len, iph, tcph);
740 739
741 WRITE_LOCK(&tcp_lock); 740 write_lock_bh(&tcp_lock);
742 /* 741 /*
743 * We have to worry for the ack in the reply packet only... 742 * We have to worry for the ack in the reply packet only...
744 */ 743 */
745 if (after(end, conntrack->proto.tcp.seen[dir].td_end)) 744 if (after(end, conntrack->proto.tcp.seen[dir].td_end))
746 conntrack->proto.tcp.seen[dir].td_end = end; 745 conntrack->proto.tcp.seen[dir].td_end = end;
747 conntrack->proto.tcp.last_end = end; 746 conntrack->proto.tcp.last_end = end;
748 WRITE_UNLOCK(&tcp_lock); 747 write_unlock_bh(&tcp_lock);
749 DEBUGP("tcp_update: sender end=%u maxend=%u maxwin=%u scale=%i " 748 DEBUGP("tcp_update: sender end=%u maxend=%u maxwin=%u scale=%i "
750 "receiver end=%u maxend=%u maxwin=%u scale=%i\n", 749 "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
751 sender->td_end, sender->td_maxend, sender->td_maxwin, 750 sender->td_end, sender->td_maxend, sender->td_maxwin,
@@ -857,7 +856,7 @@ static int tcp_packet(struct ip_conntrack *conntrack,
857 sizeof(_tcph), &_tcph); 856 sizeof(_tcph), &_tcph);
858 BUG_ON(th == NULL); 857 BUG_ON(th == NULL);
859 858
860 WRITE_LOCK(&tcp_lock); 859 write_lock_bh(&tcp_lock);
861 old_state = conntrack->proto.tcp.state; 860 old_state = conntrack->proto.tcp.state;
862 dir = CTINFO2DIR(ctinfo); 861 dir = CTINFO2DIR(ctinfo);
863 index = get_conntrack_index(th); 862 index = get_conntrack_index(th);
@@ -879,7 +878,7 @@ static int tcp_packet(struct ip_conntrack *conntrack,
879 * that the client cannot but retransmit its SYN and 878 * that the client cannot but retransmit its SYN and
880 * thus initiate a clean new session. 879 * thus initiate a clean new session.
881 */ 880 */
882 WRITE_UNLOCK(&tcp_lock); 881 write_unlock_bh(&tcp_lock);
883 if (LOG_INVALID(IPPROTO_TCP)) 882 if (LOG_INVALID(IPPROTO_TCP))
884 nf_log_packet(PF_INET, 0, skb, NULL, NULL, 883 nf_log_packet(PF_INET, 0, skb, NULL, NULL,
885 "ip_ct_tcp: killing out of sync session "); 884 "ip_ct_tcp: killing out of sync session ");
@@ -894,7 +893,7 @@ static int tcp_packet(struct ip_conntrack *conntrack,
894 conntrack->proto.tcp.last_end = 893 conntrack->proto.tcp.last_end =
895 segment_seq_plus_len(ntohl(th->seq), skb->len, iph, th); 894 segment_seq_plus_len(ntohl(th->seq), skb->len, iph, th);
896 895
897 WRITE_UNLOCK(&tcp_lock); 896 write_unlock_bh(&tcp_lock);
898 if (LOG_INVALID(IPPROTO_TCP)) 897 if (LOG_INVALID(IPPROTO_TCP))
899 nf_log_packet(PF_INET, 0, skb, NULL, NULL, 898 nf_log_packet(PF_INET, 0, skb, NULL, NULL,
900 "ip_ct_tcp: invalid packet ignored "); 899 "ip_ct_tcp: invalid packet ignored ");
@@ -904,7 +903,7 @@ static int tcp_packet(struct ip_conntrack *conntrack,
904 DEBUGP("ip_ct_tcp: Invalid dir=%i index=%u ostate=%u\n", 903 DEBUGP("ip_ct_tcp: Invalid dir=%i index=%u ostate=%u\n",
905 dir, get_conntrack_index(th), 904 dir, get_conntrack_index(th),
906 old_state); 905 old_state);
907 WRITE_UNLOCK(&tcp_lock); 906 write_unlock_bh(&tcp_lock);
908 if (LOG_INVALID(IPPROTO_TCP)) 907 if (LOG_INVALID(IPPROTO_TCP))
909 nf_log_packet(PF_INET, 0, skb, NULL, NULL, 908 nf_log_packet(PF_INET, 0, skb, NULL, NULL,
910 "ip_ct_tcp: invalid state "); 909 "ip_ct_tcp: invalid state ");
@@ -918,13 +917,13 @@ static int tcp_packet(struct ip_conntrack *conntrack,
918 conntrack->proto.tcp.seen[dir].td_end)) { 917 conntrack->proto.tcp.seen[dir].td_end)) {
919 /* Attempt to reopen a closed connection. 918 /* Attempt to reopen a closed connection.
920 * Delete this connection and look up again. */ 919 * Delete this connection and look up again. */
921 WRITE_UNLOCK(&tcp_lock); 920 write_unlock_bh(&tcp_lock);
922 if (del_timer(&conntrack->timeout)) 921 if (del_timer(&conntrack->timeout))
923 conntrack->timeout.function((unsigned long) 922 conntrack->timeout.function((unsigned long)
924 conntrack); 923 conntrack);
925 return -NF_REPEAT; 924 return -NF_REPEAT;
926 } else { 925 } else {
927 WRITE_UNLOCK(&tcp_lock); 926 write_unlock_bh(&tcp_lock);
928 if (LOG_INVALID(IPPROTO_TCP)) 927 if (LOG_INVALID(IPPROTO_TCP))
929 nf_log_packet(PF_INET, 0, skb, NULL, NULL, 928 nf_log_packet(PF_INET, 0, skb, NULL, NULL,
930 "ip_ct_tcp: invalid SYN"); 929 "ip_ct_tcp: invalid SYN");
@@ -949,7 +948,7 @@ static int tcp_packet(struct ip_conntrack *conntrack,
949 948
950 if (!tcp_in_window(&conntrack->proto.tcp, dir, index, 949 if (!tcp_in_window(&conntrack->proto.tcp, dir, index,
951 skb, iph, th)) { 950 skb, iph, th)) {
952 WRITE_UNLOCK(&tcp_lock); 951 write_unlock_bh(&tcp_lock);
953 return -NF_ACCEPT; 952 return -NF_ACCEPT;
954 } 953 }
955 in_window: 954 in_window:
@@ -972,7 +971,7 @@ static int tcp_packet(struct ip_conntrack *conntrack,
972 timeout = conntrack->proto.tcp.retrans >= ip_ct_tcp_max_retrans 971 timeout = conntrack->proto.tcp.retrans >= ip_ct_tcp_max_retrans
973 && *tcp_timeouts[new_state] > ip_ct_tcp_timeout_max_retrans 972 && *tcp_timeouts[new_state] > ip_ct_tcp_timeout_max_retrans
974 ? ip_ct_tcp_timeout_max_retrans : *tcp_timeouts[new_state]; 973 ? ip_ct_tcp_timeout_max_retrans : *tcp_timeouts[new_state];
975 WRITE_UNLOCK(&tcp_lock); 974 write_unlock_bh(&tcp_lock);
976 975
977 if (!test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)) { 976 if (!test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)) {
978 /* If only reply is a RST, we can consider ourselves not to 977 /* If only reply is a RST, we can consider ourselves not to
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_udp.c b/net/ipv4/netfilter/ip_conntrack_proto_udp.c
index 5bc28a2246..8c1eaba098 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_udp.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_udp.c
@@ -120,6 +120,7 @@ static int udp_error(struct sk_buff *skb, enum ip_conntrack_info *ctinfo,
120 * and moreover root might send raw packets. 120 * and moreover root might send raw packets.
121 * FIXME: Source route IP option packets --RR */ 121 * FIXME: Source route IP option packets --RR */
122 if (hooknum == NF_IP_PRE_ROUTING 122 if (hooknum == NF_IP_PRE_ROUTING
123 && skb->ip_summed != CHECKSUM_UNNECESSARY
123 && csum_tcpudp_magic(iph->saddr, iph->daddr, udplen, IPPROTO_UDP, 124 && csum_tcpudp_magic(iph->saddr, iph->daddr, udplen, IPPROTO_UDP,
124 skb->ip_summed == CHECKSUM_HW ? skb->csum 125 skb->ip_summed == CHECKSUM_HW ? skb->csum
125 : skb_checksum(skb, iph->ihl*4, udplen, 0))) { 126 : skb_checksum(skb, iph->ihl*4, udplen, 0))) {
diff --git a/net/ipv4/netfilter/ip_conntrack_standalone.c b/net/ipv4/netfilter/ip_conntrack_standalone.c
index 46ca45f74d..42dc951028 100644
--- a/net/ipv4/netfilter/ip_conntrack_standalone.c
+++ b/net/ipv4/netfilter/ip_conntrack_standalone.c
@@ -28,8 +28,8 @@
28#include <net/checksum.h> 28#include <net/checksum.h>
29#include <net/ip.h> 29#include <net/ip.h>
30 30
31#define ASSERT_READ_LOCK(x) MUST_BE_READ_LOCKED(&ip_conntrack_lock) 31#define ASSERT_READ_LOCK(x)
32#define ASSERT_WRITE_LOCK(x) MUST_BE_WRITE_LOCKED(&ip_conntrack_lock) 32#define ASSERT_WRITE_LOCK(x)
33 33
34#include <linux/netfilter_ipv4/ip_conntrack.h> 34#include <linux/netfilter_ipv4/ip_conntrack.h>
35#include <linux/netfilter_ipv4/ip_conntrack_protocol.h> 35#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
@@ -119,7 +119,7 @@ static struct list_head *ct_get_idx(struct seq_file *seq, loff_t pos)
119 119
120static void *ct_seq_start(struct seq_file *seq, loff_t *pos) 120static void *ct_seq_start(struct seq_file *seq, loff_t *pos)
121{ 121{
122 READ_LOCK(&ip_conntrack_lock); 122 read_lock_bh(&ip_conntrack_lock);
123 return ct_get_idx(seq, *pos); 123 return ct_get_idx(seq, *pos);
124} 124}
125 125
@@ -131,7 +131,7 @@ static void *ct_seq_next(struct seq_file *s, void *v, loff_t *pos)
131 131
132static void ct_seq_stop(struct seq_file *s, void *v) 132static void ct_seq_stop(struct seq_file *s, void *v)
133{ 133{
134 READ_UNLOCK(&ip_conntrack_lock); 134 read_unlock_bh(&ip_conntrack_lock);
135} 135}
136 136
137static int ct_seq_show(struct seq_file *s, void *v) 137static int ct_seq_show(struct seq_file *s, void *v)
@@ -140,7 +140,7 @@ static int ct_seq_show(struct seq_file *s, void *v)
140 const struct ip_conntrack *conntrack = tuplehash_to_ctrack(hash); 140 const struct ip_conntrack *conntrack = tuplehash_to_ctrack(hash);
141 struct ip_conntrack_protocol *proto; 141 struct ip_conntrack_protocol *proto;
142 142
143 MUST_BE_READ_LOCKED(&ip_conntrack_lock); 143 ASSERT_READ_LOCK(&ip_conntrack_lock);
144 IP_NF_ASSERT(conntrack); 144 IP_NF_ASSERT(conntrack);
145 145
146 /* we only want to print DIR_ORIGINAL */ 146 /* we only want to print DIR_ORIGINAL */
@@ -239,7 +239,7 @@ static void *exp_seq_start(struct seq_file *s, loff_t *pos)
239 239
240 /* strange seq_file api calls stop even if we fail, 240 /* strange seq_file api calls stop even if we fail,
241 * thus we need to grab lock since stop unlocks */ 241 * thus we need to grab lock since stop unlocks */
242 READ_LOCK(&ip_conntrack_lock); 242 read_lock_bh(&ip_conntrack_lock);
243 243
244 if (list_empty(e)) 244 if (list_empty(e))
245 return NULL; 245 return NULL;
@@ -256,6 +256,7 @@ static void *exp_seq_next(struct seq_file *s, void *v, loff_t *pos)
256{ 256{
257 struct list_head *e = v; 257 struct list_head *e = v;
258 258
259 ++*pos;
259 e = e->next; 260 e = e->next;
260 261
261 if (e == &ip_conntrack_expect_list) 262 if (e == &ip_conntrack_expect_list)
@@ -266,7 +267,7 @@ static void *exp_seq_next(struct seq_file *s, void *v, loff_t *pos)
266 267
267static void exp_seq_stop(struct seq_file *s, void *v) 268static void exp_seq_stop(struct seq_file *s, void *v)
268{ 269{
269 READ_UNLOCK(&ip_conntrack_lock); 270 read_unlock_bh(&ip_conntrack_lock);
270} 271}
271 272
272static int exp_seq_show(struct seq_file *s, void *v) 273static int exp_seq_show(struct seq_file *s, void *v)
@@ -920,22 +921,22 @@ int ip_conntrack_protocol_register(struct ip_conntrack_protocol *proto)
920{ 921{
921 int ret = 0; 922 int ret = 0;
922 923
923 WRITE_LOCK(&ip_conntrack_lock); 924 write_lock_bh(&ip_conntrack_lock);
924 if (ip_ct_protos[proto->proto] != &ip_conntrack_generic_protocol) { 925 if (ip_ct_protos[proto->proto] != &ip_conntrack_generic_protocol) {
925 ret = -EBUSY; 926 ret = -EBUSY;
926 goto out; 927 goto out;
927 } 928 }
928 ip_ct_protos[proto->proto] = proto; 929 ip_ct_protos[proto->proto] = proto;
929 out: 930 out:
930 WRITE_UNLOCK(&ip_conntrack_lock); 931 write_unlock_bh(&ip_conntrack_lock);
931 return ret; 932 return ret;
932} 933}
933 934
934void ip_conntrack_protocol_unregister(struct ip_conntrack_protocol *proto) 935void ip_conntrack_protocol_unregister(struct ip_conntrack_protocol *proto)
935{ 936{
936 WRITE_LOCK(&ip_conntrack_lock); 937 write_lock_bh(&ip_conntrack_lock);
937 ip_ct_protos[proto->proto] = &ip_conntrack_generic_protocol; 938 ip_ct_protos[proto->proto] = &ip_conntrack_generic_protocol;
938 WRITE_UNLOCK(&ip_conntrack_lock); 939 write_unlock_bh(&ip_conntrack_lock);
939 940
940 /* Somebody could be still looking at the proto in bh. */ 941 /* Somebody could be still looking at the proto in bh. */
941 synchronize_net(); 942 synchronize_net();
diff --git a/net/ipv4/netfilter/ip_nat_core.c b/net/ipv4/netfilter/ip_nat_core.c
index 9fc6f93af0..739b6dde1c 100644
--- a/net/ipv4/netfilter/ip_nat_core.c
+++ b/net/ipv4/netfilter/ip_nat_core.c
@@ -22,8 +22,8 @@
22#include <linux/udp.h> 22#include <linux/udp.h>
23#include <linux/jhash.h> 23#include <linux/jhash.h>
24 24
25#define ASSERT_READ_LOCK(x) MUST_BE_READ_LOCKED(&ip_nat_lock) 25#define ASSERT_READ_LOCK(x)
26#define ASSERT_WRITE_LOCK(x) MUST_BE_WRITE_LOCKED(&ip_nat_lock) 26#define ASSERT_WRITE_LOCK(x)
27 27
28#include <linux/netfilter_ipv4/ip_conntrack.h> 28#include <linux/netfilter_ipv4/ip_conntrack.h>
29#include <linux/netfilter_ipv4/ip_conntrack_core.h> 29#include <linux/netfilter_ipv4/ip_conntrack_core.h>
@@ -41,7 +41,7 @@
41#define DEBUGP(format, args...) 41#define DEBUGP(format, args...)
42#endif 42#endif
43 43
44DECLARE_RWLOCK(ip_nat_lock); 44DEFINE_RWLOCK(ip_nat_lock);
45 45
46/* Calculated at init based on memory size */ 46/* Calculated at init based on memory size */
47static unsigned int ip_nat_htable_size; 47static unsigned int ip_nat_htable_size;
@@ -65,9 +65,9 @@ static void ip_nat_cleanup_conntrack(struct ip_conntrack *conn)
65 if (!(conn->status & IPS_NAT_DONE_MASK)) 65 if (!(conn->status & IPS_NAT_DONE_MASK))
66 return; 66 return;
67 67
68 WRITE_LOCK(&ip_nat_lock); 68 write_lock_bh(&ip_nat_lock);
69 list_del(&conn->nat.info.bysource); 69 list_del(&conn->nat.info.bysource);
70 WRITE_UNLOCK(&ip_nat_lock); 70 write_unlock_bh(&ip_nat_lock);
71} 71}
72 72
73/* We do checksum mangling, so if they were wrong before they're still 73/* We do checksum mangling, so if they were wrong before they're still
@@ -142,7 +142,7 @@ find_appropriate_src(const struct ip_conntrack_tuple *tuple,
142 unsigned int h = hash_by_src(tuple); 142 unsigned int h = hash_by_src(tuple);
143 struct ip_conntrack *ct; 143 struct ip_conntrack *ct;
144 144
145 READ_LOCK(&ip_nat_lock); 145 read_lock_bh(&ip_nat_lock);
146 list_for_each_entry(ct, &bysource[h], nat.info.bysource) { 146 list_for_each_entry(ct, &bysource[h], nat.info.bysource) {
147 if (same_src(ct, tuple)) { 147 if (same_src(ct, tuple)) {
148 /* Copy source part from reply tuple. */ 148 /* Copy source part from reply tuple. */
@@ -151,12 +151,12 @@ find_appropriate_src(const struct ip_conntrack_tuple *tuple,
151 result->dst = tuple->dst; 151 result->dst = tuple->dst;
152 152
153 if (in_range(result, range)) { 153 if (in_range(result, range)) {
154 READ_UNLOCK(&ip_nat_lock); 154 read_unlock_bh(&ip_nat_lock);
155 return 1; 155 return 1;
156 } 156 }
157 } 157 }
158 } 158 }
159 READ_UNLOCK(&ip_nat_lock); 159 read_unlock_bh(&ip_nat_lock);
160 return 0; 160 return 0;
161} 161}
162 162
@@ -297,9 +297,9 @@ ip_nat_setup_info(struct ip_conntrack *conntrack,
297 unsigned int srchash 297 unsigned int srchash
298 = hash_by_src(&conntrack->tuplehash[IP_CT_DIR_ORIGINAL] 298 = hash_by_src(&conntrack->tuplehash[IP_CT_DIR_ORIGINAL]
299 .tuple); 299 .tuple);
300 WRITE_LOCK(&ip_nat_lock); 300 write_lock_bh(&ip_nat_lock);
301 list_add(&info->bysource, &bysource[srchash]); 301 list_add(&info->bysource, &bysource[srchash]);
302 WRITE_UNLOCK(&ip_nat_lock); 302 write_unlock_bh(&ip_nat_lock);
303 } 303 }
304 304
305 /* It's done. */ 305 /* It's done. */
@@ -474,23 +474,23 @@ int ip_nat_protocol_register(struct ip_nat_protocol *proto)
474{ 474{
475 int ret = 0; 475 int ret = 0;
476 476
477 WRITE_LOCK(&ip_nat_lock); 477 write_lock_bh(&ip_nat_lock);
478 if (ip_nat_protos[proto->protonum] != &ip_nat_unknown_protocol) { 478 if (ip_nat_protos[proto->protonum] != &ip_nat_unknown_protocol) {
479 ret = -EBUSY; 479 ret = -EBUSY;
480 goto out; 480 goto out;
481 } 481 }
482 ip_nat_protos[proto->protonum] = proto; 482 ip_nat_protos[proto->protonum] = proto;
483 out: 483 out:
484 WRITE_UNLOCK(&ip_nat_lock); 484 write_unlock_bh(&ip_nat_lock);
485 return ret; 485 return ret;
486} 486}
487 487
488/* Noone stores the protocol anywhere; simply delete it. */ 488/* Noone stores the protocol anywhere; simply delete it. */
489void ip_nat_protocol_unregister(struct ip_nat_protocol *proto) 489void ip_nat_protocol_unregister(struct ip_nat_protocol *proto)
490{ 490{
491 WRITE_LOCK(&ip_nat_lock); 491 write_lock_bh(&ip_nat_lock);
492 ip_nat_protos[proto->protonum] = &ip_nat_unknown_protocol; 492 ip_nat_protos[proto->protonum] = &ip_nat_unknown_protocol;
493 WRITE_UNLOCK(&ip_nat_lock); 493 write_unlock_bh(&ip_nat_lock);
494 494
495 /* Someone could be still looking at the proto in a bh. */ 495 /* Someone could be still looking at the proto in a bh. */
496 synchronize_net(); 496 synchronize_net();
@@ -509,13 +509,13 @@ int __init ip_nat_init(void)
509 return -ENOMEM; 509 return -ENOMEM;
510 510
511 /* Sew in builtin protocols. */ 511 /* Sew in builtin protocols. */
512 WRITE_LOCK(&ip_nat_lock); 512 write_lock_bh(&ip_nat_lock);
513 for (i = 0; i < MAX_IP_NAT_PROTO; i++) 513 for (i = 0; i < MAX_IP_NAT_PROTO; i++)
514 ip_nat_protos[i] = &ip_nat_unknown_protocol; 514 ip_nat_protos[i] = &ip_nat_unknown_protocol;
515 ip_nat_protos[IPPROTO_TCP] = &ip_nat_protocol_tcp; 515 ip_nat_protos[IPPROTO_TCP] = &ip_nat_protocol_tcp;
516 ip_nat_protos[IPPROTO_UDP] = &ip_nat_protocol_udp; 516 ip_nat_protos[IPPROTO_UDP] = &ip_nat_protocol_udp;
517 ip_nat_protos[IPPROTO_ICMP] = &ip_nat_protocol_icmp; 517 ip_nat_protos[IPPROTO_ICMP] = &ip_nat_protocol_icmp;
518 WRITE_UNLOCK(&ip_nat_lock); 518 write_unlock_bh(&ip_nat_lock);
519 519
520 for (i = 0; i < ip_nat_htable_size; i++) { 520 for (i = 0; i < ip_nat_htable_size; i++) {
521 INIT_LIST_HEAD(&bysource[i]); 521 INIT_LIST_HEAD(&bysource[i]);
diff --git a/net/ipv4/netfilter/ip_nat_helper.c b/net/ipv4/netfilter/ip_nat_helper.c
index 1637b96d8c..158f34f32c 100644
--- a/net/ipv4/netfilter/ip_nat_helper.c
+++ b/net/ipv4/netfilter/ip_nat_helper.c
@@ -28,8 +28,8 @@
28#include <net/tcp.h> 28#include <net/tcp.h>
29#include <net/udp.h> 29#include <net/udp.h>
30 30
31#define ASSERT_READ_LOCK(x) MUST_BE_READ_LOCKED(&ip_nat_lock) 31#define ASSERT_READ_LOCK(x)
32#define ASSERT_WRITE_LOCK(x) MUST_BE_WRITE_LOCKED(&ip_nat_lock) 32#define ASSERT_WRITE_LOCK(x)
33 33
34#include <linux/netfilter_ipv4/ip_conntrack.h> 34#include <linux/netfilter_ipv4/ip_conntrack.h>
35#include <linux/netfilter_ipv4/ip_conntrack_helper.h> 35#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
@@ -47,7 +47,7 @@
47#define DUMP_OFFSET(x) 47#define DUMP_OFFSET(x)
48#endif 48#endif
49 49
50static DECLARE_LOCK(ip_nat_seqofs_lock); 50static DEFINE_SPINLOCK(ip_nat_seqofs_lock);
51 51
52/* Setup TCP sequence correction given this change at this sequence */ 52/* Setup TCP sequence correction given this change at this sequence */
53static inline void 53static inline void
@@ -70,7 +70,7 @@ adjust_tcp_sequence(u32 seq,
70 DEBUGP("ip_nat_resize_packet: Seq_offset before: "); 70 DEBUGP("ip_nat_resize_packet: Seq_offset before: ");
71 DUMP_OFFSET(this_way); 71 DUMP_OFFSET(this_way);
72 72
73 LOCK_BH(&ip_nat_seqofs_lock); 73 spin_lock_bh(&ip_nat_seqofs_lock);
74 74
75 /* SYN adjust. If it's uninitialized, or this is after last 75 /* SYN adjust. If it's uninitialized, or this is after last
76 * correction, record it: we don't handle more than one 76 * correction, record it: we don't handle more than one
@@ -82,7 +82,7 @@ adjust_tcp_sequence(u32 seq,
82 this_way->offset_before = this_way->offset_after; 82 this_way->offset_before = this_way->offset_after;
83 this_way->offset_after += sizediff; 83 this_way->offset_after += sizediff;
84 } 84 }
85 UNLOCK_BH(&ip_nat_seqofs_lock); 85 spin_unlock_bh(&ip_nat_seqofs_lock);
86 86
87 DEBUGP("ip_nat_resize_packet: Seq_offset after: "); 87 DEBUGP("ip_nat_resize_packet: Seq_offset after: ");
88 DUMP_OFFSET(this_way); 88 DUMP_OFFSET(this_way);
@@ -142,9 +142,6 @@ static int enlarge_skb(struct sk_buff **pskb, unsigned int extra)
142 /* Transfer socket to new skb. */ 142 /* Transfer socket to new skb. */
143 if ((*pskb)->sk) 143 if ((*pskb)->sk)
144 skb_set_owner_w(nskb, (*pskb)->sk); 144 skb_set_owner_w(nskb, (*pskb)->sk);
145#ifdef CONFIG_NETFILTER_DEBUG
146 nskb->nf_debug = (*pskb)->nf_debug;
147#endif
148 kfree_skb(*pskb); 145 kfree_skb(*pskb);
149 *pskb = nskb; 146 *pskb = nskb;
150 return 1; 147 return 1;
diff --git a/net/ipv4/netfilter/ip_nat_rule.c b/net/ipv4/netfilter/ip_nat_rule.c
index 581f097f5a..60d70fa41a 100644
--- a/net/ipv4/netfilter/ip_nat_rule.c
+++ b/net/ipv4/netfilter/ip_nat_rule.c
@@ -19,8 +19,8 @@
19#include <net/route.h> 19#include <net/route.h>
20#include <linux/bitops.h> 20#include <linux/bitops.h>
21 21
22#define ASSERT_READ_LOCK(x) MUST_BE_READ_LOCKED(&ip_nat_lock) 22#define ASSERT_READ_LOCK(x)
23#define ASSERT_WRITE_LOCK(x) MUST_BE_WRITE_LOCKED(&ip_nat_lock) 23#define ASSERT_WRITE_LOCK(x)
24 24
25#include <linux/netfilter_ipv4/ip_tables.h> 25#include <linux/netfilter_ipv4/ip_tables.h>
26#include <linux/netfilter_ipv4/ip_nat.h> 26#include <linux/netfilter_ipv4/ip_nat.h>
diff --git a/net/ipv4/netfilter/ip_nat_standalone.c b/net/ipv4/netfilter/ip_nat_standalone.c
index 79f56f662b..bc59d0d6e8 100644
--- a/net/ipv4/netfilter/ip_nat_standalone.c
+++ b/net/ipv4/netfilter/ip_nat_standalone.c
@@ -31,8 +31,8 @@
31#include <net/checksum.h> 31#include <net/checksum.h>
32#include <linux/spinlock.h> 32#include <linux/spinlock.h>
33 33
34#define ASSERT_READ_LOCK(x) MUST_BE_READ_LOCKED(&ip_nat_lock) 34#define ASSERT_READ_LOCK(x)
35#define ASSERT_WRITE_LOCK(x) MUST_BE_WRITE_LOCKED(&ip_nat_lock) 35#define ASSERT_WRITE_LOCK(x)
36 36
37#include <linux/netfilter_ipv4/ip_nat.h> 37#include <linux/netfilter_ipv4/ip_nat.h>
38#include <linux/netfilter_ipv4/ip_nat_rule.h> 38#include <linux/netfilter_ipv4/ip_nat_rule.h>
@@ -373,7 +373,6 @@ static int init_or_cleanup(int init)
373 cleanup_rule_init: 373 cleanup_rule_init:
374 ip_nat_rule_cleanup(); 374 ip_nat_rule_cleanup();
375 cleanup_nothing: 375 cleanup_nothing:
376 MUST_BE_READ_WRITE_UNLOCKED(&ip_nat_lock);
377 return ret; 376 return ret;
378} 377}
379 378
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 8a54f92b84..c88dfcd38c 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -67,7 +67,6 @@ static DECLARE_MUTEX(ipt_mutex);
67/* Must have mutex */ 67/* Must have mutex */
68#define ASSERT_READ_LOCK(x) IP_NF_ASSERT(down_trylock(&ipt_mutex) != 0) 68#define ASSERT_READ_LOCK(x) IP_NF_ASSERT(down_trylock(&ipt_mutex) != 0)
69#define ASSERT_WRITE_LOCK(x) IP_NF_ASSERT(down_trylock(&ipt_mutex) != 0) 69#define ASSERT_WRITE_LOCK(x) IP_NF_ASSERT(down_trylock(&ipt_mutex) != 0)
70#include <linux/netfilter_ipv4/lockhelp.h>
71#include <linux/netfilter_ipv4/listhelp.h> 70#include <linux/netfilter_ipv4/listhelp.h>
72 71
73#if 0 72#if 0
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 0f12e3a3dc..9cde8c61f5 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -29,7 +29,6 @@
29#include <linux/netfilter_ipv4/ip_tables.h> 29#include <linux/netfilter_ipv4/ip_tables.h>
30#include <linux/netfilter_ipv4/ipt_CLUSTERIP.h> 30#include <linux/netfilter_ipv4/ipt_CLUSTERIP.h>
31#include <linux/netfilter_ipv4/ip_conntrack.h> 31#include <linux/netfilter_ipv4/ip_conntrack.h>
32#include <linux/netfilter_ipv4/lockhelp.h>
33 32
34#define CLUSTERIP_VERSION "0.6" 33#define CLUSTERIP_VERSION "0.6"
35 34
@@ -41,6 +40,8 @@
41#define DEBUGP 40#define DEBUGP
42#endif 41#endif
43 42
43#define ASSERT_READ_LOCK(x)
44
44MODULE_LICENSE("GPL"); 45MODULE_LICENSE("GPL");
45MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>"); 46MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
46MODULE_DESCRIPTION("iptables target for CLUSTERIP"); 47MODULE_DESCRIPTION("iptables target for CLUSTERIP");
@@ -67,7 +68,7 @@ static LIST_HEAD(clusterip_configs);
67 68
68/* clusterip_lock protects the clusterip_configs list _AND_ the configurable 69/* clusterip_lock protects the clusterip_configs list _AND_ the configurable
69 * data within all structurses (num_local_nodes, local_nodes[]) */ 70 * data within all structurses (num_local_nodes, local_nodes[]) */
70static DECLARE_RWLOCK(clusterip_lock); 71static DEFINE_RWLOCK(clusterip_lock);
71 72
72#ifdef CONFIG_PROC_FS 73#ifdef CONFIG_PROC_FS
73static struct file_operations clusterip_proc_fops; 74static struct file_operations clusterip_proc_fops;
@@ -82,9 +83,9 @@ clusterip_config_get(struct clusterip_config *c) {
82static inline void 83static inline void
83clusterip_config_put(struct clusterip_config *c) { 84clusterip_config_put(struct clusterip_config *c) {
84 if (atomic_dec_and_test(&c->refcount)) { 85 if (atomic_dec_and_test(&c->refcount)) {
85 WRITE_LOCK(&clusterip_lock); 86 write_lock_bh(&clusterip_lock);
86 list_del(&c->list); 87 list_del(&c->list);
87 WRITE_UNLOCK(&clusterip_lock); 88 write_unlock_bh(&clusterip_lock);
88 dev_mc_delete(c->dev, c->clustermac, ETH_ALEN, 0); 89 dev_mc_delete(c->dev, c->clustermac, ETH_ALEN, 0);
89 dev_put(c->dev); 90 dev_put(c->dev);
90 kfree(c); 91 kfree(c);
@@ -97,7 +98,7 @@ __clusterip_config_find(u_int32_t clusterip)
97{ 98{
98 struct list_head *pos; 99 struct list_head *pos;
99 100
100 MUST_BE_READ_LOCKED(&clusterip_lock); 101 ASSERT_READ_LOCK(&clusterip_lock);
101 list_for_each(pos, &clusterip_configs) { 102 list_for_each(pos, &clusterip_configs) {
102 struct clusterip_config *c = list_entry(pos, 103 struct clusterip_config *c = list_entry(pos,
103 struct clusterip_config, list); 104 struct clusterip_config, list);
@@ -114,14 +115,14 @@ clusterip_config_find_get(u_int32_t clusterip)
114{ 115{
115 struct clusterip_config *c; 116 struct clusterip_config *c;
116 117
117 READ_LOCK(&clusterip_lock); 118 read_lock_bh(&clusterip_lock);
118 c = __clusterip_config_find(clusterip); 119 c = __clusterip_config_find(clusterip);
119 if (!c) { 120 if (!c) {
120 READ_UNLOCK(&clusterip_lock); 121 read_unlock_bh(&clusterip_lock);
121 return NULL; 122 return NULL;
122 } 123 }
123 atomic_inc(&c->refcount); 124 atomic_inc(&c->refcount);
124 READ_UNLOCK(&clusterip_lock); 125 read_unlock_bh(&clusterip_lock);
125 126
126 return c; 127 return c;
127} 128}
@@ -160,9 +161,9 @@ clusterip_config_init(struct ipt_clusterip_tgt_info *i, u_int32_t ip,
160 c->pde->data = c; 161 c->pde->data = c;
161#endif 162#endif
162 163
163 WRITE_LOCK(&clusterip_lock); 164 write_lock_bh(&clusterip_lock);
164 list_add(&c->list, &clusterip_configs); 165 list_add(&c->list, &clusterip_configs);
165 WRITE_UNLOCK(&clusterip_lock); 166 write_unlock_bh(&clusterip_lock);
166 167
167 return c; 168 return c;
168} 169}
@@ -172,25 +173,25 @@ clusterip_add_node(struct clusterip_config *c, u_int16_t nodenum)
172{ 173{
173 int i; 174 int i;
174 175
175 WRITE_LOCK(&clusterip_lock); 176 write_lock_bh(&clusterip_lock);
176 177
177 if (c->num_local_nodes >= CLUSTERIP_MAX_NODES 178 if (c->num_local_nodes >= CLUSTERIP_MAX_NODES
178 || nodenum > CLUSTERIP_MAX_NODES) { 179 || nodenum > CLUSTERIP_MAX_NODES) {
179 WRITE_UNLOCK(&clusterip_lock); 180 write_unlock_bh(&clusterip_lock);
180 return 1; 181 return 1;
181 } 182 }
182 183
183 /* check if we alrady have this number in our array */ 184 /* check if we alrady have this number in our array */
184 for (i = 0; i < c->num_local_nodes; i++) { 185 for (i = 0; i < c->num_local_nodes; i++) {
185 if (c->local_nodes[i] == nodenum) { 186 if (c->local_nodes[i] == nodenum) {
186 WRITE_UNLOCK(&clusterip_lock); 187 write_unlock_bh(&clusterip_lock);
187 return 1; 188 return 1;
188 } 189 }
189 } 190 }
190 191
191 c->local_nodes[c->num_local_nodes++] = nodenum; 192 c->local_nodes[c->num_local_nodes++] = nodenum;
192 193
193 WRITE_UNLOCK(&clusterip_lock); 194 write_unlock_bh(&clusterip_lock);
194 return 0; 195 return 0;
195} 196}
196 197
@@ -199,10 +200,10 @@ clusterip_del_node(struct clusterip_config *c, u_int16_t nodenum)
199{ 200{
200 int i; 201 int i;
201 202
202 WRITE_LOCK(&clusterip_lock); 203 write_lock_bh(&clusterip_lock);
203 204
204 if (c->num_local_nodes <= 1 || nodenum > CLUSTERIP_MAX_NODES) { 205 if (c->num_local_nodes <= 1 || nodenum > CLUSTERIP_MAX_NODES) {
205 WRITE_UNLOCK(&clusterip_lock); 206 write_unlock_bh(&clusterip_lock);
206 return 1; 207 return 1;
207 } 208 }
208 209
@@ -211,12 +212,12 @@ clusterip_del_node(struct clusterip_config *c, u_int16_t nodenum)
211 int size = sizeof(u_int16_t)*(c->num_local_nodes-(i+1)); 212 int size = sizeof(u_int16_t)*(c->num_local_nodes-(i+1));
212 memmove(&c->local_nodes[i], &c->local_nodes[i+1], size); 213 memmove(&c->local_nodes[i], &c->local_nodes[i+1], size);
213 c->num_local_nodes--; 214 c->num_local_nodes--;
214 WRITE_UNLOCK(&clusterip_lock); 215 write_unlock_bh(&clusterip_lock);
215 return 0; 216 return 0;
216 } 217 }
217 } 218 }
218 219
219 WRITE_UNLOCK(&clusterip_lock); 220 write_unlock_bh(&clusterip_lock);
220 return 1; 221 return 1;
221} 222}
222 223
@@ -286,21 +287,21 @@ clusterip_responsible(struct clusterip_config *config, u_int32_t hash)
286{ 287{
287 int i; 288 int i;
288 289
289 READ_LOCK(&clusterip_lock); 290 read_lock_bh(&clusterip_lock);
290 291
291 if (config->num_local_nodes == 0) { 292 if (config->num_local_nodes == 0) {
292 READ_UNLOCK(&clusterip_lock); 293 read_unlock_bh(&clusterip_lock);
293 return 0; 294 return 0;
294 } 295 }
295 296
296 for (i = 0; i < config->num_local_nodes; i++) { 297 for (i = 0; i < config->num_local_nodes; i++) {
297 if (config->local_nodes[i] == hash) { 298 if (config->local_nodes[i] == hash) {
298 READ_UNLOCK(&clusterip_lock); 299 read_unlock_bh(&clusterip_lock);
299 return 1; 300 return 1;
300 } 301 }
301 } 302 }
302 303
303 READ_UNLOCK(&clusterip_lock); 304 read_unlock_bh(&clusterip_lock);
304 305
305 return 0; 306 return 0;
306} 307}
@@ -338,7 +339,7 @@ target(struct sk_buff **pskb,
338 * error messages (RELATED) and information requests (see below) */ 339 * error messages (RELATED) and information requests (see below) */
339 if ((*pskb)->nh.iph->protocol == IPPROTO_ICMP 340 if ((*pskb)->nh.iph->protocol == IPPROTO_ICMP
340 && (ctinfo == IP_CT_RELATED 341 && (ctinfo == IP_CT_RELATED
341 || ctinfo == IP_CT_IS_REPLY+IP_CT_IS_REPLY)) 342 || ctinfo == IP_CT_RELATED+IP_CT_IS_REPLY))
342 return IPT_CONTINUE; 343 return IPT_CONTINUE;
343 344
344 /* ip_conntrack_icmp guarantees us that we only have ICMP_ECHO, 345 /* ip_conntrack_icmp guarantees us that we only have ICMP_ECHO,
@@ -578,7 +579,7 @@ static void *clusterip_seq_start(struct seq_file *s, loff_t *pos)
578 struct clusterip_config *c = pde->data; 579 struct clusterip_config *c = pde->data;
579 unsigned int *nodeidx; 580 unsigned int *nodeidx;
580 581
581 READ_LOCK(&clusterip_lock); 582 read_lock_bh(&clusterip_lock);
582 if (*pos >= c->num_local_nodes) 583 if (*pos >= c->num_local_nodes)
583 return NULL; 584 return NULL;
584 585
@@ -608,7 +609,7 @@ static void clusterip_seq_stop(struct seq_file *s, void *v)
608{ 609{
609 kfree(v); 610 kfree(v);
610 611
611 READ_UNLOCK(&clusterip_lock); 612 read_unlock_bh(&clusterip_lock);
612} 613}
613 614
614static int clusterip_seq_show(struct seq_file *s, void *v) 615static int clusterip_seq_show(struct seq_file *s, void *v)
diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c
index 57e9f6cf1c..91e74502c3 100644
--- a/net/ipv4/netfilter/ipt_MASQUERADE.c
+++ b/net/ipv4/netfilter/ipt_MASQUERADE.c
@@ -33,7 +33,7 @@ MODULE_DESCRIPTION("iptables MASQUERADE target module");
33#endif 33#endif
34 34
35/* Lock protects masq region inside conntrack */ 35/* Lock protects masq region inside conntrack */
36static DECLARE_RWLOCK(masq_lock); 36static DEFINE_RWLOCK(masq_lock);
37 37
38/* FIXME: Multiple targets. --RR */ 38/* FIXME: Multiple targets. --RR */
39static int 39static int
@@ -103,9 +103,9 @@ masquerade_target(struct sk_buff **pskb,
103 return NF_DROP; 103 return NF_DROP;
104 } 104 }
105 105
106 WRITE_LOCK(&masq_lock); 106 write_lock_bh(&masq_lock);
107 ct->nat.masq_index = out->ifindex; 107 ct->nat.masq_index = out->ifindex;
108 WRITE_UNLOCK(&masq_lock); 108 write_unlock_bh(&masq_lock);
109 109
110 /* Transfer from original range. */ 110 /* Transfer from original range. */
111 newrange = ((struct ip_nat_range) 111 newrange = ((struct ip_nat_range)
@@ -122,9 +122,9 @@ device_cmp(struct ip_conntrack *i, void *ifindex)
122{ 122{
123 int ret; 123 int ret;
124 124
125 READ_LOCK(&masq_lock); 125 read_lock_bh(&masq_lock);
126 ret = (i->nat.masq_index == (int)(long)ifindex); 126 ret = (i->nat.masq_index == (int)(long)ifindex);
127 READ_UNLOCK(&masq_lock); 127 read_unlock_bh(&masq_lock);
128 128
129 return ret; 129 return ret;
130} 130}
diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c
index 266d649792..9156964460 100644
--- a/net/ipv4/netfilter/ipt_REJECT.c
+++ b/net/ipv4/netfilter/ipt_REJECT.c
@@ -104,10 +104,12 @@ static inline struct rtable *route_reverse(struct sk_buff *skb,
104static void send_reset(struct sk_buff *oldskb, int hook) 104static void send_reset(struct sk_buff *oldskb, int hook)
105{ 105{
106 struct sk_buff *nskb; 106 struct sk_buff *nskb;
107 struct iphdr *iph = oldskb->nh.iph;
107 struct tcphdr _otcph, *oth, *tcph; 108 struct tcphdr _otcph, *oth, *tcph;
108 struct rtable *rt; 109 struct rtable *rt;
109 u_int16_t tmp_port; 110 u_int16_t tmp_port;
110 u_int32_t tmp_addr; 111 u_int32_t tmp_addr;
112 unsigned int tcplen;
111 int needs_ack; 113 int needs_ack;
112 int hh_len; 114 int hh_len;
113 115
@@ -124,7 +126,16 @@ static void send_reset(struct sk_buff *oldskb, int hook)
124 if (oth->rst) 126 if (oth->rst)
125 return; 127 return;
126 128
127 /* FIXME: Check checksum --RR */ 129 /* Check checksum */
130 tcplen = oldskb->len - iph->ihl * 4;
131 if (((hook != NF_IP_LOCAL_IN && oldskb->ip_summed != CHECKSUM_HW) ||
132 (hook == NF_IP_LOCAL_IN &&
133 oldskb->ip_summed != CHECKSUM_UNNECESSARY)) &&
134 csum_tcpudp_magic(iph->saddr, iph->daddr, tcplen, IPPROTO_TCP,
135 oldskb->ip_summed == CHECKSUM_HW ? oldskb->csum :
136 skb_checksum(oldskb, iph->ihl * 4, tcplen, 0)))
137 return;
138
128 if ((rt = route_reverse(oldskb, oth, hook)) == NULL) 139 if ((rt = route_reverse(oldskb, oth, hook)) == NULL)
129 return; 140 return;
130 141
diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c
index 6f2cefbe16..52a0076302 100644
--- a/net/ipv4/netfilter/ipt_ULOG.c
+++ b/net/ipv4/netfilter/ipt_ULOG.c
@@ -56,7 +56,6 @@
56#include <linux/netfilter.h> 56#include <linux/netfilter.h>
57#include <linux/netfilter_ipv4/ip_tables.h> 57#include <linux/netfilter_ipv4/ip_tables.h>
58#include <linux/netfilter_ipv4/ipt_ULOG.h> 58#include <linux/netfilter_ipv4/ipt_ULOG.h>
59#include <linux/netfilter_ipv4/lockhelp.h>
60#include <net/sock.h> 59#include <net/sock.h>
61#include <linux/bitops.h> 60#include <linux/bitops.h>
62 61
@@ -99,8 +98,8 @@ typedef struct {
99 98
100static ulog_buff_t ulog_buffers[ULOG_MAXNLGROUPS]; /* array of buffers */ 99static ulog_buff_t ulog_buffers[ULOG_MAXNLGROUPS]; /* array of buffers */
101 100
102static struct sock *nflognl; /* our socket */ 101static struct sock *nflognl; /* our socket */
103static DECLARE_LOCK(ulog_lock); /* spinlock */ 102static DEFINE_SPINLOCK(ulog_lock); /* spinlock */
104 103
105/* send one ulog_buff_t to userspace */ 104/* send one ulog_buff_t to userspace */
106static void ulog_send(unsigned int nlgroupnum) 105static void ulog_send(unsigned int nlgroupnum)
@@ -135,9 +134,9 @@ static void ulog_timer(unsigned long data)
135 134
136 /* lock to protect against somebody modifying our structure 135 /* lock to protect against somebody modifying our structure
137 * from ipt_ulog_target at the same time */ 136 * from ipt_ulog_target at the same time */
138 LOCK_BH(&ulog_lock); 137 spin_lock_bh(&ulog_lock);
139 ulog_send(data); 138 ulog_send(data);
140 UNLOCK_BH(&ulog_lock); 139 spin_unlock_bh(&ulog_lock);
141} 140}
142 141
143static struct sk_buff *ulog_alloc_skb(unsigned int size) 142static struct sk_buff *ulog_alloc_skb(unsigned int size)
@@ -193,7 +192,7 @@ static void ipt_ulog_packet(unsigned int hooknum,
193 192
194 ub = &ulog_buffers[groupnum]; 193 ub = &ulog_buffers[groupnum];
195 194
196 LOCK_BH(&ulog_lock); 195 spin_lock_bh(&ulog_lock);
197 196
198 if (!ub->skb) { 197 if (!ub->skb) {
199 if (!(ub->skb = ulog_alloc_skb(size))) 198 if (!(ub->skb = ulog_alloc_skb(size)))
@@ -278,7 +277,7 @@ static void ipt_ulog_packet(unsigned int hooknum,
278 ulog_send(groupnum); 277 ulog_send(groupnum);
279 } 278 }
280 279
281 UNLOCK_BH(&ulog_lock); 280 spin_unlock_bh(&ulog_lock);
282 281
283 return; 282 return;
284 283
@@ -288,7 +287,7 @@ nlmsg_failure:
288alloc_failure: 287alloc_failure:
289 PRINTR("ipt_ULOG: Error building netlink message\n"); 288 PRINTR("ipt_ULOG: Error building netlink message\n");
290 289
291 UNLOCK_BH(&ulog_lock); 290 spin_unlock_bh(&ulog_lock);
292} 291}
293 292
294static unsigned int ipt_ulog_target(struct sk_buff **pskb, 293static unsigned int ipt_ulog_target(struct sk_buff **pskb,
diff --git a/net/ipv4/netfilter/ipt_hashlimit.c b/net/ipv4/netfilter/ipt_hashlimit.c
index f1937190cd..564b49bfeb 100644
--- a/net/ipv4/netfilter/ipt_hashlimit.c
+++ b/net/ipv4/netfilter/ipt_hashlimit.c
@@ -37,7 +37,6 @@
37 37
38#include <linux/netfilter_ipv4/ip_tables.h> 38#include <linux/netfilter_ipv4/ip_tables.h>
39#include <linux/netfilter_ipv4/ipt_hashlimit.h> 39#include <linux/netfilter_ipv4/ipt_hashlimit.h>
40#include <linux/netfilter_ipv4/lockhelp.h>
41 40
42/* FIXME: this is just for IP_NF_ASSERRT */ 41/* FIXME: this is just for IP_NF_ASSERRT */
43#include <linux/netfilter_ipv4/ip_conntrack.h> 42#include <linux/netfilter_ipv4/ip_conntrack.h>
@@ -92,7 +91,7 @@ struct ipt_hashlimit_htable {
92 struct hlist_head hash[0]; /* hashtable itself */ 91 struct hlist_head hash[0]; /* hashtable itself */
93}; 92};
94 93
95static DECLARE_LOCK(hashlimit_lock); /* protects htables list */ 94static DEFINE_SPINLOCK(hashlimit_lock); /* protects htables list */
96static DECLARE_MUTEX(hlimit_mutex); /* additional checkentry protection */ 95static DECLARE_MUTEX(hlimit_mutex); /* additional checkentry protection */
97static HLIST_HEAD(hashlimit_htables); 96static HLIST_HEAD(hashlimit_htables);
98static kmem_cache_t *hashlimit_cachep; 97static kmem_cache_t *hashlimit_cachep;
@@ -233,9 +232,9 @@ static int htable_create(struct ipt_hashlimit_info *minfo)
233 hinfo->timer.function = htable_gc; 232 hinfo->timer.function = htable_gc;
234 add_timer(&hinfo->timer); 233 add_timer(&hinfo->timer);
235 234
236 LOCK_BH(&hashlimit_lock); 235 spin_lock_bh(&hashlimit_lock);
237 hlist_add_head(&hinfo->node, &hashlimit_htables); 236 hlist_add_head(&hinfo->node, &hashlimit_htables);
238 UNLOCK_BH(&hashlimit_lock); 237 spin_unlock_bh(&hashlimit_lock);
239 238
240 return 0; 239 return 0;
241} 240}
@@ -301,15 +300,15 @@ static struct ipt_hashlimit_htable *htable_find_get(char *name)
301 struct ipt_hashlimit_htable *hinfo; 300 struct ipt_hashlimit_htable *hinfo;
302 struct hlist_node *pos; 301 struct hlist_node *pos;
303 302
304 LOCK_BH(&hashlimit_lock); 303 spin_lock_bh(&hashlimit_lock);
305 hlist_for_each_entry(hinfo, pos, &hashlimit_htables, node) { 304 hlist_for_each_entry(hinfo, pos, &hashlimit_htables, node) {
306 if (!strcmp(name, hinfo->pde->name)) { 305 if (!strcmp(name, hinfo->pde->name)) {
307 atomic_inc(&hinfo->use); 306 atomic_inc(&hinfo->use);
308 UNLOCK_BH(&hashlimit_lock); 307 spin_unlock_bh(&hashlimit_lock);
309 return hinfo; 308 return hinfo;
310 } 309 }
311 } 310 }
312 UNLOCK_BH(&hashlimit_lock); 311 spin_unlock_bh(&hashlimit_lock);
313 312
314 return NULL; 313 return NULL;
315} 314}
@@ -317,9 +316,9 @@ static struct ipt_hashlimit_htable *htable_find_get(char *name)
317static void htable_put(struct ipt_hashlimit_htable *hinfo) 316static void htable_put(struct ipt_hashlimit_htable *hinfo)
318{ 317{
319 if (atomic_dec_and_test(&hinfo->use)) { 318 if (atomic_dec_and_test(&hinfo->use)) {
320 LOCK_BH(&hashlimit_lock); 319 spin_lock_bh(&hashlimit_lock);
321 hlist_del(&hinfo->node); 320 hlist_del(&hinfo->node);
322 UNLOCK_BH(&hashlimit_lock); 321 spin_unlock_bh(&hashlimit_lock);
323 htable_destroy(hinfo); 322 htable_destroy(hinfo);
324 } 323 }
325} 324}
diff --git a/net/ipv4/netfilter/ipt_helper.c b/net/ipv4/netfilter/ipt_helper.c
index 33fdf364d3..3e7dd014de 100644
--- a/net/ipv4/netfilter/ipt_helper.c
+++ b/net/ipv4/netfilter/ipt_helper.c
@@ -53,7 +53,7 @@ match(const struct sk_buff *skb,
53 return ret; 53 return ret;
54 } 54 }
55 55
56 READ_LOCK(&ip_conntrack_lock); 56 read_lock_bh(&ip_conntrack_lock);
57 if (!ct->master->helper) { 57 if (!ct->master->helper) {
58 DEBUGP("ipt_helper: master ct %p has no helper\n", 58 DEBUGP("ipt_helper: master ct %p has no helper\n",
59 exp->expectant); 59 exp->expectant);
@@ -69,7 +69,7 @@ match(const struct sk_buff *skb,
69 ret ^= !strncmp(ct->master->helper->name, info->name, 69 ret ^= !strncmp(ct->master->helper->name, info->name,
70 strlen(ct->master->helper->name)); 70 strlen(ct->master->helper->name));
71out_unlock: 71out_unlock:
72 READ_UNLOCK(&ip_conntrack_lock); 72 read_unlock_bh(&ip_conntrack_lock);
73 return ret; 73 return ret;
74} 74}
75 75
diff --git a/net/ipv4/netfilter/ipt_recent.c b/net/ipv4/netfilter/ipt_recent.c
index 25ab9fabdc..2d44b07688 100644
--- a/net/ipv4/netfilter/ipt_recent.c
+++ b/net/ipv4/netfilter/ipt_recent.c
@@ -223,7 +223,7 @@ static int ip_recent_ctrl(struct file *file, const char __user *input, unsigned
223 curr_table->table[count].last_seen = 0; 223 curr_table->table[count].last_seen = 0;
224 curr_table->table[count].addr = 0; 224 curr_table->table[count].addr = 0;
225 curr_table->table[count].ttl = 0; 225 curr_table->table[count].ttl = 0;
226 memset(curr_table->table[count].last_pkts,0,ip_pkt_list_tot*sizeof(u_int32_t)); 226 memset(curr_table->table[count].last_pkts,0,ip_pkt_list_tot*sizeof(unsigned long));
227 curr_table->table[count].oldest_pkt = 0; 227 curr_table->table[count].oldest_pkt = 0;
228 curr_table->table[count].time_pos = 0; 228 curr_table->table[count].time_pos = 0;
229 curr_table->time_info[count].position = count; 229 curr_table->time_info[count].position = count;
@@ -502,7 +502,7 @@ match(const struct sk_buff *skb,
502 location = time_info[curr_table->time_pos].position; 502 location = time_info[curr_table->time_pos].position;
503 hash_table[r_list[location].hash_entry] = -1; 503 hash_table[r_list[location].hash_entry] = -1;
504 hash_table[hash_result] = location; 504 hash_table[hash_result] = location;
505 memset(r_list[location].last_pkts,0,ip_pkt_list_tot*sizeof(u_int32_t)); 505 memset(r_list[location].last_pkts,0,ip_pkt_list_tot*sizeof(unsigned long));
506 r_list[location].time_pos = curr_table->time_pos; 506 r_list[location].time_pos = curr_table->time_pos;
507 r_list[location].addr = addr; 507 r_list[location].addr = addr;
508 r_list[location].ttl = ttl; 508 r_list[location].ttl = ttl;
@@ -631,7 +631,7 @@ match(const struct sk_buff *skb,
631 r_list[location].last_seen = 0; 631 r_list[location].last_seen = 0;
632 r_list[location].addr = 0; 632 r_list[location].addr = 0;
633 r_list[location].ttl = 0; 633 r_list[location].ttl = 0;
634 memset(r_list[location].last_pkts,0,ip_pkt_list_tot*sizeof(u_int32_t)); 634 memset(r_list[location].last_pkts,0,ip_pkt_list_tot*sizeof(unsigned long));
635 r_list[location].oldest_pkt = 0; 635 r_list[location].oldest_pkt = 0;
636 ans = !info->invert; 636 ans = !info->invert;
637 } 637 }
@@ -734,10 +734,10 @@ checkentry(const char *tablename,
734 memset(curr_table->table,0,sizeof(struct recent_ip_list)*ip_list_tot); 734 memset(curr_table->table,0,sizeof(struct recent_ip_list)*ip_list_tot);
735#ifdef DEBUG 735#ifdef DEBUG
736 if(debug) printk(KERN_INFO RECENT_NAME ": checkentry: Allocating %d for pkt_list.\n", 736 if(debug) printk(KERN_INFO RECENT_NAME ": checkentry: Allocating %d for pkt_list.\n",
737 sizeof(u_int32_t)*ip_pkt_list_tot*ip_list_tot); 737 sizeof(unsigned long)*ip_pkt_list_tot*ip_list_tot);
738#endif 738#endif
739 739
740 hold = vmalloc(sizeof(u_int32_t)*ip_pkt_list_tot*ip_list_tot); 740 hold = vmalloc(sizeof(unsigned long)*ip_pkt_list_tot*ip_list_tot);
741#ifdef DEBUG 741#ifdef DEBUG
742 if(debug) printk(KERN_INFO RECENT_NAME ": checkentry: After pkt_list allocation.\n"); 742 if(debug) printk(KERN_INFO RECENT_NAME ": checkentry: After pkt_list allocation.\n");
743#endif 743#endif
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 5b1ec586ba..d1835b1bc8 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -259,7 +259,7 @@ int raw_rcv(struct sock *sk, struct sk_buff *skb)
259 return 0; 259 return 0;
260} 260}
261 261
262static int raw_send_hdrinc(struct sock *sk, void *from, int length, 262static int raw_send_hdrinc(struct sock *sk, void *from, size_t length,
263 struct rtable *rt, 263 struct rtable *rt,
264 unsigned int flags) 264 unsigned int flags)
265{ 265{
@@ -298,7 +298,7 @@ static int raw_send_hdrinc(struct sock *sk, void *from, int length,
298 goto error_fault; 298 goto error_fault;
299 299
300 /* We don't modify invalid header */ 300 /* We don't modify invalid header */
301 if (length >= sizeof(*iph) && iph->ihl * 4 <= length) { 301 if (length >= sizeof(*iph) && iph->ihl * 4U <= length) {
302 if (!iph->saddr) 302 if (!iph->saddr)
303 iph->saddr = rt->rt_src; 303 iph->saddr = rt->rt_src;
304 iph->check = 0; 304 iph->check = 0;
@@ -332,7 +332,7 @@ static void raw_probe_proto_opt(struct flowi *fl, struct msghdr *msg)
332 u8 __user *type = NULL; 332 u8 __user *type = NULL;
333 u8 __user *code = NULL; 333 u8 __user *code = NULL;
334 int probed = 0; 334 int probed = 0;
335 int i; 335 unsigned int i;
336 336
337 if (!msg->msg_iov) 337 if (!msg->msg_iov)
338 return; 338 return;
@@ -384,7 +384,7 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
384 int err; 384 int err;
385 385
386 err = -EMSGSIZE; 386 err = -EMSGSIZE;
387 if (len < 0 || len > 0xFFFF) 387 if (len > 0xFFFF)
388 goto out; 388 goto out;
389 389
390 /* 390 /*
@@ -514,7 +514,10 @@ done:
514 kfree(ipc.opt); 514 kfree(ipc.opt);
515 ip_rt_put(rt); 515 ip_rt_put(rt);
516 516
517out: return err < 0 ? err : len; 517out:
518 if (err < 0)
519 return err;
520 return len;
518 521
519do_confirm: 522do_confirm:
520 dst_confirm(&rt->u.dst); 523 dst_confirm(&rt->u.dst);
@@ -610,7 +613,10 @@ static int raw_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
610 copied = skb->len; 613 copied = skb->len;
611done: 614done:
612 skb_free_datagram(sk, skb); 615 skb_free_datagram(sk, skb);
613out: return err ? err : copied; 616out:
617 if (err)
618 return err;
619 return copied;
614} 620}
615 621
616static int raw_init(struct sock *sk) 622static int raw_init(struct sock *sk)
@@ -691,11 +697,11 @@ static int raw_ioctl(struct sock *sk, int cmd, unsigned long arg)
691 struct sk_buff *skb; 697 struct sk_buff *skb;
692 int amount = 0; 698 int amount = 0;
693 699
694 spin_lock_irq(&sk->sk_receive_queue.lock); 700 spin_lock_bh(&sk->sk_receive_queue.lock);
695 skb = skb_peek(&sk->sk_receive_queue); 701 skb = skb_peek(&sk->sk_receive_queue);
696 if (skb != NULL) 702 if (skb != NULL)
697 amount = skb->len; 703 amount = skb->len;
698 spin_unlock_irq(&sk->sk_receive_queue.lock); 704 spin_unlock_bh(&sk->sk_receive_queue.lock);
699 return put_user(amount, (int __user *)arg); 705 return put_user(amount, (int __user *)arg);
700 } 706 }
701 707
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index a682d28e24..80cf633d9f 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1767,7 +1767,7 @@ static inline int ip_mkroute_input_def(struct sk_buff *skb,
1767 struct in_device *in_dev, 1767 struct in_device *in_dev,
1768 u32 daddr, u32 saddr, u32 tos) 1768 u32 daddr, u32 saddr, u32 tos)
1769{ 1769{
1770 struct rtable* rth; 1770 struct rtable* rth = NULL;
1771 int err; 1771 int err;
1772 unsigned hash; 1772 unsigned hash;
1773 1773
@@ -1794,7 +1794,7 @@ static inline int ip_mkroute_input(struct sk_buff *skb,
1794 u32 daddr, u32 saddr, u32 tos) 1794 u32 daddr, u32 saddr, u32 tos)
1795{ 1795{
1796#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED 1796#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
1797 struct rtable* rth; 1797 struct rtable* rth = NULL;
1798 unsigned char hop, hopcount, lasthop; 1798 unsigned char hop, hopcount, lasthop;
1799 int err = -EINVAL; 1799 int err = -EINVAL;
1800 unsigned int hash; 1800 unsigned int hash;
@@ -2239,7 +2239,7 @@ static inline int ip_mkroute_output_def(struct rtable **rp,
2239 struct net_device *dev_out, 2239 struct net_device *dev_out,
2240 unsigned flags) 2240 unsigned flags)
2241{ 2241{
2242 struct rtable *rth; 2242 struct rtable *rth = NULL;
2243 int err = __mkroute_output(&rth, res, fl, oldflp, dev_out, flags); 2243 int err = __mkroute_output(&rth, res, fl, oldflp, dev_out, flags);
2244 unsigned hash; 2244 unsigned hash;
2245 if (err == 0) { 2245 if (err == 0) {
@@ -2267,7 +2267,7 @@ static inline int ip_mkroute_output(struct rtable** rp,
2267 unsigned char hop; 2267 unsigned char hop;
2268 unsigned hash; 2268 unsigned hash;
2269 int err = -EINVAL; 2269 int err = -EINVAL;
2270 struct rtable *rth; 2270 struct rtable *rth = NULL;
2271 2271
2272 if (res->fi && res->fi->fib_nhs > 1) { 2272 if (res->fi && res->fi->fib_nhs > 1) {
2273 unsigned char hopcount = res->fi->fib_nhs; 2273 unsigned char hopcount = res->fi->fib_nhs;
@@ -2581,7 +2581,7 @@ int ip_route_output_key(struct rtable **rp, struct flowi *flp)
2581} 2581}
2582 2582
2583static int rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event, 2583static int rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
2584 int nowait) 2584 int nowait, unsigned int flags)
2585{ 2585{
2586 struct rtable *rt = (struct rtable*)skb->dst; 2586 struct rtable *rt = (struct rtable*)skb->dst;
2587 struct rtmsg *r; 2587 struct rtmsg *r;
@@ -2591,9 +2591,8 @@ static int rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
2591#ifdef CONFIG_IP_MROUTE 2591#ifdef CONFIG_IP_MROUTE
2592 struct rtattr *eptr; 2592 struct rtattr *eptr;
2593#endif 2593#endif
2594 nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*r)); 2594 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*r), flags);
2595 r = NLMSG_DATA(nlh); 2595 r = NLMSG_DATA(nlh);
2596 nlh->nlmsg_flags = (nowait && pid) ? NLM_F_MULTI : 0;
2597 r->rtm_family = AF_INET; 2596 r->rtm_family = AF_INET;
2598 r->rtm_dst_len = 32; 2597 r->rtm_dst_len = 32;
2599 r->rtm_src_len = 0; 2598 r->rtm_src_len = 0;
@@ -2744,7 +2743,7 @@ int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2744 NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid; 2743 NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid;
2745 2744
2746 err = rt_fill_info(skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq, 2745 err = rt_fill_info(skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq,
2747 RTM_NEWROUTE, 0); 2746 RTM_NEWROUTE, 0, 0);
2748 if (!err) 2747 if (!err)
2749 goto out_free; 2748 goto out_free;
2750 if (err < 0) { 2749 if (err < 0) {
@@ -2781,8 +2780,8 @@ int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb)
2781 continue; 2780 continue;
2782 skb->dst = dst_clone(&rt->u.dst); 2781 skb->dst = dst_clone(&rt->u.dst);
2783 if (rt_fill_info(skb, NETLINK_CB(cb->skb).pid, 2782 if (rt_fill_info(skb, NETLINK_CB(cb->skb).pid,
2784 cb->nlh->nlmsg_seq, 2783 cb->nlh->nlmsg_seq, RTM_NEWROUTE,
2785 RTM_NEWROUTE, 1) <= 0) { 2784 1, NLM_F_MULTI) <= 0) {
2786 dst_release(xchg(&skb->dst, NULL)); 2785 dst_release(xchg(&skb->dst, NULL));
2787 rcu_read_unlock_bh(); 2786 rcu_read_unlock_bh();
2788 goto done; 2787 goto done;
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index e923d2f021..72d0144421 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -169,10 +169,10 @@ static inline int cookie_check(struct sk_buff *skb, __u32 cookie)
169 return mssind < NUM_MSS ? msstab[mssind] + 1 : 0; 169 return mssind < NUM_MSS ? msstab[mssind] + 1 : 0;
170} 170}
171 171
172extern struct or_calltable or_ipv4; 172extern struct request_sock_ops tcp_request_sock_ops;
173 173
174static inline struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb, 174static inline struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb,
175 struct open_request *req, 175 struct request_sock *req,
176 struct dst_entry *dst) 176 struct dst_entry *dst)
177{ 177{
178 struct tcp_sock *tp = tcp_sk(sk); 178 struct tcp_sock *tp = tcp_sk(sk);
@@ -182,7 +182,7 @@ static inline struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb,
182 if (child) 182 if (child)
183 tcp_acceptq_queue(sk, req, child); 183 tcp_acceptq_queue(sk, req, child);
184 else 184 else
185 tcp_openreq_free(req); 185 reqsk_free(req);
186 186
187 return child; 187 return child;
188} 188}
@@ -190,10 +190,12 @@ static inline struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb,
190struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, 190struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
191 struct ip_options *opt) 191 struct ip_options *opt)
192{ 192{
193 struct inet_request_sock *ireq;
194 struct tcp_request_sock *treq;
193 struct tcp_sock *tp = tcp_sk(sk); 195 struct tcp_sock *tp = tcp_sk(sk);
194 __u32 cookie = ntohl(skb->h.th->ack_seq) - 1; 196 __u32 cookie = ntohl(skb->h.th->ack_seq) - 1;
195 struct sock *ret = sk; 197 struct sock *ret = sk;
196 struct open_request *req; 198 struct request_sock *req;
197 int mss; 199 int mss;
198 struct rtable *rt; 200 struct rtable *rt;
199 __u8 rcv_wscale; 201 __u8 rcv_wscale;
@@ -209,19 +211,20 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
209 211
210 NET_INC_STATS_BH(LINUX_MIB_SYNCOOKIESRECV); 212 NET_INC_STATS_BH(LINUX_MIB_SYNCOOKIESRECV);
211 213
212 req = tcp_openreq_alloc();
213 ret = NULL; 214 ret = NULL;
215 req = reqsk_alloc(&tcp_request_sock_ops); /* for safety */
214 if (!req) 216 if (!req)
215 goto out; 217 goto out;
216 218
217 req->rcv_isn = htonl(skb->h.th->seq) - 1; 219 ireq = inet_rsk(req);
218 req->snt_isn = cookie; 220 treq = tcp_rsk(req);
221 treq->rcv_isn = htonl(skb->h.th->seq) - 1;
222 treq->snt_isn = cookie;
219 req->mss = mss; 223 req->mss = mss;
220 req->rmt_port = skb->h.th->source; 224 ireq->rmt_port = skb->h.th->source;
221 req->af.v4_req.loc_addr = skb->nh.iph->daddr; 225 ireq->loc_addr = skb->nh.iph->daddr;
222 req->af.v4_req.rmt_addr = skb->nh.iph->saddr; 226 ireq->rmt_addr = skb->nh.iph->saddr;
223 req->class = &or_ipv4; /* for savety */ 227 ireq->opt = NULL;
224 req->af.v4_req.opt = NULL;
225 228
226 /* We throwed the options of the initial SYN away, so we hope 229 /* We throwed the options of the initial SYN away, so we hope
227 * the ACK carries the same options again (see RFC1122 4.2.3.8) 230 * the ACK carries the same options again (see RFC1122 4.2.3.8)
@@ -229,17 +232,15 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
229 if (opt && opt->optlen) { 232 if (opt && opt->optlen) {
230 int opt_size = sizeof(struct ip_options) + opt->optlen; 233 int opt_size = sizeof(struct ip_options) + opt->optlen;
231 234
232 req->af.v4_req.opt = kmalloc(opt_size, GFP_ATOMIC); 235 ireq->opt = kmalloc(opt_size, GFP_ATOMIC);
233 if (req->af.v4_req.opt) { 236 if (ireq->opt != NULL && ip_options_echo(ireq->opt, skb)) {
234 if (ip_options_echo(req->af.v4_req.opt, skb)) { 237 kfree(ireq->opt);
235 kfree(req->af.v4_req.opt); 238 ireq->opt = NULL;
236 req->af.v4_req.opt = NULL;
237 }
238 } 239 }
239 } 240 }
240 241
241 req->snd_wscale = req->rcv_wscale = req->tstamp_ok = 0; 242 ireq->snd_wscale = ireq->rcv_wscale = ireq->tstamp_ok = 0;
242 req->wscale_ok = req->sack_ok = 0; 243 ireq->wscale_ok = ireq->sack_ok = 0;
243 req->expires = 0UL; 244 req->expires = 0UL;
244 req->retrans = 0; 245 req->retrans = 0;
245 246
@@ -253,15 +254,15 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
253 struct flowi fl = { .nl_u = { .ip4_u = 254 struct flowi fl = { .nl_u = { .ip4_u =
254 { .daddr = ((opt && opt->srr) ? 255 { .daddr = ((opt && opt->srr) ?
255 opt->faddr : 256 opt->faddr :
256 req->af.v4_req.rmt_addr), 257 ireq->rmt_addr),
257 .saddr = req->af.v4_req.loc_addr, 258 .saddr = ireq->loc_addr,
258 .tos = RT_CONN_FLAGS(sk) } }, 259 .tos = RT_CONN_FLAGS(sk) } },
259 .proto = IPPROTO_TCP, 260 .proto = IPPROTO_TCP,
260 .uli_u = { .ports = 261 .uli_u = { .ports =
261 { .sport = skb->h.th->dest, 262 { .sport = skb->h.th->dest,
262 .dport = skb->h.th->source } } }; 263 .dport = skb->h.th->source } } };
263 if (ip_route_output_key(&rt, &fl)) { 264 if (ip_route_output_key(&rt, &fl)) {
264 tcp_openreq_free(req); 265 reqsk_free(req);
265 goto out; 266 goto out;
266 } 267 }
267 } 268 }
@@ -272,7 +273,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
272 &req->rcv_wnd, &req->window_clamp, 273 &req->rcv_wnd, &req->window_clamp,
273 0, &rcv_wscale); 274 0, &rcv_wscale);
274 /* BTW win scale with syncookies is 0 by definition */ 275 /* BTW win scale with syncookies is 0 by definition */
275 req->rcv_wscale = rcv_wscale; 276 ireq->rcv_wscale = rcv_wscale;
276 277
277 ret = get_cookie_sock(sk, skb, req, &rt->u.dst); 278 ret = get_cookie_sock(sk, skb, req, &rt->u.dst);
278out: return ret; 279out: return ret;
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 3aafb298c1..23068bddbf 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -23,6 +23,7 @@ extern int sysctl_ip_nonlocal_bind;
23extern int sysctl_icmp_echo_ignore_all; 23extern int sysctl_icmp_echo_ignore_all;
24extern int sysctl_icmp_echo_ignore_broadcasts; 24extern int sysctl_icmp_echo_ignore_broadcasts;
25extern int sysctl_icmp_ignore_bogus_error_responses; 25extern int sysctl_icmp_ignore_bogus_error_responses;
26extern int sysctl_icmp_errors_use_inbound_ifaddr;
26 27
27/* From ip_fragment.c */ 28/* From ip_fragment.c */
28extern int sysctl_ipfrag_low_thresh; 29extern int sysctl_ipfrag_low_thresh;
@@ -396,6 +397,14 @@ ctl_table ipv4_table[] = {
396 .proc_handler = &proc_dointvec 397 .proc_handler = &proc_dointvec
397 }, 398 },
398 { 399 {
400 .ctl_name = NET_IPV4_ICMP_ERRORS_USE_INBOUND_IFADDR,
401 .procname = "icmp_errors_use_inbound_ifaddr",
402 .data = &sysctl_icmp_errors_use_inbound_ifaddr,
403 .maxlen = sizeof(int),
404 .mode = 0644,
405 .proc_handler = &proc_dointvec
406 },
407 {
399 .ctl_name = NET_IPV4_ROUTE, 408 .ctl_name = NET_IPV4_ROUTE,
400 .procname = "route", 409 .procname = "route",
401 .maxlen = 0, 410 .maxlen = 0,
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index a037bafcba..674bbd8cfd 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -271,7 +271,6 @@ int sysctl_tcp_fin_timeout = TCP_FIN_TIMEOUT;
271 271
272DEFINE_SNMP_STAT(struct tcp_mib, tcp_statistics); 272DEFINE_SNMP_STAT(struct tcp_mib, tcp_statistics);
273 273
274kmem_cache_t *tcp_openreq_cachep;
275kmem_cache_t *tcp_bucket_cachep; 274kmem_cache_t *tcp_bucket_cachep;
276kmem_cache_t *tcp_timewait_cachep; 275kmem_cache_t *tcp_timewait_cachep;
277 276
@@ -317,7 +316,7 @@ EXPORT_SYMBOL(tcp_enter_memory_pressure);
317static __inline__ unsigned int tcp_listen_poll(struct sock *sk, 316static __inline__ unsigned int tcp_listen_poll(struct sock *sk,
318 poll_table *wait) 317 poll_table *wait)
319{ 318{
320 return tcp_sk(sk)->accept_queue ? (POLLIN | POLLRDNORM) : 0; 319 return !reqsk_queue_empty(&tcp_sk(sk)->accept_queue) ? (POLLIN | POLLRDNORM) : 0;
321} 320}
322 321
323/* 322/*
@@ -463,28 +462,15 @@ int tcp_listen_start(struct sock *sk)
463{ 462{
464 struct inet_sock *inet = inet_sk(sk); 463 struct inet_sock *inet = inet_sk(sk);
465 struct tcp_sock *tp = tcp_sk(sk); 464 struct tcp_sock *tp = tcp_sk(sk);
466 struct tcp_listen_opt *lopt; 465 int rc = reqsk_queue_alloc(&tp->accept_queue, TCP_SYNQ_HSIZE);
466
467 if (rc != 0)
468 return rc;
467 469
468 sk->sk_max_ack_backlog = 0; 470 sk->sk_max_ack_backlog = 0;
469 sk->sk_ack_backlog = 0; 471 sk->sk_ack_backlog = 0;
470 tp->accept_queue = tp->accept_queue_tail = NULL;
471 rwlock_init(&tp->syn_wait_lock);
472 tcp_delack_init(tp); 472 tcp_delack_init(tp);
473 473
474 lopt = kmalloc(sizeof(struct tcp_listen_opt), GFP_KERNEL);
475 if (!lopt)
476 return -ENOMEM;
477
478 memset(lopt, 0, sizeof(struct tcp_listen_opt));
479 for (lopt->max_qlen_log = 6; ; lopt->max_qlen_log++)
480 if ((1 << lopt->max_qlen_log) >= sysctl_max_syn_backlog)
481 break;
482 get_random_bytes(&lopt->hash_rnd, 4);
483
484 write_lock_bh(&tp->syn_wait_lock);
485 tp->listen_opt = lopt;
486 write_unlock_bh(&tp->syn_wait_lock);
487
488 /* There is race window here: we announce ourselves listening, 474 /* There is race window here: we announce ourselves listening,
489 * but this transition is still not validated by get_port(). 475 * but this transition is still not validated by get_port().
490 * It is OK, because this socket enters to hash table only 476 * It is OK, because this socket enters to hash table only
@@ -501,10 +487,7 @@ int tcp_listen_start(struct sock *sk)
501 } 487 }
502 488
503 sk->sk_state = TCP_CLOSE; 489 sk->sk_state = TCP_CLOSE;
504 write_lock_bh(&tp->syn_wait_lock); 490 reqsk_queue_destroy(&tp->accept_queue);
505 tp->listen_opt = NULL;
506 write_unlock_bh(&tp->syn_wait_lock);
507 kfree(lopt);
508 return -EADDRINUSE; 491 return -EADDRINUSE;
509} 492}
510 493
@@ -516,25 +499,23 @@ int tcp_listen_start(struct sock *sk)
516static void tcp_listen_stop (struct sock *sk) 499static void tcp_listen_stop (struct sock *sk)
517{ 500{
518 struct tcp_sock *tp = tcp_sk(sk); 501 struct tcp_sock *tp = tcp_sk(sk);
519 struct tcp_listen_opt *lopt = tp->listen_opt; 502 struct listen_sock *lopt;
520 struct open_request *acc_req = tp->accept_queue; 503 struct request_sock *acc_req;
521 struct open_request *req; 504 struct request_sock *req;
522 int i; 505 int i;
523 506
524 tcp_delete_keepalive_timer(sk); 507 tcp_delete_keepalive_timer(sk);
525 508
526 /* make all the listen_opt local to us */ 509 /* make all the listen_opt local to us */
527 write_lock_bh(&tp->syn_wait_lock); 510 lopt = reqsk_queue_yank_listen_sk(&tp->accept_queue);
528 tp->listen_opt = NULL; 511 acc_req = reqsk_queue_yank_acceptq(&tp->accept_queue);
529 write_unlock_bh(&tp->syn_wait_lock);
530 tp->accept_queue = tp->accept_queue_tail = NULL;
531 512
532 if (lopt->qlen) { 513 if (lopt->qlen) {
533 for (i = 0; i < TCP_SYNQ_HSIZE; i++) { 514 for (i = 0; i < TCP_SYNQ_HSIZE; i++) {
534 while ((req = lopt->syn_table[i]) != NULL) { 515 while ((req = lopt->syn_table[i]) != NULL) {
535 lopt->syn_table[i] = req->dl_next; 516 lopt->syn_table[i] = req->dl_next;
536 lopt->qlen--; 517 lopt->qlen--;
537 tcp_openreq_free(req); 518 reqsk_free(req);
538 519
539 /* Following specs, it would be better either to send FIN 520 /* Following specs, it would be better either to send FIN
540 * (and enter FIN-WAIT-1, it is normal close) 521 * (and enter FIN-WAIT-1, it is normal close)
@@ -574,7 +555,7 @@ static void tcp_listen_stop (struct sock *sk)
574 sock_put(child); 555 sock_put(child);
575 556
576 sk_acceptq_removed(sk); 557 sk_acceptq_removed(sk);
577 tcp_openreq_fastfree(req); 558 __reqsk_free(req);
578 } 559 }
579 BUG_TRAP(!sk->sk_ack_backlog); 560 BUG_TRAP(!sk->sk_ack_backlog);
580} 561}
@@ -1345,7 +1326,7 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
1345 1326
1346 cleanup_rbuf(sk, copied); 1327 cleanup_rbuf(sk, copied);
1347 1328
1348 if (tp->ucopy.task == user_recv) { 1329 if (!sysctl_tcp_low_latency && tp->ucopy.task == user_recv) {
1349 /* Install new reader */ 1330 /* Install new reader */
1350 if (!user_recv && !(flags & (MSG_TRUNC | MSG_PEEK))) { 1331 if (!user_recv && !(flags & (MSG_TRUNC | MSG_PEEK))) {
1351 user_recv = current; 1332 user_recv = current;
@@ -1868,11 +1849,11 @@ static int wait_for_connect(struct sock *sk, long timeo)
1868 prepare_to_wait_exclusive(sk->sk_sleep, &wait, 1849 prepare_to_wait_exclusive(sk->sk_sleep, &wait,
1869 TASK_INTERRUPTIBLE); 1850 TASK_INTERRUPTIBLE);
1870 release_sock(sk); 1851 release_sock(sk);
1871 if (!tp->accept_queue) 1852 if (reqsk_queue_empty(&tp->accept_queue))
1872 timeo = schedule_timeout(timeo); 1853 timeo = schedule_timeout(timeo);
1873 lock_sock(sk); 1854 lock_sock(sk);
1874 err = 0; 1855 err = 0;
1875 if (tp->accept_queue) 1856 if (!reqsk_queue_empty(&tp->accept_queue))
1876 break; 1857 break;
1877 err = -EINVAL; 1858 err = -EINVAL;
1878 if (sk->sk_state != TCP_LISTEN) 1859 if (sk->sk_state != TCP_LISTEN)
@@ -1895,7 +1876,6 @@ static int wait_for_connect(struct sock *sk, long timeo)
1895struct sock *tcp_accept(struct sock *sk, int flags, int *err) 1876struct sock *tcp_accept(struct sock *sk, int flags, int *err)
1896{ 1877{
1897 struct tcp_sock *tp = tcp_sk(sk); 1878 struct tcp_sock *tp = tcp_sk(sk);
1898 struct open_request *req;
1899 struct sock *newsk; 1879 struct sock *newsk;
1900 int error; 1880 int error;
1901 1881
@@ -1906,37 +1886,31 @@ struct sock *tcp_accept(struct sock *sk, int flags, int *err)
1906 */ 1886 */
1907 error = -EINVAL; 1887 error = -EINVAL;
1908 if (sk->sk_state != TCP_LISTEN) 1888 if (sk->sk_state != TCP_LISTEN)
1909 goto out; 1889 goto out_err;
1910 1890
1911 /* Find already established connection */ 1891 /* Find already established connection */
1912 if (!tp->accept_queue) { 1892 if (reqsk_queue_empty(&tp->accept_queue)) {
1913 long timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); 1893 long timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);
1914 1894
1915 /* If this is a non blocking socket don't sleep */ 1895 /* If this is a non blocking socket don't sleep */
1916 error = -EAGAIN; 1896 error = -EAGAIN;
1917 if (!timeo) 1897 if (!timeo)
1918 goto out; 1898 goto out_err;
1919 1899
1920 error = wait_for_connect(sk, timeo); 1900 error = wait_for_connect(sk, timeo);
1921 if (error) 1901 if (error)
1922 goto out; 1902 goto out_err;
1923 } 1903 }
1924 1904
1925 req = tp->accept_queue; 1905 newsk = reqsk_queue_get_child(&tp->accept_queue, sk);
1926 if ((tp->accept_queue = req->dl_next) == NULL)
1927 tp->accept_queue_tail = NULL;
1928
1929 newsk = req->sk;
1930 sk_acceptq_removed(sk);
1931 tcp_openreq_fastfree(req);
1932 BUG_TRAP(newsk->sk_state != TCP_SYN_RECV); 1906 BUG_TRAP(newsk->sk_state != TCP_SYN_RECV);
1933 release_sock(sk);
1934 return newsk;
1935
1936out: 1907out:
1937 release_sock(sk); 1908 release_sock(sk);
1909 return newsk;
1910out_err:
1911 newsk = NULL;
1938 *err = error; 1912 *err = error;
1939 return NULL; 1913 goto out;
1940} 1914}
1941 1915
1942/* 1916/*
@@ -2271,13 +2245,6 @@ void __init tcp_init(void)
2271 __skb_cb_too_small_for_tcp(sizeof(struct tcp_skb_cb), 2245 __skb_cb_too_small_for_tcp(sizeof(struct tcp_skb_cb),
2272 sizeof(skb->cb)); 2246 sizeof(skb->cb));
2273 2247
2274 tcp_openreq_cachep = kmem_cache_create("tcp_open_request",
2275 sizeof(struct open_request),
2276 0, SLAB_HWCACHE_ALIGN,
2277 NULL, NULL);
2278 if (!tcp_openreq_cachep)
2279 panic("tcp_init: Cannot alloc open_request cache.");
2280
2281 tcp_bucket_cachep = kmem_cache_create("tcp_bind_bucket", 2248 tcp_bucket_cachep = kmem_cache_create("tcp_bind_bucket",
2282 sizeof(struct tcp_bind_bucket), 2249 sizeof(struct tcp_bind_bucket),
2283 0, SLAB_HWCACHE_ALIGN, 2250 0, SLAB_HWCACHE_ALIGN,
@@ -2338,7 +2305,7 @@ void __init tcp_init(void)
2338 (tcp_bhash_size * sizeof(struct tcp_bind_hashbucket)); 2305 (tcp_bhash_size * sizeof(struct tcp_bind_hashbucket));
2339 order++) 2306 order++)
2340 ; 2307 ;
2341 if (order > 4) { 2308 if (order >= 4) {
2342 sysctl_local_port_range[0] = 32768; 2309 sysctl_local_port_range[0] = 32768;
2343 sysctl_local_port_range[1] = 61000; 2310 sysctl_local_port_range[1] = 61000;
2344 sysctl_tcp_max_tw_buckets = 180000; 2311 sysctl_tcp_max_tw_buckets = 180000;
@@ -2374,7 +2341,6 @@ EXPORT_SYMBOL(tcp_destroy_sock);
2374EXPORT_SYMBOL(tcp_disconnect); 2341EXPORT_SYMBOL(tcp_disconnect);
2375EXPORT_SYMBOL(tcp_getsockopt); 2342EXPORT_SYMBOL(tcp_getsockopt);
2376EXPORT_SYMBOL(tcp_ioctl); 2343EXPORT_SYMBOL(tcp_ioctl);
2377EXPORT_SYMBOL(tcp_openreq_cachep);
2378EXPORT_SYMBOL(tcp_poll); 2344EXPORT_SYMBOL(tcp_poll);
2379EXPORT_SYMBOL(tcp_read_sock); 2345EXPORT_SYMBOL(tcp_read_sock);
2380EXPORT_SYMBOL(tcp_recvmsg); 2346EXPORT_SYMBOL(tcp_recvmsg);
diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c
index 8faa8948f7..634befc079 100644
--- a/net/ipv4/tcp_diag.c
+++ b/net/ipv4/tcp_diag.c
@@ -455,9 +455,10 @@ static int tcpdiag_dump_sock(struct sk_buff *skb, struct sock *sk,
455} 455}
456 456
457static int tcpdiag_fill_req(struct sk_buff *skb, struct sock *sk, 457static int tcpdiag_fill_req(struct sk_buff *skb, struct sock *sk,
458 struct open_request *req, 458 struct request_sock *req,
459 u32 pid, u32 seq) 459 u32 pid, u32 seq)
460{ 460{
461 const struct inet_request_sock *ireq = inet_rsk(req);
461 struct inet_sock *inet = inet_sk(sk); 462 struct inet_sock *inet = inet_sk(sk);
462 unsigned char *b = skb->tail; 463 unsigned char *b = skb->tail;
463 struct tcpdiagmsg *r; 464 struct tcpdiagmsg *r;
@@ -482,9 +483,9 @@ static int tcpdiag_fill_req(struct sk_buff *skb, struct sock *sk,
482 tmo = 0; 483 tmo = 0;
483 484
484 r->id.tcpdiag_sport = inet->sport; 485 r->id.tcpdiag_sport = inet->sport;
485 r->id.tcpdiag_dport = req->rmt_port; 486 r->id.tcpdiag_dport = ireq->rmt_port;
486 r->id.tcpdiag_src[0] = req->af.v4_req.loc_addr; 487 r->id.tcpdiag_src[0] = ireq->loc_addr;
487 r->id.tcpdiag_dst[0] = req->af.v4_req.rmt_addr; 488 r->id.tcpdiag_dst[0] = ireq->rmt_addr;
488 r->tcpdiag_expires = jiffies_to_msecs(tmo), 489 r->tcpdiag_expires = jiffies_to_msecs(tmo),
489 r->tcpdiag_rqueue = 0; 490 r->tcpdiag_rqueue = 0;
490 r->tcpdiag_wqueue = 0; 491 r->tcpdiag_wqueue = 0;
@@ -493,9 +494,9 @@ static int tcpdiag_fill_req(struct sk_buff *skb, struct sock *sk,
493#ifdef CONFIG_IP_TCPDIAG_IPV6 494#ifdef CONFIG_IP_TCPDIAG_IPV6
494 if (r->tcpdiag_family == AF_INET6) { 495 if (r->tcpdiag_family == AF_INET6) {
495 ipv6_addr_copy((struct in6_addr *)r->id.tcpdiag_src, 496 ipv6_addr_copy((struct in6_addr *)r->id.tcpdiag_src,
496 &req->af.v6_req.loc_addr); 497 &tcp6_rsk(req)->loc_addr);
497 ipv6_addr_copy((struct in6_addr *)r->id.tcpdiag_dst, 498 ipv6_addr_copy((struct in6_addr *)r->id.tcpdiag_dst,
498 &req->af.v6_req.rmt_addr); 499 &tcp6_rsk(req)->rmt_addr);
499 } 500 }
500#endif 501#endif
501 nlh->nlmsg_len = skb->tail - b; 502 nlh->nlmsg_len = skb->tail - b;
@@ -513,7 +514,7 @@ static int tcpdiag_dump_reqs(struct sk_buff *skb, struct sock *sk,
513 struct tcpdiag_entry entry; 514 struct tcpdiag_entry entry;
514 struct tcpdiagreq *r = NLMSG_DATA(cb->nlh); 515 struct tcpdiagreq *r = NLMSG_DATA(cb->nlh);
515 struct tcp_sock *tp = tcp_sk(sk); 516 struct tcp_sock *tp = tcp_sk(sk);
516 struct tcp_listen_opt *lopt; 517 struct listen_sock *lopt;
517 struct rtattr *bc = NULL; 518 struct rtattr *bc = NULL;
518 struct inet_sock *inet = inet_sk(sk); 519 struct inet_sock *inet = inet_sk(sk);
519 int j, s_j; 520 int j, s_j;
@@ -528,9 +529,9 @@ static int tcpdiag_dump_reqs(struct sk_buff *skb, struct sock *sk,
528 529
529 entry.family = sk->sk_family; 530 entry.family = sk->sk_family;
530 531
531 read_lock_bh(&tp->syn_wait_lock); 532 read_lock_bh(&tp->accept_queue.syn_wait_lock);
532 533
533 lopt = tp->listen_opt; 534 lopt = tp->accept_queue.listen_opt;
534 if (!lopt || !lopt->qlen) 535 if (!lopt || !lopt->qlen)
535 goto out; 536 goto out;
536 537
@@ -541,13 +542,15 @@ static int tcpdiag_dump_reqs(struct sk_buff *skb, struct sock *sk,
541 } 542 }
542 543
543 for (j = s_j; j < TCP_SYNQ_HSIZE; j++) { 544 for (j = s_j; j < TCP_SYNQ_HSIZE; j++) {
544 struct open_request *req, *head = lopt->syn_table[j]; 545 struct request_sock *req, *head = lopt->syn_table[j];
545 546
546 reqnum = 0; 547 reqnum = 0;
547 for (req = head; req; reqnum++, req = req->dl_next) { 548 for (req = head; req; reqnum++, req = req->dl_next) {
549 struct inet_request_sock *ireq = inet_rsk(req);
550
548 if (reqnum < s_reqnum) 551 if (reqnum < s_reqnum)
549 continue; 552 continue;
550 if (r->id.tcpdiag_dport != req->rmt_port && 553 if (r->id.tcpdiag_dport != ireq->rmt_port &&
551 r->id.tcpdiag_dport) 554 r->id.tcpdiag_dport)
552 continue; 555 continue;
553 556
@@ -555,16 +558,16 @@ static int tcpdiag_dump_reqs(struct sk_buff *skb, struct sock *sk,
555 entry.saddr = 558 entry.saddr =
556#ifdef CONFIG_IP_TCPDIAG_IPV6 559#ifdef CONFIG_IP_TCPDIAG_IPV6
557 (entry.family == AF_INET6) ? 560 (entry.family == AF_INET6) ?
558 req->af.v6_req.loc_addr.s6_addr32 : 561 tcp6_rsk(req)->loc_addr.s6_addr32 :
559#endif 562#endif
560 &req->af.v4_req.loc_addr; 563 &ireq->loc_addr;
561 entry.daddr = 564 entry.daddr =
562#ifdef CONFIG_IP_TCPDIAG_IPV6 565#ifdef CONFIG_IP_TCPDIAG_IPV6
563 (entry.family == AF_INET6) ? 566 (entry.family == AF_INET6) ?
564 req->af.v6_req.rmt_addr.s6_addr32 : 567 tcp6_rsk(req)->rmt_addr.s6_addr32 :
565#endif 568#endif
566 &req->af.v4_req.rmt_addr; 569 &ireq->rmt_addr;
567 entry.dport = ntohs(req->rmt_port); 570 entry.dport = ntohs(ireq->rmt_port);
568 571
569 if (!tcpdiag_bc_run(RTA_DATA(bc), 572 if (!tcpdiag_bc_run(RTA_DATA(bc),
570 RTA_PAYLOAD(bc), &entry)) 573 RTA_PAYLOAD(bc), &entry))
@@ -585,7 +588,7 @@ static int tcpdiag_dump_reqs(struct sk_buff *skb, struct sock *sk,
585 } 588 }
586 589
587out: 590out:
588 read_unlock_bh(&tp->syn_wait_lock); 591 read_unlock_bh(&tp->accept_queue.syn_wait_lock);
589 592
590 return err; 593 return err;
591} 594}
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index dad98e4a50..2d41d5d6ad 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -36,7 +36,7 @@
36 * ACK bit. 36 * ACK bit.
37 * Andi Kleen : Implemented fast path mtu discovery. 37 * Andi Kleen : Implemented fast path mtu discovery.
38 * Fixed many serious bugs in the 38 * Fixed many serious bugs in the
39 * open_request handling and moved 39 * request_sock handling and moved
40 * most of it into the af independent code. 40 * most of it into the af independent code.
41 * Added tail drop and some other bugfixes. 41 * Added tail drop and some other bugfixes.
42 * Added new listen sematics. 42 * Added new listen sematics.
@@ -869,21 +869,23 @@ static __inline__ u32 tcp_v4_synq_hash(u32 raddr, u16 rport, u32 rnd)
869 return (jhash_2words(raddr, (u32) rport, rnd) & (TCP_SYNQ_HSIZE - 1)); 869 return (jhash_2words(raddr, (u32) rport, rnd) & (TCP_SYNQ_HSIZE - 1));
870} 870}
871 871
872static struct open_request *tcp_v4_search_req(struct tcp_sock *tp, 872static struct request_sock *tcp_v4_search_req(struct tcp_sock *tp,
873 struct open_request ***prevp, 873 struct request_sock ***prevp,
874 __u16 rport, 874 __u16 rport,
875 __u32 raddr, __u32 laddr) 875 __u32 raddr, __u32 laddr)
876{ 876{
877 struct tcp_listen_opt *lopt = tp->listen_opt; 877 struct listen_sock *lopt = tp->accept_queue.listen_opt;
878 struct open_request *req, **prev; 878 struct request_sock *req, **prev;
879 879
880 for (prev = &lopt->syn_table[tcp_v4_synq_hash(raddr, rport, lopt->hash_rnd)]; 880 for (prev = &lopt->syn_table[tcp_v4_synq_hash(raddr, rport, lopt->hash_rnd)];
881 (req = *prev) != NULL; 881 (req = *prev) != NULL;
882 prev = &req->dl_next) { 882 prev = &req->dl_next) {
883 if (req->rmt_port == rport && 883 const struct inet_request_sock *ireq = inet_rsk(req);
884 req->af.v4_req.rmt_addr == raddr && 884
885 req->af.v4_req.loc_addr == laddr && 885 if (ireq->rmt_port == rport &&
886 TCP_INET_FAMILY(req->class->family)) { 886 ireq->rmt_addr == raddr &&
887 ireq->loc_addr == laddr &&
888 TCP_INET_FAMILY(req->rsk_ops->family)) {
887 BUG_TRAP(!req->sk); 889 BUG_TRAP(!req->sk);
888 *prevp = prev; 890 *prevp = prev;
889 break; 891 break;
@@ -893,21 +895,13 @@ static struct open_request *tcp_v4_search_req(struct tcp_sock *tp,
893 return req; 895 return req;
894} 896}
895 897
896static void tcp_v4_synq_add(struct sock *sk, struct open_request *req) 898static void tcp_v4_synq_add(struct sock *sk, struct request_sock *req)
897{ 899{
898 struct tcp_sock *tp = tcp_sk(sk); 900 struct tcp_sock *tp = tcp_sk(sk);
899 struct tcp_listen_opt *lopt = tp->listen_opt; 901 struct listen_sock *lopt = tp->accept_queue.listen_opt;
900 u32 h = tcp_v4_synq_hash(req->af.v4_req.rmt_addr, req->rmt_port, lopt->hash_rnd); 902 u32 h = tcp_v4_synq_hash(inet_rsk(req)->rmt_addr, inet_rsk(req)->rmt_port, lopt->hash_rnd);
901
902 req->expires = jiffies + TCP_TIMEOUT_INIT;
903 req->retrans = 0;
904 req->sk = NULL;
905 req->dl_next = lopt->syn_table[h];
906
907 write_lock(&tp->syn_wait_lock);
908 lopt->syn_table[h] = req;
909 write_unlock(&tp->syn_wait_lock);
910 903
904 reqsk_queue_hash_req(&tp->accept_queue, h, req, TCP_TIMEOUT_INIT);
911 tcp_synq_added(sk); 905 tcp_synq_added(sk);
912} 906}
913 907
@@ -1050,7 +1044,7 @@ void tcp_v4_err(struct sk_buff *skb, u32 info)
1050 } 1044 }
1051 1045
1052 switch (sk->sk_state) { 1046 switch (sk->sk_state) {
1053 struct open_request *req, **prev; 1047 struct request_sock *req, **prev;
1054 case TCP_LISTEN: 1048 case TCP_LISTEN:
1055 if (sock_owned_by_user(sk)) 1049 if (sock_owned_by_user(sk))
1056 goto out; 1050 goto out;
@@ -1065,7 +1059,7 @@ void tcp_v4_err(struct sk_buff *skb, u32 info)
1065 */ 1059 */
1066 BUG_TRAP(!req->sk); 1060 BUG_TRAP(!req->sk);
1067 1061
1068 if (seq != req->snt_isn) { 1062 if (seq != tcp_rsk(req)->snt_isn) {
1069 NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS); 1063 NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
1070 goto out; 1064 goto out;
1071 } 1065 }
@@ -1254,28 +1248,29 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
1254 tcp_tw_put(tw); 1248 tcp_tw_put(tw);
1255} 1249}
1256 1250
1257static void tcp_v4_or_send_ack(struct sk_buff *skb, struct open_request *req) 1251static void tcp_v4_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req)
1258{ 1252{
1259 tcp_v4_send_ack(skb, req->snt_isn + 1, req->rcv_isn + 1, req->rcv_wnd, 1253 tcp_v4_send_ack(skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd,
1260 req->ts_recent); 1254 req->ts_recent);
1261} 1255}
1262 1256
1263static struct dst_entry* tcp_v4_route_req(struct sock *sk, 1257static struct dst_entry* tcp_v4_route_req(struct sock *sk,
1264 struct open_request *req) 1258 struct request_sock *req)
1265{ 1259{
1266 struct rtable *rt; 1260 struct rtable *rt;
1267 struct ip_options *opt = req->af.v4_req.opt; 1261 const struct inet_request_sock *ireq = inet_rsk(req);
1262 struct ip_options *opt = inet_rsk(req)->opt;
1268 struct flowi fl = { .oif = sk->sk_bound_dev_if, 1263 struct flowi fl = { .oif = sk->sk_bound_dev_if,
1269 .nl_u = { .ip4_u = 1264 .nl_u = { .ip4_u =
1270 { .daddr = ((opt && opt->srr) ? 1265 { .daddr = ((opt && opt->srr) ?
1271 opt->faddr : 1266 opt->faddr :
1272 req->af.v4_req.rmt_addr), 1267 ireq->rmt_addr),
1273 .saddr = req->af.v4_req.loc_addr, 1268 .saddr = ireq->loc_addr,
1274 .tos = RT_CONN_FLAGS(sk) } }, 1269 .tos = RT_CONN_FLAGS(sk) } },
1275 .proto = IPPROTO_TCP, 1270 .proto = IPPROTO_TCP,
1276 .uli_u = { .ports = 1271 .uli_u = { .ports =
1277 { .sport = inet_sk(sk)->sport, 1272 { .sport = inet_sk(sk)->sport,
1278 .dport = req->rmt_port } } }; 1273 .dport = ireq->rmt_port } } };
1279 1274
1280 if (ip_route_output_flow(&rt, &fl, sk, 0)) { 1275 if (ip_route_output_flow(&rt, &fl, sk, 0)) {
1281 IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES); 1276 IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES);
@@ -1291,12 +1286,13 @@ static struct dst_entry* tcp_v4_route_req(struct sock *sk,
1291 1286
1292/* 1287/*
1293 * Send a SYN-ACK after having received an ACK. 1288 * Send a SYN-ACK after having received an ACK.
1294 * This still operates on a open_request only, not on a big 1289 * This still operates on a request_sock only, not on a big
1295 * socket. 1290 * socket.
1296 */ 1291 */
1297static int tcp_v4_send_synack(struct sock *sk, struct open_request *req, 1292static int tcp_v4_send_synack(struct sock *sk, struct request_sock *req,
1298 struct dst_entry *dst) 1293 struct dst_entry *dst)
1299{ 1294{
1295 const struct inet_request_sock *ireq = inet_rsk(req);
1300 int err = -1; 1296 int err = -1;
1301 struct sk_buff * skb; 1297 struct sk_buff * skb;
1302 1298
@@ -1310,14 +1306,14 @@ static int tcp_v4_send_synack(struct sock *sk, struct open_request *req,
1310 struct tcphdr *th = skb->h.th; 1306 struct tcphdr *th = skb->h.th;
1311 1307
1312 th->check = tcp_v4_check(th, skb->len, 1308 th->check = tcp_v4_check(th, skb->len,
1313 req->af.v4_req.loc_addr, 1309 ireq->loc_addr,
1314 req->af.v4_req.rmt_addr, 1310 ireq->rmt_addr,
1315 csum_partial((char *)th, skb->len, 1311 csum_partial((char *)th, skb->len,
1316 skb->csum)); 1312 skb->csum));
1317 1313
1318 err = ip_build_and_send_pkt(skb, sk, req->af.v4_req.loc_addr, 1314 err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr,
1319 req->af.v4_req.rmt_addr, 1315 ireq->rmt_addr,
1320 req->af.v4_req.opt); 1316 ireq->opt);
1321 if (err == NET_XMIT_CN) 1317 if (err == NET_XMIT_CN)
1322 err = 0; 1318 err = 0;
1323 } 1319 }
@@ -1328,12 +1324,12 @@ out:
1328} 1324}
1329 1325
1330/* 1326/*
1331 * IPv4 open_request destructor. 1327 * IPv4 request_sock destructor.
1332 */ 1328 */
1333static void tcp_v4_or_free(struct open_request *req) 1329static void tcp_v4_reqsk_destructor(struct request_sock *req)
1334{ 1330{
1335 if (req->af.v4_req.opt) 1331 if (inet_rsk(req)->opt)
1336 kfree(req->af.v4_req.opt); 1332 kfree(inet_rsk(req)->opt);
1337} 1333}
1338 1334
1339static inline void syn_flood_warning(struct sk_buff *skb) 1335static inline void syn_flood_warning(struct sk_buff *skb)
@@ -1349,7 +1345,7 @@ static inline void syn_flood_warning(struct sk_buff *skb)
1349} 1345}
1350 1346
1351/* 1347/*
1352 * Save and compile IPv4 options into the open_request if needed. 1348 * Save and compile IPv4 options into the request_sock if needed.
1353 */ 1349 */
1354static inline struct ip_options *tcp_v4_save_options(struct sock *sk, 1350static inline struct ip_options *tcp_v4_save_options(struct sock *sk,
1355 struct sk_buff *skb) 1351 struct sk_buff *skb)
@@ -1370,33 +1366,20 @@ static inline struct ip_options *tcp_v4_save_options(struct sock *sk,
1370 return dopt; 1366 return dopt;
1371} 1367}
1372 1368
1373/* 1369struct request_sock_ops tcp_request_sock_ops = {
1374 * Maximum number of SYN_RECV sockets in queue per LISTEN socket.
1375 * One SYN_RECV socket costs about 80bytes on a 32bit machine.
1376 * It would be better to replace it with a global counter for all sockets
1377 * but then some measure against one socket starving all other sockets
1378 * would be needed.
1379 *
1380 * It was 128 by default. Experiments with real servers show, that
1381 * it is absolutely not enough even at 100conn/sec. 256 cures most
1382 * of problems. This value is adjusted to 128 for very small machines
1383 * (<=32Mb of memory) and to 1024 on normal or better ones (>=256Mb).
1384 * Further increasing requires to change hash table size.
1385 */
1386int sysctl_max_syn_backlog = 256;
1387
1388struct or_calltable or_ipv4 = {
1389 .family = PF_INET, 1370 .family = PF_INET,
1371 .obj_size = sizeof(struct tcp_request_sock),
1390 .rtx_syn_ack = tcp_v4_send_synack, 1372 .rtx_syn_ack = tcp_v4_send_synack,
1391 .send_ack = tcp_v4_or_send_ack, 1373 .send_ack = tcp_v4_reqsk_send_ack,
1392 .destructor = tcp_v4_or_free, 1374 .destructor = tcp_v4_reqsk_destructor,
1393 .send_reset = tcp_v4_send_reset, 1375 .send_reset = tcp_v4_send_reset,
1394}; 1376};
1395 1377
1396int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) 1378int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1397{ 1379{
1380 struct inet_request_sock *ireq;
1398 struct tcp_options_received tmp_opt; 1381 struct tcp_options_received tmp_opt;
1399 struct open_request *req; 1382 struct request_sock *req;
1400 __u32 saddr = skb->nh.iph->saddr; 1383 __u32 saddr = skb->nh.iph->saddr;
1401 __u32 daddr = skb->nh.iph->daddr; 1384 __u32 daddr = skb->nh.iph->daddr;
1402 __u32 isn = TCP_SKB_CB(skb)->when; 1385 __u32 isn = TCP_SKB_CB(skb)->when;
@@ -1433,7 +1416,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1433 if (sk_acceptq_is_full(sk) && tcp_synq_young(sk) > 1) 1416 if (sk_acceptq_is_full(sk) && tcp_synq_young(sk) > 1)
1434 goto drop; 1417 goto drop;
1435 1418
1436 req = tcp_openreq_alloc(); 1419 req = reqsk_alloc(&tcp_request_sock_ops);
1437 if (!req) 1420 if (!req)
1438 goto drop; 1421 goto drop;
1439 1422
@@ -1461,10 +1444,10 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1461 1444
1462 tcp_openreq_init(req, &tmp_opt, skb); 1445 tcp_openreq_init(req, &tmp_opt, skb);
1463 1446
1464 req->af.v4_req.loc_addr = daddr; 1447 ireq = inet_rsk(req);
1465 req->af.v4_req.rmt_addr = saddr; 1448 ireq->loc_addr = daddr;
1466 req->af.v4_req.opt = tcp_v4_save_options(sk, skb); 1449 ireq->rmt_addr = saddr;
1467 req->class = &or_ipv4; 1450 ireq->opt = tcp_v4_save_options(sk, skb);
1468 if (!want_cookie) 1451 if (!want_cookie)
1469 TCP_ECN_create_request(req, skb->h.th); 1452 TCP_ECN_create_request(req, skb->h.th);
1470 1453
@@ -1523,20 +1506,20 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1523 1506
1524 isn = tcp_v4_init_sequence(sk, skb); 1507 isn = tcp_v4_init_sequence(sk, skb);
1525 } 1508 }
1526 req->snt_isn = isn; 1509 tcp_rsk(req)->snt_isn = isn;
1527 1510
1528 if (tcp_v4_send_synack(sk, req, dst)) 1511 if (tcp_v4_send_synack(sk, req, dst))
1529 goto drop_and_free; 1512 goto drop_and_free;
1530 1513
1531 if (want_cookie) { 1514 if (want_cookie) {
1532 tcp_openreq_free(req); 1515 reqsk_free(req);
1533 } else { 1516 } else {
1534 tcp_v4_synq_add(sk, req); 1517 tcp_v4_synq_add(sk, req);
1535 } 1518 }
1536 return 0; 1519 return 0;
1537 1520
1538drop_and_free: 1521drop_and_free:
1539 tcp_openreq_free(req); 1522 reqsk_free(req);
1540drop: 1523drop:
1541 TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS); 1524 TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
1542 return 0; 1525 return 0;
@@ -1548,9 +1531,10 @@ drop:
1548 * now create the new socket. 1531 * now create the new socket.
1549 */ 1532 */
1550struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, 1533struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1551 struct open_request *req, 1534 struct request_sock *req,
1552 struct dst_entry *dst) 1535 struct dst_entry *dst)
1553{ 1536{
1537 struct inet_request_sock *ireq;
1554 struct inet_sock *newinet; 1538 struct inet_sock *newinet;
1555 struct tcp_sock *newtp; 1539 struct tcp_sock *newtp;
1556 struct sock *newsk; 1540 struct sock *newsk;
@@ -1570,11 +1554,12 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1570 1554
1571 newtp = tcp_sk(newsk); 1555 newtp = tcp_sk(newsk);
1572 newinet = inet_sk(newsk); 1556 newinet = inet_sk(newsk);
1573 newinet->daddr = req->af.v4_req.rmt_addr; 1557 ireq = inet_rsk(req);
1574 newinet->rcv_saddr = req->af.v4_req.loc_addr; 1558 newinet->daddr = ireq->rmt_addr;
1575 newinet->saddr = req->af.v4_req.loc_addr; 1559 newinet->rcv_saddr = ireq->loc_addr;
1576 newinet->opt = req->af.v4_req.opt; 1560 newinet->saddr = ireq->loc_addr;
1577 req->af.v4_req.opt = NULL; 1561 newinet->opt = ireq->opt;
1562 ireq->opt = NULL;
1578 newinet->mc_index = tcp_v4_iif(skb); 1563 newinet->mc_index = tcp_v4_iif(skb);
1579 newinet->mc_ttl = skb->nh.iph->ttl; 1564 newinet->mc_ttl = skb->nh.iph->ttl;
1580 newtp->ext_header_len = 0; 1565 newtp->ext_header_len = 0;
@@ -1605,9 +1590,9 @@ static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
1605 struct iphdr *iph = skb->nh.iph; 1590 struct iphdr *iph = skb->nh.iph;
1606 struct tcp_sock *tp = tcp_sk(sk); 1591 struct tcp_sock *tp = tcp_sk(sk);
1607 struct sock *nsk; 1592 struct sock *nsk;
1608 struct open_request **prev; 1593 struct request_sock **prev;
1609 /* Find possible connection requests. */ 1594 /* Find possible connection requests. */
1610 struct open_request *req = tcp_v4_search_req(tp, &prev, th->source, 1595 struct request_sock *req = tcp_v4_search_req(tp, &prev, th->source,
1611 iph->saddr, iph->daddr); 1596 iph->saddr, iph->daddr);
1612 if (req) 1597 if (req)
1613 return tcp_check_req(sk, skb, req, prev); 1598 return tcp_check_req(sk, skb, req, prev);
@@ -2144,13 +2129,13 @@ static void *listening_get_next(struct seq_file *seq, void *cur)
2144 ++st->num; 2129 ++st->num;
2145 2130
2146 if (st->state == TCP_SEQ_STATE_OPENREQ) { 2131 if (st->state == TCP_SEQ_STATE_OPENREQ) {
2147 struct open_request *req = cur; 2132 struct request_sock *req = cur;
2148 2133
2149 tp = tcp_sk(st->syn_wait_sk); 2134 tp = tcp_sk(st->syn_wait_sk);
2150 req = req->dl_next; 2135 req = req->dl_next;
2151 while (1) { 2136 while (1) {
2152 while (req) { 2137 while (req) {
2153 if (req->class->family == st->family) { 2138 if (req->rsk_ops->family == st->family) {
2154 cur = req; 2139 cur = req;
2155 goto out; 2140 goto out;
2156 } 2141 }
@@ -2159,17 +2144,17 @@ static void *listening_get_next(struct seq_file *seq, void *cur)
2159 if (++st->sbucket >= TCP_SYNQ_HSIZE) 2144 if (++st->sbucket >= TCP_SYNQ_HSIZE)
2160 break; 2145 break;
2161get_req: 2146get_req:
2162 req = tp->listen_opt->syn_table[st->sbucket]; 2147 req = tp->accept_queue.listen_opt->syn_table[st->sbucket];
2163 } 2148 }
2164 sk = sk_next(st->syn_wait_sk); 2149 sk = sk_next(st->syn_wait_sk);
2165 st->state = TCP_SEQ_STATE_LISTENING; 2150 st->state = TCP_SEQ_STATE_LISTENING;
2166 read_unlock_bh(&tp->syn_wait_lock); 2151 read_unlock_bh(&tp->accept_queue.syn_wait_lock);
2167 } else { 2152 } else {
2168 tp = tcp_sk(sk); 2153 tp = tcp_sk(sk);
2169 read_lock_bh(&tp->syn_wait_lock); 2154 read_lock_bh(&tp->accept_queue.syn_wait_lock);
2170 if (tp->listen_opt && tp->listen_opt->qlen) 2155 if (reqsk_queue_len(&tp->accept_queue))
2171 goto start_req; 2156 goto start_req;
2172 read_unlock_bh(&tp->syn_wait_lock); 2157 read_unlock_bh(&tp->accept_queue.syn_wait_lock);
2173 sk = sk_next(sk); 2158 sk = sk_next(sk);
2174 } 2159 }
2175get_sk: 2160get_sk:
@@ -2179,8 +2164,8 @@ get_sk:
2179 goto out; 2164 goto out;
2180 } 2165 }
2181 tp = tcp_sk(sk); 2166 tp = tcp_sk(sk);
2182 read_lock_bh(&tp->syn_wait_lock); 2167 read_lock_bh(&tp->accept_queue.syn_wait_lock);
2183 if (tp->listen_opt && tp->listen_opt->qlen) { 2168 if (reqsk_queue_len(&tp->accept_queue)) {
2184start_req: 2169start_req:
2185 st->uid = sock_i_uid(sk); 2170 st->uid = sock_i_uid(sk);
2186 st->syn_wait_sk = sk; 2171 st->syn_wait_sk = sk;
@@ -2188,7 +2173,7 @@ start_req:
2188 st->sbucket = 0; 2173 st->sbucket = 0;
2189 goto get_req; 2174 goto get_req;
2190 } 2175 }
2191 read_unlock_bh(&tp->syn_wait_lock); 2176 read_unlock_bh(&tp->accept_queue.syn_wait_lock);
2192 } 2177 }
2193 if (++st->bucket < TCP_LHTABLE_SIZE) { 2178 if (++st->bucket < TCP_LHTABLE_SIZE) {
2194 sk = sk_head(&tcp_listening_hash[st->bucket]); 2179 sk = sk_head(&tcp_listening_hash[st->bucket]);
@@ -2375,7 +2360,7 @@ static void tcp_seq_stop(struct seq_file *seq, void *v)
2375 case TCP_SEQ_STATE_OPENREQ: 2360 case TCP_SEQ_STATE_OPENREQ:
2376 if (v) { 2361 if (v) {
2377 struct tcp_sock *tp = tcp_sk(st->syn_wait_sk); 2362 struct tcp_sock *tp = tcp_sk(st->syn_wait_sk);
2378 read_unlock_bh(&tp->syn_wait_lock); 2363 read_unlock_bh(&tp->accept_queue.syn_wait_lock);
2379 } 2364 }
2380 case TCP_SEQ_STATE_LISTENING: 2365 case TCP_SEQ_STATE_LISTENING:
2381 if (v != SEQ_START_TOKEN) 2366 if (v != SEQ_START_TOKEN)
@@ -2451,18 +2436,19 @@ void tcp_proc_unregister(struct tcp_seq_afinfo *afinfo)
2451 memset(afinfo->seq_fops, 0, sizeof(*afinfo->seq_fops)); 2436 memset(afinfo->seq_fops, 0, sizeof(*afinfo->seq_fops));
2452} 2437}
2453 2438
2454static void get_openreq4(struct sock *sk, struct open_request *req, 2439static void get_openreq4(struct sock *sk, struct request_sock *req,
2455 char *tmpbuf, int i, int uid) 2440 char *tmpbuf, int i, int uid)
2456{ 2441{
2442 const struct inet_request_sock *ireq = inet_rsk(req);
2457 int ttd = req->expires - jiffies; 2443 int ttd = req->expires - jiffies;
2458 2444
2459 sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X" 2445 sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X"
2460 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %u %d %p", 2446 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %u %d %p",
2461 i, 2447 i,
2462 req->af.v4_req.loc_addr, 2448 ireq->loc_addr,
2463 ntohs(inet_sk(sk)->sport), 2449 ntohs(inet_sk(sk)->sport),
2464 req->af.v4_req.rmt_addr, 2450 ireq->rmt_addr,
2465 ntohs(req->rmt_port), 2451 ntohs(ireq->rmt_port),
2466 TCP_SYN_RECV, 2452 TCP_SYN_RECV,
2467 0, 0, /* could print option size, but that is af dependent. */ 2453 0, 0, /* could print option size, but that is af dependent. */
2468 1, /* timers active (only the expire timer) */ 2454 1, /* timers active (only the expire timer) */
@@ -2618,6 +2604,7 @@ struct proto tcp_prot = {
2618 .sysctl_rmem = sysctl_tcp_rmem, 2604 .sysctl_rmem = sysctl_tcp_rmem,
2619 .max_header = MAX_TCP_HEADER, 2605 .max_header = MAX_TCP_HEADER,
2620 .obj_size = sizeof(struct tcp_sock), 2606 .obj_size = sizeof(struct tcp_sock),
2607 .rsk_prot = &tcp_request_sock_ops,
2621}; 2608};
2622 2609
2623 2610
@@ -2660,7 +2647,6 @@ EXPORT_SYMBOL(tcp_proc_register);
2660EXPORT_SYMBOL(tcp_proc_unregister); 2647EXPORT_SYMBOL(tcp_proc_unregister);
2661#endif 2648#endif
2662EXPORT_SYMBOL(sysctl_local_port_range); 2649EXPORT_SYMBOL(sysctl_local_port_range);
2663EXPORT_SYMBOL(sysctl_max_syn_backlog);
2664EXPORT_SYMBOL(sysctl_tcp_low_latency); 2650EXPORT_SYMBOL(sysctl_tcp_low_latency);
2665EXPORT_SYMBOL(sysctl_tcp_tw_reuse); 2651EXPORT_SYMBOL(sysctl_tcp_tw_reuse);
2666 2652
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index eea1a17a9a..b3943e7562 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -684,7 +684,7 @@ out:
684 * Actually, we could lots of memory writes here. tp of listening 684 * Actually, we could lots of memory writes here. tp of listening
685 * socket contains all necessary default parameters. 685 * socket contains all necessary default parameters.
686 */ 686 */
687struct sock *tcp_create_openreq_child(struct sock *sk, struct open_request *req, struct sk_buff *skb) 687struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, struct sk_buff *skb)
688{ 688{
689 /* allocate the newsk from the same slab of the master sock, 689 /* allocate the newsk from the same slab of the master sock,
690 * if not, at sk_free time we'll try to free it from the wrong 690 * if not, at sk_free time we'll try to free it from the wrong
@@ -692,6 +692,8 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct open_request *req,
692 struct sock *newsk = sk_alloc(PF_INET, GFP_ATOMIC, sk->sk_prot, 0); 692 struct sock *newsk = sk_alloc(PF_INET, GFP_ATOMIC, sk->sk_prot, 0);
693 693
694 if(newsk != NULL) { 694 if(newsk != NULL) {
695 struct inet_request_sock *ireq = inet_rsk(req);
696 struct tcp_request_sock *treq = tcp_rsk(req);
695 struct tcp_sock *newtp; 697 struct tcp_sock *newtp;
696 struct sk_filter *filter; 698 struct sk_filter *filter;
697 699
@@ -703,7 +705,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct open_request *req,
703 tcp_sk(newsk)->bind_hash = NULL; 705 tcp_sk(newsk)->bind_hash = NULL;
704 706
705 /* Clone the TCP header template */ 707 /* Clone the TCP header template */
706 inet_sk(newsk)->dport = req->rmt_port; 708 inet_sk(newsk)->dport = ireq->rmt_port;
707 709
708 sock_lock_init(newsk); 710 sock_lock_init(newsk);
709 bh_lock_sock(newsk); 711 bh_lock_sock(newsk);
@@ -739,14 +741,14 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct open_request *req,
739 /* Now setup tcp_sock */ 741 /* Now setup tcp_sock */
740 newtp = tcp_sk(newsk); 742 newtp = tcp_sk(newsk);
741 newtp->pred_flags = 0; 743 newtp->pred_flags = 0;
742 newtp->rcv_nxt = req->rcv_isn + 1; 744 newtp->rcv_nxt = treq->rcv_isn + 1;
743 newtp->snd_nxt = req->snt_isn + 1; 745 newtp->snd_nxt = treq->snt_isn + 1;
744 newtp->snd_una = req->snt_isn + 1; 746 newtp->snd_una = treq->snt_isn + 1;
745 newtp->snd_sml = req->snt_isn + 1; 747 newtp->snd_sml = treq->snt_isn + 1;
746 748
747 tcp_prequeue_init(newtp); 749 tcp_prequeue_init(newtp);
748 750
749 tcp_init_wl(newtp, req->snt_isn, req->rcv_isn); 751 tcp_init_wl(newtp, treq->snt_isn, treq->rcv_isn);
750 752
751 newtp->retransmits = 0; 753 newtp->retransmits = 0;
752 newtp->backoff = 0; 754 newtp->backoff = 0;
@@ -775,10 +777,10 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct open_request *req,
775 tcp_set_ca_state(newtp, TCP_CA_Open); 777 tcp_set_ca_state(newtp, TCP_CA_Open);
776 tcp_init_xmit_timers(newsk); 778 tcp_init_xmit_timers(newsk);
777 skb_queue_head_init(&newtp->out_of_order_queue); 779 skb_queue_head_init(&newtp->out_of_order_queue);
778 newtp->rcv_wup = req->rcv_isn + 1; 780 newtp->rcv_wup = treq->rcv_isn + 1;
779 newtp->write_seq = req->snt_isn + 1; 781 newtp->write_seq = treq->snt_isn + 1;
780 newtp->pushed_seq = newtp->write_seq; 782 newtp->pushed_seq = newtp->write_seq;
781 newtp->copied_seq = req->rcv_isn + 1; 783 newtp->copied_seq = treq->rcv_isn + 1;
782 784
783 newtp->rx_opt.saw_tstamp = 0; 785 newtp->rx_opt.saw_tstamp = 0;
784 786
@@ -788,10 +790,8 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct open_request *req,
788 newtp->probes_out = 0; 790 newtp->probes_out = 0;
789 newtp->rx_opt.num_sacks = 0; 791 newtp->rx_opt.num_sacks = 0;
790 newtp->urg_data = 0; 792 newtp->urg_data = 0;
791 newtp->listen_opt = NULL; 793 /* Deinitialize accept_queue to trap illegal accesses. */
792 newtp->accept_queue = newtp->accept_queue_tail = NULL; 794 memset(&newtp->accept_queue, 0, sizeof(newtp->accept_queue));
793 /* Deinitialize syn_wait_lock to trap illegal accesses. */
794 memset(&newtp->syn_wait_lock, 0, sizeof(newtp->syn_wait_lock));
795 795
796 /* Back to base struct sock members. */ 796 /* Back to base struct sock members. */
797 newsk->sk_err = 0; 797 newsk->sk_err = 0;
@@ -808,18 +808,18 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct open_request *req,
808 newsk->sk_socket = NULL; 808 newsk->sk_socket = NULL;
809 newsk->sk_sleep = NULL; 809 newsk->sk_sleep = NULL;
810 810
811 newtp->rx_opt.tstamp_ok = req->tstamp_ok; 811 newtp->rx_opt.tstamp_ok = ireq->tstamp_ok;
812 if((newtp->rx_opt.sack_ok = req->sack_ok) != 0) { 812 if((newtp->rx_opt.sack_ok = ireq->sack_ok) != 0) {
813 if (sysctl_tcp_fack) 813 if (sysctl_tcp_fack)
814 newtp->rx_opt.sack_ok |= 2; 814 newtp->rx_opt.sack_ok |= 2;
815 } 815 }
816 newtp->window_clamp = req->window_clamp; 816 newtp->window_clamp = req->window_clamp;
817 newtp->rcv_ssthresh = req->rcv_wnd; 817 newtp->rcv_ssthresh = req->rcv_wnd;
818 newtp->rcv_wnd = req->rcv_wnd; 818 newtp->rcv_wnd = req->rcv_wnd;
819 newtp->rx_opt.wscale_ok = req->wscale_ok; 819 newtp->rx_opt.wscale_ok = ireq->wscale_ok;
820 if (newtp->rx_opt.wscale_ok) { 820 if (newtp->rx_opt.wscale_ok) {
821 newtp->rx_opt.snd_wscale = req->snd_wscale; 821 newtp->rx_opt.snd_wscale = ireq->snd_wscale;
822 newtp->rx_opt.rcv_wscale = req->rcv_wscale; 822 newtp->rx_opt.rcv_wscale = ireq->rcv_wscale;
823 } else { 823 } else {
824 newtp->rx_opt.snd_wscale = newtp->rx_opt.rcv_wscale = 0; 824 newtp->rx_opt.snd_wscale = newtp->rx_opt.rcv_wscale = 0;
825 newtp->window_clamp = min(newtp->window_clamp, 65535U); 825 newtp->window_clamp = min(newtp->window_clamp, 65535U);
@@ -851,12 +851,12 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct open_request *req,
851 851
852/* 852/*
853 * Process an incoming packet for SYN_RECV sockets represented 853 * Process an incoming packet for SYN_RECV sockets represented
854 * as an open_request. 854 * as a request_sock.
855 */ 855 */
856 856
857struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb, 857struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
858 struct open_request *req, 858 struct request_sock *req,
859 struct open_request **prev) 859 struct request_sock **prev)
860{ 860{
861 struct tcphdr *th = skb->h.th; 861 struct tcphdr *th = skb->h.th;
862 struct tcp_sock *tp = tcp_sk(sk); 862 struct tcp_sock *tp = tcp_sk(sk);
@@ -881,7 +881,7 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
881 } 881 }
882 882
883 /* Check for pure retransmitted SYN. */ 883 /* Check for pure retransmitted SYN. */
884 if (TCP_SKB_CB(skb)->seq == req->rcv_isn && 884 if (TCP_SKB_CB(skb)->seq == tcp_rsk(req)->rcv_isn &&
885 flg == TCP_FLAG_SYN && 885 flg == TCP_FLAG_SYN &&
886 !paws_reject) { 886 !paws_reject) {
887 /* 887 /*
@@ -901,7 +901,7 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
901 * Enforce "SYN-ACK" according to figure 8, figure 6 901 * Enforce "SYN-ACK" according to figure 8, figure 6
902 * of RFC793, fixed by RFC1122. 902 * of RFC793, fixed by RFC1122.
903 */ 903 */
904 req->class->rtx_syn_ack(sk, req, NULL); 904 req->rsk_ops->rtx_syn_ack(sk, req, NULL);
905 return NULL; 905 return NULL;
906 } 906 }
907 907
@@ -959,7 +959,7 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
959 * Invalid ACK: reset will be sent by listening socket 959 * Invalid ACK: reset will be sent by listening socket
960 */ 960 */
961 if ((flg & TCP_FLAG_ACK) && 961 if ((flg & TCP_FLAG_ACK) &&
962 (TCP_SKB_CB(skb)->ack_seq != req->snt_isn+1)) 962 (TCP_SKB_CB(skb)->ack_seq != tcp_rsk(req)->snt_isn + 1))
963 return sk; 963 return sk;
964 964
965 /* Also, it would be not so bad idea to check rcv_tsecr, which 965 /* Also, it would be not so bad idea to check rcv_tsecr, which
@@ -970,10 +970,10 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
970 /* RFC793: "first check sequence number". */ 970 /* RFC793: "first check sequence number". */
971 971
972 if (paws_reject || !tcp_in_window(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq, 972 if (paws_reject || !tcp_in_window(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq,
973 req->rcv_isn+1, req->rcv_isn+1+req->rcv_wnd)) { 973 tcp_rsk(req)->rcv_isn + 1, tcp_rsk(req)->rcv_isn + 1 + req->rcv_wnd)) {
974 /* Out of window: send ACK and drop. */ 974 /* Out of window: send ACK and drop. */
975 if (!(flg & TCP_FLAG_RST)) 975 if (!(flg & TCP_FLAG_RST))
976 req->class->send_ack(skb, req); 976 req->rsk_ops->send_ack(skb, req);
977 if (paws_reject) 977 if (paws_reject)
978 NET_INC_STATS_BH(LINUX_MIB_PAWSESTABREJECTED); 978 NET_INC_STATS_BH(LINUX_MIB_PAWSESTABREJECTED);
979 return NULL; 979 return NULL;
@@ -981,12 +981,12 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
981 981
982 /* In sequence, PAWS is OK. */ 982 /* In sequence, PAWS is OK. */
983 983
984 if (tmp_opt.saw_tstamp && !after(TCP_SKB_CB(skb)->seq, req->rcv_isn+1)) 984 if (tmp_opt.saw_tstamp && !after(TCP_SKB_CB(skb)->seq, tcp_rsk(req)->rcv_isn + 1))
985 req->ts_recent = tmp_opt.rcv_tsval; 985 req->ts_recent = tmp_opt.rcv_tsval;
986 986
987 if (TCP_SKB_CB(skb)->seq == req->rcv_isn) { 987 if (TCP_SKB_CB(skb)->seq == tcp_rsk(req)->rcv_isn) {
988 /* Truncate SYN, it is out of window starting 988 /* Truncate SYN, it is out of window starting
989 at req->rcv_isn+1. */ 989 at tcp_rsk(req)->rcv_isn + 1. */
990 flg &= ~TCP_FLAG_SYN; 990 flg &= ~TCP_FLAG_SYN;
991 } 991 }
992 992
@@ -1003,8 +1003,8 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
1003 return NULL; 1003 return NULL;
1004 1004
1005 /* If TCP_DEFER_ACCEPT is set, drop bare ACK. */ 1005 /* If TCP_DEFER_ACCEPT is set, drop bare ACK. */
1006 if (tp->defer_accept && TCP_SKB_CB(skb)->end_seq == req->rcv_isn+1) { 1006 if (tp->defer_accept && TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) {
1007 req->acked = 1; 1007 inet_rsk(req)->acked = 1;
1008 return NULL; 1008 return NULL;
1009 } 1009 }
1010 1010
@@ -1026,14 +1026,14 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
1026 1026
1027 listen_overflow: 1027 listen_overflow:
1028 if (!sysctl_tcp_abort_on_overflow) { 1028 if (!sysctl_tcp_abort_on_overflow) {
1029 req->acked = 1; 1029 inet_rsk(req)->acked = 1;
1030 return NULL; 1030 return NULL;
1031 } 1031 }
1032 1032
1033 embryonic_reset: 1033 embryonic_reset:
1034 NET_INC_STATS_BH(LINUX_MIB_EMBRYONICRSTS); 1034 NET_INC_STATS_BH(LINUX_MIB_EMBRYONICRSTS);
1035 if (!(flg & TCP_FLAG_RST)) 1035 if (!(flg & TCP_FLAG_RST))
1036 req->class->send_reset(skb); 1036 req->rsk_ops->send_reset(skb);
1037 1037
1038 tcp_synq_drop(sk, req, prev); 1038 tcp_synq_drop(sk, req, prev);
1039 return NULL; 1039 return NULL;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index fa24e7ae1f..f17c6577e3 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1356,8 +1356,9 @@ int tcp_send_synack(struct sock *sk)
1356 * Prepare a SYN-ACK. 1356 * Prepare a SYN-ACK.
1357 */ 1357 */
1358struct sk_buff * tcp_make_synack(struct sock *sk, struct dst_entry *dst, 1358struct sk_buff * tcp_make_synack(struct sock *sk, struct dst_entry *dst,
1359 struct open_request *req) 1359 struct request_sock *req)
1360{ 1360{
1361 struct inet_request_sock *ireq = inet_rsk(req);
1361 struct tcp_sock *tp = tcp_sk(sk); 1362 struct tcp_sock *tp = tcp_sk(sk);
1362 struct tcphdr *th; 1363 struct tcphdr *th;
1363 int tcp_header_size; 1364 int tcp_header_size;
@@ -1373,47 +1374,47 @@ struct sk_buff * tcp_make_synack(struct sock *sk, struct dst_entry *dst,
1373 skb->dst = dst_clone(dst); 1374 skb->dst = dst_clone(dst);
1374 1375
1375 tcp_header_size = (sizeof(struct tcphdr) + TCPOLEN_MSS + 1376 tcp_header_size = (sizeof(struct tcphdr) + TCPOLEN_MSS +
1376 (req->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0) + 1377 (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0) +
1377 (req->wscale_ok ? TCPOLEN_WSCALE_ALIGNED : 0) + 1378 (ireq->wscale_ok ? TCPOLEN_WSCALE_ALIGNED : 0) +
1378 /* SACK_PERM is in the place of NOP NOP of TS */ 1379 /* SACK_PERM is in the place of NOP NOP of TS */
1379 ((req->sack_ok && !req->tstamp_ok) ? TCPOLEN_SACKPERM_ALIGNED : 0)); 1380 ((ireq->sack_ok && !ireq->tstamp_ok) ? TCPOLEN_SACKPERM_ALIGNED : 0));
1380 skb->h.th = th = (struct tcphdr *) skb_push(skb, tcp_header_size); 1381 skb->h.th = th = (struct tcphdr *) skb_push(skb, tcp_header_size);
1381 1382
1382 memset(th, 0, sizeof(struct tcphdr)); 1383 memset(th, 0, sizeof(struct tcphdr));
1383 th->syn = 1; 1384 th->syn = 1;
1384 th->ack = 1; 1385 th->ack = 1;
1385 if (dst->dev->features&NETIF_F_TSO) 1386 if (dst->dev->features&NETIF_F_TSO)
1386 req->ecn_ok = 0; 1387 ireq->ecn_ok = 0;
1387 TCP_ECN_make_synack(req, th); 1388 TCP_ECN_make_synack(req, th);
1388 th->source = inet_sk(sk)->sport; 1389 th->source = inet_sk(sk)->sport;
1389 th->dest = req->rmt_port; 1390 th->dest = ireq->rmt_port;
1390 TCP_SKB_CB(skb)->seq = req->snt_isn; 1391 TCP_SKB_CB(skb)->seq = tcp_rsk(req)->snt_isn;
1391 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq + 1; 1392 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq + 1;
1392 TCP_SKB_CB(skb)->sacked = 0; 1393 TCP_SKB_CB(skb)->sacked = 0;
1393 skb_shinfo(skb)->tso_segs = 1; 1394 skb_shinfo(skb)->tso_segs = 1;
1394 skb_shinfo(skb)->tso_size = 0; 1395 skb_shinfo(skb)->tso_size = 0;
1395 th->seq = htonl(TCP_SKB_CB(skb)->seq); 1396 th->seq = htonl(TCP_SKB_CB(skb)->seq);
1396 th->ack_seq = htonl(req->rcv_isn + 1); 1397 th->ack_seq = htonl(tcp_rsk(req)->rcv_isn + 1);
1397 if (req->rcv_wnd == 0) { /* ignored for retransmitted syns */ 1398 if (req->rcv_wnd == 0) { /* ignored for retransmitted syns */
1398 __u8 rcv_wscale; 1399 __u8 rcv_wscale;
1399 /* Set this up on the first call only */ 1400 /* Set this up on the first call only */
1400 req->window_clamp = tp->window_clamp ? : dst_metric(dst, RTAX_WINDOW); 1401 req->window_clamp = tp->window_clamp ? : dst_metric(dst, RTAX_WINDOW);
1401 /* tcp_full_space because it is guaranteed to be the first packet */ 1402 /* tcp_full_space because it is guaranteed to be the first packet */
1402 tcp_select_initial_window(tcp_full_space(sk), 1403 tcp_select_initial_window(tcp_full_space(sk),
1403 dst_metric(dst, RTAX_ADVMSS) - (req->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0), 1404 dst_metric(dst, RTAX_ADVMSS) - (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0),
1404 &req->rcv_wnd, 1405 &req->rcv_wnd,
1405 &req->window_clamp, 1406 &req->window_clamp,
1406 req->wscale_ok, 1407 ireq->wscale_ok,
1407 &rcv_wscale); 1408 &rcv_wscale);
1408 req->rcv_wscale = rcv_wscale; 1409 ireq->rcv_wscale = rcv_wscale;
1409 } 1410 }
1410 1411
1411 /* RFC1323: The window in SYN & SYN/ACK segments is never scaled. */ 1412 /* RFC1323: The window in SYN & SYN/ACK segments is never scaled. */
1412 th->window = htons(req->rcv_wnd); 1413 th->window = htons(req->rcv_wnd);
1413 1414
1414 TCP_SKB_CB(skb)->when = tcp_time_stamp; 1415 TCP_SKB_CB(skb)->when = tcp_time_stamp;
1415 tcp_syn_build_options((__u32 *)(th + 1), dst_metric(dst, RTAX_ADVMSS), req->tstamp_ok, 1416 tcp_syn_build_options((__u32 *)(th + 1), dst_metric(dst, RTAX_ADVMSS), ireq->tstamp_ok,
1416 req->sack_ok, req->wscale_ok, req->rcv_wscale, 1417 ireq->sack_ok, ireq->wscale_ok, ireq->rcv_wscale,
1417 TCP_SKB_CB(skb)->when, 1418 TCP_SKB_CB(skb)->when,
1418 req->ts_recent); 1419 req->ts_recent);
1419 1420
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 799ebe061e..b127b44985 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -464,11 +464,11 @@ out_unlock:
464static void tcp_synack_timer(struct sock *sk) 464static void tcp_synack_timer(struct sock *sk)
465{ 465{
466 struct tcp_sock *tp = tcp_sk(sk); 466 struct tcp_sock *tp = tcp_sk(sk);
467 struct tcp_listen_opt *lopt = tp->listen_opt; 467 struct listen_sock *lopt = tp->accept_queue.listen_opt;
468 int max_retries = tp->syn_retries ? : sysctl_tcp_synack_retries; 468 int max_retries = tp->syn_retries ? : sysctl_tcp_synack_retries;
469 int thresh = max_retries; 469 int thresh = max_retries;
470 unsigned long now = jiffies; 470 unsigned long now = jiffies;
471 struct open_request **reqp, *req; 471 struct request_sock **reqp, *req;
472 int i, budget; 472 int i, budget;
473 473
474 if (lopt == NULL || lopt->qlen == 0) 474 if (lopt == NULL || lopt->qlen == 0)
@@ -513,8 +513,8 @@ static void tcp_synack_timer(struct sock *sk)
513 while ((req = *reqp) != NULL) { 513 while ((req = *reqp) != NULL) {
514 if (time_after_eq(now, req->expires)) { 514 if (time_after_eq(now, req->expires)) {
515 if ((req->retrans < thresh || 515 if ((req->retrans < thresh ||
516 (req->acked && req->retrans < max_retries)) 516 (inet_rsk(req)->acked && req->retrans < max_retries))
517 && !req->class->rtx_syn_ack(sk, req, NULL)) { 517 && !req->rsk_ops->rtx_syn_ack(sk, req, NULL)) {
518 unsigned long timeo; 518 unsigned long timeo;
519 519
520 if (req->retrans++ == 0) 520 if (req->retrans++ == 0)
@@ -527,13 +527,9 @@ static void tcp_synack_timer(struct sock *sk)
527 } 527 }
528 528
529 /* Drop this request */ 529 /* Drop this request */
530 write_lock(&tp->syn_wait_lock); 530 tcp_synq_unlink(tp, req, reqp);
531 *reqp = req->dl_next; 531 reqsk_queue_removed(&tp->accept_queue, req);
532 write_unlock(&tp->syn_wait_lock); 532 reqsk_free(req);
533 lopt->qlen--;
534 if (req->retrans == 0)
535 lopt->qlen_young--;
536 tcp_openreq_free(req);
537 continue; 533 continue;
538 } 534 }
539 reqp = &req->dl_next; 535 reqp = &req->dl_next;
diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c
index af2392ae57..66620a9594 100644
--- a/net/ipv4/xfrm4_output.c
+++ b/net/ipv4/xfrm4_output.c
@@ -33,6 +33,7 @@ static void xfrm4_encap(struct sk_buff *skb)
33 struct dst_entry *dst = skb->dst; 33 struct dst_entry *dst = skb->dst;
34 struct xfrm_state *x = dst->xfrm; 34 struct xfrm_state *x = dst->xfrm;
35 struct iphdr *iph, *top_iph; 35 struct iphdr *iph, *top_iph;
36 int flags;
36 37
37 iph = skb->nh.iph; 38 iph = skb->nh.iph;
38 skb->h.ipiph = iph; 39 skb->h.ipiph = iph;
@@ -51,10 +52,13 @@ static void xfrm4_encap(struct sk_buff *skb)
51 52
52 /* DS disclosed */ 53 /* DS disclosed */
53 top_iph->tos = INET_ECN_encapsulate(iph->tos, iph->tos); 54 top_iph->tos = INET_ECN_encapsulate(iph->tos, iph->tos);
54 if (x->props.flags & XFRM_STATE_NOECN) 55
56 flags = x->props.flags;
57 if (flags & XFRM_STATE_NOECN)
55 IP_ECN_clear(top_iph); 58 IP_ECN_clear(top_iph);
56 59
57 top_iph->frag_off = iph->frag_off & htons(IP_DF); 60 top_iph->frag_off = (flags & XFRM_STATE_NOPMTUDISC) ?
61 0 : (iph->frag_off & htons(IP_DF));
58 if (!top_iph->frag_off) 62 if (!top_iph->frag_off)
59 __ip_select_ident(top_iph, dst, 0); 63 __ip_select_ident(top_iph, dst, 0);
60 64
diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c
index 223a2e8385..050611d7a9 100644
--- a/net/ipv4/xfrm4_state.c
+++ b/net/ipv4/xfrm4_state.c
@@ -7,12 +7,20 @@
7 * 7 *
8 */ 8 */
9 9
10#include <net/ip.h>
10#include <net/xfrm.h> 11#include <net/xfrm.h>
11#include <linux/pfkeyv2.h> 12#include <linux/pfkeyv2.h>
12#include <linux/ipsec.h> 13#include <linux/ipsec.h>
13 14
14static struct xfrm_state_afinfo xfrm4_state_afinfo; 15static struct xfrm_state_afinfo xfrm4_state_afinfo;
15 16
17static int xfrm4_init_flags(struct xfrm_state *x)
18{
19 if (ipv4_config.no_pmtu_disc)
20 x->props.flags |= XFRM_STATE_NOPMTUDISC;
21 return 0;
22}
23
16static void 24static void
17__xfrm4_init_tempsel(struct xfrm_state *x, struct flowi *fl, 25__xfrm4_init_tempsel(struct xfrm_state *x, struct flowi *fl,
18 struct xfrm_tmpl *tmpl, 26 struct xfrm_tmpl *tmpl,
@@ -109,6 +117,7 @@ __xfrm4_find_acq(u8 mode, u32 reqid, u8 proto,
109static struct xfrm_state_afinfo xfrm4_state_afinfo = { 117static struct xfrm_state_afinfo xfrm4_state_afinfo = {
110 .family = AF_INET, 118 .family = AF_INET,
111 .lock = RW_LOCK_UNLOCKED, 119 .lock = RW_LOCK_UNLOCKED,
120 .init_flags = xfrm4_init_flags,
112 .init_tempsel = __xfrm4_init_tempsel, 121 .init_tempsel = __xfrm4_init_tempsel,
113 .state_lookup = __xfrm4_state_lookup, 122 .state_lookup = __xfrm4_state_lookup,
114 .find_acq = __xfrm4_find_acq, 123 .find_acq = __xfrm4_find_acq,
diff --git a/net/ipv4/xfrm4_tunnel.c b/net/ipv4/xfrm4_tunnel.c
index 413191f585..e1fe360ed2 100644
--- a/net/ipv4/xfrm4_tunnel.c
+++ b/net/ipv4/xfrm4_tunnel.c
@@ -84,7 +84,7 @@ static void ipip_err(struct sk_buff *skb, u32 info)
84 handler->err_handler(skb, &arg); 84 handler->err_handler(skb, &arg);
85} 85}
86 86
87static int ipip_init_state(struct xfrm_state *x, void *args) 87static int ipip_init_state(struct xfrm_state *x)
88{ 88{
89 if (!x->props.mode) 89 if (!x->props.mode)
90 return -EINVAL; 90 return -EINVAL;
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 7744a25926..14f5c53235 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -131,7 +131,7 @@ static void addrconf_leave_anycast(struct inet6_ifaddr *ifp);
131 131
132static int addrconf_ifdown(struct net_device *dev, int how); 132static int addrconf_ifdown(struct net_device *dev, int how);
133 133
134static void addrconf_dad_start(struct inet6_ifaddr *ifp, int flags); 134static void addrconf_dad_start(struct inet6_ifaddr *ifp, u32 flags);
135static void addrconf_dad_timer(unsigned long data); 135static void addrconf_dad_timer(unsigned long data);
136static void addrconf_dad_completed(struct inet6_ifaddr *ifp); 136static void addrconf_dad_completed(struct inet6_ifaddr *ifp);
137static void addrconf_rs_timer(unsigned long data); 137static void addrconf_rs_timer(unsigned long data);
@@ -372,6 +372,7 @@ static struct inet6_dev * ipv6_add_dev(struct net_device *dev)
372 ndev->regen_timer.data = (unsigned long) ndev; 372 ndev->regen_timer.data = (unsigned long) ndev;
373 if ((dev->flags&IFF_LOOPBACK) || 373 if ((dev->flags&IFF_LOOPBACK) ||
374 dev->type == ARPHRD_TUNNEL || 374 dev->type == ARPHRD_TUNNEL ||
375 dev->type == ARPHRD_NONE ||
375 dev->type == ARPHRD_SIT) { 376 dev->type == ARPHRD_SIT) {
376 printk(KERN_INFO 377 printk(KERN_INFO
377 "Disabled Privacy Extensions on device %p(%s)\n", 378 "Disabled Privacy Extensions on device %p(%s)\n",
@@ -491,7 +492,7 @@ void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp)
491 492
492static struct inet6_ifaddr * 493static struct inet6_ifaddr *
493ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen, 494ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen,
494 int scope, unsigned flags) 495 int scope, u32 flags)
495{ 496{
496 struct inet6_ifaddr *ifa = NULL; 497 struct inet6_ifaddr *ifa = NULL;
497 struct rt6_info *rt; 498 struct rt6_info *rt;
@@ -694,7 +695,7 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp)
694 695
695 if (rt && ((rt->rt6i_flags & (RTF_GATEWAY | RTF_DEFAULT)) == 0)) { 696 if (rt && ((rt->rt6i_flags & (RTF_GATEWAY | RTF_DEFAULT)) == 0)) {
696 if (onlink == 0) { 697 if (onlink == 0) {
697 ip6_del_rt(rt, NULL, NULL); 698 ip6_del_rt(rt, NULL, NULL, NULL);
698 rt = NULL; 699 rt = NULL;
699 } else if (!(rt->rt6i_flags & RTF_EXPIRES)) { 700 } else if (!(rt->rt6i_flags & RTF_EXPIRES)) {
700 rt->rt6i_expires = expires; 701 rt->rt6i_expires = expires;
@@ -1319,7 +1320,7 @@ static int __ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpad
1319 1320
1320static void 1321static void
1321addrconf_prefix_route(struct in6_addr *pfx, int plen, struct net_device *dev, 1322addrconf_prefix_route(struct in6_addr *pfx, int plen, struct net_device *dev,
1322 unsigned long expires, unsigned flags) 1323 unsigned long expires, u32 flags)
1323{ 1324{
1324 struct in6_rtmsg rtmsg; 1325 struct in6_rtmsg rtmsg;
1325 1326
@@ -1339,7 +1340,7 @@ addrconf_prefix_route(struct in6_addr *pfx, int plen, struct net_device *dev,
1339 if (dev->type == ARPHRD_SIT && (dev->flags&IFF_POINTOPOINT)) 1340 if (dev->type == ARPHRD_SIT && (dev->flags&IFF_POINTOPOINT))
1340 rtmsg.rtmsg_flags |= RTF_NONEXTHOP; 1341 rtmsg.rtmsg_flags |= RTF_NONEXTHOP;
1341 1342
1342 ip6_route_add(&rtmsg, NULL, NULL); 1343 ip6_route_add(&rtmsg, NULL, NULL, NULL);
1343} 1344}
1344 1345
1345/* Create "default" multicast route to the interface */ 1346/* Create "default" multicast route to the interface */
@@ -1356,7 +1357,7 @@ static void addrconf_add_mroute(struct net_device *dev)
1356 rtmsg.rtmsg_ifindex = dev->ifindex; 1357 rtmsg.rtmsg_ifindex = dev->ifindex;
1357 rtmsg.rtmsg_flags = RTF_UP; 1358 rtmsg.rtmsg_flags = RTF_UP;
1358 rtmsg.rtmsg_type = RTMSG_NEWROUTE; 1359 rtmsg.rtmsg_type = RTMSG_NEWROUTE;
1359 ip6_route_add(&rtmsg, NULL, NULL); 1360 ip6_route_add(&rtmsg, NULL, NULL, NULL);
1360} 1361}
1361 1362
1362static void sit_route_add(struct net_device *dev) 1363static void sit_route_add(struct net_device *dev)
@@ -1373,7 +1374,7 @@ static void sit_route_add(struct net_device *dev)
1373 rtmsg.rtmsg_flags = RTF_UP|RTF_NONEXTHOP; 1374 rtmsg.rtmsg_flags = RTF_UP|RTF_NONEXTHOP;
1374 rtmsg.rtmsg_ifindex = dev->ifindex; 1375 rtmsg.rtmsg_ifindex = dev->ifindex;
1375 1376
1376 ip6_route_add(&rtmsg, NULL, NULL); 1377 ip6_route_add(&rtmsg, NULL, NULL, NULL);
1377} 1378}
1378 1379
1379static void addrconf_add_lroute(struct net_device *dev) 1380static void addrconf_add_lroute(struct net_device *dev)
@@ -1466,7 +1467,7 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len)
1466 if (rt && ((rt->rt6i_flags & (RTF_GATEWAY | RTF_DEFAULT)) == 0)) { 1467 if (rt && ((rt->rt6i_flags & (RTF_GATEWAY | RTF_DEFAULT)) == 0)) {
1467 if (rt->rt6i_flags&RTF_EXPIRES) { 1468 if (rt->rt6i_flags&RTF_EXPIRES) {
1468 if (valid_lft == 0) { 1469 if (valid_lft == 0) {
1469 ip6_del_rt(rt, NULL, NULL); 1470 ip6_del_rt(rt, NULL, NULL, NULL);
1470 rt = NULL; 1471 rt = NULL;
1471 } else { 1472 } else {
1472 rt->rt6i_expires = rt_expires; 1473 rt->rt6i_expires = rt_expires;
@@ -2228,7 +2229,7 @@ out:
2228/* 2229/*
2229 * Duplicate Address Detection 2230 * Duplicate Address Detection
2230 */ 2231 */
2231static void addrconf_dad_start(struct inet6_ifaddr *ifp, int flags) 2232static void addrconf_dad_start(struct inet6_ifaddr *ifp, u32 flags)
2232{ 2233{
2233 struct inet6_dev *idev = ifp->idev; 2234 struct inet6_dev *idev = ifp->idev;
2234 struct net_device *dev = idev->dev; 2235 struct net_device *dev = idev->dev;
@@ -2621,15 +2622,14 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
2621} 2622}
2622 2623
2623static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa, 2624static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa,
2624 u32 pid, u32 seq, int event) 2625 u32 pid, u32 seq, int event, unsigned int flags)
2625{ 2626{
2626 struct ifaddrmsg *ifm; 2627 struct ifaddrmsg *ifm;
2627 struct nlmsghdr *nlh; 2628 struct nlmsghdr *nlh;
2628 struct ifa_cacheinfo ci; 2629 struct ifa_cacheinfo ci;
2629 unsigned char *b = skb->tail; 2630 unsigned char *b = skb->tail;
2630 2631
2631 nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*ifm)); 2632 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*ifm), flags);
2632 if (pid) nlh->nlmsg_flags |= NLM_F_MULTI;
2633 ifm = NLMSG_DATA(nlh); 2633 ifm = NLMSG_DATA(nlh);
2634 ifm->ifa_family = AF_INET6; 2634 ifm->ifa_family = AF_INET6;
2635 ifm->ifa_prefixlen = ifa->prefix_len; 2635 ifm->ifa_prefixlen = ifa->prefix_len;
@@ -2671,15 +2671,14 @@ rtattr_failure:
2671} 2671}
2672 2672
2673static int inet6_fill_ifmcaddr(struct sk_buff *skb, struct ifmcaddr6 *ifmca, 2673static int inet6_fill_ifmcaddr(struct sk_buff *skb, struct ifmcaddr6 *ifmca,
2674 u32 pid, u32 seq, int event) 2674 u32 pid, u32 seq, int event, u16 flags)
2675{ 2675{
2676 struct ifaddrmsg *ifm; 2676 struct ifaddrmsg *ifm;
2677 struct nlmsghdr *nlh; 2677 struct nlmsghdr *nlh;
2678 struct ifa_cacheinfo ci; 2678 struct ifa_cacheinfo ci;
2679 unsigned char *b = skb->tail; 2679 unsigned char *b = skb->tail;
2680 2680
2681 nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*ifm)); 2681 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*ifm), flags);
2682 if (pid) nlh->nlmsg_flags |= NLM_F_MULTI;
2683 ifm = NLMSG_DATA(nlh); 2682 ifm = NLMSG_DATA(nlh);
2684 ifm->ifa_family = AF_INET6; 2683 ifm->ifa_family = AF_INET6;
2685 ifm->ifa_prefixlen = 128; 2684 ifm->ifa_prefixlen = 128;
@@ -2708,15 +2707,14 @@ rtattr_failure:
2708} 2707}
2709 2708
2710static int inet6_fill_ifacaddr(struct sk_buff *skb, struct ifacaddr6 *ifaca, 2709static int inet6_fill_ifacaddr(struct sk_buff *skb, struct ifacaddr6 *ifaca,
2711 u32 pid, u32 seq, int event) 2710 u32 pid, u32 seq, int event, unsigned int flags)
2712{ 2711{
2713 struct ifaddrmsg *ifm; 2712 struct ifaddrmsg *ifm;
2714 struct nlmsghdr *nlh; 2713 struct nlmsghdr *nlh;
2715 struct ifa_cacheinfo ci; 2714 struct ifa_cacheinfo ci;
2716 unsigned char *b = skb->tail; 2715 unsigned char *b = skb->tail;
2717 2716
2718 nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*ifm)); 2717 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*ifm), flags);
2719 if (pid) nlh->nlmsg_flags |= NLM_F_MULTI;
2720 ifm = NLMSG_DATA(nlh); 2718 ifm = NLMSG_DATA(nlh);
2721 ifm->ifa_family = AF_INET6; 2719 ifm->ifa_family = AF_INET6;
2722 ifm->ifa_prefixlen = 128; 2720 ifm->ifa_prefixlen = 128;
@@ -2785,7 +2783,8 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb,
2785 continue; 2783 continue;
2786 if ((err = inet6_fill_ifaddr(skb, ifa, 2784 if ((err = inet6_fill_ifaddr(skb, ifa,
2787 NETLINK_CB(cb->skb).pid, 2785 NETLINK_CB(cb->skb).pid,
2788 cb->nlh->nlmsg_seq, RTM_NEWADDR)) <= 0) 2786 cb->nlh->nlmsg_seq, RTM_NEWADDR,
2787 NLM_F_MULTI)) <= 0)
2789 goto done; 2788 goto done;
2790 } 2789 }
2791 /* temp addr */ 2790 /* temp addr */
@@ -2796,7 +2795,8 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb,
2796 continue; 2795 continue;
2797 if ((err = inet6_fill_ifaddr(skb, ifa, 2796 if ((err = inet6_fill_ifaddr(skb, ifa,
2798 NETLINK_CB(cb->skb).pid, 2797 NETLINK_CB(cb->skb).pid,
2799 cb->nlh->nlmsg_seq, RTM_NEWADDR)) <= 0) 2798 cb->nlh->nlmsg_seq, RTM_NEWADDR,
2799 NLM_F_MULTI)) <= 0)
2800 goto done; 2800 goto done;
2801 } 2801 }
2802#endif 2802#endif
@@ -2809,7 +2809,8 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb,
2809 continue; 2809 continue;
2810 if ((err = inet6_fill_ifmcaddr(skb, ifmca, 2810 if ((err = inet6_fill_ifmcaddr(skb, ifmca,
2811 NETLINK_CB(cb->skb).pid, 2811 NETLINK_CB(cb->skb).pid,
2812 cb->nlh->nlmsg_seq, RTM_GETMULTICAST)) <= 0) 2812 cb->nlh->nlmsg_seq, RTM_GETMULTICAST,
2813 NLM_F_MULTI)) <= 0)
2813 goto done; 2814 goto done;
2814 } 2815 }
2815 break; 2816 break;
@@ -2821,7 +2822,8 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb,
2821 continue; 2822 continue;
2822 if ((err = inet6_fill_ifacaddr(skb, ifaca, 2823 if ((err = inet6_fill_ifacaddr(skb, ifaca,
2823 NETLINK_CB(cb->skb).pid, 2824 NETLINK_CB(cb->skb).pid,
2824 cb->nlh->nlmsg_seq, RTM_GETANYCAST)) <= 0) 2825 cb->nlh->nlmsg_seq, RTM_GETANYCAST,
2826 NLM_F_MULTI)) <= 0)
2825 goto done; 2827 goto done;
2826 } 2828 }
2827 break; 2829 break;
@@ -2871,7 +2873,7 @@ static void inet6_ifa_notify(int event, struct inet6_ifaddr *ifa)
2871 netlink_set_err(rtnl, 0, RTMGRP_IPV6_IFADDR, ENOBUFS); 2873 netlink_set_err(rtnl, 0, RTMGRP_IPV6_IFADDR, ENOBUFS);
2872 return; 2874 return;
2873 } 2875 }
2874 if (inet6_fill_ifaddr(skb, ifa, 0, 0, event) < 0) { 2876 if (inet6_fill_ifaddr(skb, ifa, current->pid, 0, event, 0) < 0) {
2875 kfree_skb(skb); 2877 kfree_skb(skb);
2876 netlink_set_err(rtnl, 0, RTMGRP_IPV6_IFADDR, EINVAL); 2878 netlink_set_err(rtnl, 0, RTMGRP_IPV6_IFADDR, EINVAL);
2877 return; 2879 return;
@@ -2906,7 +2908,7 @@ static void inline ipv6_store_devconf(struct ipv6_devconf *cnf,
2906} 2908}
2907 2909
2908static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev, 2910static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev,
2909 u32 pid, u32 seq, int event) 2911 u32 pid, u32 seq, int event, unsigned int flags)
2910{ 2912{
2911 struct net_device *dev = idev->dev; 2913 struct net_device *dev = idev->dev;
2912 __s32 *array = NULL; 2914 __s32 *array = NULL;
@@ -2917,8 +2919,7 @@ static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev,
2917 __u32 mtu = dev->mtu; 2919 __u32 mtu = dev->mtu;
2918 struct ifla_cacheinfo ci; 2920 struct ifla_cacheinfo ci;
2919 2921
2920 nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*r)); 2922 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*r), flags);
2921 if (pid) nlh->nlmsg_flags |= NLM_F_MULTI;
2922 r = NLMSG_DATA(nlh); 2923 r = NLMSG_DATA(nlh);
2923 r->ifi_family = AF_INET6; 2924 r->ifi_family = AF_INET6;
2924 r->ifi_type = dev->type; 2925 r->ifi_type = dev->type;
@@ -2985,7 +2986,7 @@ static int inet6_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
2985 if ((idev = in6_dev_get(dev)) == NULL) 2986 if ((idev = in6_dev_get(dev)) == NULL)
2986 continue; 2987 continue;
2987 err = inet6_fill_ifinfo(skb, idev, NETLINK_CB(cb->skb).pid, 2988 err = inet6_fill_ifinfo(skb, idev, NETLINK_CB(cb->skb).pid,
2988 cb->nlh->nlmsg_seq, RTM_NEWLINK); 2989 cb->nlh->nlmsg_seq, RTM_NEWLINK, NLM_F_MULTI);
2989 in6_dev_put(idev); 2990 in6_dev_put(idev);
2990 if (err <= 0) 2991 if (err <= 0)
2991 break; 2992 break;
@@ -3007,7 +3008,7 @@ void inet6_ifinfo_notify(int event, struct inet6_dev *idev)
3007 netlink_set_err(rtnl, 0, RTMGRP_IPV6_IFINFO, ENOBUFS); 3008 netlink_set_err(rtnl, 0, RTMGRP_IPV6_IFINFO, ENOBUFS);
3008 return; 3009 return;
3009 } 3010 }
3010 if (inet6_fill_ifinfo(skb, idev, 0, 0, event) < 0) { 3011 if (inet6_fill_ifinfo(skb, idev, current->pid, 0, event, 0) < 0) {
3011 kfree_skb(skb); 3012 kfree_skb(skb);
3012 netlink_set_err(rtnl, 0, RTMGRP_IPV6_IFINFO, EINVAL); 3013 netlink_set_err(rtnl, 0, RTMGRP_IPV6_IFINFO, EINVAL);
3013 return; 3014 return;
@@ -3017,18 +3018,15 @@ void inet6_ifinfo_notify(int event, struct inet6_dev *idev)
3017} 3018}
3018 3019
3019static int inet6_fill_prefix(struct sk_buff *skb, struct inet6_dev *idev, 3020static int inet6_fill_prefix(struct sk_buff *skb, struct inet6_dev *idev,
3020 struct prefix_info *pinfo, u32 pid, u32 seq, int event) 3021 struct prefix_info *pinfo, u32 pid, u32 seq,
3022 int event, unsigned int flags)
3021{ 3023{
3022 struct prefixmsg *pmsg; 3024 struct prefixmsg *pmsg;
3023 struct nlmsghdr *nlh; 3025 struct nlmsghdr *nlh;
3024 unsigned char *b = skb->tail; 3026 unsigned char *b = skb->tail;
3025 struct prefix_cacheinfo ci; 3027 struct prefix_cacheinfo ci;
3026 3028
3027 nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*pmsg)); 3029 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*pmsg), flags);
3028
3029 if (pid)
3030 nlh->nlmsg_flags |= NLM_F_MULTI;
3031
3032 pmsg = NLMSG_DATA(nlh); 3030 pmsg = NLMSG_DATA(nlh);
3033 pmsg->prefix_family = AF_INET6; 3031 pmsg->prefix_family = AF_INET6;
3034 pmsg->prefix_ifindex = idev->dev->ifindex; 3032 pmsg->prefix_ifindex = idev->dev->ifindex;
@@ -3067,7 +3065,7 @@ static void inet6_prefix_notify(int event, struct inet6_dev *idev,
3067 netlink_set_err(rtnl, 0, RTMGRP_IPV6_PREFIX, ENOBUFS); 3065 netlink_set_err(rtnl, 0, RTMGRP_IPV6_PREFIX, ENOBUFS);
3068 return; 3066 return;
3069 } 3067 }
3070 if (inet6_fill_prefix(skb, idev, pinfo, 0, 0, event) < 0) { 3068 if (inet6_fill_prefix(skb, idev, pinfo, current->pid, 0, event, 0) < 0) {
3071 kfree_skb(skb); 3069 kfree_skb(skb);
3072 netlink_set_err(rtnl, 0, RTMGRP_IPV6_PREFIX, EINVAL); 3070 netlink_set_err(rtnl, 0, RTMGRP_IPV6_PREFIX, EINVAL);
3073 return; 3071 return;
@@ -3096,7 +3094,7 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
3096 switch (event) { 3094 switch (event) {
3097 case RTM_NEWADDR: 3095 case RTM_NEWADDR:
3098 dst_hold(&ifp->rt->u.dst); 3096 dst_hold(&ifp->rt->u.dst);
3099 if (ip6_ins_rt(ifp->rt, NULL, NULL)) 3097 if (ip6_ins_rt(ifp->rt, NULL, NULL, NULL))
3100 dst_release(&ifp->rt->u.dst); 3098 dst_release(&ifp->rt->u.dst);
3101 if (ifp->idev->cnf.forwarding) 3099 if (ifp->idev->cnf.forwarding)
3102 addrconf_join_anycast(ifp); 3100 addrconf_join_anycast(ifp);
@@ -3106,7 +3104,7 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
3106 addrconf_leave_anycast(ifp); 3104 addrconf_leave_anycast(ifp);
3107 addrconf_leave_solict(ifp->idev, &ifp->addr); 3105 addrconf_leave_solict(ifp->idev, &ifp->addr);
3108 dst_hold(&ifp->rt->u.dst); 3106 dst_hold(&ifp->rt->u.dst);
3109 if (ip6_del_rt(ifp->rt, NULL, NULL)) 3107 if (ip6_del_rt(ifp->rt, NULL, NULL, NULL))
3110 dst_free(&ifp->rt->u.dst); 3108 dst_free(&ifp->rt->u.dst);
3111 else 3109 else
3112 dst_release(&ifp->rt->u.dst); 3110 dst_release(&ifp->rt->u.dst);
diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index e3ecf626cb..986fdfdccb 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -339,7 +339,7 @@ static void ah6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
339 xfrm_state_put(x); 339 xfrm_state_put(x);
340} 340}
341 341
342static int ah6_init_state(struct xfrm_state *x, void *args) 342static int ah6_init_state(struct xfrm_state *x)
343{ 343{
344 struct ah_data *ahp = NULL; 344 struct ah_data *ahp = NULL;
345 struct xfrm_algo_desc *aalg_desc; 345 struct xfrm_algo_desc *aalg_desc;
diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
index 5d22ca3cca..6b72940472 100644
--- a/net/ipv6/anycast.c
+++ b/net/ipv6/anycast.c
@@ -337,7 +337,7 @@ int ipv6_dev_ac_inc(struct net_device *dev, struct in6_addr *addr)
337 write_unlock_bh(&idev->lock); 337 write_unlock_bh(&idev->lock);
338 338
339 dst_hold(&rt->u.dst); 339 dst_hold(&rt->u.dst);
340 if (ip6_ins_rt(rt, NULL, NULL)) 340 if (ip6_ins_rt(rt, NULL, NULL, NULL))
341 dst_release(&rt->u.dst); 341 dst_release(&rt->u.dst);
342 342
343 addrconf_join_solict(dev, &aca->aca_addr); 343 addrconf_join_solict(dev, &aca->aca_addr);
@@ -380,7 +380,7 @@ int __ipv6_dev_ac_dec(struct inet6_dev *idev, struct in6_addr *addr)
380 addrconf_leave_solict(idev, &aca->aca_addr); 380 addrconf_leave_solict(idev, &aca->aca_addr);
381 381
382 dst_hold(&aca->aca_rt->u.dst); 382 dst_hold(&aca->aca_rt->u.dst);
383 if (ip6_del_rt(aca->aca_rt, NULL, NULL)) 383 if (ip6_del_rt(aca->aca_rt, NULL, NULL, NULL))
384 dst_free(&aca->aca_rt->u.dst); 384 dst_free(&aca->aca_rt->u.dst);
385 else 385 else
386 dst_release(&aca->aca_rt->u.dst); 386 dst_release(&aca->aca_rt->u.dst);
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 65b9375df5..5229365cd8 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -353,14 +353,14 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len)
353 err = copied; 353 err = copied;
354 354
355 /* Reset and regenerate socket error */ 355 /* Reset and regenerate socket error */
356 spin_lock_irq(&sk->sk_error_queue.lock); 356 spin_lock_bh(&sk->sk_error_queue.lock);
357 sk->sk_err = 0; 357 sk->sk_err = 0;
358 if ((skb2 = skb_peek(&sk->sk_error_queue)) != NULL) { 358 if ((skb2 = skb_peek(&sk->sk_error_queue)) != NULL) {
359 sk->sk_err = SKB_EXT_ERR(skb2)->ee.ee_errno; 359 sk->sk_err = SKB_EXT_ERR(skb2)->ee.ee_errno;
360 spin_unlock_irq(&sk->sk_error_queue.lock); 360 spin_unlock_bh(&sk->sk_error_queue.lock);
361 sk->sk_error_report(sk); 361 sk->sk_error_report(sk);
362 } else { 362 } else {
363 spin_unlock_irq(&sk->sk_error_queue.lock); 363 spin_unlock_bh(&sk->sk_error_queue.lock);
364 } 364 }
365 365
366out_free_skb: 366out_free_skb:
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index be7095d6ba..324db62515 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -296,7 +296,7 @@ static void esp6_destroy(struct xfrm_state *x)
296 kfree(esp); 296 kfree(esp);
297} 297}
298 298
299static int esp6_init_state(struct xfrm_state *x, void *args) 299static int esp6_init_state(struct xfrm_state *x)
300{ 300{
301 struct esp_data *esp = NULL; 301 struct esp_data *esp = NULL;
302 302
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 8e0f569b88..ff3ec9822e 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -277,8 +277,8 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info,
277{ 277{
278 struct inet6_dev *idev = NULL; 278 struct inet6_dev *idev = NULL;
279 struct ipv6hdr *hdr = skb->nh.ipv6h; 279 struct ipv6hdr *hdr = skb->nh.ipv6h;
280 struct sock *sk = icmpv6_socket->sk; 280 struct sock *sk;
281 struct ipv6_pinfo *np = inet6_sk(sk); 281 struct ipv6_pinfo *np;
282 struct in6_addr *saddr = NULL; 282 struct in6_addr *saddr = NULL;
283 struct dst_entry *dst; 283 struct dst_entry *dst;
284 struct icmp6hdr tmp_hdr; 284 struct icmp6hdr tmp_hdr;
@@ -358,6 +358,9 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info,
358 if (icmpv6_xmit_lock()) 358 if (icmpv6_xmit_lock())
359 return; 359 return;
360 360
361 sk = icmpv6_socket->sk;
362 np = inet6_sk(sk);
363
361 if (!icmpv6_xrlim_allow(sk, type, &fl)) 364 if (!icmpv6_xrlim_allow(sk, type, &fl))
362 goto out; 365 goto out;
363 366
@@ -423,9 +426,9 @@ out:
423 426
424static void icmpv6_echo_reply(struct sk_buff *skb) 427static void icmpv6_echo_reply(struct sk_buff *skb)
425{ 428{
426 struct sock *sk = icmpv6_socket->sk; 429 struct sock *sk;
427 struct inet6_dev *idev; 430 struct inet6_dev *idev;
428 struct ipv6_pinfo *np = inet6_sk(sk); 431 struct ipv6_pinfo *np;
429 struct in6_addr *saddr = NULL; 432 struct in6_addr *saddr = NULL;
430 struct icmp6hdr *icmph = (struct icmp6hdr *) skb->h.raw; 433 struct icmp6hdr *icmph = (struct icmp6hdr *) skb->h.raw;
431 struct icmp6hdr tmp_hdr; 434 struct icmp6hdr tmp_hdr;
@@ -454,6 +457,9 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
454 if (icmpv6_xmit_lock()) 457 if (icmpv6_xmit_lock())
455 return; 458 return;
456 459
460 sk = icmpv6_socket->sk;
461 np = inet6_sk(sk);
462
457 if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst)) 463 if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst))
458 fl.oif = np->mcast_oif; 464 fl.oif = np->mcast_oif;
459 465
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 405740b75a..1b354aa979 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -394,7 +394,7 @@ insert_above:
394 */ 394 */
395 395
396static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, 396static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
397 struct nlmsghdr *nlh) 397 struct nlmsghdr *nlh, struct netlink_skb_parms *req)
398{ 398{
399 struct rt6_info *iter = NULL; 399 struct rt6_info *iter = NULL;
400 struct rt6_info **ins; 400 struct rt6_info **ins;
@@ -449,7 +449,7 @@ out:
449 *ins = rt; 449 *ins = rt;
450 rt->rt6i_node = fn; 450 rt->rt6i_node = fn;
451 atomic_inc(&rt->rt6i_ref); 451 atomic_inc(&rt->rt6i_ref);
452 inet6_rt_notify(RTM_NEWROUTE, rt, nlh); 452 inet6_rt_notify(RTM_NEWROUTE, rt, nlh, req);
453 rt6_stats.fib_rt_entries++; 453 rt6_stats.fib_rt_entries++;
454 454
455 if ((fn->fn_flags & RTN_RTINFO) == 0) { 455 if ((fn->fn_flags & RTN_RTINFO) == 0) {
@@ -479,7 +479,8 @@ void fib6_force_start_gc(void)
479 * with source addr info in sub-trees 479 * with source addr info in sub-trees
480 */ 480 */
481 481
482int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr) 482int fib6_add(struct fib6_node *root, struct rt6_info *rt,
483 struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
483{ 484{
484 struct fib6_node *fn; 485 struct fib6_node *fn;
485 int err = -ENOMEM; 486 int err = -ENOMEM;
@@ -552,7 +553,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nlmsghdr *nlh,
552 } 553 }
553#endif 554#endif
554 555
555 err = fib6_add_rt2node(fn, rt, nlh); 556 err = fib6_add_rt2node(fn, rt, nlh, req);
556 557
557 if (err == 0) { 558 if (err == 0) {
558 fib6_start_gc(rt); 559 fib6_start_gc(rt);
@@ -859,7 +860,7 @@ static struct fib6_node * fib6_repair_tree(struct fib6_node *fn)
859} 860}
860 861
861static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp, 862static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp,
862 struct nlmsghdr *nlh, void *_rtattr) 863 struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
863{ 864{
864 struct fib6_walker_t *w; 865 struct fib6_walker_t *w;
865 struct rt6_info *rt = *rtp; 866 struct rt6_info *rt = *rtp;
@@ -915,11 +916,11 @@ static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp,
915 if (atomic_read(&rt->rt6i_ref) != 1) BUG(); 916 if (atomic_read(&rt->rt6i_ref) != 1) BUG();
916 } 917 }
917 918
918 inet6_rt_notify(RTM_DELROUTE, rt, nlh); 919 inet6_rt_notify(RTM_DELROUTE, rt, nlh, req);
919 rt6_release(rt); 920 rt6_release(rt);
920} 921}
921 922
922int fib6_del(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr) 923int fib6_del(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
923{ 924{
924 struct fib6_node *fn = rt->rt6i_node; 925 struct fib6_node *fn = rt->rt6i_node;
925 struct rt6_info **rtp; 926 struct rt6_info **rtp;
@@ -944,7 +945,7 @@ int fib6_del(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr)
944 945
945 for (rtp = &fn->leaf; *rtp; rtp = &(*rtp)->u.next) { 946 for (rtp = &fn->leaf; *rtp; rtp = &(*rtp)->u.next) {
946 if (*rtp == rt) { 947 if (*rtp == rt) {
947 fib6_del_route(fn, rtp, nlh, _rtattr); 948 fib6_del_route(fn, rtp, nlh, _rtattr, req);
948 return 0; 949 return 0;
949 } 950 }
950 } 951 }
@@ -1073,7 +1074,7 @@ static int fib6_clean_node(struct fib6_walker_t *w)
1073 res = c->func(rt, c->arg); 1074 res = c->func(rt, c->arg);
1074 if (res < 0) { 1075 if (res < 0) {
1075 w->leaf = rt; 1076 w->leaf = rt;
1076 res = fib6_del(rt, NULL, NULL); 1077 res = fib6_del(rt, NULL, NULL, NULL);
1077 if (res) { 1078 if (res) {
1078#if RT6_DEBUG >= 2 1079#if RT6_DEBUG >= 2
1079 printk(KERN_DEBUG "fib6_clean_node: del failed: rt=%p@%p err=%d\n", rt, rt->rt6i_node, res); 1080 printk(KERN_DEBUG "fib6_clean_node: del failed: rt=%p@%p err=%d\n", rt, rt->rt6i_node, res);
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index b78a535868..06e7cdaeed 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -484,9 +484,6 @@ static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
484 to->nf_bridge = from->nf_bridge; 484 to->nf_bridge = from->nf_bridge;
485 nf_bridge_get(to->nf_bridge); 485 nf_bridge_get(to->nf_bridge);
486#endif 486#endif
487#ifdef CONFIG_NETFILTER_DEBUG
488 to->nf_debug = from->nf_debug;
489#endif
490#endif 487#endif
491} 488}
492 489
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 3b1c9fa184..ba3b0c267f 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -882,6 +882,7 @@ ip6ip6_tnl_change(struct ip6_tnl *t, struct ip6_tnl_parm *p)
882 t->parms.hop_limit = p->hop_limit; 882 t->parms.hop_limit = p->hop_limit;
883 t->parms.encap_limit = p->encap_limit; 883 t->parms.encap_limit = p->encap_limit;
884 t->parms.flowinfo = p->flowinfo; 884 t->parms.flowinfo = p->flowinfo;
885 t->parms.link = p->link;
885 ip6ip6_tnl_link_config(t); 886 ip6ip6_tnl_link_config(t);
886 return 0; 887 return 0;
887} 888}
diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c
index 6cde5310cd..423feb46cc 100644
--- a/net/ipv6/ipcomp6.c
+++ b/net/ipv6/ipcomp6.c
@@ -234,14 +234,9 @@ static struct xfrm_state *ipcomp6_tunnel_create(struct xfrm_state *x)
234 t->props.mode = 1; 234 t->props.mode = 1;
235 memcpy(t->props.saddr.a6, x->props.saddr.a6, sizeof(struct in6_addr)); 235 memcpy(t->props.saddr.a6, x->props.saddr.a6, sizeof(struct in6_addr));
236 236
237 t->type = xfrm_get_type(IPPROTO_IPV6, t->props.family); 237 if (xfrm_init_state(t))
238 if (t->type == NULL)
239 goto error; 238 goto error;
240 239
241 if (t->type->init_state(t, NULL))
242 goto error;
243
244 t->km.state = XFRM_STATE_VALID;
245 atomic_set(&t->tunnel_users, 1); 240 atomic_set(&t->tunnel_users, 1);
246 241
247out: 242out:
@@ -420,7 +415,7 @@ static void ipcomp6_destroy(struct xfrm_state *x)
420 xfrm6_tunnel_free_spi((xfrm_address_t *)&x->props.saddr); 415 xfrm6_tunnel_free_spi((xfrm_address_t *)&x->props.saddr);
421} 416}
422 417
423static int ipcomp6_init_state(struct xfrm_state *x, void *args) 418static int ipcomp6_init_state(struct xfrm_state *x)
424{ 419{
425 int err; 420 int err;
426 struct ipcomp_data *ipcd; 421 struct ipcomp_data *ipcd;
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 279ab86be6..f3ef4c38d3 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -423,11 +423,12 @@ done:
423 psin6 = (struct sockaddr_in6 *)&greqs.gsr_group; 423 psin6 = (struct sockaddr_in6 *)&greqs.gsr_group;
424 retv = ipv6_sock_mc_join(sk, greqs.gsr_interface, 424 retv = ipv6_sock_mc_join(sk, greqs.gsr_interface,
425 &psin6->sin6_addr); 425 &psin6->sin6_addr);
426 if (retv) 426 /* prior join w/ different source is ok */
427 if (retv && retv != -EADDRINUSE)
427 break; 428 break;
428 omode = MCAST_INCLUDE; 429 omode = MCAST_INCLUDE;
429 add = 1; 430 add = 1;
430 } else /*IP_DROP_SOURCE_MEMBERSHIP */ { 431 } else /* MCAST_LEAVE_SOURCE_GROUP */ {
431 omode = MCAST_INCLUDE; 432 omode = MCAST_INCLUDE;
432 add = 0; 433 add = 0;
433 } 434 }
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 393b6e6f50..562fcd14fd 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -188,6 +188,16 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, struct in6_addr *addr)
188 if (!ipv6_addr_is_multicast(addr)) 188 if (!ipv6_addr_is_multicast(addr))
189 return -EINVAL; 189 return -EINVAL;
190 190
191 read_lock_bh(&ipv6_sk_mc_lock);
192 for (mc_lst=np->ipv6_mc_list; mc_lst; mc_lst=mc_lst->next) {
193 if ((ifindex == 0 || mc_lst->ifindex == ifindex) &&
194 ipv6_addr_equal(&mc_lst->addr, addr)) {
195 read_unlock_bh(&ipv6_sk_mc_lock);
196 return -EADDRINUSE;
197 }
198 }
199 read_unlock_bh(&ipv6_sk_mc_lock);
200
191 mc_lst = sock_kmalloc(sk, sizeof(struct ipv6_mc_socklist), GFP_KERNEL); 201 mc_lst = sock_kmalloc(sk, sizeof(struct ipv6_mc_socklist), GFP_KERNEL);
192 202
193 if (mc_lst == NULL) 203 if (mc_lst == NULL)
@@ -349,6 +359,7 @@ int ip6_mc_source(int add, int omode, struct sock *sk,
349 struct ipv6_pinfo *inet6 = inet6_sk(sk); 359 struct ipv6_pinfo *inet6 = inet6_sk(sk);
350 struct ip6_sf_socklist *psl; 360 struct ip6_sf_socklist *psl;
351 int i, j, rv; 361 int i, j, rv;
362 int leavegroup = 0;
352 int err; 363 int err;
353 364
354 if (pgsr->gsr_group.ss_family != AF_INET6 || 365 if (pgsr->gsr_group.ss_family != AF_INET6 ||
@@ -368,6 +379,7 @@ int ip6_mc_source(int add, int omode, struct sock *sk,
368 379
369 err = -EADDRNOTAVAIL; 380 err = -EADDRNOTAVAIL;
370 381
382 read_lock_bh(&ipv6_sk_mc_lock);
371 for (pmc=inet6->ipv6_mc_list; pmc; pmc=pmc->next) { 383 for (pmc=inet6->ipv6_mc_list; pmc; pmc=pmc->next) {
372 if (pgsr->gsr_interface && pmc->ifindex != pgsr->gsr_interface) 384 if (pgsr->gsr_interface && pmc->ifindex != pgsr->gsr_interface)
373 continue; 385 continue;
@@ -401,6 +413,12 @@ int ip6_mc_source(int add, int omode, struct sock *sk,
401 if (rv) /* source not found */ 413 if (rv) /* source not found */
402 goto done; 414 goto done;
403 415
416 /* special case - (INCLUDE, empty) == LEAVE_GROUP */
417 if (psl->sl_count == 1 && omode == MCAST_INCLUDE) {
418 leavegroup = 1;
419 goto done;
420 }
421
404 /* update the interface filter */ 422 /* update the interface filter */
405 ip6_mc_del_src(idev, group, omode, 1, source, 1); 423 ip6_mc_del_src(idev, group, omode, 1, source, 1);
406 424
@@ -453,9 +471,12 @@ int ip6_mc_source(int add, int omode, struct sock *sk,
453 /* update the interface list */ 471 /* update the interface list */
454 ip6_mc_add_src(idev, group, omode, 1, source, 1); 472 ip6_mc_add_src(idev, group, omode, 1, source, 1);
455done: 473done:
474 read_unlock_bh(&ipv6_sk_mc_lock);
456 read_unlock_bh(&idev->lock); 475 read_unlock_bh(&idev->lock);
457 in6_dev_put(idev); 476 in6_dev_put(idev);
458 dev_put(dev); 477 dev_put(dev);
478 if (leavegroup)
479 return ipv6_sock_mc_drop(sk, pgsr->gsr_interface, group);
459 return err; 480 return err;
460} 481}
461 482
@@ -1280,15 +1301,6 @@ static struct sk_buff *mld_newpack(struct net_device *dev, int size)
1280 return NULL; 1301 return NULL;
1281 1302
1282 skb_reserve(skb, LL_RESERVED_SPACE(dev)); 1303 skb_reserve(skb, LL_RESERVED_SPACE(dev));
1283 if (dev->hard_header) {
1284 unsigned char ha[MAX_ADDR_LEN];
1285
1286 ndisc_mc_map(&mld2_all_mcr, ha, dev, 1);
1287 if (dev->hard_header(skb, dev, ETH_P_IPV6,ha,NULL,size) < 0) {
1288 kfree_skb(skb);
1289 return NULL;
1290 }
1291 }
1292 1304
1293 if (ipv6_get_lladdr(dev, &addr_buf)) { 1305 if (ipv6_get_lladdr(dev, &addr_buf)) {
1294 /* <draft-ietf-magma-mld-source-05.txt>: 1306 /* <draft-ietf-magma-mld-source-05.txt>:
@@ -1312,6 +1324,30 @@ static struct sk_buff *mld_newpack(struct net_device *dev, int size)
1312 return skb; 1324 return skb;
1313} 1325}
1314 1326
1327static inline int mld_dev_queue_xmit2(struct sk_buff *skb)
1328{
1329 struct net_device *dev = skb->dev;
1330
1331 if (dev->hard_header) {
1332 unsigned char ha[MAX_ADDR_LEN];
1333 int err;
1334
1335 ndisc_mc_map(&skb->nh.ipv6h->daddr, ha, dev, 1);
1336 err = dev->hard_header(skb, dev, ETH_P_IPV6, ha, NULL, skb->len);
1337 if (err < 0) {
1338 kfree_skb(skb);
1339 return err;
1340 }
1341 }
1342 return dev_queue_xmit(skb);
1343}
1344
1345static inline int mld_dev_queue_xmit(struct sk_buff *skb)
1346{
1347 return NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, skb, NULL, skb->dev,
1348 mld_dev_queue_xmit2);
1349}
1350
1315static void mld_sendpack(struct sk_buff *skb) 1351static void mld_sendpack(struct sk_buff *skb)
1316{ 1352{
1317 struct ipv6hdr *pip6 = skb->nh.ipv6h; 1353 struct ipv6hdr *pip6 = skb->nh.ipv6h;
@@ -1329,7 +1365,7 @@ static void mld_sendpack(struct sk_buff *skb)
1329 pmr->csum = csum_ipv6_magic(&pip6->saddr, &pip6->daddr, mldlen, 1365 pmr->csum = csum_ipv6_magic(&pip6->saddr, &pip6->daddr, mldlen,
1330 IPPROTO_ICMPV6, csum_partial(skb->h.raw, mldlen, 0)); 1366 IPPROTO_ICMPV6, csum_partial(skb->h.raw, mldlen, 0));
1331 err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, skb->dev, 1367 err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, skb->dev,
1332 dev_queue_xmit); 1368 mld_dev_queue_xmit);
1333 if (!err) { 1369 if (!err) {
1334 ICMP6_INC_STATS(idev,ICMP6_MIB_OUTMSGS); 1370 ICMP6_INC_STATS(idev,ICMP6_MIB_OUTMSGS);
1335 IP6_INC_STATS(IPSTATS_MIB_OUTMCASTPKTS); 1371 IP6_INC_STATS(IPSTATS_MIB_OUTMCASTPKTS);
@@ -1635,12 +1671,6 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type)
1635 } 1671 }
1636 1672
1637 skb_reserve(skb, LL_RESERVED_SPACE(dev)); 1673 skb_reserve(skb, LL_RESERVED_SPACE(dev));
1638 if (dev->hard_header) {
1639 unsigned char ha[MAX_ADDR_LEN];
1640 ndisc_mc_map(snd_addr, ha, dev, 1);
1641 if (dev->hard_header(skb, dev, ETH_P_IPV6, ha, NULL, full_len) < 0)
1642 goto out;
1643 }
1644 1674
1645 if (ipv6_get_lladdr(dev, &addr_buf)) { 1675 if (ipv6_get_lladdr(dev, &addr_buf)) {
1646 /* <draft-ietf-magma-mld-source-05.txt>: 1676 /* <draft-ietf-magma-mld-source-05.txt>:
@@ -1668,7 +1698,7 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type)
1668 idev = in6_dev_get(skb->dev); 1698 idev = in6_dev_get(skb->dev);
1669 1699
1670 err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, skb->dev, 1700 err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, skb->dev,
1671 dev_queue_xmit); 1701 mld_dev_queue_xmit);
1672 if (!err) { 1702 if (!err) {
1673 if (type == ICMPV6_MGM_REDUCTION) 1703 if (type == ICMPV6_MGM_REDUCTION)
1674 ICMP6_INC_STATS(idev, ICMP6_MIB_OUTGROUPMEMBREDUCTIONS); 1704 ICMP6_INC_STATS(idev, ICMP6_MIB_OUTGROUPMEMBREDUCTIONS);
@@ -1682,10 +1712,6 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type)
1682 if (likely(idev != NULL)) 1712 if (likely(idev != NULL))
1683 in6_dev_put(idev); 1713 in6_dev_put(idev);
1684 return; 1714 return;
1685
1686out:
1687 IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
1688 kfree_skb(skb);
1689} 1715}
1690 1716
1691static int ip6_mc_del1_src(struct ifmcaddr6 *pmc, int sfmode, 1717static int ip6_mc_del1_src(struct ifmcaddr6 *pmc, int sfmode,
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 7c291f4e9e..7ae72d4c9b 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -955,7 +955,7 @@ static void ndisc_recv_na(struct sk_buff *skb)
955 struct rt6_info *rt; 955 struct rt6_info *rt;
956 rt = rt6_get_dflt_router(saddr, dev); 956 rt = rt6_get_dflt_router(saddr, dev);
957 if (rt) 957 if (rt)
958 ip6_del_rt(rt, NULL, NULL); 958 ip6_del_rt(rt, NULL, NULL, NULL);
959 } 959 }
960 960
961out: 961out:
@@ -1096,7 +1096,7 @@ static void ndisc_router_discovery(struct sk_buff *skb)
1096 1096
1097 if (rt && lifetime == 0) { 1097 if (rt && lifetime == 0) {
1098 neigh_clone(neigh); 1098 neigh_clone(neigh);
1099 ip6_del_rt(rt, NULL, NULL); 1099 ip6_del_rt(rt, NULL, NULL, NULL);
1100 rt = NULL; 1100 rt = NULL;
1101 } 1101 }
1102 1102
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index c735276fdd..73034511c8 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -71,7 +71,6 @@ static DECLARE_MUTEX(ip6t_mutex);
71/* Must have mutex */ 71/* Must have mutex */
72#define ASSERT_READ_LOCK(x) IP_NF_ASSERT(down_trylock(&ip6t_mutex) != 0) 72#define ASSERT_READ_LOCK(x) IP_NF_ASSERT(down_trylock(&ip6t_mutex) != 0)
73#define ASSERT_WRITE_LOCK(x) IP_NF_ASSERT(down_trylock(&ip6t_mutex) != 0) 73#define ASSERT_WRITE_LOCK(x) IP_NF_ASSERT(down_trylock(&ip6t_mutex) != 0)
74#include <linux/netfilter_ipv4/lockhelp.h>
75#include <linux/netfilter_ipv4/listhelp.h> 74#include <linux/netfilter_ipv4/listhelp.h>
76 75
77#if 0 76#if 0
diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c
index bfc3d0185d..c44685e391 100644
--- a/net/ipv6/netfilter/ip6t_LOG.c
+++ b/net/ipv6/netfilter/ip6t_LOG.c
@@ -366,8 +366,6 @@ ip6t_log_packet(unsigned int hooknum,
366 const char *level_string, 366 const char *level_string,
367 const char *prefix) 367 const char *prefix)
368{ 368{
369 struct ipv6hdr *ipv6h = skb->nh.ipv6h;
370
371 spin_lock_bh(&log_lock); 369 spin_lock_bh(&log_lock);
372 printk(level_string); 370 printk(level_string);
373 printk("%sIN=%s OUT=%s ", 371 printk("%sIN=%s OUT=%s ",
@@ -377,39 +375,25 @@ ip6t_log_packet(unsigned int hooknum,
377 if (in && !out) { 375 if (in && !out) {
378 /* MAC logging for input chain only. */ 376 /* MAC logging for input chain only. */
379 printk("MAC="); 377 printk("MAC=");
380 if (skb->dev && skb->dev->hard_header_len && skb->mac.raw != (void*)ipv6h) { 378 if (skb->dev && skb->dev->hard_header_len &&
381 if (skb->dev->type != ARPHRD_SIT){ 379 skb->mac.raw != skb->nh.raw) {
382 int i; 380 unsigned char *p = skb->mac.raw;
383 unsigned char *p = skb->mac.raw; 381 int i;
384 for (i = 0; i < skb->dev->hard_header_len; i++,p++) 382
385 printk("%02x%c", *p, 383 if (skb->dev->type == ARPHRD_SIT &&
386 i==skb->dev->hard_header_len - 1 384 (p -= ETH_HLEN) < skb->head)
387 ? ' ':':'); 385 p = NULL;
388 } else { 386
389 int i; 387 if (p != NULL)
390 unsigned char *p = skb->mac.raw; 388 for (i = 0; i < skb->dev->hard_header_len; i++)
391 if ( p - (ETH_ALEN*2+2) > skb->head ){ 389 printk("%02x", p[i]);
392 p -= (ETH_ALEN+2); 390 printk(" ");
393 for (i = 0; i < (ETH_ALEN); i++,p++) 391
394 printk("%02x%s", *p, 392 if (skb->dev->type == ARPHRD_SIT) {
395 i == ETH_ALEN-1 ? "->" : ":"); 393 struct iphdr *iph = (struct iphdr *)skb->mac.raw;
396 p -= (ETH_ALEN*2); 394 printk("TUNNEL=%u.%u.%u.%u->%u.%u.%u.%u ",
397 for (i = 0; i < (ETH_ALEN); i++,p++) 395 NIPQUAD(iph->saddr),
398 printk("%02x%c", *p, 396 NIPQUAD(iph->daddr));
399 i == ETH_ALEN-1 ? ' ' : ':');
400 }
401
402 if ((skb->dev->addr_len == 4) &&
403 skb->dev->hard_header_len > 20){
404 printk("TUNNEL=");
405 p = skb->mac.raw + 12;
406 for (i = 0; i < 4; i++,p++)
407 printk("%3d%s", *p,
408 i == 3 ? "->" : ".");
409 for (i = 0; i < 4; i++,p++)
410 printk("%3d%c", *p,
411 i == 3 ? ' ' : '.');
412 }
413 } 397 }
414 } else 398 } else
415 printk(" "); 399 printk(" ");
diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c
index 71407beaf7..c2982efd14 100644
--- a/net/ipv6/netfilter/ip6table_raw.c
+++ b/net/ipv6/netfilter/ip6table_raw.c
@@ -129,13 +129,15 @@ static struct nf_hook_ops ip6t_ops[] = {
129 .hook = ip6t_hook, 129 .hook = ip6t_hook,
130 .pf = PF_INET6, 130 .pf = PF_INET6,
131 .hooknum = NF_IP6_PRE_ROUTING, 131 .hooknum = NF_IP6_PRE_ROUTING,
132 .priority = NF_IP6_PRI_FIRST 132 .priority = NF_IP6_PRI_FIRST,
133 .owner = THIS_MODULE,
133 }, 134 },
134 { 135 {
135 .hook = ip6t_hook, 136 .hook = ip6t_hook,
136 .pf = PF_INET6, 137 .pf = PF_INET6,
137 .hooknum = NF_IP6_LOCAL_OUT, 138 .hooknum = NF_IP6_LOCAL_OUT,
138 .priority = NF_IP6_PRI_FIRST 139 .priority = NF_IP6_PRI_FIRST,
140 .owner = THIS_MODULE,
139 }, 141 },
140}; 142};
141 143
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 617645bc5e..e2b848ec98 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -434,12 +434,12 @@ csum_copy_err:
434 /* Clear queue. */ 434 /* Clear queue. */
435 if (flags&MSG_PEEK) { 435 if (flags&MSG_PEEK) {
436 int clear = 0; 436 int clear = 0;
437 spin_lock_irq(&sk->sk_receive_queue.lock); 437 spin_lock_bh(&sk->sk_receive_queue.lock);
438 if (skb == skb_peek(&sk->sk_receive_queue)) { 438 if (skb == skb_peek(&sk->sk_receive_queue)) {
439 __skb_unlink(skb, &sk->sk_receive_queue); 439 __skb_unlink(skb, &sk->sk_receive_queue);
440 clear = 1; 440 clear = 1;
441 } 441 }
442 spin_unlock_irq(&sk->sk_receive_queue.lock); 442 spin_unlock_bh(&sk->sk_receive_queue.lock);
443 if (clear) 443 if (clear)
444 kfree_skb(skb); 444 kfree_skb(skb);
445 } 445 }
@@ -971,11 +971,11 @@ static int rawv6_ioctl(struct sock *sk, int cmd, unsigned long arg)
971 struct sk_buff *skb; 971 struct sk_buff *skb;
972 int amount = 0; 972 int amount = 0;
973 973
974 spin_lock_irq(&sk->sk_receive_queue.lock); 974 spin_lock_bh(&sk->sk_receive_queue.lock);
975 skb = skb_peek(&sk->sk_receive_queue); 975 skb = skb_peek(&sk->sk_receive_queue);
976 if (skb != NULL) 976 if (skb != NULL)
977 amount = skb->tail - skb->h.raw; 977 amount = skb->tail - skb->h.raw;
978 spin_unlock_irq(&sk->sk_receive_queue.lock); 978 spin_unlock_bh(&sk->sk_receive_queue.lock);
979 return put_user(amount, (int __user *)arg); 979 return put_user(amount, (int __user *)arg);
980 } 980 }
981 981
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 3bf8a0254f..878789b312 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -384,12 +384,13 @@ struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
384 be destroyed. 384 be destroyed.
385 */ 385 */
386 386
387int ip6_ins_rt(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr) 387int ip6_ins_rt(struct rt6_info *rt, struct nlmsghdr *nlh,
388 void *_rtattr, struct netlink_skb_parms *req)
388{ 389{
389 int err; 390 int err;
390 391
391 write_lock_bh(&rt6_lock); 392 write_lock_bh(&rt6_lock);
392 err = fib6_add(&ip6_routing_table, rt, nlh, _rtattr); 393 err = fib6_add(&ip6_routing_table, rt, nlh, _rtattr, req);
393 write_unlock_bh(&rt6_lock); 394 write_unlock_bh(&rt6_lock);
394 395
395 return err; 396 return err;
@@ -400,7 +401,7 @@ int ip6_ins_rt(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr)
400 */ 401 */
401 402
402static struct rt6_info *rt6_cow(struct rt6_info *ort, struct in6_addr *daddr, 403static struct rt6_info *rt6_cow(struct rt6_info *ort, struct in6_addr *daddr,
403 struct in6_addr *saddr) 404 struct in6_addr *saddr, struct netlink_skb_parms *req)
404{ 405{
405 int err; 406 int err;
406 struct rt6_info *rt; 407 struct rt6_info *rt;
@@ -432,7 +433,7 @@ static struct rt6_info *rt6_cow(struct rt6_info *ort, struct in6_addr *daddr,
432 433
433 dst_hold(&rt->u.dst); 434 dst_hold(&rt->u.dst);
434 435
435 err = ip6_ins_rt(rt, NULL, NULL); 436 err = ip6_ins_rt(rt, NULL, NULL, req);
436 if (err == 0) 437 if (err == 0)
437 return rt; 438 return rt;
438 439
@@ -491,7 +492,8 @@ restart:
491 read_unlock_bh(&rt6_lock); 492 read_unlock_bh(&rt6_lock);
492 493
493 nrt = rt6_cow(rt, &skb->nh.ipv6h->daddr, 494 nrt = rt6_cow(rt, &skb->nh.ipv6h->daddr,
494 &skb->nh.ipv6h->saddr); 495 &skb->nh.ipv6h->saddr,
496 &NETLINK_CB(skb));
495 497
496 dst_release(&rt->u.dst); 498 dst_release(&rt->u.dst);
497 rt = nrt; 499 rt = nrt;
@@ -551,7 +553,7 @@ restart:
551 dst_hold(&rt->u.dst); 553 dst_hold(&rt->u.dst);
552 read_unlock_bh(&rt6_lock); 554 read_unlock_bh(&rt6_lock);
553 555
554 nrt = rt6_cow(rt, &fl->fl6_dst, &fl->fl6_src); 556 nrt = rt6_cow(rt, &fl->fl6_dst, &fl->fl6_src, NULL);
555 557
556 dst_release(&rt->u.dst); 558 dst_release(&rt->u.dst);
557 rt = nrt; 559 rt = nrt;
@@ -598,7 +600,7 @@ static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
598 600
599 if (rt) { 601 if (rt) {
600 if (rt->rt6i_flags & RTF_CACHE) 602 if (rt->rt6i_flags & RTF_CACHE)
601 ip6_del_rt(rt, NULL, NULL); 603 ip6_del_rt(rt, NULL, NULL, NULL);
602 else 604 else
603 dst_release(dst); 605 dst_release(dst);
604 } 606 }
@@ -787,7 +789,8 @@ int ipv6_get_hoplimit(struct net_device *dev)
787 * 789 *
788 */ 790 */
789 791
790int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr) 792int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh,
793 void *_rtattr, struct netlink_skb_parms *req)
791{ 794{
792 int err; 795 int err;
793 struct rtmsg *r; 796 struct rtmsg *r;
@@ -974,7 +977,7 @@ install_route:
974 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst)); 977 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
975 rt->u.dst.dev = dev; 978 rt->u.dst.dev = dev;
976 rt->rt6i_idev = idev; 979 rt->rt6i_idev = idev;
977 return ip6_ins_rt(rt, nlh, _rtattr); 980 return ip6_ins_rt(rt, nlh, _rtattr, req);
978 981
979out: 982out:
980 if (dev) 983 if (dev)
@@ -986,7 +989,7 @@ out:
986 return err; 989 return err;
987} 990}
988 991
989int ip6_del_rt(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr) 992int ip6_del_rt(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
990{ 993{
991 int err; 994 int err;
992 995
@@ -994,7 +997,7 @@ int ip6_del_rt(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr)
994 997
995 rt6_reset_dflt_pointer(NULL); 998 rt6_reset_dflt_pointer(NULL);
996 999
997 err = fib6_del(rt, nlh, _rtattr); 1000 err = fib6_del(rt, nlh, _rtattr, req);
998 dst_release(&rt->u.dst); 1001 dst_release(&rt->u.dst);
999 1002
1000 write_unlock_bh(&rt6_lock); 1003 write_unlock_bh(&rt6_lock);
@@ -1002,7 +1005,7 @@ int ip6_del_rt(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr)
1002 return err; 1005 return err;
1003} 1006}
1004 1007
1005static int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr) 1008static int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
1006{ 1009{
1007 struct fib6_node *fn; 1010 struct fib6_node *fn;
1008 struct rt6_info *rt; 1011 struct rt6_info *rt;
@@ -1029,7 +1032,7 @@ static int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_r
1029 dst_hold(&rt->u.dst); 1032 dst_hold(&rt->u.dst);
1030 read_unlock_bh(&rt6_lock); 1033 read_unlock_bh(&rt6_lock);
1031 1034
1032 return ip6_del_rt(rt, nlh, _rtattr); 1035 return ip6_del_rt(rt, nlh, _rtattr, req);
1033 } 1036 }
1034 } 1037 }
1035 read_unlock_bh(&rt6_lock); 1038 read_unlock_bh(&rt6_lock);
@@ -1136,11 +1139,11 @@ source_ok:
1136 nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev); 1139 nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1137 nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&nrt->u.dst)); 1140 nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&nrt->u.dst));
1138 1141
1139 if (ip6_ins_rt(nrt, NULL, NULL)) 1142 if (ip6_ins_rt(nrt, NULL, NULL, NULL))
1140 goto out; 1143 goto out;
1141 1144
1142 if (rt->rt6i_flags&RTF_CACHE) { 1145 if (rt->rt6i_flags&RTF_CACHE) {
1143 ip6_del_rt(rt, NULL, NULL); 1146 ip6_del_rt(rt, NULL, NULL, NULL);
1144 return; 1147 return;
1145 } 1148 }
1146 1149
@@ -1204,7 +1207,7 @@ void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1204 2. It is gatewayed route or NONEXTHOP route. Action: clone it. 1207 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1205 */ 1208 */
1206 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) { 1209 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
1207 nrt = rt6_cow(rt, daddr, saddr); 1210 nrt = rt6_cow(rt, daddr, saddr, NULL);
1208 if (!nrt->u.dst.error) { 1211 if (!nrt->u.dst.error) {
1209 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu; 1212 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1210 if (allfrag) 1213 if (allfrag)
@@ -1232,7 +1235,7 @@ void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1232 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu; 1235 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1233 if (allfrag) 1236 if (allfrag)
1234 nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG; 1237 nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1235 ip6_ins_rt(nrt, NULL, NULL); 1238 ip6_ins_rt(nrt, NULL, NULL, NULL);
1236 } 1239 }
1237 1240
1238out: 1241out:
@@ -1305,7 +1308,7 @@ struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
1305 1308
1306 rtmsg.rtmsg_ifindex = dev->ifindex; 1309 rtmsg.rtmsg_ifindex = dev->ifindex;
1307 1310
1308 ip6_route_add(&rtmsg, NULL, NULL); 1311 ip6_route_add(&rtmsg, NULL, NULL, NULL);
1309 return rt6_get_dflt_router(gwaddr, dev); 1312 return rt6_get_dflt_router(gwaddr, dev);
1310} 1313}
1311 1314
@@ -1323,7 +1326,7 @@ restart:
1323 1326
1324 read_unlock_bh(&rt6_lock); 1327 read_unlock_bh(&rt6_lock);
1325 1328
1326 ip6_del_rt(rt, NULL, NULL); 1329 ip6_del_rt(rt, NULL, NULL, NULL);
1327 1330
1328 goto restart; 1331 goto restart;
1329 } 1332 }
@@ -1349,10 +1352,10 @@ int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
1349 rtnl_lock(); 1352 rtnl_lock();
1350 switch (cmd) { 1353 switch (cmd) {
1351 case SIOCADDRT: 1354 case SIOCADDRT:
1352 err = ip6_route_add(&rtmsg, NULL, NULL); 1355 err = ip6_route_add(&rtmsg, NULL, NULL, NULL);
1353 break; 1356 break;
1354 case SIOCDELRT: 1357 case SIOCDELRT:
1355 err = ip6_route_del(&rtmsg, NULL, NULL); 1358 err = ip6_route_del(&rtmsg, NULL, NULL, NULL);
1356 break; 1359 break;
1357 default: 1360 default:
1358 err = -EINVAL; 1361 err = -EINVAL;
@@ -1546,7 +1549,7 @@ int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1546 1549
1547 if (inet6_rtm_to_rtmsg(r, arg, &rtmsg)) 1550 if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1548 return -EINVAL; 1551 return -EINVAL;
1549 return ip6_route_del(&rtmsg, nlh, arg); 1552 return ip6_route_del(&rtmsg, nlh, arg, &NETLINK_CB(skb));
1550} 1553}
1551 1554
1552int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) 1555int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
@@ -1556,7 +1559,7 @@ int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1556 1559
1557 if (inet6_rtm_to_rtmsg(r, arg, &rtmsg)) 1560 if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1558 return -EINVAL; 1561 return -EINVAL;
1559 return ip6_route_add(&rtmsg, nlh, arg); 1562 return ip6_route_add(&rtmsg, nlh, arg, &NETLINK_CB(skb));
1560} 1563}
1561 1564
1562struct rt6_rtnl_dump_arg 1565struct rt6_rtnl_dump_arg
@@ -1566,11 +1569,9 @@ struct rt6_rtnl_dump_arg
1566}; 1569};
1567 1570
1568static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt, 1571static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
1569 struct in6_addr *dst, 1572 struct in6_addr *dst, struct in6_addr *src,
1570 struct in6_addr *src, 1573 int iif, int type, u32 pid, u32 seq,
1571 int iif, 1574 int prefix, unsigned int flags)
1572 int type, u32 pid, u32 seq,
1573 struct nlmsghdr *in_nlh, int prefix)
1574{ 1575{
1575 struct rtmsg *rtm; 1576 struct rtmsg *rtm;
1576 struct nlmsghdr *nlh; 1577 struct nlmsghdr *nlh;
@@ -1584,11 +1585,7 @@ static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
1584 } 1585 }
1585 } 1586 }
1586 1587
1587 if (!pid && in_nlh) { 1588 nlh = NLMSG_NEW(skb, pid, seq, type, sizeof(*rtm), flags);
1588 pid = in_nlh->nlmsg_pid;
1589 }
1590
1591 nlh = NLMSG_PUT(skb, pid, seq, type, sizeof(*rtm));
1592 rtm = NLMSG_DATA(nlh); 1589 rtm = NLMSG_DATA(nlh);
1593 rtm->rtm_family = AF_INET6; 1590 rtm->rtm_family = AF_INET6;
1594 rtm->rtm_dst_len = rt->rt6i_dst.plen; 1591 rtm->rtm_dst_len = rt->rt6i_dst.plen;
@@ -1674,7 +1671,7 @@ static int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1674 1671
1675 return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE, 1672 return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
1676 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq, 1673 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
1677 NULL, prefix); 1674 prefix, NLM_F_MULTI);
1678} 1675}
1679 1676
1680static int fib6_dump_node(struct fib6_walker_t *w) 1677static int fib6_dump_node(struct fib6_walker_t *w)
@@ -1822,7 +1819,7 @@ int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1822 &fl.fl6_dst, &fl.fl6_src, 1819 &fl.fl6_dst, &fl.fl6_src,
1823 iif, 1820 iif,
1824 RTM_NEWROUTE, NETLINK_CB(in_skb).pid, 1821 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
1825 nlh->nlmsg_seq, nlh, 0); 1822 nlh->nlmsg_seq, 0, 0);
1826 if (err < 0) { 1823 if (err < 0) {
1827 err = -EMSGSIZE; 1824 err = -EMSGSIZE;
1828 goto out_free; 1825 goto out_free;
@@ -1838,17 +1835,25 @@ out_free:
1838 goto out; 1835 goto out;
1839} 1836}
1840 1837
1841void inet6_rt_notify(int event, struct rt6_info *rt, struct nlmsghdr *nlh) 1838void inet6_rt_notify(int event, struct rt6_info *rt, struct nlmsghdr *nlh,
1839 struct netlink_skb_parms *req)
1842{ 1840{
1843 struct sk_buff *skb; 1841 struct sk_buff *skb;
1844 int size = NLMSG_SPACE(sizeof(struct rtmsg)+256); 1842 int size = NLMSG_SPACE(sizeof(struct rtmsg)+256);
1843 u32 pid = current->pid;
1844 u32 seq = 0;
1845 1845
1846 if (req)
1847 pid = req->pid;
1848 if (nlh)
1849 seq = nlh->nlmsg_seq;
1850
1846 skb = alloc_skb(size, gfp_any()); 1851 skb = alloc_skb(size, gfp_any());
1847 if (!skb) { 1852 if (!skb) {
1848 netlink_set_err(rtnl, 0, RTMGRP_IPV6_ROUTE, ENOBUFS); 1853 netlink_set_err(rtnl, 0, RTMGRP_IPV6_ROUTE, ENOBUFS);
1849 return; 1854 return;
1850 } 1855 }
1851 if (rt6_fill_node(skb, rt, NULL, NULL, 0, event, 0, 0, nlh, 0) < 0) { 1856 if (rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0) < 0) {
1852 kfree_skb(skb); 1857 kfree_skb(skb);
1853 netlink_set_err(rtnl, 0, RTMGRP_IPV6_ROUTE, EINVAL); 1858 netlink_set_err(rtnl, 0, RTMGRP_IPV6_ROUTE, EINVAL);
1854 return; 1859 return;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 0f69e800a0..2414937f2a 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -65,7 +65,7 @@
65#include <linux/seq_file.h> 65#include <linux/seq_file.h>
66 66
67static void tcp_v6_send_reset(struct sk_buff *skb); 67static void tcp_v6_send_reset(struct sk_buff *skb);
68static void tcp_v6_or_send_ack(struct sk_buff *skb, struct open_request *req); 68static void tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req);
69static void tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len, 69static void tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len,
70 struct sk_buff *skb); 70 struct sk_buff *skb);
71 71
@@ -394,24 +394,26 @@ static u32 tcp_v6_synq_hash(struct in6_addr *raddr, u16 rport, u32 rnd)
394 return c & (TCP_SYNQ_HSIZE - 1); 394 return c & (TCP_SYNQ_HSIZE - 1);
395} 395}
396 396
397static struct open_request *tcp_v6_search_req(struct tcp_sock *tp, 397static struct request_sock *tcp_v6_search_req(struct tcp_sock *tp,
398 struct open_request ***prevp, 398 struct request_sock ***prevp,
399 __u16 rport, 399 __u16 rport,
400 struct in6_addr *raddr, 400 struct in6_addr *raddr,
401 struct in6_addr *laddr, 401 struct in6_addr *laddr,
402 int iif) 402 int iif)
403{ 403{
404 struct tcp_listen_opt *lopt = tp->listen_opt; 404 struct listen_sock *lopt = tp->accept_queue.listen_opt;
405 struct open_request *req, **prev; 405 struct request_sock *req, **prev;
406 406
407 for (prev = &lopt->syn_table[tcp_v6_synq_hash(raddr, rport, lopt->hash_rnd)]; 407 for (prev = &lopt->syn_table[tcp_v6_synq_hash(raddr, rport, lopt->hash_rnd)];
408 (req = *prev) != NULL; 408 (req = *prev) != NULL;
409 prev = &req->dl_next) { 409 prev = &req->dl_next) {
410 if (req->rmt_port == rport && 410 const struct tcp6_request_sock *treq = tcp6_rsk(req);
411 req->class->family == AF_INET6 && 411
412 ipv6_addr_equal(&req->af.v6_req.rmt_addr, raddr) && 412 if (inet_rsk(req)->rmt_port == rport &&
413 ipv6_addr_equal(&req->af.v6_req.loc_addr, laddr) && 413 req->rsk_ops->family == AF_INET6 &&
414 (!req->af.v6_req.iif || req->af.v6_req.iif == iif)) { 414 ipv6_addr_equal(&treq->rmt_addr, raddr) &&
415 ipv6_addr_equal(&treq->loc_addr, laddr) &&
416 (!treq->iif || treq->iif == iif)) {
415 BUG_TRAP(req->sk == NULL); 417 BUG_TRAP(req->sk == NULL);
416 *prevp = prev; 418 *prevp = prev;
417 return req; 419 return req;
@@ -906,9 +908,9 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
906 908
907 icmpv6_err_convert(type, code, &err); 909 icmpv6_err_convert(type, code, &err);
908 910
909 /* Might be for an open_request */ 911 /* Might be for an request_sock */
910 switch (sk->sk_state) { 912 switch (sk->sk_state) {
911 struct open_request *req, **prev; 913 struct request_sock *req, **prev;
912 case TCP_LISTEN: 914 case TCP_LISTEN:
913 if (sock_owned_by_user(sk)) 915 if (sock_owned_by_user(sk))
914 goto out; 916 goto out;
@@ -923,7 +925,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
923 */ 925 */
924 BUG_TRAP(req->sk == NULL); 926 BUG_TRAP(req->sk == NULL);
925 927
926 if (seq != req->snt_isn) { 928 if (seq != tcp_rsk(req)->snt_isn) {
927 NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS); 929 NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
928 goto out; 930 goto out;
929 } 931 }
@@ -957,9 +959,10 @@ out:
957} 959}
958 960
959 961
960static int tcp_v6_send_synack(struct sock *sk, struct open_request *req, 962static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req,
961 struct dst_entry *dst) 963 struct dst_entry *dst)
962{ 964{
965 struct tcp6_request_sock *treq = tcp6_rsk(req);
963 struct ipv6_pinfo *np = inet6_sk(sk); 966 struct ipv6_pinfo *np = inet6_sk(sk);
964 struct sk_buff * skb; 967 struct sk_buff * skb;
965 struct ipv6_txoptions *opt = NULL; 968 struct ipv6_txoptions *opt = NULL;
@@ -969,19 +972,19 @@ static int tcp_v6_send_synack(struct sock *sk, struct open_request *req,
969 972
970 memset(&fl, 0, sizeof(fl)); 973 memset(&fl, 0, sizeof(fl));
971 fl.proto = IPPROTO_TCP; 974 fl.proto = IPPROTO_TCP;
972 ipv6_addr_copy(&fl.fl6_dst, &req->af.v6_req.rmt_addr); 975 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
973 ipv6_addr_copy(&fl.fl6_src, &req->af.v6_req.loc_addr); 976 ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr);
974 fl.fl6_flowlabel = 0; 977 fl.fl6_flowlabel = 0;
975 fl.oif = req->af.v6_req.iif; 978 fl.oif = treq->iif;
976 fl.fl_ip_dport = req->rmt_port; 979 fl.fl_ip_dport = inet_rsk(req)->rmt_port;
977 fl.fl_ip_sport = inet_sk(sk)->sport; 980 fl.fl_ip_sport = inet_sk(sk)->sport;
978 981
979 if (dst == NULL) { 982 if (dst == NULL) {
980 opt = np->opt; 983 opt = np->opt;
981 if (opt == NULL && 984 if (opt == NULL &&
982 np->rxopt.bits.srcrt == 2 && 985 np->rxopt.bits.srcrt == 2 &&
983 req->af.v6_req.pktopts) { 986 treq->pktopts) {
984 struct sk_buff *pktopts = req->af.v6_req.pktopts; 987 struct sk_buff *pktopts = treq->pktopts;
985 struct inet6_skb_parm *rxopt = IP6CB(pktopts); 988 struct inet6_skb_parm *rxopt = IP6CB(pktopts);
986 if (rxopt->srcrt) 989 if (rxopt->srcrt)
987 opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr*)(pktopts->nh.raw + rxopt->srcrt)); 990 opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr*)(pktopts->nh.raw + rxopt->srcrt));
@@ -1008,10 +1011,10 @@ static int tcp_v6_send_synack(struct sock *sk, struct open_request *req,
1008 struct tcphdr *th = skb->h.th; 1011 struct tcphdr *th = skb->h.th;
1009 1012
1010 th->check = tcp_v6_check(th, skb->len, 1013 th->check = tcp_v6_check(th, skb->len,
1011 &req->af.v6_req.loc_addr, &req->af.v6_req.rmt_addr, 1014 &treq->loc_addr, &treq->rmt_addr,
1012 csum_partial((char *)th, skb->len, skb->csum)); 1015 csum_partial((char *)th, skb->len, skb->csum));
1013 1016
1014 ipv6_addr_copy(&fl.fl6_dst, &req->af.v6_req.rmt_addr); 1017 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
1015 err = ip6_xmit(sk, skb, &fl, opt, 0); 1018 err = ip6_xmit(sk, skb, &fl, opt, 0);
1016 if (err == NET_XMIT_CN) 1019 if (err == NET_XMIT_CN)
1017 err = 0; 1020 err = 0;
@@ -1024,17 +1027,18 @@ done:
1024 return err; 1027 return err;
1025} 1028}
1026 1029
1027static void tcp_v6_or_free(struct open_request *req) 1030static void tcp_v6_reqsk_destructor(struct request_sock *req)
1028{ 1031{
1029 if (req->af.v6_req.pktopts) 1032 if (tcp6_rsk(req)->pktopts)
1030 kfree_skb(req->af.v6_req.pktopts); 1033 kfree_skb(tcp6_rsk(req)->pktopts);
1031} 1034}
1032 1035
1033static struct or_calltable or_ipv6 = { 1036static struct request_sock_ops tcp6_request_sock_ops = {
1034 .family = AF_INET6, 1037 .family = AF_INET6,
1038 .obj_size = sizeof(struct tcp6_request_sock),
1035 .rtx_syn_ack = tcp_v6_send_synack, 1039 .rtx_syn_ack = tcp_v6_send_synack,
1036 .send_ack = tcp_v6_or_send_ack, 1040 .send_ack = tcp_v6_reqsk_send_ack,
1037 .destructor = tcp_v6_or_free, 1041 .destructor = tcp_v6_reqsk_destructor,
1038 .send_reset = tcp_v6_send_reset 1042 .send_reset = tcp_v6_send_reset
1039}; 1043};
1040 1044
@@ -1219,15 +1223,15 @@ static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
1219 tcp_tw_put(tw); 1223 tcp_tw_put(tw);
1220} 1224}
1221 1225
1222static void tcp_v6_or_send_ack(struct sk_buff *skb, struct open_request *req) 1226static void tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req)
1223{ 1227{
1224 tcp_v6_send_ack(skb, req->snt_isn+1, req->rcv_isn+1, req->rcv_wnd, req->ts_recent); 1228 tcp_v6_send_ack(skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd, req->ts_recent);
1225} 1229}
1226 1230
1227 1231
1228static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb) 1232static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
1229{ 1233{
1230 struct open_request *req, **prev; 1234 struct request_sock *req, **prev;
1231 struct tcphdr *th = skb->h.th; 1235 struct tcphdr *th = skb->h.th;
1232 struct tcp_sock *tp = tcp_sk(sk); 1236 struct tcp_sock *tp = tcp_sk(sk);
1233 struct sock *nsk; 1237 struct sock *nsk;
@@ -1260,21 +1264,13 @@ static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
1260 return sk; 1264 return sk;
1261} 1265}
1262 1266
1263static void tcp_v6_synq_add(struct sock *sk, struct open_request *req) 1267static void tcp_v6_synq_add(struct sock *sk, struct request_sock *req)
1264{ 1268{
1265 struct tcp_sock *tp = tcp_sk(sk); 1269 struct tcp_sock *tp = tcp_sk(sk);
1266 struct tcp_listen_opt *lopt = tp->listen_opt; 1270 struct listen_sock *lopt = tp->accept_queue.listen_opt;
1267 u32 h = tcp_v6_synq_hash(&req->af.v6_req.rmt_addr, req->rmt_port, lopt->hash_rnd); 1271 u32 h = tcp_v6_synq_hash(&tcp6_rsk(req)->rmt_addr, inet_rsk(req)->rmt_port, lopt->hash_rnd);
1268
1269 req->sk = NULL;
1270 req->expires = jiffies + TCP_TIMEOUT_INIT;
1271 req->retrans = 0;
1272 req->dl_next = lopt->syn_table[h];
1273
1274 write_lock(&tp->syn_wait_lock);
1275 lopt->syn_table[h] = req;
1276 write_unlock(&tp->syn_wait_lock);
1277 1272
1273 reqsk_queue_hash_req(&tp->accept_queue, h, req, TCP_TIMEOUT_INIT);
1278 tcp_synq_added(sk); 1274 tcp_synq_added(sk);
1279} 1275}
1280 1276
@@ -1284,10 +1280,11 @@ static void tcp_v6_synq_add(struct sock *sk, struct open_request *req)
1284 */ 1280 */
1285static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) 1281static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1286{ 1282{
1283 struct tcp6_request_sock *treq;
1287 struct ipv6_pinfo *np = inet6_sk(sk); 1284 struct ipv6_pinfo *np = inet6_sk(sk);
1288 struct tcp_options_received tmp_opt; 1285 struct tcp_options_received tmp_opt;
1289 struct tcp_sock *tp = tcp_sk(sk); 1286 struct tcp_sock *tp = tcp_sk(sk);
1290 struct open_request *req = NULL; 1287 struct request_sock *req = NULL;
1291 __u32 isn = TCP_SKB_CB(skb)->when; 1288 __u32 isn = TCP_SKB_CB(skb)->when;
1292 1289
1293 if (skb->protocol == htons(ETH_P_IP)) 1290 if (skb->protocol == htons(ETH_P_IP))
@@ -1308,7 +1305,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1308 if (sk_acceptq_is_full(sk) && tcp_synq_young(sk) > 1) 1305 if (sk_acceptq_is_full(sk) && tcp_synq_young(sk) > 1)
1309 goto drop; 1306 goto drop;
1310 1307
1311 req = tcp_openreq_alloc(); 1308 req = reqsk_alloc(&tcp6_request_sock_ops);
1312 if (req == NULL) 1309 if (req == NULL)
1313 goto drop; 1310 goto drop;
1314 1311
@@ -1321,28 +1318,28 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1321 tmp_opt.tstamp_ok = tmp_opt.saw_tstamp; 1318 tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
1322 tcp_openreq_init(req, &tmp_opt, skb); 1319 tcp_openreq_init(req, &tmp_opt, skb);
1323 1320
1324 req->class = &or_ipv6; 1321 treq = tcp6_rsk(req);
1325 ipv6_addr_copy(&req->af.v6_req.rmt_addr, &skb->nh.ipv6h->saddr); 1322 ipv6_addr_copy(&treq->rmt_addr, &skb->nh.ipv6h->saddr);
1326 ipv6_addr_copy(&req->af.v6_req.loc_addr, &skb->nh.ipv6h->daddr); 1323 ipv6_addr_copy(&treq->loc_addr, &skb->nh.ipv6h->daddr);
1327 TCP_ECN_create_request(req, skb->h.th); 1324 TCP_ECN_create_request(req, skb->h.th);
1328 req->af.v6_req.pktopts = NULL; 1325 treq->pktopts = NULL;
1329 if (ipv6_opt_accepted(sk, skb) || 1326 if (ipv6_opt_accepted(sk, skb) ||
1330 np->rxopt.bits.rxinfo || 1327 np->rxopt.bits.rxinfo ||
1331 np->rxopt.bits.rxhlim) { 1328 np->rxopt.bits.rxhlim) {
1332 atomic_inc(&skb->users); 1329 atomic_inc(&skb->users);
1333 req->af.v6_req.pktopts = skb; 1330 treq->pktopts = skb;
1334 } 1331 }
1335 req->af.v6_req.iif = sk->sk_bound_dev_if; 1332 treq->iif = sk->sk_bound_dev_if;
1336 1333
1337 /* So that link locals have meaning */ 1334 /* So that link locals have meaning */
1338 if (!sk->sk_bound_dev_if && 1335 if (!sk->sk_bound_dev_if &&
1339 ipv6_addr_type(&req->af.v6_req.rmt_addr) & IPV6_ADDR_LINKLOCAL) 1336 ipv6_addr_type(&treq->rmt_addr) & IPV6_ADDR_LINKLOCAL)
1340 req->af.v6_req.iif = tcp_v6_iif(skb); 1337 treq->iif = tcp_v6_iif(skb);
1341 1338
1342 if (isn == 0) 1339 if (isn == 0)
1343 isn = tcp_v6_init_sequence(sk,skb); 1340 isn = tcp_v6_init_sequence(sk,skb);
1344 1341
1345 req->snt_isn = isn; 1342 tcp_rsk(req)->snt_isn = isn;
1346 1343
1347 if (tcp_v6_send_synack(sk, req, NULL)) 1344 if (tcp_v6_send_synack(sk, req, NULL))
1348 goto drop; 1345 goto drop;
@@ -1353,16 +1350,17 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1353 1350
1354drop: 1351drop:
1355 if (req) 1352 if (req)
1356 tcp_openreq_free(req); 1353 reqsk_free(req);
1357 1354
1358 TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS); 1355 TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
1359 return 0; /* don't send reset */ 1356 return 0; /* don't send reset */
1360} 1357}
1361 1358
1362static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, 1359static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1363 struct open_request *req, 1360 struct request_sock *req,
1364 struct dst_entry *dst) 1361 struct dst_entry *dst)
1365{ 1362{
1363 struct tcp6_request_sock *treq = tcp6_rsk(req);
1366 struct ipv6_pinfo *newnp, *np = inet6_sk(sk); 1364 struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
1367 struct tcp6_sock *newtcp6sk; 1365 struct tcp6_sock *newtcp6sk;
1368 struct inet_sock *newinet; 1366 struct inet_sock *newinet;
@@ -1426,10 +1424,10 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1426 goto out_overflow; 1424 goto out_overflow;
1427 1425
1428 if (np->rxopt.bits.srcrt == 2 && 1426 if (np->rxopt.bits.srcrt == 2 &&
1429 opt == NULL && req->af.v6_req.pktopts) { 1427 opt == NULL && treq->pktopts) {
1430 struct inet6_skb_parm *rxopt = IP6CB(req->af.v6_req.pktopts); 1428 struct inet6_skb_parm *rxopt = IP6CB(treq->pktopts);
1431 if (rxopt->srcrt) 1429 if (rxopt->srcrt)
1432 opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr*)(req->af.v6_req.pktopts->nh.raw+rxopt->srcrt)); 1430 opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr *)(treq->pktopts->nh.raw + rxopt->srcrt));
1433 } 1431 }
1434 1432
1435 if (dst == NULL) { 1433 if (dst == NULL) {
@@ -1438,16 +1436,16 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1438 1436
1439 memset(&fl, 0, sizeof(fl)); 1437 memset(&fl, 0, sizeof(fl));
1440 fl.proto = IPPROTO_TCP; 1438 fl.proto = IPPROTO_TCP;
1441 ipv6_addr_copy(&fl.fl6_dst, &req->af.v6_req.rmt_addr); 1439 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
1442 if (opt && opt->srcrt) { 1440 if (opt && opt->srcrt) {
1443 struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt; 1441 struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
1444 ipv6_addr_copy(&final, &fl.fl6_dst); 1442 ipv6_addr_copy(&final, &fl.fl6_dst);
1445 ipv6_addr_copy(&fl.fl6_dst, rt0->addr); 1443 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1446 final_p = &final; 1444 final_p = &final;
1447 } 1445 }
1448 ipv6_addr_copy(&fl.fl6_src, &req->af.v6_req.loc_addr); 1446 ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr);
1449 fl.oif = sk->sk_bound_dev_if; 1447 fl.oif = sk->sk_bound_dev_if;
1450 fl.fl_ip_dport = req->rmt_port; 1448 fl.fl_ip_dport = inet_rsk(req)->rmt_port;
1451 fl.fl_ip_sport = inet_sk(sk)->sport; 1449 fl.fl_ip_sport = inet_sk(sk)->sport;
1452 1450
1453 if (ip6_dst_lookup(sk, &dst, &fl)) 1451 if (ip6_dst_lookup(sk, &dst, &fl))
@@ -1482,10 +1480,10 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1482 1480
1483 memcpy(newnp, np, sizeof(struct ipv6_pinfo)); 1481 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1484 1482
1485 ipv6_addr_copy(&newnp->daddr, &req->af.v6_req.rmt_addr); 1483 ipv6_addr_copy(&newnp->daddr, &treq->rmt_addr);
1486 ipv6_addr_copy(&newnp->saddr, &req->af.v6_req.loc_addr); 1484 ipv6_addr_copy(&newnp->saddr, &treq->loc_addr);
1487 ipv6_addr_copy(&newnp->rcv_saddr, &req->af.v6_req.loc_addr); 1485 ipv6_addr_copy(&newnp->rcv_saddr, &treq->loc_addr);
1488 newsk->sk_bound_dev_if = req->af.v6_req.iif; 1486 newsk->sk_bound_dev_if = treq->iif;
1489 1487
1490 /* Now IPv6 options... 1488 /* Now IPv6 options...
1491 1489
@@ -1498,11 +1496,10 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1498 1496
1499 /* Clone pktoptions received with SYN */ 1497 /* Clone pktoptions received with SYN */
1500 newnp->pktoptions = NULL; 1498 newnp->pktoptions = NULL;
1501 if (req->af.v6_req.pktopts) { 1499 if (treq->pktopts != NULL) {
1502 newnp->pktoptions = skb_clone(req->af.v6_req.pktopts, 1500 newnp->pktoptions = skb_clone(treq->pktopts, GFP_ATOMIC);
1503 GFP_ATOMIC); 1501 kfree_skb(treq->pktopts);
1504 kfree_skb(req->af.v6_req.pktopts); 1502 treq->pktopts = NULL;
1505 req->af.v6_req.pktopts = NULL;
1506 if (newnp->pktoptions) 1503 if (newnp->pktoptions)
1507 skb_set_owner_r(newnp->pktoptions, newsk); 1504 skb_set_owner_r(newnp->pktoptions, newsk);
1508 } 1505 }
@@ -2050,7 +2047,7 @@ static int tcp_v6_destroy_sock(struct sock *sk)
2050 2047
2051/* Proc filesystem TCPv6 sock list dumping. */ 2048/* Proc filesystem TCPv6 sock list dumping. */
2052static void get_openreq6(struct seq_file *seq, 2049static void get_openreq6(struct seq_file *seq,
2053 struct sock *sk, struct open_request *req, int i, int uid) 2050 struct sock *sk, struct request_sock *req, int i, int uid)
2054{ 2051{
2055 struct in6_addr *dest, *src; 2052 struct in6_addr *dest, *src;
2056 int ttd = req->expires - jiffies; 2053 int ttd = req->expires - jiffies;
@@ -2058,8 +2055,8 @@ static void get_openreq6(struct seq_file *seq,
2058 if (ttd < 0) 2055 if (ttd < 0)
2059 ttd = 0; 2056 ttd = 0;
2060 2057
2061 src = &req->af.v6_req.loc_addr; 2058 src = &tcp6_rsk(req)->loc_addr;
2062 dest = &req->af.v6_req.rmt_addr; 2059 dest = &tcp6_rsk(req)->rmt_addr;
2063 seq_printf(seq, 2060 seq_printf(seq,
2064 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " 2061 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2065 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n", 2062 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
@@ -2069,7 +2066,7 @@ static void get_openreq6(struct seq_file *seq,
2069 ntohs(inet_sk(sk)->sport), 2066 ntohs(inet_sk(sk)->sport),
2070 dest->s6_addr32[0], dest->s6_addr32[1], 2067 dest->s6_addr32[0], dest->s6_addr32[1],
2071 dest->s6_addr32[2], dest->s6_addr32[3], 2068 dest->s6_addr32[2], dest->s6_addr32[3],
2072 ntohs(req->rmt_port), 2069 ntohs(inet_rsk(req)->rmt_port),
2073 TCP_SYN_RECV, 2070 TCP_SYN_RECV,
2074 0,0, /* could print option size, but that is af dependent. */ 2071 0,0, /* could print option size, but that is af dependent. */
2075 1, /* timers active (only the expire timer) */ 2072 1, /* timers active (only the expire timer) */
@@ -2239,6 +2236,7 @@ struct proto tcpv6_prot = {
2239 .sysctl_rmem = sysctl_tcp_rmem, 2236 .sysctl_rmem = sysctl_tcp_rmem,
2240 .max_header = MAX_TCP_HEADER, 2237 .max_header = MAX_TCP_HEADER,
2241 .obj_size = sizeof(struct tcp6_sock), 2238 .obj_size = sizeof(struct tcp6_sock),
2239 .rsk_prot = &tcp6_request_sock_ops,
2242}; 2240};
2243 2241
2244static struct inet6_protocol tcpv6_protocol = { 2242static struct inet6_protocol tcpv6_protocol = {
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index e251d0ba4f..eff050ac70 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -300,12 +300,12 @@ csum_copy_err:
300 /* Clear queue. */ 300 /* Clear queue. */
301 if (flags&MSG_PEEK) { 301 if (flags&MSG_PEEK) {
302 int clear = 0; 302 int clear = 0;
303 spin_lock_irq(&sk->sk_receive_queue.lock); 303 spin_lock_bh(&sk->sk_receive_queue.lock);
304 if (skb == skb_peek(&sk->sk_receive_queue)) { 304 if (skb == skb_peek(&sk->sk_receive_queue)) {
305 __skb_unlink(skb, &sk->sk_receive_queue); 305 __skb_unlink(skb, &sk->sk_receive_queue);
306 clear = 1; 306 clear = 1;
307 } 307 }
308 spin_unlock_irq(&sk->sk_receive_queue.lock); 308 spin_unlock_bh(&sk->sk_receive_queue.lock);
309 if (clear) 309 if (clear)
310 kfree_skb(skb); 310 kfree_skb(skb);
311 } 311 }
diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c
index ffcadd68b9..60c26c8727 100644
--- a/net/ipv6/xfrm6_tunnel.c
+++ b/net/ipv6/xfrm6_tunnel.c
@@ -466,7 +466,7 @@ static void xfrm6_tunnel_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
466 return; 466 return;
467} 467}
468 468
469static int xfrm6_tunnel_init_state(struct xfrm_state *x, void *args) 469static int xfrm6_tunnel_init_state(struct xfrm_state *x)
470{ 470{
471 if (!x->props.mode) 471 if (!x->props.mode)
472 return -EINVAL; 472 return -EINVAL;
diff --git a/net/key/af_key.c b/net/key/af_key.c
index ce980aa94e..4879743b94 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -656,13 +656,18 @@ static struct sk_buff * pfkey_xfrm_state2msg(struct xfrm_state *x, int add_keys,
656 sa->sadb_sa_exttype = SADB_EXT_SA; 656 sa->sadb_sa_exttype = SADB_EXT_SA;
657 sa->sadb_sa_spi = x->id.spi; 657 sa->sadb_sa_spi = x->id.spi;
658 sa->sadb_sa_replay = x->props.replay_window; 658 sa->sadb_sa_replay = x->props.replay_window;
659 sa->sadb_sa_state = SADB_SASTATE_DYING; 659 switch (x->km.state) {
660 if (x->km.state == XFRM_STATE_VALID && !x->km.dying) 660 case XFRM_STATE_VALID:
661 sa->sadb_sa_state = SADB_SASTATE_MATURE; 661 sa->sadb_sa_state = x->km.dying ?
662 else if (x->km.state == XFRM_STATE_ACQ) 662 SADB_SASTATE_DYING : SADB_SASTATE_MATURE;
663 break;
664 case XFRM_STATE_ACQ:
663 sa->sadb_sa_state = SADB_SASTATE_LARVAL; 665 sa->sadb_sa_state = SADB_SASTATE_LARVAL;
664 else if (x->km.state == XFRM_STATE_EXPIRED) 666 break;
667 default:
665 sa->sadb_sa_state = SADB_SASTATE_DEAD; 668 sa->sadb_sa_state = SADB_SASTATE_DEAD;
669 break;
670 }
666 sa->sadb_sa_auth = 0; 671 sa->sadb_sa_auth = 0;
667 if (x->aalg) { 672 if (x->aalg) {
668 struct xfrm_algo_desc *a = xfrm_aalg_get_byname(x->aalg->alg_name, 0); 673 struct xfrm_algo_desc *a = xfrm_aalg_get_byname(x->aalg->alg_name, 0);
@@ -685,6 +690,8 @@ static struct sk_buff * pfkey_xfrm_state2msg(struct xfrm_state *x, int add_keys,
685 sa->sadb_sa_flags |= SADB_SAFLAGS_NOECN; 690 sa->sadb_sa_flags |= SADB_SAFLAGS_NOECN;
686 if (x->props.flags & XFRM_STATE_DECAP_DSCP) 691 if (x->props.flags & XFRM_STATE_DECAP_DSCP)
687 sa->sadb_sa_flags |= SADB_SAFLAGS_DECAP_DSCP; 692 sa->sadb_sa_flags |= SADB_SAFLAGS_DECAP_DSCP;
693 if (x->props.flags & XFRM_STATE_NOPMTUDISC)
694 sa->sadb_sa_flags |= SADB_SAFLAGS_NOPMTUDISC;
688 695
689 /* hard time */ 696 /* hard time */
690 if (hsc & 2) { 697 if (hsc & 2) {
@@ -969,6 +976,8 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct sadb_msg *hdr,
969 x->props.flags |= XFRM_STATE_NOECN; 976 x->props.flags |= XFRM_STATE_NOECN;
970 if (sa->sadb_sa_flags & SADB_SAFLAGS_DECAP_DSCP) 977 if (sa->sadb_sa_flags & SADB_SAFLAGS_DECAP_DSCP)
971 x->props.flags |= XFRM_STATE_DECAP_DSCP; 978 x->props.flags |= XFRM_STATE_DECAP_DSCP;
979 if (sa->sadb_sa_flags & SADB_SAFLAGS_NOPMTUDISC)
980 x->props.flags |= XFRM_STATE_NOPMTUDISC;
972 981
973 lifetime = (struct sadb_lifetime*) ext_hdrs[SADB_EXT_LIFETIME_HARD-1]; 982 lifetime = (struct sadb_lifetime*) ext_hdrs[SADB_EXT_LIFETIME_HARD-1];
974 if (lifetime != NULL) { 983 if (lifetime != NULL) {
@@ -1091,17 +1100,11 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct sadb_msg *hdr,
1091 } 1100 }
1092 } 1101 }
1093 1102
1094 x->type = xfrm_get_type(proto, x->props.family); 1103 err = xfrm_init_state(x);
1095 if (x->type == NULL) { 1104 if (err)
1096 err = -ENOPROTOOPT;
1097 goto out;
1098 }
1099 if (x->type->init_state(x, NULL)) {
1100 err = -EINVAL;
1101 goto out; 1105 goto out;
1102 } 1106
1103 x->km.seq = hdr->sadb_msg_seq; 1107 x->km.seq = hdr->sadb_msg_seq;
1104 x->km.state = XFRM_STATE_VALID;
1105 return x; 1108 return x;
1106 1109
1107out: 1110out:
@@ -1240,13 +1243,78 @@ static int pfkey_acquire(struct sock *sk, struct sk_buff *skb, struct sadb_msg *
1240 return 0; 1243 return 0;
1241} 1244}
1242 1245
1246static inline int event2poltype(int event)
1247{
1248 switch (event) {
1249 case XFRM_MSG_DELPOLICY:
1250 return SADB_X_SPDDELETE;
1251 case XFRM_MSG_NEWPOLICY:
1252 return SADB_X_SPDADD;
1253 case XFRM_MSG_UPDPOLICY:
1254 return SADB_X_SPDUPDATE;
1255 case XFRM_MSG_POLEXPIRE:
1256 // return SADB_X_SPDEXPIRE;
1257 default:
1258 printk("pfkey: Unknown policy event %d\n", event);
1259 break;
1260 }
1261
1262 return 0;
1263}
1264
1265static inline int event2keytype(int event)
1266{
1267 switch (event) {
1268 case XFRM_MSG_DELSA:
1269 return SADB_DELETE;
1270 case XFRM_MSG_NEWSA:
1271 return SADB_ADD;
1272 case XFRM_MSG_UPDSA:
1273 return SADB_UPDATE;
1274 case XFRM_MSG_EXPIRE:
1275 return SADB_EXPIRE;
1276 default:
1277 printk("pfkey: Unknown SA event %d\n", event);
1278 break;
1279 }
1280
1281 return 0;
1282}
1283
1284/* ADD/UPD/DEL */
1285static int key_notify_sa(struct xfrm_state *x, struct km_event *c)
1286{
1287 struct sk_buff *skb;
1288 struct sadb_msg *hdr;
1289 int hsc = 3;
1290
1291 if (c->event == XFRM_MSG_DELSA)
1292 hsc = 0;
1293
1294 skb = pfkey_xfrm_state2msg(x, 0, hsc);
1295
1296 if (IS_ERR(skb))
1297 return PTR_ERR(skb);
1298
1299 hdr = (struct sadb_msg *) skb->data;
1300 hdr->sadb_msg_version = PF_KEY_V2;
1301 hdr->sadb_msg_type = event2keytype(c->event);
1302 hdr->sadb_msg_satype = pfkey_proto2satype(x->id.proto);
1303 hdr->sadb_msg_errno = 0;
1304 hdr->sadb_msg_reserved = 0;
1305 hdr->sadb_msg_seq = c->seq;
1306 hdr->sadb_msg_pid = c->pid;
1307
1308 pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_ALL, NULL);
1309
1310 return 0;
1311}
1243 1312
1244static int pfkey_add(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs) 1313static int pfkey_add(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs)
1245{ 1314{
1246 struct sk_buff *out_skb;
1247 struct sadb_msg *out_hdr;
1248 struct xfrm_state *x; 1315 struct xfrm_state *x;
1249 int err; 1316 int err;
1317 struct km_event c;
1250 1318
1251 xfrm_probe_algs(); 1319 xfrm_probe_algs();
1252 1320
@@ -1254,6 +1322,7 @@ static int pfkey_add(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr,
1254 if (IS_ERR(x)) 1322 if (IS_ERR(x))
1255 return PTR_ERR(x); 1323 return PTR_ERR(x);
1256 1324
1325 xfrm_state_hold(x);
1257 if (hdr->sadb_msg_type == SADB_ADD) 1326 if (hdr->sadb_msg_type == SADB_ADD)
1258 err = xfrm_state_add(x); 1327 err = xfrm_state_add(x);
1259 else 1328 else
@@ -1262,30 +1331,26 @@ static int pfkey_add(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr,
1262 if (err < 0) { 1331 if (err < 0) {
1263 x->km.state = XFRM_STATE_DEAD; 1332 x->km.state = XFRM_STATE_DEAD;
1264 xfrm_state_put(x); 1333 xfrm_state_put(x);
1265 return err; 1334 goto out;
1266 } 1335 }
1267 1336
1268 out_skb = pfkey_xfrm_state2msg(x, 0, 3); 1337 if (hdr->sadb_msg_type == SADB_ADD)
1269 if (IS_ERR(out_skb)) 1338 c.event = XFRM_MSG_NEWSA;
1270 return PTR_ERR(out_skb); /* XXX Should we return 0 here ? */ 1339 else
1271 1340 c.event = XFRM_MSG_UPDSA;
1272 out_hdr = (struct sadb_msg *) out_skb->data; 1341 c.seq = hdr->sadb_msg_seq;
1273 out_hdr->sadb_msg_version = hdr->sadb_msg_version; 1342 c.pid = hdr->sadb_msg_pid;
1274 out_hdr->sadb_msg_type = hdr->sadb_msg_type; 1343 km_state_notify(x, &c);
1275 out_hdr->sadb_msg_satype = pfkey_proto2satype(x->id.proto); 1344out:
1276 out_hdr->sadb_msg_errno = 0; 1345 xfrm_state_put(x);
1277 out_hdr->sadb_msg_reserved = 0; 1346 return err;
1278 out_hdr->sadb_msg_seq = hdr->sadb_msg_seq;
1279 out_hdr->sadb_msg_pid = hdr->sadb_msg_pid;
1280
1281 pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_ALL, sk);
1282
1283 return 0;
1284} 1347}
1285 1348
1286static int pfkey_delete(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs) 1349static int pfkey_delete(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs)
1287{ 1350{
1288 struct xfrm_state *x; 1351 struct xfrm_state *x;
1352 struct km_event c;
1353 int err;
1289 1354
1290 if (!ext_hdrs[SADB_EXT_SA-1] || 1355 if (!ext_hdrs[SADB_EXT_SA-1] ||
1291 !present_and_same_family(ext_hdrs[SADB_EXT_ADDRESS_SRC-1], 1356 !present_and_same_family(ext_hdrs[SADB_EXT_ADDRESS_SRC-1],
@@ -1301,13 +1366,19 @@ static int pfkey_delete(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h
1301 return -EPERM; 1366 return -EPERM;
1302 } 1367 }
1303 1368
1304 xfrm_state_delete(x); 1369 err = xfrm_state_delete(x);
1305 xfrm_state_put(x); 1370 if (err < 0) {
1371 xfrm_state_put(x);
1372 return err;
1373 }
1306 1374
1307 pfkey_broadcast(skb_clone(skb, GFP_KERNEL), GFP_KERNEL, 1375 c.seq = hdr->sadb_msg_seq;
1308 BROADCAST_ALL, sk); 1376 c.pid = hdr->sadb_msg_pid;
1377 c.event = XFRM_MSG_DELSA;
1378 km_state_notify(x, &c);
1379 xfrm_state_put(x);
1309 1380
1310 return 0; 1381 return err;
1311} 1382}
1312 1383
1313static int pfkey_get(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs) 1384static int pfkey_get(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs)
@@ -1445,28 +1516,42 @@ static int pfkey_register(struct sock *sk, struct sk_buff *skb, struct sadb_msg
1445 return 0; 1516 return 0;
1446} 1517}
1447 1518
1519static int key_notify_sa_flush(struct km_event *c)
1520{
1521 struct sk_buff *skb;
1522 struct sadb_msg *hdr;
1523
1524 skb = alloc_skb(sizeof(struct sadb_msg) + 16, GFP_ATOMIC);
1525 if (!skb)
1526 return -ENOBUFS;
1527 hdr = (struct sadb_msg *) skb_put(skb, sizeof(struct sadb_msg));
1528 hdr->sadb_msg_satype = pfkey_proto2satype(c->data.proto);
1529 hdr->sadb_msg_seq = c->seq;
1530 hdr->sadb_msg_pid = c->pid;
1531 hdr->sadb_msg_version = PF_KEY_V2;
1532 hdr->sadb_msg_errno = (uint8_t) 0;
1533 hdr->sadb_msg_len = (sizeof(struct sadb_msg) / sizeof(uint64_t));
1534
1535 pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_ALL, NULL);
1536
1537 return 0;
1538}
1539
1448static int pfkey_flush(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs) 1540static int pfkey_flush(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs)
1449{ 1541{
1450 unsigned proto; 1542 unsigned proto;
1451 struct sk_buff *skb_out; 1543 struct km_event c;
1452 struct sadb_msg *hdr_out;
1453 1544
1454 proto = pfkey_satype2proto(hdr->sadb_msg_satype); 1545 proto = pfkey_satype2proto(hdr->sadb_msg_satype);
1455 if (proto == 0) 1546 if (proto == 0)
1456 return -EINVAL; 1547 return -EINVAL;
1457 1548
1458 skb_out = alloc_skb(sizeof(struct sadb_msg) + 16, GFP_KERNEL);
1459 if (!skb_out)
1460 return -ENOBUFS;
1461
1462 xfrm_state_flush(proto); 1549 xfrm_state_flush(proto);
1463 1550 c.data.proto = proto;
1464 hdr_out = (struct sadb_msg *) skb_put(skb_out, sizeof(struct sadb_msg)); 1551 c.seq = hdr->sadb_msg_seq;
1465 pfkey_hdr_dup(hdr_out, hdr); 1552 c.pid = hdr->sadb_msg_pid;
1466 hdr_out->sadb_msg_errno = (uint8_t) 0; 1553 c.event = XFRM_MSG_FLUSHSA;
1467 hdr_out->sadb_msg_len = (sizeof(struct sadb_msg) / sizeof(uint64_t)); 1554 km_state_notify(NULL, &c);
1468
1469 pfkey_broadcast(skb_out, GFP_KERNEL, BROADCAST_ALL, NULL);
1470 1555
1471 return 0; 1556 return 0;
1472} 1557}
@@ -1859,6 +1944,35 @@ static void pfkey_xfrm_policy2msg(struct sk_buff *skb, struct xfrm_policy *xp, i
1859 hdr->sadb_msg_reserved = atomic_read(&xp->refcnt); 1944 hdr->sadb_msg_reserved = atomic_read(&xp->refcnt);
1860} 1945}
1861 1946
1947static int key_notify_policy(struct xfrm_policy *xp, int dir, struct km_event *c)
1948{
1949 struct sk_buff *out_skb;
1950 struct sadb_msg *out_hdr;
1951 int err;
1952
1953 out_skb = pfkey_xfrm_policy2msg_prep(xp);
1954 if (IS_ERR(out_skb)) {
1955 err = PTR_ERR(out_skb);
1956 goto out;
1957 }
1958 pfkey_xfrm_policy2msg(out_skb, xp, dir);
1959
1960 out_hdr = (struct sadb_msg *) out_skb->data;
1961 out_hdr->sadb_msg_version = PF_KEY_V2;
1962
1963 if (c->data.byid && c->event == XFRM_MSG_DELPOLICY)
1964 out_hdr->sadb_msg_type = SADB_X_SPDDELETE2;
1965 else
1966 out_hdr->sadb_msg_type = event2poltype(c->event);
1967 out_hdr->sadb_msg_errno = 0;
1968 out_hdr->sadb_msg_seq = c->seq;
1969 out_hdr->sadb_msg_pid = c->pid;
1970 pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_ALL, NULL);
1971out:
1972 return 0;
1973
1974}
1975
1862static int pfkey_spdadd(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs) 1976static int pfkey_spdadd(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs)
1863{ 1977{
1864 int err; 1978 int err;
@@ -1866,8 +1980,7 @@ static int pfkey_spdadd(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h
1866 struct sadb_address *sa; 1980 struct sadb_address *sa;
1867 struct sadb_x_policy *pol; 1981 struct sadb_x_policy *pol;
1868 struct xfrm_policy *xp; 1982 struct xfrm_policy *xp;
1869 struct sk_buff *out_skb; 1983 struct km_event c;
1870 struct sadb_msg *out_hdr;
1871 1984
1872 if (!present_and_same_family(ext_hdrs[SADB_EXT_ADDRESS_SRC-1], 1985 if (!present_and_same_family(ext_hdrs[SADB_EXT_ADDRESS_SRC-1],
1873 ext_hdrs[SADB_EXT_ADDRESS_DST-1]) || 1986 ext_hdrs[SADB_EXT_ADDRESS_DST-1]) ||
@@ -1935,31 +2048,23 @@ static int pfkey_spdadd(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h
1935 (err = parse_ipsecrequests(xp, pol)) < 0) 2048 (err = parse_ipsecrequests(xp, pol)) < 0)
1936 goto out; 2049 goto out;
1937 2050
1938 out_skb = pfkey_xfrm_policy2msg_prep(xp);
1939 if (IS_ERR(out_skb)) {
1940 err = PTR_ERR(out_skb);
1941 goto out;
1942 }
1943
1944 err = xfrm_policy_insert(pol->sadb_x_policy_dir-1, xp, 2051 err = xfrm_policy_insert(pol->sadb_x_policy_dir-1, xp,
1945 hdr->sadb_msg_type != SADB_X_SPDUPDATE); 2052 hdr->sadb_msg_type != SADB_X_SPDUPDATE);
1946 if (err) { 2053 if (err) {
1947 kfree_skb(out_skb); 2054 kfree(xp);
1948 goto out; 2055 return err;
1949 } 2056 }
1950 2057
1951 pfkey_xfrm_policy2msg(out_skb, xp, pol->sadb_x_policy_dir-1); 2058 if (hdr->sadb_msg_type == SADB_X_SPDUPDATE)
2059 c.event = XFRM_MSG_UPDPOLICY;
2060 else
2061 c.event = XFRM_MSG_NEWPOLICY;
1952 2062
1953 xfrm_pol_put(xp); 2063 c.seq = hdr->sadb_msg_seq;
2064 c.pid = hdr->sadb_msg_pid;
1954 2065
1955 out_hdr = (struct sadb_msg *) out_skb->data; 2066 km_policy_notify(xp, pol->sadb_x_policy_dir-1, &c);
1956 out_hdr->sadb_msg_version = hdr->sadb_msg_version; 2067 xfrm_pol_put(xp);
1957 out_hdr->sadb_msg_type = hdr->sadb_msg_type;
1958 out_hdr->sadb_msg_satype = 0;
1959 out_hdr->sadb_msg_errno = 0;
1960 out_hdr->sadb_msg_seq = hdr->sadb_msg_seq;
1961 out_hdr->sadb_msg_pid = hdr->sadb_msg_pid;
1962 pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_ALL, sk);
1963 return 0; 2068 return 0;
1964 2069
1965out: 2070out:
@@ -1973,9 +2078,8 @@ static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, struct sadb_msg
1973 struct sadb_address *sa; 2078 struct sadb_address *sa;
1974 struct sadb_x_policy *pol; 2079 struct sadb_x_policy *pol;
1975 struct xfrm_policy *xp; 2080 struct xfrm_policy *xp;
1976 struct sk_buff *out_skb;
1977 struct sadb_msg *out_hdr;
1978 struct xfrm_selector sel; 2081 struct xfrm_selector sel;
2082 struct km_event c;
1979 2083
1980 if (!present_and_same_family(ext_hdrs[SADB_EXT_ADDRESS_SRC-1], 2084 if (!present_and_same_family(ext_hdrs[SADB_EXT_ADDRESS_SRC-1],
1981 ext_hdrs[SADB_EXT_ADDRESS_DST-1]) || 2085 ext_hdrs[SADB_EXT_ADDRESS_DST-1]) ||
@@ -2010,25 +2114,40 @@ static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, struct sadb_msg
2010 2114
2011 err = 0; 2115 err = 0;
2012 2116
2117 c.seq = hdr->sadb_msg_seq;
2118 c.pid = hdr->sadb_msg_pid;
2119 c.event = XFRM_MSG_DELPOLICY;
2120 km_policy_notify(xp, pol->sadb_x_policy_dir-1, &c);
2121
2122 xfrm_pol_put(xp);
2123 return err;
2124}
2125
2126static int key_pol_get_resp(struct sock *sk, struct xfrm_policy *xp, struct sadb_msg *hdr, int dir)
2127{
2128 int err;
2129 struct sk_buff *out_skb;
2130 struct sadb_msg *out_hdr;
2131 err = 0;
2132
2013 out_skb = pfkey_xfrm_policy2msg_prep(xp); 2133 out_skb = pfkey_xfrm_policy2msg_prep(xp);
2014 if (IS_ERR(out_skb)) { 2134 if (IS_ERR(out_skb)) {
2015 err = PTR_ERR(out_skb); 2135 err = PTR_ERR(out_skb);
2016 goto out; 2136 goto out;
2017 } 2137 }
2018 pfkey_xfrm_policy2msg(out_skb, xp, pol->sadb_x_policy_dir-1); 2138 pfkey_xfrm_policy2msg(out_skb, xp, dir);
2019 2139
2020 out_hdr = (struct sadb_msg *) out_skb->data; 2140 out_hdr = (struct sadb_msg *) out_skb->data;
2021 out_hdr->sadb_msg_version = hdr->sadb_msg_version; 2141 out_hdr->sadb_msg_version = hdr->sadb_msg_version;
2022 out_hdr->sadb_msg_type = SADB_X_SPDDELETE; 2142 out_hdr->sadb_msg_type = hdr->sadb_msg_type;
2023 out_hdr->sadb_msg_satype = 0; 2143 out_hdr->sadb_msg_satype = 0;
2024 out_hdr->sadb_msg_errno = 0; 2144 out_hdr->sadb_msg_errno = 0;
2025 out_hdr->sadb_msg_seq = hdr->sadb_msg_seq; 2145 out_hdr->sadb_msg_seq = hdr->sadb_msg_seq;
2026 out_hdr->sadb_msg_pid = hdr->sadb_msg_pid; 2146 out_hdr->sadb_msg_pid = hdr->sadb_msg_pid;
2027 pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_ALL, sk); 2147 pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_ONE, sk);
2028 err = 0; 2148 err = 0;
2029 2149
2030out: 2150out:
2031 xfrm_pol_put(xp);
2032 return err; 2151 return err;
2033} 2152}
2034 2153
@@ -2037,8 +2156,7 @@ static int pfkey_spdget(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h
2037 int err; 2156 int err;
2038 struct sadb_x_policy *pol; 2157 struct sadb_x_policy *pol;
2039 struct xfrm_policy *xp; 2158 struct xfrm_policy *xp;
2040 struct sk_buff *out_skb; 2159 struct km_event c;
2041 struct sadb_msg *out_hdr;
2042 2160
2043 if ((pol = ext_hdrs[SADB_X_EXT_POLICY-1]) == NULL) 2161 if ((pol = ext_hdrs[SADB_X_EXT_POLICY-1]) == NULL)
2044 return -EINVAL; 2162 return -EINVAL;
@@ -2050,24 +2168,16 @@ static int pfkey_spdget(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h
2050 2168
2051 err = 0; 2169 err = 0;
2052 2170
2053 out_skb = pfkey_xfrm_policy2msg_prep(xp); 2171 c.seq = hdr->sadb_msg_seq;
2054 if (IS_ERR(out_skb)) { 2172 c.pid = hdr->sadb_msg_pid;
2055 err = PTR_ERR(out_skb); 2173 if (hdr->sadb_msg_type == SADB_X_SPDDELETE2) {
2056 goto out; 2174 c.data.byid = 1;
2175 c.event = XFRM_MSG_DELPOLICY;
2176 km_policy_notify(xp, pol->sadb_x_policy_dir-1, &c);
2177 } else {
2178 err = key_pol_get_resp(sk, xp, hdr, pol->sadb_x_policy_dir-1);
2057 } 2179 }
2058 pfkey_xfrm_policy2msg(out_skb, xp, pol->sadb_x_policy_dir-1);
2059 2180
2060 out_hdr = (struct sadb_msg *) out_skb->data;
2061 out_hdr->sadb_msg_version = hdr->sadb_msg_version;
2062 out_hdr->sadb_msg_type = hdr->sadb_msg_type;
2063 out_hdr->sadb_msg_satype = 0;
2064 out_hdr->sadb_msg_errno = 0;
2065 out_hdr->sadb_msg_seq = hdr->sadb_msg_seq;
2066 out_hdr->sadb_msg_pid = hdr->sadb_msg_pid;
2067 pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_ALL, sk);
2068 err = 0;
2069
2070out:
2071 xfrm_pol_put(xp); 2181 xfrm_pol_put(xp);
2072 return err; 2182 return err;
2073} 2183}
@@ -2102,22 +2212,34 @@ static int pfkey_spddump(struct sock *sk, struct sk_buff *skb, struct sadb_msg *
2102 return xfrm_policy_walk(dump_sp, &data); 2212 return xfrm_policy_walk(dump_sp, &data);
2103} 2213}
2104 2214
2105static int pfkey_spdflush(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs) 2215static int key_notify_policy_flush(struct km_event *c)
2106{ 2216{
2107 struct sk_buff *skb_out; 2217 struct sk_buff *skb_out;
2108 struct sadb_msg *hdr_out; 2218 struct sadb_msg *hdr;
2109 2219
2110 skb_out = alloc_skb(sizeof(struct sadb_msg) + 16, GFP_KERNEL); 2220 skb_out = alloc_skb(sizeof(struct sadb_msg) + 16, GFP_ATOMIC);
2111 if (!skb_out) 2221 if (!skb_out)
2112 return -ENOBUFS; 2222 return -ENOBUFS;
2223 hdr = (struct sadb_msg *) skb_put(skb_out, sizeof(struct sadb_msg));
2224 hdr->sadb_msg_seq = c->seq;
2225 hdr->sadb_msg_pid = c->pid;
2226 hdr->sadb_msg_version = PF_KEY_V2;
2227 hdr->sadb_msg_errno = (uint8_t) 0;
2228 hdr->sadb_msg_len = (sizeof(struct sadb_msg) / sizeof(uint64_t));
2229 pfkey_broadcast(skb_out, GFP_ATOMIC, BROADCAST_ALL, NULL);
2230 return 0;
2113 2231
2114 xfrm_policy_flush(); 2232}
2115 2233
2116 hdr_out = (struct sadb_msg *) skb_put(skb_out, sizeof(struct sadb_msg)); 2234static int pfkey_spdflush(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs)
2117 pfkey_hdr_dup(hdr_out, hdr); 2235{
2118 hdr_out->sadb_msg_errno = (uint8_t) 0; 2236 struct km_event c;
2119 hdr_out->sadb_msg_len = (sizeof(struct sadb_msg) / sizeof(uint64_t)); 2237
2120 pfkey_broadcast(skb_out, GFP_KERNEL, BROADCAST_ALL, NULL); 2238 xfrm_policy_flush();
2239 c.event = XFRM_MSG_FLUSHPOLICY;
2240 c.pid = hdr->sadb_msg_pid;
2241 c.seq = hdr->sadb_msg_seq;
2242 km_policy_notify(NULL, 0, &c);
2121 2243
2122 return 0; 2244 return 0;
2123} 2245}
@@ -2317,11 +2439,23 @@ static void dump_esp_combs(struct sk_buff *skb, struct xfrm_tmpl *t)
2317 } 2439 }
2318} 2440}
2319 2441
2320static int pfkey_send_notify(struct xfrm_state *x, int hard) 2442static int key_notify_policy_expire(struct xfrm_policy *xp, struct km_event *c)
2443{
2444 return 0;
2445}
2446
2447static int key_notify_sa_expire(struct xfrm_state *x, struct km_event *c)
2321{ 2448{
2322 struct sk_buff *out_skb; 2449 struct sk_buff *out_skb;
2323 struct sadb_msg *out_hdr; 2450 struct sadb_msg *out_hdr;
2324 int hsc = (hard ? 2 : 1); 2451 int hard;
2452 int hsc;
2453
2454 hard = c->data.hard;
2455 if (hard)
2456 hsc = 2;
2457 else
2458 hsc = 1;
2325 2459
2326 out_skb = pfkey_xfrm_state2msg(x, 0, hsc); 2460 out_skb = pfkey_xfrm_state2msg(x, 0, hsc);
2327 if (IS_ERR(out_skb)) 2461 if (IS_ERR(out_skb))
@@ -2340,6 +2474,44 @@ static int pfkey_send_notify(struct xfrm_state *x, int hard)
2340 return 0; 2474 return 0;
2341} 2475}
2342 2476
2477static int pfkey_send_notify(struct xfrm_state *x, struct km_event *c)
2478{
2479 switch (c->event) {
2480 case XFRM_MSG_EXPIRE:
2481 return key_notify_sa_expire(x, c);
2482 case XFRM_MSG_DELSA:
2483 case XFRM_MSG_NEWSA:
2484 case XFRM_MSG_UPDSA:
2485 return key_notify_sa(x, c);
2486 case XFRM_MSG_FLUSHSA:
2487 return key_notify_sa_flush(c);
2488 default:
2489 printk("pfkey: Unknown SA event %d\n", c->event);
2490 break;
2491 }
2492
2493 return 0;
2494}
2495
2496static int pfkey_send_policy_notify(struct xfrm_policy *xp, int dir, struct km_event *c)
2497{
2498 switch (c->event) {
2499 case XFRM_MSG_POLEXPIRE:
2500 return key_notify_policy_expire(xp, c);
2501 case XFRM_MSG_DELPOLICY:
2502 case XFRM_MSG_NEWPOLICY:
2503 case XFRM_MSG_UPDPOLICY:
2504 return key_notify_policy(xp, dir, c);
2505 case XFRM_MSG_FLUSHPOLICY:
2506 return key_notify_policy_flush(c);
2507 default:
2508 printk("pfkey: Unknown policy event %d\n", c->event);
2509 break;
2510 }
2511
2512 return 0;
2513}
2514
2343static u32 get_acqseq(void) 2515static u32 get_acqseq(void)
2344{ 2516{
2345 u32 res; 2517 u32 res;
@@ -2856,6 +3028,7 @@ static struct xfrm_mgr pfkeyv2_mgr =
2856 .acquire = pfkey_send_acquire, 3028 .acquire = pfkey_send_acquire,
2857 .compile_policy = pfkey_compile_policy, 3029 .compile_policy = pfkey_compile_policy,
2858 .new_mapping = pfkey_send_new_mapping, 3030 .new_mapping = pfkey_send_new_mapping,
3031 .notify_policy = pfkey_send_policy_notify,
2859}; 3032};
2860 3033
2861static void __exit ipsec_pfkey_exit(void) 3034static void __exit ipsec_pfkey_exit(void)
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index e41ce458c2..70bcd4744d 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1095,8 +1095,7 @@ static int netlink_dump(struct sock *sk)
1095 return 0; 1095 return 0;
1096 } 1096 }
1097 1097
1098 nlh = __nlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, NLMSG_DONE, sizeof(int)); 1098 nlh = NLMSG_NEW_ANSWER(skb, cb, NLMSG_DONE, sizeof(len), NLM_F_MULTI);
1099 nlh->nlmsg_flags |= NLM_F_MULTI;
1100 memcpy(NLMSG_DATA(nlh), &len, sizeof(len)); 1099 memcpy(NLMSG_DATA(nlh), &len, sizeof(len));
1101 skb_queue_tail(&sk->sk_receive_queue, skb); 1100 skb_queue_tail(&sk->sk_receive_queue, skb);
1102 sk->sk_data_ready(sk, skb->len); 1101 sk->sk_data_ready(sk, skb->len);
@@ -1107,6 +1106,9 @@ static int netlink_dump(struct sock *sk)
1107 1106
1108 netlink_destroy_callback(cb); 1107 netlink_destroy_callback(cb);
1109 return 0; 1108 return 0;
1109
1110nlmsg_failure:
1111 return -ENOBUFS;
1110} 1112}
1111 1113
1112int netlink_dump_start(struct sock *ssk, struct sk_buff *skb, 1114int netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
@@ -1178,7 +1180,7 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err)
1178 } 1180 }
1179 1181
1180 rep = __nlmsg_put(skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq, 1182 rep = __nlmsg_put(skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq,
1181 NLMSG_ERROR, sizeof(struct nlmsgerr)); 1183 NLMSG_ERROR, sizeof(struct nlmsgerr), 0);
1182 errmsg = NLMSG_DATA(rep); 1184 errmsg = NLMSG_DATA(rep);
1183 errmsg->error = err; 1185 errmsg->error = err;
1184 memcpy(&errmsg->msg, nlh, err ? nlh->nlmsg_len : sizeof(struct nlmsghdr)); 1186 memcpy(&errmsg->msg, nlh, err ? nlh->nlmsg_len : sizeof(struct nlmsghdr));
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index b0941186f8..b22c9beb60 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -405,7 +405,7 @@ config NET_EMATCH_STACK
405 ---help--- 405 ---help---
406 Size of the local stack variable used while evaluating the tree of 406 Size of the local stack variable used while evaluating the tree of
407 ematches. Limits the depth of the tree, i.e. the number of 407 ematches. Limits the depth of the tree, i.e. the number of
408 encapsulated precedences. Every level requires 4 bytes of addtional 408 encapsulated precedences. Every level requires 4 bytes of additional
409 stack space. 409 stack space.
410 410
411config NET_EMATCH_CMP 411config NET_EMATCH_CMP
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index cafcb08409..9594206e60 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -428,15 +428,15 @@ errout:
428 428
429static int 429static int
430tca_get_fill(struct sk_buff *skb, struct tc_action *a, u32 pid, u32 seq, 430tca_get_fill(struct sk_buff *skb, struct tc_action *a, u32 pid, u32 seq,
431 unsigned flags, int event, int bind, int ref) 431 u16 flags, int event, int bind, int ref)
432{ 432{
433 struct tcamsg *t; 433 struct tcamsg *t;
434 struct nlmsghdr *nlh; 434 struct nlmsghdr *nlh;
435 unsigned char *b = skb->tail; 435 unsigned char *b = skb->tail;
436 struct rtattr *x; 436 struct rtattr *x;
437 437
438 nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*t)); 438 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*t), flags);
439 nlh->nlmsg_flags = flags; 439
440 t = NLMSG_DATA(nlh); 440 t = NLMSG_DATA(nlh);
441 t->tca_family = AF_UNSPEC; 441 t->tca_family = AF_UNSPEC;
442 442
@@ -669,7 +669,7 @@ err:
669} 669}
670 670
671static int tcf_add_notify(struct tc_action *a, u32 pid, u32 seq, int event, 671static int tcf_add_notify(struct tc_action *a, u32 pid, u32 seq, int event,
672 unsigned flags) 672 u16 flags)
673{ 673{
674 struct tcamsg *t; 674 struct tcamsg *t;
675 struct nlmsghdr *nlh; 675 struct nlmsghdr *nlh;
@@ -684,8 +684,7 @@ static int tcf_add_notify(struct tc_action *a, u32 pid, u32 seq, int event,
684 684
685 b = (unsigned char *)skb->tail; 685 b = (unsigned char *)skb->tail;
686 686
687 nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*t)); 687 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*t), flags);
688 nlh->nlmsg_flags = flags;
689 t = NLMSG_DATA(nlh); 688 t = NLMSG_DATA(nlh);
690 t->tca_family = AF_UNSPEC; 689 t->tca_family = AF_UNSPEC;
691 690
@@ -881,7 +880,7 @@ static int __init tc_action_init(void)
881 link_p[RTM_GETACTION-RTM_BASE].dumpit = tc_dump_action; 880 link_p[RTM_GETACTION-RTM_BASE].dumpit = tc_dump_action;
882 } 881 }
883 882
884 printk("TC classifier action (bugs to netdev@oss.sgi.com cc " 883 printk("TC classifier action (bugs to netdev@vger.kernel.org cc "
885 "hadi@cyberus.ca)\n"); 884 "hadi@cyberus.ca)\n");
886 return 0; 885 return 0;
887} 886}
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 56e66c3fe0..1616bf5c96 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -322,14 +322,13 @@ errout:
322 322
323static int 323static int
324tcf_fill_node(struct sk_buff *skb, struct tcf_proto *tp, unsigned long fh, 324tcf_fill_node(struct sk_buff *skb, struct tcf_proto *tp, unsigned long fh,
325 u32 pid, u32 seq, unsigned flags, int event) 325 u32 pid, u32 seq, u16 flags, int event)
326{ 326{
327 struct tcmsg *tcm; 327 struct tcmsg *tcm;
328 struct nlmsghdr *nlh; 328 struct nlmsghdr *nlh;
329 unsigned char *b = skb->tail; 329 unsigned char *b = skb->tail;
330 330
331 nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*tcm)); 331 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags);
332 nlh->nlmsg_flags = flags;
333 tcm = NLMSG_DATA(nlh); 332 tcm = NLMSG_DATA(nlh);
334 tcm->tcm_family = AF_UNSPEC; 333 tcm->tcm_family = AF_UNSPEC;
335 tcm->tcm_ifindex = tp->q->dev->ifindex; 334 tcm->tcm_ifindex = tp->q->dev->ifindex;
diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c
index 0d2d4415f3..dfb300bb6b 100644
--- a/net/sched/cls_basic.c
+++ b/net/sched/cls_basic.c
@@ -261,6 +261,9 @@ static int basic_dump(struct tcf_proto *tp, unsigned long fh,
261 rta = (struct rtattr *) b; 261 rta = (struct rtattr *) b;
262 RTA_PUT(skb, TCA_OPTIONS, 0, NULL); 262 RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
263 263
264 if (f->res.classid)
265 RTA_PUT(skb, TCA_BASIC_CLASSID, sizeof(u32), &f->res.classid);
266
264 if (tcf_exts_dump(skb, &f->exts, &basic_ext_map) < 0 || 267 if (tcf_exts_dump(skb, &f->exts, &basic_ext_map) < 0 ||
265 tcf_em_tree_dump(skb, &f->ematches, TCA_BASIC_EMATCHES) < 0) 268 tcf_em_tree_dump(skb, &f->ematches, TCA_BASIC_EMATCHES) < 0)
266 goto rtattr_failure; 269 goto rtattr_failure;
diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c
index f1eeaf65ce..48bb23c2a3 100644
--- a/net/sched/em_meta.c
+++ b/net/sched/em_meta.c
@@ -32,7 +32,7 @@
32 * +-----------+ +-----------+ 32 * +-----------+ +-----------+
33 * | | 33 * | |
34 * ---> meta_ops[INT][INDEV](...) | 34 * ---> meta_ops[INT][INDEV](...) |
35 * | | 35 * | |
36 * ----------- | 36 * ----------- |
37 * V V 37 * V V
38 * +-----------+ +-----------+ 38 * +-----------+ +-----------+
@@ -70,6 +70,7 @@
70#include <net/dst.h> 70#include <net/dst.h>
71#include <net/route.h> 71#include <net/route.h>
72#include <net/pkt_cls.h> 72#include <net/pkt_cls.h>
73#include <net/sock.h>
73 74
74struct meta_obj 75struct meta_obj
75{ 76{
@@ -284,6 +285,214 @@ META_COLLECTOR(int_rtiif)
284} 285}
285 286
286/************************************************************************** 287/**************************************************************************
288 * Socket Attributes
289 **************************************************************************/
290
291#define SKIP_NONLOCAL(skb) \
292 if (unlikely(skb->sk == NULL)) { \
293 *err = -1; \
294 return; \
295 }
296
297META_COLLECTOR(int_sk_family)
298{
299 SKIP_NONLOCAL(skb);
300 dst->value = skb->sk->sk_family;
301}
302
303META_COLLECTOR(int_sk_state)
304{
305 SKIP_NONLOCAL(skb);
306 dst->value = skb->sk->sk_state;
307}
308
309META_COLLECTOR(int_sk_reuse)
310{
311 SKIP_NONLOCAL(skb);
312 dst->value = skb->sk->sk_reuse;
313}
314
315META_COLLECTOR(int_sk_bound_if)
316{
317 SKIP_NONLOCAL(skb);
318 /* No error if bound_dev_if is 0, legal userspace check */
319 dst->value = skb->sk->sk_bound_dev_if;
320}
321
322META_COLLECTOR(var_sk_bound_if)
323{
324 SKIP_NONLOCAL(skb);
325
326 if (skb->sk->sk_bound_dev_if == 0) {
327 dst->value = (unsigned long) "any";
328 dst->len = 3;
329 } else {
330 struct net_device *dev;
331
332 dev = dev_get_by_index(skb->sk->sk_bound_dev_if);
333 *err = var_dev(dev, dst);
334 if (dev)
335 dev_put(dev);
336 }
337}
338
339META_COLLECTOR(int_sk_refcnt)
340{
341 SKIP_NONLOCAL(skb);
342 dst->value = atomic_read(&skb->sk->sk_refcnt);
343}
344
345META_COLLECTOR(int_sk_rcvbuf)
346{
347 SKIP_NONLOCAL(skb);
348 dst->value = skb->sk->sk_rcvbuf;
349}
350
351META_COLLECTOR(int_sk_shutdown)
352{
353 SKIP_NONLOCAL(skb);
354 dst->value = skb->sk->sk_shutdown;
355}
356
357META_COLLECTOR(int_sk_proto)
358{
359 SKIP_NONLOCAL(skb);
360 dst->value = skb->sk->sk_protocol;
361}
362
363META_COLLECTOR(int_sk_type)
364{
365 SKIP_NONLOCAL(skb);
366 dst->value = skb->sk->sk_type;
367}
368
369META_COLLECTOR(int_sk_rmem_alloc)
370{
371 SKIP_NONLOCAL(skb);
372 dst->value = atomic_read(&skb->sk->sk_rmem_alloc);
373}
374
375META_COLLECTOR(int_sk_wmem_alloc)
376{
377 SKIP_NONLOCAL(skb);
378 dst->value = atomic_read(&skb->sk->sk_wmem_alloc);
379}
380
381META_COLLECTOR(int_sk_omem_alloc)
382{
383 SKIP_NONLOCAL(skb);
384 dst->value = atomic_read(&skb->sk->sk_omem_alloc);
385}
386
387META_COLLECTOR(int_sk_rcv_qlen)
388{
389 SKIP_NONLOCAL(skb);
390 dst->value = skb->sk->sk_receive_queue.qlen;
391}
392
393META_COLLECTOR(int_sk_snd_qlen)
394{
395 SKIP_NONLOCAL(skb);
396 dst->value = skb->sk->sk_write_queue.qlen;
397}
398
399META_COLLECTOR(int_sk_wmem_queued)
400{
401 SKIP_NONLOCAL(skb);
402 dst->value = skb->sk->sk_wmem_queued;
403}
404
405META_COLLECTOR(int_sk_fwd_alloc)
406{
407 SKIP_NONLOCAL(skb);
408 dst->value = skb->sk->sk_forward_alloc;
409}
410
411META_COLLECTOR(int_sk_sndbuf)
412{
413 SKIP_NONLOCAL(skb);
414 dst->value = skb->sk->sk_sndbuf;
415}
416
417META_COLLECTOR(int_sk_alloc)
418{
419 SKIP_NONLOCAL(skb);
420 dst->value = skb->sk->sk_allocation;
421}
422
423META_COLLECTOR(int_sk_route_caps)
424{
425 SKIP_NONLOCAL(skb);
426 dst->value = skb->sk->sk_route_caps;
427}
428
429META_COLLECTOR(int_sk_hashent)
430{
431 SKIP_NONLOCAL(skb);
432 dst->value = skb->sk->sk_hashent;
433}
434
435META_COLLECTOR(int_sk_lingertime)
436{
437 SKIP_NONLOCAL(skb);
438 dst->value = skb->sk->sk_lingertime / HZ;
439}
440
441META_COLLECTOR(int_sk_err_qlen)
442{
443 SKIP_NONLOCAL(skb);
444 dst->value = skb->sk->sk_error_queue.qlen;
445}
446
447META_COLLECTOR(int_sk_ack_bl)
448{
449 SKIP_NONLOCAL(skb);
450 dst->value = skb->sk->sk_ack_backlog;
451}
452
453META_COLLECTOR(int_sk_max_ack_bl)
454{
455 SKIP_NONLOCAL(skb);
456 dst->value = skb->sk->sk_max_ack_backlog;
457}
458
459META_COLLECTOR(int_sk_prio)
460{
461 SKIP_NONLOCAL(skb);
462 dst->value = skb->sk->sk_priority;
463}
464
465META_COLLECTOR(int_sk_rcvlowat)
466{
467 SKIP_NONLOCAL(skb);
468 dst->value = skb->sk->sk_rcvlowat;
469}
470
471META_COLLECTOR(int_sk_rcvtimeo)
472{
473 SKIP_NONLOCAL(skb);
474 dst->value = skb->sk->sk_rcvtimeo / HZ;
475}
476
477META_COLLECTOR(int_sk_sndtimeo)
478{
479 SKIP_NONLOCAL(skb);
480 dst->value = skb->sk->sk_sndtimeo / HZ;
481}
482
483META_COLLECTOR(int_sk_sendmsg_off)
484{
485 SKIP_NONLOCAL(skb);
486 dst->value = skb->sk->sk_sndmsg_off;
487}
488
489META_COLLECTOR(int_sk_write_pend)
490{
491 SKIP_NONLOCAL(skb);
492 dst->value = skb->sk->sk_write_pending;
493}
494
495/**************************************************************************
287 * Meta value collectors assignment table 496 * Meta value collectors assignment table
288 **************************************************************************/ 497 **************************************************************************/
289 498
@@ -293,41 +502,75 @@ struct meta_ops
293 struct meta_value *, struct meta_obj *, int *); 502 struct meta_value *, struct meta_obj *, int *);
294}; 503};
295 504
505#define META_ID(name) TCF_META_ID_##name
506#define META_FUNC(name) { .get = meta_##name }
507
296/* Meta value operations table listing all meta value collectors and 508/* Meta value operations table listing all meta value collectors and
297 * assigns them to a type and meta id. */ 509 * assigns them to a type and meta id. */
298static struct meta_ops __meta_ops[TCF_META_TYPE_MAX+1][TCF_META_ID_MAX+1] = { 510static struct meta_ops __meta_ops[TCF_META_TYPE_MAX+1][TCF_META_ID_MAX+1] = {
299 [TCF_META_TYPE_VAR] = { 511 [TCF_META_TYPE_VAR] = {
300 [TCF_META_ID_DEV] = { .get = meta_var_dev }, 512 [META_ID(DEV)] = META_FUNC(var_dev),
301 [TCF_META_ID_INDEV] = { .get = meta_var_indev }, 513 [META_ID(INDEV)] = META_FUNC(var_indev),
302 [TCF_META_ID_REALDEV] = { .get = meta_var_realdev } 514 [META_ID(REALDEV)] = META_FUNC(var_realdev),
515 [META_ID(SK_BOUND_IF)] = META_FUNC(var_sk_bound_if),
303 }, 516 },
304 [TCF_META_TYPE_INT] = { 517 [TCF_META_TYPE_INT] = {
305 [TCF_META_ID_RANDOM] = { .get = meta_int_random }, 518 [META_ID(RANDOM)] = META_FUNC(int_random),
306 [TCF_META_ID_LOADAVG_0] = { .get = meta_int_loadavg_0 }, 519 [META_ID(LOADAVG_0)] = META_FUNC(int_loadavg_0),
307 [TCF_META_ID_LOADAVG_1] = { .get = meta_int_loadavg_1 }, 520 [META_ID(LOADAVG_1)] = META_FUNC(int_loadavg_1),
308 [TCF_META_ID_LOADAVG_2] = { .get = meta_int_loadavg_2 }, 521 [META_ID(LOADAVG_2)] = META_FUNC(int_loadavg_2),
309 [TCF_META_ID_DEV] = { .get = meta_int_dev }, 522 [META_ID(DEV)] = META_FUNC(int_dev),
310 [TCF_META_ID_INDEV] = { .get = meta_int_indev }, 523 [META_ID(INDEV)] = META_FUNC(int_indev),
311 [TCF_META_ID_REALDEV] = { .get = meta_int_realdev }, 524 [META_ID(REALDEV)] = META_FUNC(int_realdev),
312 [TCF_META_ID_PRIORITY] = { .get = meta_int_priority }, 525 [META_ID(PRIORITY)] = META_FUNC(int_priority),
313 [TCF_META_ID_PROTOCOL] = { .get = meta_int_protocol }, 526 [META_ID(PROTOCOL)] = META_FUNC(int_protocol),
314 [TCF_META_ID_SECURITY] = { .get = meta_int_security }, 527 [META_ID(SECURITY)] = META_FUNC(int_security),
315 [TCF_META_ID_PKTTYPE] = { .get = meta_int_pkttype }, 528 [META_ID(PKTTYPE)] = META_FUNC(int_pkttype),
316 [TCF_META_ID_PKTLEN] = { .get = meta_int_pktlen }, 529 [META_ID(PKTLEN)] = META_FUNC(int_pktlen),
317 [TCF_META_ID_DATALEN] = { .get = meta_int_datalen }, 530 [META_ID(DATALEN)] = META_FUNC(int_datalen),
318 [TCF_META_ID_MACLEN] = { .get = meta_int_maclen }, 531 [META_ID(MACLEN)] = META_FUNC(int_maclen),
319#ifdef CONFIG_NETFILTER 532#ifdef CONFIG_NETFILTER
320 [TCF_META_ID_NFMARK] = { .get = meta_int_nfmark }, 533 [META_ID(NFMARK)] = META_FUNC(int_nfmark),
321#endif 534#endif
322 [TCF_META_ID_TCINDEX] = { .get = meta_int_tcindex }, 535 [META_ID(TCINDEX)] = META_FUNC(int_tcindex),
323#ifdef CONFIG_NET_CLS_ACT 536#ifdef CONFIG_NET_CLS_ACT
324 [TCF_META_ID_TCVERDICT] = { .get = meta_int_tcverd }, 537 [META_ID(TCVERDICT)] = META_FUNC(int_tcverd),
325 [TCF_META_ID_TCCLASSID] = { .get = meta_int_tcclassid }, 538 [META_ID(TCCLASSID)] = META_FUNC(int_tcclassid),
326#endif 539#endif
327#ifdef CONFIG_NET_CLS_ROUTE 540#ifdef CONFIG_NET_CLS_ROUTE
328 [TCF_META_ID_RTCLASSID] = { .get = meta_int_rtclassid }, 541 [META_ID(RTCLASSID)] = META_FUNC(int_rtclassid),
329#endif 542#endif
330 [TCF_META_ID_RTIIF] = { .get = meta_int_rtiif } 543 [META_ID(RTIIF)] = META_FUNC(int_rtiif),
544 [META_ID(SK_FAMILY)] = META_FUNC(int_sk_family),
545 [META_ID(SK_STATE)] = META_FUNC(int_sk_state),
546 [META_ID(SK_REUSE)] = META_FUNC(int_sk_reuse),
547 [META_ID(SK_BOUND_IF)] = META_FUNC(int_sk_bound_if),
548 [META_ID(SK_REFCNT)] = META_FUNC(int_sk_refcnt),
549 [META_ID(SK_RCVBUF)] = META_FUNC(int_sk_rcvbuf),
550 [META_ID(SK_SNDBUF)] = META_FUNC(int_sk_sndbuf),
551 [META_ID(SK_SHUTDOWN)] = META_FUNC(int_sk_shutdown),
552 [META_ID(SK_PROTO)] = META_FUNC(int_sk_proto),
553 [META_ID(SK_TYPE)] = META_FUNC(int_sk_type),
554 [META_ID(SK_RMEM_ALLOC)] = META_FUNC(int_sk_rmem_alloc),
555 [META_ID(SK_WMEM_ALLOC)] = META_FUNC(int_sk_wmem_alloc),
556 [META_ID(SK_OMEM_ALLOC)] = META_FUNC(int_sk_omem_alloc),
557 [META_ID(SK_WMEM_QUEUED)] = META_FUNC(int_sk_wmem_queued),
558 [META_ID(SK_RCV_QLEN)] = META_FUNC(int_sk_rcv_qlen),
559 [META_ID(SK_SND_QLEN)] = META_FUNC(int_sk_snd_qlen),
560 [META_ID(SK_ERR_QLEN)] = META_FUNC(int_sk_err_qlen),
561 [META_ID(SK_FORWARD_ALLOCS)] = META_FUNC(int_sk_fwd_alloc),
562 [META_ID(SK_ALLOCS)] = META_FUNC(int_sk_alloc),
563 [META_ID(SK_ROUTE_CAPS)] = META_FUNC(int_sk_route_caps),
564 [META_ID(SK_HASHENT)] = META_FUNC(int_sk_hashent),
565 [META_ID(SK_LINGERTIME)] = META_FUNC(int_sk_lingertime),
566 [META_ID(SK_ACK_BACKLOG)] = META_FUNC(int_sk_ack_bl),
567 [META_ID(SK_MAX_ACK_BACKLOG)] = META_FUNC(int_sk_max_ack_bl),
568 [META_ID(SK_PRIO)] = META_FUNC(int_sk_prio),
569 [META_ID(SK_RCVLOWAT)] = META_FUNC(int_sk_rcvlowat),
570 [META_ID(SK_RCVTIMEO)] = META_FUNC(int_sk_rcvtimeo),
571 [META_ID(SK_SNDTIMEO)] = META_FUNC(int_sk_sndtimeo),
572 [META_ID(SK_SENDMSG_OFF)] = META_FUNC(int_sk_sendmsg_off),
573 [META_ID(SK_WRITE_PENDING)] = META_FUNC(int_sk_write_pend),
331 } 574 }
332}; 575};
333 576
@@ -396,9 +639,9 @@ static int meta_int_compare(struct meta_obj *a, struct meta_obj *b)
396 /* Let gcc optimize it, the unlikely is not really based on 639 /* Let gcc optimize it, the unlikely is not really based on
397 * some numbers but jump free code for mismatches seems 640 * some numbers but jump free code for mismatches seems
398 * more logical. */ 641 * more logical. */
399 if (unlikely(a == b)) 642 if (unlikely(a->value == b->value))
400 return 0; 643 return 0;
401 else if (a < b) 644 else if (a->value < b->value)
402 return -1; 645 return -1;
403 else 646 else
404 return 1; 647 return 1;
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 07977f8f26..97c1c75d5c 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -760,15 +760,14 @@ graft:
760} 760}
761 761
762static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid, 762static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
763 u32 pid, u32 seq, unsigned flags, int event) 763 u32 pid, u32 seq, u16 flags, int event)
764{ 764{
765 struct tcmsg *tcm; 765 struct tcmsg *tcm;
766 struct nlmsghdr *nlh; 766 struct nlmsghdr *nlh;
767 unsigned char *b = skb->tail; 767 unsigned char *b = skb->tail;
768 struct gnet_dump d; 768 struct gnet_dump d;
769 769
770 nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*tcm)); 770 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags);
771 nlh->nlmsg_flags = flags;
772 tcm = NLMSG_DATA(nlh); 771 tcm = NLMSG_DATA(nlh);
773 tcm->tcm_family = AF_UNSPEC; 772 tcm->tcm_family = AF_UNSPEC;
774 tcm->tcm_ifindex = q->dev->ifindex; 773 tcm->tcm_ifindex = q->dev->ifindex;
@@ -997,7 +996,7 @@ out:
997 996
998static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q, 997static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
999 unsigned long cl, 998 unsigned long cl,
1000 u32 pid, u32 seq, unsigned flags, int event) 999 u32 pid, u32 seq, u16 flags, int event)
1001{ 1000{
1002 struct tcmsg *tcm; 1001 struct tcmsg *tcm;
1003 struct nlmsghdr *nlh; 1002 struct nlmsghdr *nlh;
@@ -1005,8 +1004,7 @@ static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
1005 struct gnet_dump d; 1004 struct gnet_dump d;
1006 struct Qdisc_class_ops *cl_ops = q->ops->cl_ops; 1005 struct Qdisc_class_ops *cl_ops = q->ops->cl_ops;
1007 1006
1008 nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*tcm)); 1007 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags);
1009 nlh->nlmsg_flags = flags;
1010 tcm = NLMSG_DATA(nlh); 1008 tcm = NLMSG_DATA(nlh);
1011 tcm->tcm_family = AF_UNSPEC; 1009 tcm->tcm_family = AF_UNSPEC;
1012 tcm->tcm_ifindex = q->dev->ifindex; 1010 tcm->tcm_ifindex = q->dev->ifindex;
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index d8bd2a569c..13e0e7b385 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -31,7 +31,7 @@
31#endif 31#endif
32 32
33 33
34#define PRIV(sch) qdisc_priv(sch) 34#define PRIV(sch) ((struct dsmark_qdisc_data *) qdisc_priv(sch))
35 35
36 36
37/* 37/*
@@ -55,24 +55,38 @@
55struct dsmark_qdisc_data { 55struct dsmark_qdisc_data {
56 struct Qdisc *q; 56 struct Qdisc *q;
57 struct tcf_proto *filter_list; 57 struct tcf_proto *filter_list;
58 __u8 *mask; /* "owns" the array */ 58 u8 *mask; /* "owns" the array */
59 __u8 *value; 59 u8 *value;
60 __u16 indices; 60 u16 indices;
61 __u32 default_index; /* index range is 0...0xffff */ 61 u32 default_index; /* index range is 0...0xffff */
62 int set_tc_index; 62 int set_tc_index;
63}; 63};
64 64
65static inline int dsmark_valid_indices(u16 indices)
66{
67 while (indices != 1) {
68 if (indices & 1)
69 return 0;
70 indices >>= 1;
71 }
72
73 return 1;
74}
65 75
66/* ------------------------- Class/flow operations ------------------------- */ 76static inline int dsmark_valid_index(struct dsmark_qdisc_data *p, u16 index)
77{
78 return (index <= p->indices && index > 0);
79}
67 80
81/* ------------------------- Class/flow operations ------------------------- */
68 82
69static int dsmark_graft(struct Qdisc *sch,unsigned long arg, 83static int dsmark_graft(struct Qdisc *sch, unsigned long arg,
70 struct Qdisc *new,struct Qdisc **old) 84 struct Qdisc *new, struct Qdisc **old)
71{ 85{
72 struct dsmark_qdisc_data *p = PRIV(sch); 86 struct dsmark_qdisc_data *p = PRIV(sch);
73 87
74 DPRINTK("dsmark_graft(sch %p,[qdisc %p],new %p,old %p)\n",sch,p,new, 88 DPRINTK("dsmark_graft(sch %p,[qdisc %p],new %p,old %p)\n",
75 old); 89 sch, p, new, old);
76 90
77 if (new == NULL) { 91 if (new == NULL) {
78 new = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops); 92 new = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops);
@@ -81,91 +95,95 @@ static int dsmark_graft(struct Qdisc *sch,unsigned long arg,
81 } 95 }
82 96
83 sch_tree_lock(sch); 97 sch_tree_lock(sch);
84 *old = xchg(&p->q,new); 98 *old = xchg(&p->q, new);
85 if (*old) 99 qdisc_reset(*old);
86 qdisc_reset(*old);
87 sch->q.qlen = 0; 100 sch->q.qlen = 0;
88 sch_tree_unlock(sch); /* @@@ move up ? */ 101 sch_tree_unlock(sch);
102
89 return 0; 103 return 0;
90} 104}
91 105
92
93static struct Qdisc *dsmark_leaf(struct Qdisc *sch, unsigned long arg) 106static struct Qdisc *dsmark_leaf(struct Qdisc *sch, unsigned long arg)
94{ 107{
95 struct dsmark_qdisc_data *p = PRIV(sch); 108 return PRIV(sch)->q;
96
97 return p->q;
98} 109}
99 110
100 111static unsigned long dsmark_get(struct Qdisc *sch, u32 classid)
101static unsigned long dsmark_get(struct Qdisc *sch,u32 classid)
102{ 112{
103 struct dsmark_qdisc_data *p __attribute__((unused)) = PRIV(sch); 113 DPRINTK("dsmark_get(sch %p,[qdisc %p],classid %x)\n",
114 sch, PRIV(sch), classid);
104 115
105 DPRINTK("dsmark_get(sch %p,[qdisc %p],classid %x)\n",sch,p,classid); 116 return TC_H_MIN(classid) + 1;
106 return TC_H_MIN(classid)+1;
107} 117}
108 118
109
110static unsigned long dsmark_bind_filter(struct Qdisc *sch, 119static unsigned long dsmark_bind_filter(struct Qdisc *sch,
111 unsigned long parent, u32 classid) 120 unsigned long parent, u32 classid)
112{ 121{
113 return dsmark_get(sch,classid); 122 return dsmark_get(sch, classid);
114} 123}
115 124
116
117static void dsmark_put(struct Qdisc *sch, unsigned long cl) 125static void dsmark_put(struct Qdisc *sch, unsigned long cl)
118{ 126{
119} 127}
120 128
121
122static int dsmark_change(struct Qdisc *sch, u32 classid, u32 parent, 129static int dsmark_change(struct Qdisc *sch, u32 classid, u32 parent,
123 struct rtattr **tca, unsigned long *arg) 130 struct rtattr **tca, unsigned long *arg)
124{ 131{
125 struct dsmark_qdisc_data *p = PRIV(sch); 132 struct dsmark_qdisc_data *p = PRIV(sch);
126 struct rtattr *opt = tca[TCA_OPTIONS-1]; 133 struct rtattr *opt = tca[TCA_OPTIONS-1];
127 struct rtattr *tb[TCA_DSMARK_MAX]; 134 struct rtattr *tb[TCA_DSMARK_MAX];
135 int err = -EINVAL;
136 u8 mask = 0;
128 137
129 DPRINTK("dsmark_change(sch %p,[qdisc %p],classid %x,parent %x)," 138 DPRINTK("dsmark_change(sch %p,[qdisc %p],classid %x,parent %x),"
130 "arg 0x%lx\n",sch,p,classid,parent,*arg); 139 "arg 0x%lx\n", sch, p, classid, parent, *arg);
131 if (*arg > p->indices) 140
132 return -ENOENT; 141 if (!dsmark_valid_index(p, *arg)) {
133 if (!opt || rtattr_parse_nested(tb, TCA_DSMARK_MAX, opt)) 142 err = -ENOENT;
134 return -EINVAL; 143 goto rtattr_failure;
135 if (tb[TCA_DSMARK_MASK-1]) {
136 if (!RTA_PAYLOAD(tb[TCA_DSMARK_MASK-1]))
137 return -EINVAL;
138 p->mask[*arg-1] = *(__u8 *) RTA_DATA(tb[TCA_DSMARK_MASK-1]);
139 }
140 if (tb[TCA_DSMARK_VALUE-1]) {
141 if (!RTA_PAYLOAD(tb[TCA_DSMARK_VALUE-1]))
142 return -EINVAL;
143 p->value[*arg-1] = *(__u8 *) RTA_DATA(tb[TCA_DSMARK_VALUE-1]);
144 } 144 }
145 return 0;
146}
147 145
146 if (!opt || rtattr_parse_nested(tb, TCA_DSMARK_MAX, opt))
147 goto rtattr_failure;
148
149 if (tb[TCA_DSMARK_MASK-1])
150 mask = RTA_GET_U8(tb[TCA_DSMARK_MASK-1]);
151
152 if (tb[TCA_DSMARK_VALUE-1])
153 p->value[*arg-1] = RTA_GET_U8(tb[TCA_DSMARK_VALUE-1]);
154
155 if (tb[TCA_DSMARK_MASK-1])
156 p->mask[*arg-1] = mask;
157
158 err = 0;
148 159
149static int dsmark_delete(struct Qdisc *sch,unsigned long arg) 160rtattr_failure:
161 return err;
162}
163
164static int dsmark_delete(struct Qdisc *sch, unsigned long arg)
150{ 165{
151 struct dsmark_qdisc_data *p = PRIV(sch); 166 struct dsmark_qdisc_data *p = PRIV(sch);
152 167
153 if (!arg || arg > p->indices) 168 if (!dsmark_valid_index(p, arg))
154 return -EINVAL; 169 return -EINVAL;
170
155 p->mask[arg-1] = 0xff; 171 p->mask[arg-1] = 0xff;
156 p->value[arg-1] = 0; 172 p->value[arg-1] = 0;
173
157 return 0; 174 return 0;
158} 175}
159 176
160
161static void dsmark_walk(struct Qdisc *sch,struct qdisc_walker *walker) 177static void dsmark_walk(struct Qdisc *sch,struct qdisc_walker *walker)
162{ 178{
163 struct dsmark_qdisc_data *p = PRIV(sch); 179 struct dsmark_qdisc_data *p = PRIV(sch);
164 int i; 180 int i;
165 181
166 DPRINTK("dsmark_walk(sch %p,[qdisc %p],walker %p)\n",sch,p,walker); 182 DPRINTK("dsmark_walk(sch %p,[qdisc %p],walker %p)\n", sch, p, walker);
183
167 if (walker->stop) 184 if (walker->stop)
168 return; 185 return;
186
169 for (i = 0; i < p->indices; i++) { 187 for (i = 0; i < p->indices; i++) {
170 if (p->mask[i] == 0xff && !p->value[i]) 188 if (p->mask[i] == 0xff && !p->value[i])
171 goto ignore; 189 goto ignore;
@@ -180,26 +198,20 @@ ignore:
180 } 198 }
181} 199}
182 200
183
184static struct tcf_proto **dsmark_find_tcf(struct Qdisc *sch,unsigned long cl) 201static struct tcf_proto **dsmark_find_tcf(struct Qdisc *sch,unsigned long cl)
185{ 202{
186 struct dsmark_qdisc_data *p = PRIV(sch); 203 return &PRIV(sch)->filter_list;
187
188 return &p->filter_list;
189} 204}
190 205
191
192/* --------------------------- Qdisc operations ---------------------------- */ 206/* --------------------------- Qdisc operations ---------------------------- */
193 207
194
195static int dsmark_enqueue(struct sk_buff *skb,struct Qdisc *sch) 208static int dsmark_enqueue(struct sk_buff *skb,struct Qdisc *sch)
196{ 209{
197 struct dsmark_qdisc_data *p = PRIV(sch); 210 struct dsmark_qdisc_data *p = PRIV(sch);
198 struct tcf_result res; 211 int err;
199 int result; 212
200 int ret = NET_XMIT_POLICED; 213 D2PRINTK("dsmark_enqueue(skb %p,sch %p,[qdisc %p])\n", skb, sch, p);
201 214
202 D2PRINTK("dsmark_enqueue(skb %p,sch %p,[qdisc %p])\n",skb,sch,p);
203 if (p->set_tc_index) { 215 if (p->set_tc_index) {
204 /* FIXME: Safe with non-linear skbs? --RR */ 216 /* FIXME: Safe with non-linear skbs? --RR */
205 switch (skb->protocol) { 217 switch (skb->protocol) {
@@ -216,17 +228,21 @@ static int dsmark_enqueue(struct sk_buff *skb,struct Qdisc *sch)
216 break; 228 break;
217 }; 229 };
218 } 230 }
219 result = TC_POLICE_OK; /* be nice to gcc */ 231
220 if (TC_H_MAJ(skb->priority) == sch->handle) { 232 if (TC_H_MAJ(skb->priority) == sch->handle)
221 skb->tc_index = TC_H_MIN(skb->priority); 233 skb->tc_index = TC_H_MIN(skb->priority);
222 } else { 234 else {
223 result = tc_classify(skb,p->filter_list,&res); 235 struct tcf_result res;
224 D2PRINTK("result %d class 0x%04x\n",result,res.classid); 236 int result = tc_classify(skb, p->filter_list, &res);
237
238 D2PRINTK("result %d class 0x%04x\n", result, res.classid);
239
225 switch (result) { 240 switch (result) {
226#ifdef CONFIG_NET_CLS_POLICE 241#ifdef CONFIG_NET_CLS_POLICE
227 case TC_POLICE_SHOT: 242 case TC_POLICE_SHOT:
228 kfree_skb(skb); 243 kfree_skb(skb);
229 break; 244 sch->qstats.drops++;
245 return NET_XMIT_POLICED;
230#if 0 246#if 0
231 case TC_POLICE_RECLASSIFY: 247 case TC_POLICE_RECLASSIFY:
232 /* FIXME: what to do here ??? */ 248 /* FIXME: what to do here ??? */
@@ -243,43 +259,45 @@ static int dsmark_enqueue(struct sk_buff *skb,struct Qdisc *sch)
243 break; 259 break;
244 }; 260 };
245 } 261 }
246 if (
247#ifdef CONFIG_NET_CLS_POLICE
248 result == TC_POLICE_SHOT ||
249#endif
250 262
251 ((ret = p->q->enqueue(skb,p->q)) != 0)) { 263 err = p->q->enqueue(skb,p->q);
264 if (err != NET_XMIT_SUCCESS) {
252 sch->qstats.drops++; 265 sch->qstats.drops++;
253 return ret; 266 return err;
254 } 267 }
268
255 sch->bstats.bytes += skb->len; 269 sch->bstats.bytes += skb->len;
256 sch->bstats.packets++; 270 sch->bstats.packets++;
257 sch->q.qlen++; 271 sch->q.qlen++;
258 return ret;
259}
260 272
273 return NET_XMIT_SUCCESS;
274}
261 275
262static struct sk_buff *dsmark_dequeue(struct Qdisc *sch) 276static struct sk_buff *dsmark_dequeue(struct Qdisc *sch)
263{ 277{
264 struct dsmark_qdisc_data *p = PRIV(sch); 278 struct dsmark_qdisc_data *p = PRIV(sch);
265 struct sk_buff *skb; 279 struct sk_buff *skb;
266 int index; 280 u32 index;
281
282 D2PRINTK("dsmark_dequeue(sch %p,[qdisc %p])\n", sch, p);
267 283
268 D2PRINTK("dsmark_dequeue(sch %p,[qdisc %p])\n",sch,p);
269 skb = p->q->ops->dequeue(p->q); 284 skb = p->q->ops->dequeue(p->q);
270 if (!skb) 285 if (skb == NULL)
271 return NULL; 286 return NULL;
287
272 sch->q.qlen--; 288 sch->q.qlen--;
273 index = skb->tc_index & (p->indices-1); 289
274 D2PRINTK("index %d->%d\n",skb->tc_index,index); 290 index = skb->tc_index & (p->indices - 1);
291 D2PRINTK("index %d->%d\n", skb->tc_index, index);
292
275 switch (skb->protocol) { 293 switch (skb->protocol) {
276 case __constant_htons(ETH_P_IP): 294 case __constant_htons(ETH_P_IP):
277 ipv4_change_dsfield(skb->nh.iph, 295 ipv4_change_dsfield(skb->nh.iph, p->mask[index],
278 p->mask[index],p->value[index]); 296 p->value[index]);
279 break; 297 break;
280 case __constant_htons(ETH_P_IPV6): 298 case __constant_htons(ETH_P_IPV6):
281 ipv6_change_dsfield(skb->nh.ipv6h, 299 ipv6_change_dsfield(skb->nh.ipv6h, p->mask[index],
282 p->mask[index],p->value[index]); 300 p->value[index]);
283 break; 301 break;
284 default: 302 default:
285 /* 303 /*
@@ -293,152 +311,162 @@ static struct sk_buff *dsmark_dequeue(struct Qdisc *sch)
293 htons(skb->protocol)); 311 htons(skb->protocol));
294 break; 312 break;
295 }; 313 };
314
296 return skb; 315 return skb;
297} 316}
298 317
299
300static int dsmark_requeue(struct sk_buff *skb,struct Qdisc *sch) 318static int dsmark_requeue(struct sk_buff *skb,struct Qdisc *sch)
301{ 319{
302 int ret;
303 struct dsmark_qdisc_data *p = PRIV(sch); 320 struct dsmark_qdisc_data *p = PRIV(sch);
321 int err;
304 322
305 D2PRINTK("dsmark_requeue(skb %p,sch %p,[qdisc %p])\n",skb,sch,p); 323 D2PRINTK("dsmark_requeue(skb %p,sch %p,[qdisc %p])\n", skb, sch, p);
306 if ((ret = p->q->ops->requeue(skb, p->q)) == 0) { 324
307 sch->q.qlen++; 325 err = p->q->ops->requeue(skb, p->q);
308 sch->qstats.requeues++; 326 if (err != NET_XMIT_SUCCESS) {
309 return 0; 327 sch->qstats.drops++;
328 return err;
310 } 329 }
311 sch->qstats.drops++;
312 return ret;
313}
314 330
331 sch->q.qlen++;
332 sch->qstats.requeues++;
333
334 return NET_XMIT_SUCCESS;
335}
315 336
316static unsigned int dsmark_drop(struct Qdisc *sch) 337static unsigned int dsmark_drop(struct Qdisc *sch)
317{ 338{
318 struct dsmark_qdisc_data *p = PRIV(sch); 339 struct dsmark_qdisc_data *p = PRIV(sch);
319 unsigned int len; 340 unsigned int len;
320 341
321 DPRINTK("dsmark_reset(sch %p,[qdisc %p])\n",sch,p); 342 DPRINTK("dsmark_reset(sch %p,[qdisc %p])\n", sch, p);
322 if (!p->q->ops->drop) 343
323 return 0; 344 if (p->q->ops->drop == NULL)
324 if (!(len = p->q->ops->drop(p->q)))
325 return 0; 345 return 0;
326 sch->q.qlen--; 346
347 len = p->q->ops->drop(p->q);
348 if (len)
349 sch->q.qlen--;
350
327 return len; 351 return len;
328} 352}
329 353
330 354static int dsmark_init(struct Qdisc *sch, struct rtattr *opt)
331static int dsmark_init(struct Qdisc *sch,struct rtattr *opt)
332{ 355{
333 struct dsmark_qdisc_data *p = PRIV(sch); 356 struct dsmark_qdisc_data *p = PRIV(sch);
334 struct rtattr *tb[TCA_DSMARK_MAX]; 357 struct rtattr *tb[TCA_DSMARK_MAX];
335 __u16 tmp; 358 int err = -EINVAL;
336 359 u32 default_index = NO_DEFAULT_INDEX;
337 DPRINTK("dsmark_init(sch %p,[qdisc %p],opt %p)\n",sch,p,opt); 360 u16 indices;
338 if (!opt || 361 u8 *mask;
339 rtattr_parse(tb,TCA_DSMARK_MAX,RTA_DATA(opt),RTA_PAYLOAD(opt)) < 0 || 362
340 !tb[TCA_DSMARK_INDICES-1] || 363 DPRINTK("dsmark_init(sch %p,[qdisc %p],opt %p)\n", sch, p, opt);
341 RTA_PAYLOAD(tb[TCA_DSMARK_INDICES-1]) < sizeof(__u16)) 364
342 return -EINVAL; 365 if (!opt || rtattr_parse_nested(tb, TCA_DSMARK_MAX, opt) < 0)
343 p->indices = *(__u16 *) RTA_DATA(tb[TCA_DSMARK_INDICES-1]); 366 goto errout;
344 if (!p->indices) 367
345 return -EINVAL; 368 indices = RTA_GET_U16(tb[TCA_DSMARK_INDICES-1]);
346 for (tmp = p->indices; tmp != 1; tmp >>= 1) { 369 if (!indices || !dsmark_valid_indices(indices))
347 if (tmp & 1) 370 goto errout;
348 return -EINVAL; 371
349 } 372 if (tb[TCA_DSMARK_DEFAULT_INDEX-1])
350 p->default_index = NO_DEFAULT_INDEX; 373 default_index = RTA_GET_U16(tb[TCA_DSMARK_DEFAULT_INDEX-1]);
351 if (tb[TCA_DSMARK_DEFAULT_INDEX-1]) { 374
352 if (RTA_PAYLOAD(tb[TCA_DSMARK_DEFAULT_INDEX-1]) < sizeof(__u16)) 375 mask = kmalloc(indices * 2, GFP_KERNEL);
353 return -EINVAL; 376 if (mask == NULL) {
354 p->default_index = 377 err = -ENOMEM;
355 *(__u16 *) RTA_DATA(tb[TCA_DSMARK_DEFAULT_INDEX-1]); 378 goto errout;
356 } 379 }
357 p->set_tc_index = !!tb[TCA_DSMARK_SET_TC_INDEX-1]; 380
358 p->mask = kmalloc(p->indices*2,GFP_KERNEL); 381 p->mask = mask;
359 if (!p->mask) 382 memset(p->mask, 0xff, indices);
360 return -ENOMEM; 383
361 p->value = p->mask+p->indices; 384 p->value = p->mask + indices;
362 memset(p->mask,0xff,p->indices); 385 memset(p->value, 0, indices);
363 memset(p->value,0,p->indices); 386
364 if (!(p->q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops))) 387 p->indices = indices;
388 p->default_index = default_index;
389 p->set_tc_index = RTA_GET_FLAG(tb[TCA_DSMARK_SET_TC_INDEX-1]);
390
391 p->q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops);
392 if (p->q == NULL)
365 p->q = &noop_qdisc; 393 p->q = &noop_qdisc;
366 DPRINTK("dsmark_init: qdisc %p\n",&p->q);
367 return 0;
368}
369 394
395 DPRINTK("dsmark_init: qdisc %p\n", p->q);
396
397 err = 0;
398errout:
399rtattr_failure:
400 return err;
401}
370 402
371static void dsmark_reset(struct Qdisc *sch) 403static void dsmark_reset(struct Qdisc *sch)
372{ 404{
373 struct dsmark_qdisc_data *p = PRIV(sch); 405 struct dsmark_qdisc_data *p = PRIV(sch);
374 406
375 DPRINTK("dsmark_reset(sch %p,[qdisc %p])\n",sch,p); 407 DPRINTK("dsmark_reset(sch %p,[qdisc %p])\n", sch, p);
376 qdisc_reset(p->q); 408 qdisc_reset(p->q);
377 sch->q.qlen = 0; 409 sch->q.qlen = 0;
378} 410}
379 411
380
381static void dsmark_destroy(struct Qdisc *sch) 412static void dsmark_destroy(struct Qdisc *sch)
382{ 413{
383 struct dsmark_qdisc_data *p = PRIV(sch); 414 struct dsmark_qdisc_data *p = PRIV(sch);
384 struct tcf_proto *tp; 415 struct tcf_proto *tp;
385 416
386 DPRINTK("dsmark_destroy(sch %p,[qdisc %p])\n",sch,p); 417 DPRINTK("dsmark_destroy(sch %p,[qdisc %p])\n", sch, p);
418
387 while (p->filter_list) { 419 while (p->filter_list) {
388 tp = p->filter_list; 420 tp = p->filter_list;
389 p->filter_list = tp->next; 421 p->filter_list = tp->next;
390 tcf_destroy(tp); 422 tcf_destroy(tp);
391 } 423 }
424
392 qdisc_destroy(p->q); 425 qdisc_destroy(p->q);
393 kfree(p->mask); 426 kfree(p->mask);
394} 427}
395 428
396
397static int dsmark_dump_class(struct Qdisc *sch, unsigned long cl, 429static int dsmark_dump_class(struct Qdisc *sch, unsigned long cl,
398 struct sk_buff *skb, struct tcmsg *tcm) 430 struct sk_buff *skb, struct tcmsg *tcm)
399{ 431{
400 struct dsmark_qdisc_data *p = PRIV(sch); 432 struct dsmark_qdisc_data *p = PRIV(sch);
401 unsigned char *b = skb->tail; 433 struct rtattr *opts = NULL;
402 struct rtattr *rta; 434
435 DPRINTK("dsmark_dump_class(sch %p,[qdisc %p],class %ld\n", sch, p, cl);
403 436
404 DPRINTK("dsmark_dump_class(sch %p,[qdisc %p],class %ld\n",sch,p,cl); 437 if (!dsmark_valid_index(p, cl))
405 if (!cl || cl > p->indices)
406 return -EINVAL; 438 return -EINVAL;
407 tcm->tcm_handle = TC_H_MAKE(TC_H_MAJ(sch->handle),cl-1); 439
408 rta = (struct rtattr *) b; 440 tcm->tcm_handle = TC_H_MAKE(TC_H_MAJ(sch->handle), cl-1);
409 RTA_PUT(skb,TCA_OPTIONS,0,NULL); 441
410 RTA_PUT(skb,TCA_DSMARK_MASK,1,&p->mask[cl-1]); 442 opts = RTA_NEST(skb, TCA_OPTIONS);
411 RTA_PUT(skb,TCA_DSMARK_VALUE,1,&p->value[cl-1]); 443 RTA_PUT_U8(skb,TCA_DSMARK_MASK, p->mask[cl-1]);
412 rta->rta_len = skb->tail-b; 444 RTA_PUT_U8(skb,TCA_DSMARK_VALUE, p->value[cl-1]);
413 return skb->len; 445
446 return RTA_NEST_END(skb, opts);
414 447
415rtattr_failure: 448rtattr_failure:
416 skb_trim(skb,b-skb->data); 449 return RTA_NEST_CANCEL(skb, opts);
417 return -1;
418} 450}
419 451
420static int dsmark_dump(struct Qdisc *sch, struct sk_buff *skb) 452static int dsmark_dump(struct Qdisc *sch, struct sk_buff *skb)
421{ 453{
422 struct dsmark_qdisc_data *p = PRIV(sch); 454 struct dsmark_qdisc_data *p = PRIV(sch);
423 unsigned char *b = skb->tail; 455 struct rtattr *opts = NULL;
424 struct rtattr *rta;
425 456
426 rta = (struct rtattr *) b; 457 opts = RTA_NEST(skb, TCA_OPTIONS);
427 RTA_PUT(skb,TCA_OPTIONS,0,NULL); 458 RTA_PUT_U16(skb, TCA_DSMARK_INDICES, p->indices);
428 RTA_PUT(skb,TCA_DSMARK_INDICES,sizeof(__u16),&p->indices); 459
429 if (p->default_index != NO_DEFAULT_INDEX) { 460 if (p->default_index != NO_DEFAULT_INDEX)
430 __u16 tmp = p->default_index; 461 RTA_PUT_U16(skb, TCA_DSMARK_DEFAULT_INDEX, p->default_index);
431 462
432 RTA_PUT(skb,TCA_DSMARK_DEFAULT_INDEX, sizeof(__u16), &tmp);
433 }
434 if (p->set_tc_index) 463 if (p->set_tc_index)
435 RTA_PUT(skb, TCA_DSMARK_SET_TC_INDEX, 0, NULL); 464 RTA_PUT_FLAG(skb, TCA_DSMARK_SET_TC_INDEX);
436 rta->rta_len = skb->tail-b; 465
437 return skb->len; 466 return RTA_NEST_END(skb, opts);
438 467
439rtattr_failure: 468rtattr_failure:
440 skb_trim(skb,b-skb->data); 469 return RTA_NEST_CANCEL(skb, opts);
441 return -1;
442} 470}
443 471
444static struct Qdisc_class_ops dsmark_class_ops = { 472static struct Qdisc_class_ops dsmark_class_ops = {
@@ -476,10 +504,13 @@ static int __init dsmark_module_init(void)
476{ 504{
477 return register_qdisc(&dsmark_qdisc_ops); 505 return register_qdisc(&dsmark_qdisc_ops);
478} 506}
507
479static void __exit dsmark_module_exit(void) 508static void __exit dsmark_module_exit(void)
480{ 509{
481 unregister_qdisc(&dsmark_qdisc_ops); 510 unregister_qdisc(&dsmark_qdisc_ops);
482} 511}
512
483module_init(dsmark_module_init) 513module_init(dsmark_module_init)
484module_exit(dsmark_module_exit) 514module_exit(dsmark_module_exit)
515
485MODULE_LICENSE("GPL"); 516MODULE_LICENSE("GPL");
diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c
index 4888305c96..033083bf0e 100644
--- a/net/sched/sch_fifo.c
+++ b/net/sched/sch_fifo.c
@@ -11,131 +11,38 @@
11 11
12#include <linux/config.h> 12#include <linux/config.h>
13#include <linux/module.h> 13#include <linux/module.h>
14#include <asm/uaccess.h>
15#include <asm/system.h>
16#include <linux/bitops.h>
17#include <linux/types.h> 14#include <linux/types.h>
18#include <linux/kernel.h> 15#include <linux/kernel.h>
19#include <linux/sched.h>
20#include <linux/string.h>
21#include <linux/mm.h>
22#include <linux/socket.h>
23#include <linux/sockios.h>
24#include <linux/in.h>
25#include <linux/errno.h> 16#include <linux/errno.h>
26#include <linux/interrupt.h>
27#include <linux/if_ether.h>
28#include <linux/inet.h>
29#include <linux/netdevice.h> 17#include <linux/netdevice.h>
30#include <linux/etherdevice.h>
31#include <linux/notifier.h>
32#include <net/ip.h>
33#include <net/route.h>
34#include <linux/skbuff.h> 18#include <linux/skbuff.h>
35#include <net/sock.h>
36#include <net/pkt_sched.h> 19#include <net/pkt_sched.h>
37 20
38/* 1 band FIFO pseudo-"scheduler" */ 21/* 1 band FIFO pseudo-"scheduler" */
39 22
40struct fifo_sched_data 23struct fifo_sched_data
41{ 24{
42 unsigned limit; 25 u32 limit;
43}; 26};
44 27
45static int 28static int bfifo_enqueue(struct sk_buff *skb, struct Qdisc* sch)
46bfifo_enqueue(struct sk_buff *skb, struct Qdisc* sch)
47{ 29{
48 struct fifo_sched_data *q = qdisc_priv(sch); 30 struct fifo_sched_data *q = qdisc_priv(sch);
49 31
50 if (sch->qstats.backlog + skb->len <= q->limit) { 32 if (likely(sch->qstats.backlog + skb->len <= q->limit))
51 __skb_queue_tail(&sch->q, skb); 33 return qdisc_enqueue_tail(skb, sch);
52 sch->qstats.backlog += skb->len;
53 sch->bstats.bytes += skb->len;
54 sch->bstats.packets++;
55 return 0;
56 }
57 sch->qstats.drops++;
58#ifdef CONFIG_NET_CLS_POLICE
59 if (sch->reshape_fail==NULL || sch->reshape_fail(skb, sch))
60#endif
61 kfree_skb(skb);
62 return NET_XMIT_DROP;
63}
64
65static int
66bfifo_requeue(struct sk_buff *skb, struct Qdisc* sch)
67{
68 __skb_queue_head(&sch->q, skb);
69 sch->qstats.backlog += skb->len;
70 sch->qstats.requeues++;
71 return 0;
72}
73
74static struct sk_buff *
75bfifo_dequeue(struct Qdisc* sch)
76{
77 struct sk_buff *skb;
78 34
79 skb = __skb_dequeue(&sch->q); 35 return qdisc_reshape_fail(skb, sch);
80 if (skb)
81 sch->qstats.backlog -= skb->len;
82 return skb;
83} 36}
84 37
85static unsigned int 38static int pfifo_enqueue(struct sk_buff *skb, struct Qdisc* sch)
86fifo_drop(struct Qdisc* sch)
87{
88 struct sk_buff *skb;
89
90 skb = __skb_dequeue_tail(&sch->q);
91 if (skb) {
92 unsigned int len = skb->len;
93 sch->qstats.backlog -= len;
94 kfree_skb(skb);
95 return len;
96 }
97 return 0;
98}
99
100static void
101fifo_reset(struct Qdisc* sch)
102{
103 skb_queue_purge(&sch->q);
104 sch->qstats.backlog = 0;
105}
106
107static int
108pfifo_enqueue(struct sk_buff *skb, struct Qdisc* sch)
109{ 39{
110 struct fifo_sched_data *q = qdisc_priv(sch); 40 struct fifo_sched_data *q = qdisc_priv(sch);
111 41
112 if (sch->q.qlen < q->limit) { 42 if (likely(skb_queue_len(&sch->q) < q->limit))
113 __skb_queue_tail(&sch->q, skb); 43 return qdisc_enqueue_tail(skb, sch);
114 sch->bstats.bytes += skb->len;
115 sch->bstats.packets++;
116 return 0;
117 }
118 sch->qstats.drops++;
119#ifdef CONFIG_NET_CLS_POLICE
120 if (sch->reshape_fail==NULL || sch->reshape_fail(skb, sch))
121#endif
122 kfree_skb(skb);
123 return NET_XMIT_DROP;
124}
125
126static int
127pfifo_requeue(struct sk_buff *skb, struct Qdisc* sch)
128{
129 __skb_queue_head(&sch->q, skb);
130 sch->qstats.requeues++;
131 return 0;
132}
133
134 44
135static struct sk_buff * 45 return qdisc_reshape_fail(skb, sch);
136pfifo_dequeue(struct Qdisc* sch)
137{
138 return __skb_dequeue(&sch->q);
139} 46}
140 47
141static int fifo_init(struct Qdisc *sch, struct rtattr *opt) 48static int fifo_init(struct Qdisc *sch, struct rtattr *opt)
@@ -143,66 +50,59 @@ static int fifo_init(struct Qdisc *sch, struct rtattr *opt)
143 struct fifo_sched_data *q = qdisc_priv(sch); 50 struct fifo_sched_data *q = qdisc_priv(sch);
144 51
145 if (opt == NULL) { 52 if (opt == NULL) {
146 unsigned int limit = sch->dev->tx_queue_len ? : 1; 53 u32 limit = sch->dev->tx_queue_len ? : 1;
147 54
148 if (sch->ops == &bfifo_qdisc_ops) 55 if (sch->ops == &bfifo_qdisc_ops)
149 q->limit = limit*sch->dev->mtu; 56 limit *= sch->dev->mtu;
150 else 57
151 q->limit = limit; 58 q->limit = limit;
152 } else { 59 } else {
153 struct tc_fifo_qopt *ctl = RTA_DATA(opt); 60 struct tc_fifo_qopt *ctl = RTA_DATA(opt);
154 if (opt->rta_len < RTA_LENGTH(sizeof(*ctl))) 61
62 if (RTA_PAYLOAD(opt) < sizeof(*ctl))
155 return -EINVAL; 63 return -EINVAL;
64
156 q->limit = ctl->limit; 65 q->limit = ctl->limit;
157 } 66 }
67
158 return 0; 68 return 0;
159} 69}
160 70
161static int fifo_dump(struct Qdisc *sch, struct sk_buff *skb) 71static int fifo_dump(struct Qdisc *sch, struct sk_buff *skb)
162{ 72{
163 struct fifo_sched_data *q = qdisc_priv(sch); 73 struct fifo_sched_data *q = qdisc_priv(sch);
164 unsigned char *b = skb->tail; 74 struct tc_fifo_qopt opt = { .limit = q->limit };
165 struct tc_fifo_qopt opt;
166 75
167 opt.limit = q->limit;
168 RTA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt); 76 RTA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
169
170 return skb->len; 77 return skb->len;
171 78
172rtattr_failure: 79rtattr_failure:
173 skb_trim(skb, b - skb->data);
174 return -1; 80 return -1;
175} 81}
176 82
177struct Qdisc_ops pfifo_qdisc_ops = { 83struct Qdisc_ops pfifo_qdisc_ops = {
178 .next = NULL,
179 .cl_ops = NULL,
180 .id = "pfifo", 84 .id = "pfifo",
181 .priv_size = sizeof(struct fifo_sched_data), 85 .priv_size = sizeof(struct fifo_sched_data),
182 .enqueue = pfifo_enqueue, 86 .enqueue = pfifo_enqueue,
183 .dequeue = pfifo_dequeue, 87 .dequeue = qdisc_dequeue_head,
184 .requeue = pfifo_requeue, 88 .requeue = qdisc_requeue,
185 .drop = fifo_drop, 89 .drop = qdisc_queue_drop,
186 .init = fifo_init, 90 .init = fifo_init,
187 .reset = fifo_reset, 91 .reset = qdisc_reset_queue,
188 .destroy = NULL,
189 .change = fifo_init, 92 .change = fifo_init,
190 .dump = fifo_dump, 93 .dump = fifo_dump,
191 .owner = THIS_MODULE, 94 .owner = THIS_MODULE,
192}; 95};
193 96
194struct Qdisc_ops bfifo_qdisc_ops = { 97struct Qdisc_ops bfifo_qdisc_ops = {
195 .next = NULL,
196 .cl_ops = NULL,
197 .id = "bfifo", 98 .id = "bfifo",
198 .priv_size = sizeof(struct fifo_sched_data), 99 .priv_size = sizeof(struct fifo_sched_data),
199 .enqueue = bfifo_enqueue, 100 .enqueue = bfifo_enqueue,
200 .dequeue = bfifo_dequeue, 101 .dequeue = qdisc_dequeue_head,
201 .requeue = bfifo_requeue, 102 .requeue = qdisc_requeue,
202 .drop = fifo_drop, 103 .drop = qdisc_queue_drop,
203 .init = fifo_init, 104 .init = fifo_init,
204 .reset = fifo_reset, 105 .reset = qdisc_reset_queue,
205 .destroy = NULL,
206 .change = fifo_init, 106 .change = fifo_init,
207 .dump = fifo_dump, 107 .dump = fifo_dump,
208 .owner = THIS_MODULE, 108 .owner = THIS_MODULE,
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 87e48a4e10..7683b34dc6 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -243,31 +243,27 @@ static void dev_watchdog_down(struct net_device *dev)
243 cheaper. 243 cheaper.
244 */ 244 */
245 245
246static int 246static int noop_enqueue(struct sk_buff *skb, struct Qdisc * qdisc)
247noop_enqueue(struct sk_buff *skb, struct Qdisc * qdisc)
248{ 247{
249 kfree_skb(skb); 248 kfree_skb(skb);
250 return NET_XMIT_CN; 249 return NET_XMIT_CN;
251} 250}
252 251
253static struct sk_buff * 252static struct sk_buff *noop_dequeue(struct Qdisc * qdisc)
254noop_dequeue(struct Qdisc * qdisc)
255{ 253{
256 return NULL; 254 return NULL;
257} 255}
258 256
259static int 257static int noop_requeue(struct sk_buff *skb, struct Qdisc* qdisc)
260noop_requeue(struct sk_buff *skb, struct Qdisc* qdisc)
261{ 258{
262 if (net_ratelimit()) 259 if (net_ratelimit())
263 printk(KERN_DEBUG "%s deferred output. It is buggy.\n", skb->dev->name); 260 printk(KERN_DEBUG "%s deferred output. It is buggy.\n",
261 skb->dev->name);
264 kfree_skb(skb); 262 kfree_skb(skb);
265 return NET_XMIT_CN; 263 return NET_XMIT_CN;
266} 264}
267 265
268struct Qdisc_ops noop_qdisc_ops = { 266struct Qdisc_ops noop_qdisc_ops = {
269 .next = NULL,
270 .cl_ops = NULL,
271 .id = "noop", 267 .id = "noop",
272 .priv_size = 0, 268 .priv_size = 0,
273 .enqueue = noop_enqueue, 269 .enqueue = noop_enqueue,
@@ -285,8 +281,6 @@ struct Qdisc noop_qdisc = {
285}; 281};
286 282
287static struct Qdisc_ops noqueue_qdisc_ops = { 283static struct Qdisc_ops noqueue_qdisc_ops = {
288 .next = NULL,
289 .cl_ops = NULL,
290 .id = "noqueue", 284 .id = "noqueue",
291 .priv_size = 0, 285 .priv_size = 0,
292 .enqueue = noop_enqueue, 286 .enqueue = noop_enqueue,
@@ -311,97 +305,87 @@ static const u8 prio2band[TC_PRIO_MAX+1] =
311 generic prio+fifo combination. 305 generic prio+fifo combination.
312 */ 306 */
313 307
314static int 308#define PFIFO_FAST_BANDS 3
315pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc* qdisc) 309
310static inline struct sk_buff_head *prio2list(struct sk_buff *skb,
311 struct Qdisc *qdisc)
316{ 312{
317 struct sk_buff_head *list = qdisc_priv(qdisc); 313 struct sk_buff_head *list = qdisc_priv(qdisc);
314 return list + prio2band[skb->priority & TC_PRIO_MAX];
315}
318 316
319 list += prio2band[skb->priority&TC_PRIO_MAX]; 317static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc* qdisc)
318{
319 struct sk_buff_head *list = prio2list(skb, qdisc);
320 320
321 if (list->qlen < qdisc->dev->tx_queue_len) { 321 if (skb_queue_len(list) < qdisc->dev->tx_queue_len) {
322 __skb_queue_tail(list, skb);
323 qdisc->q.qlen++; 322 qdisc->q.qlen++;
324 qdisc->bstats.bytes += skb->len; 323 return __qdisc_enqueue_tail(skb, qdisc, list);
325 qdisc->bstats.packets++;
326 return 0;
327 } 324 }
328 qdisc->qstats.drops++; 325
329 kfree_skb(skb); 326 return qdisc_drop(skb, qdisc);
330 return NET_XMIT_DROP;
331} 327}
332 328
333static struct sk_buff * 329static struct sk_buff *pfifo_fast_dequeue(struct Qdisc* qdisc)
334pfifo_fast_dequeue(struct Qdisc* qdisc)
335{ 330{
336 int prio; 331 int prio;
337 struct sk_buff_head *list = qdisc_priv(qdisc); 332 struct sk_buff_head *list = qdisc_priv(qdisc);
338 struct sk_buff *skb;
339 333
340 for (prio = 0; prio < 3; prio++, list++) { 334 for (prio = 0; prio < PFIFO_FAST_BANDS; prio++, list++) {
341 skb = __skb_dequeue(list); 335 struct sk_buff *skb = __qdisc_dequeue_head(qdisc, list);
342 if (skb) { 336 if (skb) {
343 qdisc->q.qlen--; 337 qdisc->q.qlen--;
344 return skb; 338 return skb;
345 } 339 }
346 } 340 }
341
347 return NULL; 342 return NULL;
348} 343}
349 344
350static int 345static int pfifo_fast_requeue(struct sk_buff *skb, struct Qdisc* qdisc)
351pfifo_fast_requeue(struct sk_buff *skb, struct Qdisc* qdisc)
352{ 346{
353 struct sk_buff_head *list = qdisc_priv(qdisc);
354
355 list += prio2band[skb->priority&TC_PRIO_MAX];
356
357 __skb_queue_head(list, skb);
358 qdisc->q.qlen++; 347 qdisc->q.qlen++;
359 qdisc->qstats.requeues++; 348 return __qdisc_requeue(skb, qdisc, prio2list(skb, qdisc));
360 return 0;
361} 349}
362 350
363static void 351static void pfifo_fast_reset(struct Qdisc* qdisc)
364pfifo_fast_reset(struct Qdisc* qdisc)
365{ 352{
366 int prio; 353 int prio;
367 struct sk_buff_head *list = qdisc_priv(qdisc); 354 struct sk_buff_head *list = qdisc_priv(qdisc);
368 355
369 for (prio=0; prio < 3; prio++) 356 for (prio = 0; prio < PFIFO_FAST_BANDS; prio++)
370 skb_queue_purge(list+prio); 357 __qdisc_reset_queue(qdisc, list + prio);
358
359 qdisc->qstats.backlog = 0;
371 qdisc->q.qlen = 0; 360 qdisc->q.qlen = 0;
372} 361}
373 362
374static int pfifo_fast_dump(struct Qdisc *qdisc, struct sk_buff *skb) 363static int pfifo_fast_dump(struct Qdisc *qdisc, struct sk_buff *skb)
375{ 364{
376 unsigned char *b = skb->tail; 365 struct tc_prio_qopt opt = { .bands = PFIFO_FAST_BANDS };
377 struct tc_prio_qopt opt;
378 366
379 opt.bands = 3;
380 memcpy(&opt.priomap, prio2band, TC_PRIO_MAX+1); 367 memcpy(&opt.priomap, prio2band, TC_PRIO_MAX+1);
381 RTA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt); 368 RTA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
382 return skb->len; 369 return skb->len;
383 370
384rtattr_failure: 371rtattr_failure:
385 skb_trim(skb, b - skb->data);
386 return -1; 372 return -1;
387} 373}
388 374
389static int pfifo_fast_init(struct Qdisc *qdisc, struct rtattr *opt) 375static int pfifo_fast_init(struct Qdisc *qdisc, struct rtattr *opt)
390{ 376{
391 int i; 377 int prio;
392 struct sk_buff_head *list = qdisc_priv(qdisc); 378 struct sk_buff_head *list = qdisc_priv(qdisc);
393 379
394 for (i=0; i<3; i++) 380 for (prio = 0; prio < PFIFO_FAST_BANDS; prio++)
395 skb_queue_head_init(list+i); 381 skb_queue_head_init(list + prio);
396 382
397 return 0; 383 return 0;
398} 384}
399 385
400static struct Qdisc_ops pfifo_fast_ops = { 386static struct Qdisc_ops pfifo_fast_ops = {
401 .next = NULL,
402 .cl_ops = NULL,
403 .id = "pfifo_fast", 387 .id = "pfifo_fast",
404 .priv_size = 3 * sizeof(struct sk_buff_head), 388 .priv_size = PFIFO_FAST_BANDS * sizeof(struct sk_buff_head),
405 .enqueue = pfifo_fast_enqueue, 389 .enqueue = pfifo_fast_enqueue,
406 .dequeue = pfifo_fast_dequeue, 390 .dequeue = pfifo_fast_dequeue,
407 .requeue = pfifo_fast_requeue, 391 .requeue = pfifo_fast_requeue,
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index 663843d97a..7ae6aa772d 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -191,10 +191,6 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a
191 asoc->last_cwr_tsn = asoc->ctsn_ack_point; 191 asoc->last_cwr_tsn = asoc->ctsn_ack_point;
192 asoc->unack_data = 0; 192 asoc->unack_data = 0;
193 193
194 SCTP_DEBUG_PRINTK("myctsnap for %s INIT as 0x%x.\n",
195 asoc->ep->debug_name,
196 asoc->ctsn_ack_point);
197
198 /* ADDIP Section 4.1 Asconf Chunk Procedures 194 /* ADDIP Section 4.1 Asconf Chunk Procedures
199 * 195 *
200 * When an endpoint has an ASCONF signaled change to be sent to the 196 * When an endpoint has an ASCONF signaled change to be sent to the
@@ -211,6 +207,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a
211 207
212 /* Make an empty list of remote transport addresses. */ 208 /* Make an empty list of remote transport addresses. */
213 INIT_LIST_HEAD(&asoc->peer.transport_addr_list); 209 INIT_LIST_HEAD(&asoc->peer.transport_addr_list);
210 asoc->peer.transport_count = 0;
214 211
215 /* RFC 2960 5.1 Normal Establishment of an Association 212 /* RFC 2960 5.1 Normal Establishment of an Association
216 * 213 *
@@ -288,6 +285,7 @@ struct sctp_association *sctp_association_new(const struct sctp_endpoint *ep,
288 285
289 asoc->base.malloced = 1; 286 asoc->base.malloced = 1;
290 SCTP_DBG_OBJCNT_INC(assoc); 287 SCTP_DBG_OBJCNT_INC(assoc);
288 SCTP_DEBUG_PRINTK("Created asoc %p\n", asoc);
291 289
292 return asoc; 290 return asoc;
293 291
@@ -356,6 +354,8 @@ void sctp_association_free(struct sctp_association *asoc)
356 sctp_transport_free(transport); 354 sctp_transport_free(transport);
357 } 355 }
358 356
357 asoc->peer.transport_count = 0;
358
359 /* Free any cached ASCONF_ACK chunk. */ 359 /* Free any cached ASCONF_ACK chunk. */
360 if (asoc->addip_last_asconf_ack) 360 if (asoc->addip_last_asconf_ack)
361 sctp_chunk_free(asoc->addip_last_asconf_ack); 361 sctp_chunk_free(asoc->addip_last_asconf_ack);
@@ -400,7 +400,7 @@ void sctp_assoc_set_primary(struct sctp_association *asoc,
400 /* If the primary path is changing, assume that the 400 /* If the primary path is changing, assume that the
401 * user wants to use this new path. 401 * user wants to use this new path.
402 */ 402 */
403 if (transport->active) 403 if (transport->state != SCTP_INACTIVE)
404 asoc->peer.active_path = transport; 404 asoc->peer.active_path = transport;
405 405
406 /* 406 /*
@@ -428,10 +428,58 @@ void sctp_assoc_set_primary(struct sctp_association *asoc,
428 transport->cacc.next_tsn_at_change = asoc->next_tsn; 428 transport->cacc.next_tsn_at_change = asoc->next_tsn;
429} 429}
430 430
431/* Remove a transport from an association. */
432void sctp_assoc_rm_peer(struct sctp_association *asoc,
433 struct sctp_transport *peer)
434{
435 struct list_head *pos;
436 struct sctp_transport *transport;
437
438 SCTP_DEBUG_PRINTK_IPADDR("sctp_assoc_rm_peer:association %p addr: ",
439 " port: %d\n",
440 asoc,
441 (&peer->ipaddr),
442 peer->ipaddr.v4.sin_port);
443
444 /* If we are to remove the current retran_path, update it
445 * to the next peer before removing this peer from the list.
446 */
447 if (asoc->peer.retran_path == peer)
448 sctp_assoc_update_retran_path(asoc);
449
450 /* Remove this peer from the list. */
451 list_del(&peer->transports);
452
453 /* Get the first transport of asoc. */
454 pos = asoc->peer.transport_addr_list.next;
455 transport = list_entry(pos, struct sctp_transport, transports);
456
457 /* Update any entries that match the peer to be deleted. */
458 if (asoc->peer.primary_path == peer)
459 sctp_assoc_set_primary(asoc, transport);
460 if (asoc->peer.active_path == peer)
461 asoc->peer.active_path = transport;
462 if (asoc->peer.last_data_from == peer)
463 asoc->peer.last_data_from = transport;
464
465 /* If we remove the transport an INIT was last sent to, set it to
466 * NULL. Combined with the update of the retran path above, this
467 * will cause the next INIT to be sent to the next available
468 * transport, maintaining the cycle.
469 */
470 if (asoc->init_last_sent_to == peer)
471 asoc->init_last_sent_to = NULL;
472
473 asoc->peer.transport_count--;
474
475 sctp_transport_free(peer);
476}
477
431/* Add a transport address to an association. */ 478/* Add a transport address to an association. */
432struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc, 479struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc,
433 const union sctp_addr *addr, 480 const union sctp_addr *addr,
434 int gfp) 481 const int gfp,
482 const int peer_state)
435{ 483{
436 struct sctp_transport *peer; 484 struct sctp_transport *peer;
437 struct sctp_sock *sp; 485 struct sctp_sock *sp;
@@ -442,14 +490,25 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc,
442 /* AF_INET and AF_INET6 share common port field. */ 490 /* AF_INET and AF_INET6 share common port field. */
443 port = addr->v4.sin_port; 491 port = addr->v4.sin_port;
444 492
493 SCTP_DEBUG_PRINTK_IPADDR("sctp_assoc_add_peer:association %p addr: ",
494 " port: %d state:%s\n",
495 asoc,
496 addr,
497 addr->v4.sin_port,
498 peer_state == SCTP_UNKNOWN?"UNKNOWN":"ACTIVE");
499
445 /* Set the port if it has not been set yet. */ 500 /* Set the port if it has not been set yet. */
446 if (0 == asoc->peer.port) 501 if (0 == asoc->peer.port)
447 asoc->peer.port = port; 502 asoc->peer.port = port;
448 503
449 /* Check to see if this is a duplicate. */ 504 /* Check to see if this is a duplicate. */
450 peer = sctp_assoc_lookup_paddr(asoc, addr); 505 peer = sctp_assoc_lookup_paddr(asoc, addr);
451 if (peer) 506 if (peer) {
507 if (peer_state == SCTP_ACTIVE &&
508 peer->state == SCTP_UNKNOWN)
509 peer->state = SCTP_ACTIVE;
452 return peer; 510 return peer;
511 }
453 512
454 peer = sctp_transport_new(addr, gfp); 513 peer = sctp_transport_new(addr, gfp);
455 if (!peer) 514 if (!peer)
@@ -516,8 +575,12 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc,
516 /* Set the transport's RTO.initial value */ 575 /* Set the transport's RTO.initial value */
517 peer->rto = asoc->rto_initial; 576 peer->rto = asoc->rto_initial;
518 577
578 /* Set the peer's active state. */
579 peer->state = peer_state;
580
519 /* Attach the remote transport to our asoc. */ 581 /* Attach the remote transport to our asoc. */
520 list_add_tail(&peer->transports, &asoc->peer.transport_addr_list); 582 list_add_tail(&peer->transports, &asoc->peer.transport_addr_list);
583 asoc->peer.transport_count++;
521 584
522 /* If we do not yet have a primary path, set one. */ 585 /* If we do not yet have a primary path, set one. */
523 if (!asoc->peer.primary_path) { 586 if (!asoc->peer.primary_path) {
@@ -525,8 +588,9 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc,
525 asoc->peer.retran_path = peer; 588 asoc->peer.retran_path = peer;
526 } 589 }
527 590
528 if (asoc->peer.active_path == asoc->peer.retran_path) 591 if (asoc->peer.active_path == asoc->peer.retran_path) {
529 asoc->peer.retran_path = peer; 592 asoc->peer.retran_path = peer;
593 }
530 594
531 return peer; 595 return peer;
532} 596}
@@ -537,37 +601,16 @@ void sctp_assoc_del_peer(struct sctp_association *asoc,
537{ 601{
538 struct list_head *pos; 602 struct list_head *pos;
539 struct list_head *temp; 603 struct list_head *temp;
540 struct sctp_transport *peer = NULL;
541 struct sctp_transport *transport; 604 struct sctp_transport *transport;
542 605
543 list_for_each_safe(pos, temp, &asoc->peer.transport_addr_list) { 606 list_for_each_safe(pos, temp, &asoc->peer.transport_addr_list) {
544 transport = list_entry(pos, struct sctp_transport, transports); 607 transport = list_entry(pos, struct sctp_transport, transports);
545 if (sctp_cmp_addr_exact(addr, &transport->ipaddr)) { 608 if (sctp_cmp_addr_exact(addr, &transport->ipaddr)) {
546 peer = transport; 609 /* Do book keeping for removing the peer and free it. */
547 list_del(pos); 610 sctp_assoc_rm_peer(asoc, transport);
548 break; 611 break;
549 } 612 }
550 } 613 }
551
552 /* The address we want delete is not in the association. */
553 if (!peer)
554 return;
555
556 /* Get the first transport of asoc. */
557 pos = asoc->peer.transport_addr_list.next;
558 transport = list_entry(pos, struct sctp_transport, transports);
559
560 /* Update any entries that match the peer to be deleted. */
561 if (asoc->peer.primary_path == peer)
562 sctp_assoc_set_primary(asoc, transport);
563 if (asoc->peer.active_path == peer)
564 asoc->peer.active_path = transport;
565 if (asoc->peer.retran_path == peer)
566 asoc->peer.retran_path = transport;
567 if (asoc->peer.last_data_from == peer)
568 asoc->peer.last_data_from = transport;
569
570 sctp_transport_free(peer);
571} 614}
572 615
573/* Lookup a transport by address. */ 616/* Lookup a transport by address. */
@@ -608,12 +651,12 @@ void sctp_assoc_control_transport(struct sctp_association *asoc,
608 /* Record the transition on the transport. */ 651 /* Record the transition on the transport. */
609 switch (command) { 652 switch (command) {
610 case SCTP_TRANSPORT_UP: 653 case SCTP_TRANSPORT_UP:
611 transport->active = SCTP_ACTIVE; 654 transport->state = SCTP_ACTIVE;
612 spc_state = SCTP_ADDR_AVAILABLE; 655 spc_state = SCTP_ADDR_AVAILABLE;
613 break; 656 break;
614 657
615 case SCTP_TRANSPORT_DOWN: 658 case SCTP_TRANSPORT_DOWN:
616 transport->active = SCTP_INACTIVE; 659 transport->state = SCTP_INACTIVE;
617 spc_state = SCTP_ADDR_UNREACHABLE; 660 spc_state = SCTP_ADDR_UNREACHABLE;
618 break; 661 break;
619 662
@@ -643,7 +686,7 @@ void sctp_assoc_control_transport(struct sctp_association *asoc,
643 list_for_each(pos, &asoc->peer.transport_addr_list) { 686 list_for_each(pos, &asoc->peer.transport_addr_list) {
644 t = list_entry(pos, struct sctp_transport, transports); 687 t = list_entry(pos, struct sctp_transport, transports);
645 688
646 if (!t->active) 689 if (t->state == SCTP_INACTIVE)
647 continue; 690 continue;
648 if (!first || t->last_time_heard > first->last_time_heard) { 691 if (!first || t->last_time_heard > first->last_time_heard) {
649 second = first; 692 second = first;
@@ -663,7 +706,7 @@ void sctp_assoc_control_transport(struct sctp_association *asoc,
663 * [If the primary is active but not most recent, bump the most 706 * [If the primary is active but not most recent, bump the most
664 * recently used transport.] 707 * recently used transport.]
665 */ 708 */
666 if (asoc->peer.primary_path->active && 709 if (asoc->peer.primary_path->state != SCTP_INACTIVE &&
667 first != asoc->peer.primary_path) { 710 first != asoc->peer.primary_path) {
668 second = first; 711 second = first;
669 first = asoc->peer.primary_path; 712 first = asoc->peer.primary_path;
@@ -958,7 +1001,7 @@ void sctp_assoc_update(struct sctp_association *asoc,
958 transports); 1001 transports);
959 if (!sctp_assoc_lookup_paddr(asoc, &trans->ipaddr)) 1002 if (!sctp_assoc_lookup_paddr(asoc, &trans->ipaddr))
960 sctp_assoc_add_peer(asoc, &trans->ipaddr, 1003 sctp_assoc_add_peer(asoc, &trans->ipaddr,
961 GFP_ATOMIC); 1004 GFP_ATOMIC, SCTP_ACTIVE);
962 } 1005 }
963 1006
964 asoc->ctsn_ack_point = asoc->next_tsn - 1; 1007 asoc->ctsn_ack_point = asoc->next_tsn - 1;
@@ -998,7 +1041,7 @@ void sctp_assoc_update_retran_path(struct sctp_association *asoc)
998 1041
999 /* Try to find an active transport. */ 1042 /* Try to find an active transport. */
1000 1043
1001 if (t->active) { 1044 if (t->state != SCTP_INACTIVE) {
1002 break; 1045 break;
1003 } else { 1046 } else {
1004 /* Keep track of the next transport in case 1047 /* Keep track of the next transport in case
@@ -1019,6 +1062,40 @@ void sctp_assoc_update_retran_path(struct sctp_association *asoc)
1019 } 1062 }
1020 1063
1021 asoc->peer.retran_path = t; 1064 asoc->peer.retran_path = t;
1065
1066 SCTP_DEBUG_PRINTK_IPADDR("sctp_assoc_update_retran_path:association"
1067 " %p addr: ",
1068 " port: %d\n",
1069 asoc,
1070 (&t->ipaddr),
1071 t->ipaddr.v4.sin_port);
1072}
1073
1074/* Choose the transport for sending a INIT packet. */
1075struct sctp_transport *sctp_assoc_choose_init_transport(
1076 struct sctp_association *asoc)
1077{
1078 struct sctp_transport *t;
1079
1080 /* Use the retran path. If the last INIT was sent over the
1081 * retran path, update the retran path and use it.
1082 */
1083 if (!asoc->init_last_sent_to) {
1084 t = asoc->peer.active_path;
1085 } else {
1086 if (asoc->init_last_sent_to == asoc->peer.retran_path)
1087 sctp_assoc_update_retran_path(asoc);
1088 t = asoc->peer.retran_path;
1089 }
1090
1091 SCTP_DEBUG_PRINTK_IPADDR("sctp_assoc_update_retran_path:association"
1092 " %p addr: ",
1093 " port: %d\n",
1094 asoc,
1095 (&t->ipaddr),
1096 t->ipaddr.v4.sin_port);
1097
1098 return t;
1022} 1099}
1023 1100
1024/* Choose the transport for sending a SHUTDOWN packet. */ 1101/* Choose the transport for sending a SHUTDOWN packet. */
diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c
index 334f61773e..2ec0320fac 100644
--- a/net/sctp/endpointola.c
+++ b/net/sctp/endpointola.c
@@ -134,7 +134,6 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep,
134 ep->last_key = ep->current_key = 0; 134 ep->last_key = ep->current_key = 0;
135 ep->key_changed_at = jiffies; 135 ep->key_changed_at = jiffies;
136 136
137 ep->debug_name = "unnamedEndpoint";
138 return ep; 137 return ep;
139} 138}
140 139
diff --git a/net/sctp/input.c b/net/sctp/input.c
index b719a77d66..339f7acfdb 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -178,6 +178,37 @@ int sctp_rcv(struct sk_buff *skb)
178 178
179 asoc = __sctp_rcv_lookup(skb, &src, &dest, &transport); 179 asoc = __sctp_rcv_lookup(skb, &src, &dest, &transport);
180 180
181 if (!asoc)
182 ep = __sctp_rcv_lookup_endpoint(&dest);
183
184 /* Retrieve the common input handling substructure. */
185 rcvr = asoc ? &asoc->base : &ep->base;
186 sk = rcvr->sk;
187
188 /*
189 * If a frame arrives on an interface and the receiving socket is
190 * bound to another interface, via SO_BINDTODEVICE, treat it as OOTB
191 */
192 if (sk->sk_bound_dev_if && (sk->sk_bound_dev_if != af->skb_iif(skb)))
193 {
194 sock_put(sk);
195 if (asoc) {
196 sctp_association_put(asoc);
197 asoc = NULL;
198 } else {
199 sctp_endpoint_put(ep);
200 ep = NULL;
201 }
202 sk = sctp_get_ctl_sock();
203 ep = sctp_sk(sk)->ep;
204 sctp_endpoint_hold(ep);
205 sock_hold(sk);
206 rcvr = &ep->base;
207 }
208
209 if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
210 goto discard_release;
211
181 /* 212 /*
182 * RFC 2960, 8.4 - Handle "Out of the blue" Packets. 213 * RFC 2960, 8.4 - Handle "Out of the blue" Packets.
183 * An SCTP packet is called an "out of the blue" (OOTB) 214 * An SCTP packet is called an "out of the blue" (OOTB)
@@ -187,22 +218,12 @@ int sctp_rcv(struct sk_buff *skb)
187 * packet belongs. 218 * packet belongs.
188 */ 219 */
189 if (!asoc) { 220 if (!asoc) {
190 ep = __sctp_rcv_lookup_endpoint(&dest);
191 if (sctp_rcv_ootb(skb)) { 221 if (sctp_rcv_ootb(skb)) {
192 SCTP_INC_STATS_BH(SCTP_MIB_OUTOFBLUES); 222 SCTP_INC_STATS_BH(SCTP_MIB_OUTOFBLUES);
193 goto discard_release; 223 goto discard_release;
194 } 224 }
195 } 225 }
196 226
197 /* Retrieve the common input handling substructure. */
198 rcvr = asoc ? &asoc->base : &ep->base;
199 sk = rcvr->sk;
200
201 if ((sk) && (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)) {
202 goto discard_release;
203 }
204
205
206 /* SCTP seems to always need a timestamp right now (FIXME) */ 227 /* SCTP seems to always need a timestamp right now (FIXME) */
207 if (skb->stamp.tv_sec == 0) { 228 if (skb->stamp.tv_sec == 0) {
208 do_gettimeofday(&skb->stamp); 229 do_gettimeofday(&skb->stamp);
@@ -265,13 +286,11 @@ discard_it:
265 286
266discard_release: 287discard_release:
267 /* Release any structures we may be holding. */ 288 /* Release any structures we may be holding. */
268 if (asoc) { 289 sock_put(sk);
269 sock_put(asoc->base.sk); 290 if (asoc)
270 sctp_association_put(asoc); 291 sctp_association_put(asoc);
271 } else { 292 else
272 sock_put(ep->base.sk);
273 sctp_endpoint_put(ep); 293 sctp_endpoint_put(ep);
274 }
275 294
276 goto discard_it; 295 goto discard_it;
277} 296}
@@ -334,7 +353,7 @@ void sctp_icmp_proto_unreachable(struct sock *sk,
334 353
335 sctp_do_sm(SCTP_EVENT_T_OTHER, 354 sctp_do_sm(SCTP_EVENT_T_OTHER,
336 SCTP_ST_OTHER(SCTP_EVENT_ICMP_PROTO_UNREACH), 355 SCTP_ST_OTHER(SCTP_EVENT_ICMP_PROTO_UNREACH),
337 asoc->state, asoc->ep, asoc, NULL, 356 asoc->state, asoc->ep, asoc, t,
338 GFP_ATOMIC); 357 GFP_ATOMIC);
339 358
340} 359}
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index c9d9ea0647..c7e42d125b 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -812,26 +812,23 @@ static int sctp_inet6_bind_verify(struct sctp_sock *opt, union sctp_addr *addr)
812 if (addr->sa.sa_family != AF_INET6) 812 if (addr->sa.sa_family != AF_INET6)
813 af = sctp_get_af_specific(addr->sa.sa_family); 813 af = sctp_get_af_specific(addr->sa.sa_family);
814 else { 814 else {
815 struct sock *sk;
816 int type = ipv6_addr_type(&addr->v6.sin6_addr); 815 int type = ipv6_addr_type(&addr->v6.sin6_addr);
817 sk = sctp_opt2sk(opt); 816 struct net_device *dev;
817
818 if (type & IPV6_ADDR_LINKLOCAL) { 818 if (type & IPV6_ADDR_LINKLOCAL) {
819 /* Note: Behavior similar to af_inet6.c: 819 if (!addr->v6.sin6_scope_id)
820 * 1) Overrides previous bound_dev_if 820 return 0;
821 * 2) Destructive even if bind isn't successful. 821 dev = dev_get_by_index(addr->v6.sin6_scope_id);
822 */ 822 if (!dev)
823
824 if (addr->v6.sin6_scope_id)
825 sk->sk_bound_dev_if = addr->v6.sin6_scope_id;
826 if (!sk->sk_bound_dev_if)
827 return 0; 823 return 0;
824 dev_put(dev);
828 } 825 }
829 af = opt->pf->af; 826 af = opt->pf->af;
830 } 827 }
831 return af->available(addr, opt); 828 return af->available(addr, opt);
832} 829}
833 830
834/* Verify that the provided sockaddr looks bindable. Common verification, 831/* Verify that the provided sockaddr looks sendable. Common verification,
835 * has already been taken care of. 832 * has already been taken care of.
836 */ 833 */
837static int sctp_inet6_send_verify(struct sctp_sock *opt, union sctp_addr *addr) 834static int sctp_inet6_send_verify(struct sctp_sock *opt, union sctp_addr *addr)
@@ -842,19 +839,16 @@ static int sctp_inet6_send_verify(struct sctp_sock *opt, union sctp_addr *addr)
842 if (addr->sa.sa_family != AF_INET6) 839 if (addr->sa.sa_family != AF_INET6)
843 af = sctp_get_af_specific(addr->sa.sa_family); 840 af = sctp_get_af_specific(addr->sa.sa_family);
844 else { 841 else {
845 struct sock *sk;
846 int type = ipv6_addr_type(&addr->v6.sin6_addr); 842 int type = ipv6_addr_type(&addr->v6.sin6_addr);
847 sk = sctp_opt2sk(opt); 843 struct net_device *dev;
844
848 if (type & IPV6_ADDR_LINKLOCAL) { 845 if (type & IPV6_ADDR_LINKLOCAL) {
849 /* Note: Behavior similar to af_inet6.c: 846 if (!addr->v6.sin6_scope_id)
850 * 1) Overrides previous bound_dev_if 847 return 0;
851 * 2) Destructive even if bind isn't successful. 848 dev = dev_get_by_index(addr->v6.sin6_scope_id);
852 */ 849 if (!dev)
853
854 if (addr->v6.sin6_scope_id)
855 sk->sk_bound_dev_if = addr->v6.sin6_scope_id;
856 if (!sk->sk_bound_dev_if)
857 return 0; 850 return 0;
851 dev_put(dev);
858 } 852 }
859 af = opt->pf->af; 853 af = opt->pf->af;
860 } 854 }
diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index 1b2d4adc4d..4eb81a1407 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -682,9 +682,9 @@ int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout)
682 682
683 if (!new_transport) { 683 if (!new_transport) {
684 new_transport = asoc->peer.active_path; 684 new_transport = asoc->peer.active_path;
685 } else if (!new_transport->active) { 685 } else if (new_transport->state == SCTP_INACTIVE) {
686 /* If the chunk is Heartbeat or Heartbeat Ack, 686 /* If the chunk is Heartbeat or Heartbeat Ack,
687 * send it to chunk->transport, even if it's 687 * send it to chunk->transport, even if it's
688 * inactive. 688 * inactive.
689 * 689 *
690 * 3.3.6 Heartbeat Acknowledgement: 690 * 3.3.6 Heartbeat Acknowledgement:
@@ -840,7 +840,8 @@ int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout)
840 * Otherwise, we want to use the active path. 840 * Otherwise, we want to use the active path.
841 */ 841 */
842 new_transport = chunk->transport; 842 new_transport = chunk->transport;
843 if (!new_transport || !new_transport->active) 843 if (!new_transport ||
844 new_transport->state == SCTP_INACTIVE)
844 new_transport = asoc->peer.active_path; 845 new_transport = asoc->peer.active_path;
845 846
846 /* Change packets if necessary. */ 847 /* Change packets if necessary. */
@@ -1454,7 +1455,7 @@ static void sctp_check_transmitted(struct sctp_outq *q,
1454 /* Mark the destination transport address as 1455 /* Mark the destination transport address as
1455 * active if it is not so marked. 1456 * active if it is not so marked.
1456 */ 1457 */
1457 if (!transport->active) { 1458 if (transport->state == SCTP_INACTIVE) {
1458 sctp_assoc_control_transport( 1459 sctp_assoc_control_transport(
1459 transport->asoc, 1460 transport->asoc,
1460 transport, 1461 transport,
diff --git a/net/sctp/proc.c b/net/sctp/proc.c
index e42fd8c291..98d49ec9b7 100644
--- a/net/sctp/proc.c
+++ b/net/sctp/proc.c
@@ -132,14 +132,25 @@ void sctp_snmp_proc_exit(void)
132static void sctp_seq_dump_local_addrs(struct seq_file *seq, struct sctp_ep_common *epb) 132static void sctp_seq_dump_local_addrs(struct seq_file *seq, struct sctp_ep_common *epb)
133{ 133{
134 struct list_head *pos; 134 struct list_head *pos;
135 struct sctp_association *asoc;
135 struct sctp_sockaddr_entry *laddr; 136 struct sctp_sockaddr_entry *laddr;
136 union sctp_addr *addr; 137 struct sctp_transport *peer;
138 union sctp_addr *addr, *primary = NULL;
137 struct sctp_af *af; 139 struct sctp_af *af;
138 140
141 if (epb->type == SCTP_EP_TYPE_ASSOCIATION) {
142 asoc = sctp_assoc(epb);
143 peer = asoc->peer.primary_path;
144 primary = &peer->saddr;
145 }
146
139 list_for_each(pos, &epb->bind_addr.address_list) { 147 list_for_each(pos, &epb->bind_addr.address_list) {
140 laddr = list_entry(pos, struct sctp_sockaddr_entry, list); 148 laddr = list_entry(pos, struct sctp_sockaddr_entry, list);
141 addr = (union sctp_addr *)&laddr->a; 149 addr = (union sctp_addr *)&laddr->a;
142 af = sctp_get_af_specific(addr->sa.sa_family); 150 af = sctp_get_af_specific(addr->sa.sa_family);
151 if (primary && af->cmp_addr(addr, primary)) {
152 seq_printf(seq, "*");
153 }
143 af->seq_dump_addr(seq, addr); 154 af->seq_dump_addr(seq, addr);
144 } 155 }
145} 156}
@@ -149,17 +160,54 @@ static void sctp_seq_dump_remote_addrs(struct seq_file *seq, struct sctp_associa
149{ 160{
150 struct list_head *pos; 161 struct list_head *pos;
151 struct sctp_transport *transport; 162 struct sctp_transport *transport;
152 union sctp_addr *addr; 163 union sctp_addr *addr, *primary;
153 struct sctp_af *af; 164 struct sctp_af *af;
154 165
166 primary = &(assoc->peer.primary_addr);
155 list_for_each(pos, &assoc->peer.transport_addr_list) { 167 list_for_each(pos, &assoc->peer.transport_addr_list) {
156 transport = list_entry(pos, struct sctp_transport, transports); 168 transport = list_entry(pos, struct sctp_transport, transports);
157 addr = (union sctp_addr *)&transport->ipaddr; 169 addr = (union sctp_addr *)&transport->ipaddr;
158 af = sctp_get_af_specific(addr->sa.sa_family); 170 af = sctp_get_af_specific(addr->sa.sa_family);
171 if (af->cmp_addr(addr, primary)) {
172 seq_printf(seq, "*");
173 }
159 af->seq_dump_addr(seq, addr); 174 af->seq_dump_addr(seq, addr);
160 } 175 }
161} 176}
162 177
178static void * sctp_eps_seq_start(struct seq_file *seq, loff_t *pos)
179{
180 if (*pos > sctp_ep_hashsize)
181 return NULL;
182
183 if (*pos < 0)
184 *pos = 0;
185
186 if (*pos == 0)
187 seq_printf(seq, " ENDPT SOCK STY SST HBKT LPORT UID INODE LADDRS\n");
188
189 ++*pos;
190
191 return (void *)pos;
192}
193
194static void sctp_eps_seq_stop(struct seq_file *seq, void *v)
195{
196 return;
197}
198
199
200static void * sctp_eps_seq_next(struct seq_file *seq, void *v, loff_t *pos)
201{
202 if (*pos > sctp_ep_hashsize)
203 return NULL;
204
205 ++*pos;
206
207 return pos;
208}
209
210
163/* Display sctp endpoints (/proc/net/sctp/eps). */ 211/* Display sctp endpoints (/proc/net/sctp/eps). */
164static int sctp_eps_seq_show(struct seq_file *seq, void *v) 212static int sctp_eps_seq_show(struct seq_file *seq, void *v)
165{ 213{
@@ -167,38 +215,50 @@ static int sctp_eps_seq_show(struct seq_file *seq, void *v)
167 struct sctp_ep_common *epb; 215 struct sctp_ep_common *epb;
168 struct sctp_endpoint *ep; 216 struct sctp_endpoint *ep;
169 struct sock *sk; 217 struct sock *sk;
170 int hash; 218 int hash = *(int *)v;
171 219
172 seq_printf(seq, " ENDPT SOCK STY SST HBKT LPORT LADDRS\n"); 220 if (hash > sctp_ep_hashsize)
173 for (hash = 0; hash < sctp_ep_hashsize; hash++) { 221 return -ENOMEM;
174 head = &sctp_ep_hashtable[hash]; 222
175 read_lock(&head->lock); 223 head = &sctp_ep_hashtable[hash-1];
176 for (epb = head->chain; epb; epb = epb->next) { 224 sctp_local_bh_disable();
177 ep = sctp_ep(epb); 225 read_lock(&head->lock);
178 sk = epb->sk; 226 for (epb = head->chain; epb; epb = epb->next) {
179 seq_printf(seq, "%8p %8p %-3d %-3d %-4d %-5d ", ep, sk, 227 ep = sctp_ep(epb);
180 sctp_sk(sk)->type, sk->sk_state, hash, 228 sk = epb->sk;
181 epb->bind_addr.port); 229 seq_printf(seq, "%8p %8p %-3d %-3d %-4d %-5d %5d %5lu ", ep, sk,
182 sctp_seq_dump_local_addrs(seq, epb); 230 sctp_sk(sk)->type, sk->sk_state, hash-1,
183 seq_printf(seq, "\n"); 231 epb->bind_addr.port,
184 } 232 sock_i_uid(sk), sock_i_ino(sk));
185 read_unlock(&head->lock); 233
234 sctp_seq_dump_local_addrs(seq, epb);
235 seq_printf(seq, "\n");
186 } 236 }
237 read_unlock(&head->lock);
238 sctp_local_bh_enable();
187 239
188 return 0; 240 return 0;
189} 241}
190 242
243static struct seq_operations sctp_eps_ops = {
244 .start = sctp_eps_seq_start,
245 .next = sctp_eps_seq_next,
246 .stop = sctp_eps_seq_stop,
247 .show = sctp_eps_seq_show,
248};
249
250
191/* Initialize the seq file operations for 'eps' object. */ 251/* Initialize the seq file operations for 'eps' object. */
192static int sctp_eps_seq_open(struct inode *inode, struct file *file) 252static int sctp_eps_seq_open(struct inode *inode, struct file *file)
193{ 253{
194 return single_open(file, sctp_eps_seq_show, NULL); 254 return seq_open(file, &sctp_eps_ops);
195} 255}
196 256
197static struct file_operations sctp_eps_seq_fops = { 257static struct file_operations sctp_eps_seq_fops = {
198 .open = sctp_eps_seq_open, 258 .open = sctp_eps_seq_open,
199 .read = seq_read, 259 .read = seq_read,
200 .llseek = seq_lseek, 260 .llseek = seq_lseek,
201 .release = single_release, 261 .release = seq_release,
202}; 262};
203 263
204/* Set up the proc fs entry for 'eps' object. */ 264/* Set up the proc fs entry for 'eps' object. */
@@ -221,6 +281,40 @@ void sctp_eps_proc_exit(void)
221 remove_proc_entry("eps", proc_net_sctp); 281 remove_proc_entry("eps", proc_net_sctp);
222} 282}
223 283
284
285static void * sctp_assocs_seq_start(struct seq_file *seq, loff_t *pos)
286{
287 if (*pos > sctp_assoc_hashsize)
288 return NULL;
289
290 if (*pos < 0)
291 *pos = 0;
292
293 if (*pos == 0)
294 seq_printf(seq, " ASSOC SOCK STY SST ST HBKT ASSOC-ID TX_QUEUE RX_QUEUE UID INODE LPORT "
295 "RPORT LADDRS <-> RADDRS\n");
296
297 ++*pos;
298
299 return (void *)pos;
300}
301
302static void sctp_assocs_seq_stop(struct seq_file *seq, void *v)
303{
304 return;
305}
306
307
308static void * sctp_assocs_seq_next(struct seq_file *seq, void *v, loff_t *pos)
309{
310 if (*pos > sctp_assoc_hashsize)
311 return NULL;
312
313 ++*pos;
314
315 return pos;
316}
317
224/* Display sctp associations (/proc/net/sctp/assocs). */ 318/* Display sctp associations (/proc/net/sctp/assocs). */
225static int sctp_assocs_seq_show(struct seq_file *seq, void *v) 319static int sctp_assocs_seq_show(struct seq_file *seq, void *v)
226{ 320{
@@ -228,43 +322,57 @@ static int sctp_assocs_seq_show(struct seq_file *seq, void *v)
228 struct sctp_ep_common *epb; 322 struct sctp_ep_common *epb;
229 struct sctp_association *assoc; 323 struct sctp_association *assoc;
230 struct sock *sk; 324 struct sock *sk;
231 int hash; 325 int hash = *(int *)v;
232 326
233 seq_printf(seq, " ASSOC SOCK STY SST ST HBKT LPORT RPORT " 327 if (hash > sctp_assoc_hashsize)
234 "LADDRS <-> RADDRS\n"); 328 return -ENOMEM;
235 for (hash = 0; hash < sctp_assoc_hashsize; hash++) { 329
236 head = &sctp_assoc_hashtable[hash]; 330 head = &sctp_assoc_hashtable[hash-1];
237 read_lock(&head->lock); 331 sctp_local_bh_disable();
238 for (epb = head->chain; epb; epb = epb->next) { 332 read_lock(&head->lock);
239 assoc = sctp_assoc(epb); 333 for (epb = head->chain; epb; epb = epb->next) {
240 sk = epb->sk; 334 assoc = sctp_assoc(epb);
241 seq_printf(seq, 335 sk = epb->sk;
242 "%8p %8p %-3d %-3d %-2d %-4d %-5d %-5d ", 336 seq_printf(seq,
243 assoc, sk, sctp_sk(sk)->type, sk->sk_state, 337 "%8p %8p %-3d %-3d %-2d %-4d %4d %8d %8d %7d %5lu %-5d %5d ",
244 assoc->state, hash, epb->bind_addr.port, 338 assoc, sk, sctp_sk(sk)->type, sk->sk_state,
245 assoc->peer.port); 339 assoc->state, hash-1, assoc->assoc_id,
246 sctp_seq_dump_local_addrs(seq, epb); 340 (sk->sk_rcvbuf - assoc->rwnd),
247 seq_printf(seq, "<-> "); 341 assoc->sndbuf_used,
248 sctp_seq_dump_remote_addrs(seq, assoc); 342 sock_i_uid(sk), sock_i_ino(sk),
249 seq_printf(seq, "\n"); 343 epb->bind_addr.port,
250 } 344 assoc->peer.port);
251 read_unlock(&head->lock); 345
346 seq_printf(seq, " ");
347 sctp_seq_dump_local_addrs(seq, epb);
348 seq_printf(seq, "<-> ");
349 sctp_seq_dump_remote_addrs(seq, assoc);
350 seq_printf(seq, "\n");
252 } 351 }
352 read_unlock(&head->lock);
353 sctp_local_bh_enable();
253 354
254 return 0; 355 return 0;
255} 356}
256 357
358static struct seq_operations sctp_assoc_ops = {
359 .start = sctp_assocs_seq_start,
360 .next = sctp_assocs_seq_next,
361 .stop = sctp_assocs_seq_stop,
362 .show = sctp_assocs_seq_show,
363};
364
257/* Initialize the seq file operations for 'assocs' object. */ 365/* Initialize the seq file operations for 'assocs' object. */
258static int sctp_assocs_seq_open(struct inode *inode, struct file *file) 366static int sctp_assocs_seq_open(struct inode *inode, struct file *file)
259{ 367{
260 return single_open(file, sctp_assocs_seq_show, NULL); 368 return seq_open(file, &sctp_assoc_ops);
261} 369}
262 370
263static struct file_operations sctp_assocs_seq_fops = { 371static struct file_operations sctp_assocs_seq_fops = {
264 .open = sctp_assocs_seq_open, 372 .open = sctp_assocs_seq_open,
265 .read = seq_read, 373 .read = seq_read,
266 .llseek = seq_lseek, 374 .llseek = seq_lseek,
267 .release = single_release, 375 .release = seq_release,
268}; 376};
269 377
270/* Set up the proc fs entry for 'assocs' object. */ 378/* Set up the proc fs entry for 'assocs' object. */
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 2e1f9c3556..5135e1a25d 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -378,10 +378,13 @@ static int sctp_v4_available(union sctp_addr *addr, struct sctp_sock *sp)
378{ 378{
379 int ret = inet_addr_type(addr->v4.sin_addr.s_addr); 379 int ret = inet_addr_type(addr->v4.sin_addr.s_addr);
380 380
381 /* FIXME: ip_nonlocal_bind sysctl support. */
382 381
383 if (addr->v4.sin_addr.s_addr != INADDR_ANY && ret != RTN_LOCAL) 382 if (addr->v4.sin_addr.s_addr != INADDR_ANY &&
383 ret != RTN_LOCAL &&
384 !sp->inet.freebind &&
385 !sysctl_ip_nonlocal_bind)
384 return 0; 386 return 0;
387
385 return 1; 388 return 1;
386} 389}
387 390
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 33ac8bf47b..5baed9bb7d 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -1830,7 +1830,7 @@ int sctp_process_init(struct sctp_association *asoc, sctp_cid_t cid,
1830 * be a a better choice than any of the embedded addresses. 1830 * be a a better choice than any of the embedded addresses.
1831 */ 1831 */
1832 if (peer_addr) 1832 if (peer_addr)
1833 if(!sctp_assoc_add_peer(asoc, peer_addr, gfp)) 1833 if(!sctp_assoc_add_peer(asoc, peer_addr, gfp, SCTP_ACTIVE))
1834 goto nomem; 1834 goto nomem;
1835 1835
1836 /* Process the initialization parameters. */ 1836 /* Process the initialization parameters. */
@@ -1841,6 +1841,14 @@ int sctp_process_init(struct sctp_association *asoc, sctp_cid_t cid,
1841 goto clean_up; 1841 goto clean_up;
1842 } 1842 }
1843 1843
1844 /* Walk list of transports, removing transports in the UNKNOWN state. */
1845 list_for_each_safe(pos, temp, &asoc->peer.transport_addr_list) {
1846 transport = list_entry(pos, struct sctp_transport, transports);
1847 if (transport->state == SCTP_UNKNOWN) {
1848 sctp_assoc_rm_peer(asoc, transport);
1849 }
1850 }
1851
1844 /* The fixed INIT headers are always in network byte 1852 /* The fixed INIT headers are always in network byte
1845 * order. 1853 * order.
1846 */ 1854 */
@@ -1906,7 +1914,8 @@ int sctp_process_init(struct sctp_association *asoc, sctp_cid_t cid,
1906 * stream sequence number shall be set to 0. 1914 * stream sequence number shall be set to 0.
1907 */ 1915 */
1908 1916
1909 /* Allocate storage for the negotiated streams if it is not a temporary * association. 1917 /* Allocate storage for the negotiated streams if it is not a temporary
1918 * association.
1910 */ 1919 */
1911 if (!asoc->temp) { 1920 if (!asoc->temp) {
1912 int assoc_id; 1921 int assoc_id;
@@ -1952,6 +1961,9 @@ clean_up:
1952 list_del_init(pos); 1961 list_del_init(pos);
1953 sctp_transport_free(transport); 1962 sctp_transport_free(transport);
1954 } 1963 }
1964
1965 asoc->peer.transport_count = 0;
1966
1955nomem: 1967nomem:
1956 return 0; 1968 return 0;
1957} 1969}
@@ -1995,7 +2007,7 @@ static int sctp_process_param(struct sctp_association *asoc,
1995 af->from_addr_param(&addr, param.addr, asoc->peer.port, 0); 2007 af->from_addr_param(&addr, param.addr, asoc->peer.port, 0);
1996 scope = sctp_scope(peer_addr); 2008 scope = sctp_scope(peer_addr);
1997 if (sctp_in_scope(&addr, scope)) 2009 if (sctp_in_scope(&addr, scope))
1998 if (!sctp_assoc_add_peer(asoc, &addr, gfp)) 2010 if (!sctp_assoc_add_peer(asoc, &addr, gfp, SCTP_ACTIVE))
1999 return 0; 2011 return 0;
2000 break; 2012 break;
2001 2013
@@ -2396,7 +2408,7 @@ static __u16 sctp_process_asconf_param(struct sctp_association *asoc,
2396 * Due to Resource Shortage'. 2408 * Due to Resource Shortage'.
2397 */ 2409 */
2398 2410
2399 peer = sctp_assoc_add_peer(asoc, &addr, GFP_ATOMIC); 2411 peer = sctp_assoc_add_peer(asoc, &addr, GFP_ATOMIC, SCTP_ACTIVE);
2400 if (!peer) 2412 if (!peer)
2401 return SCTP_ERROR_RSRC_LOW; 2413 return SCTP_ERROR_RSRC_LOW;
2402 2414
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index f65fa44195..778639db12 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -414,11 +414,13 @@ static void sctp_do_8_2_transport_strike(struct sctp_association *asoc,
414 */ 414 */
415 asoc->overall_error_count++; 415 asoc->overall_error_count++;
416 416
417 if (transport->active && 417 if (transport->state != SCTP_INACTIVE &&
418 (transport->error_count++ >= transport->max_retrans)) { 418 (transport->error_count++ >= transport->max_retrans)) {
419 SCTP_DEBUG_PRINTK("transport_strike: transport " 419 SCTP_DEBUG_PRINTK_IPADDR("transport_strike:association %p",
420 "IP:%d.%d.%d.%d failed.\n", 420 " transport IP: port:%d failed.\n",
421 NIPQUAD(transport->ipaddr.v4.sin_addr)); 421 asoc,
422 (&transport->ipaddr),
423 transport->ipaddr.v4.sin_port);
422 sctp_assoc_control_transport(asoc, transport, 424 sctp_assoc_control_transport(asoc, transport,
423 SCTP_TRANSPORT_DOWN, 425 SCTP_TRANSPORT_DOWN,
424 SCTP_FAILED_THRESHOLD); 426 SCTP_FAILED_THRESHOLD);
@@ -593,7 +595,7 @@ static void sctp_cmd_transport_on(sctp_cmd_seq_t *cmds,
593 /* Mark the destination transport address as active if it is not so 595 /* Mark the destination transport address as active if it is not so
594 * marked. 596 * marked.
595 */ 597 */
596 if (!t->active) 598 if (t->state == SCTP_INACTIVE)
597 sctp_assoc_control_transport(asoc, t, SCTP_TRANSPORT_UP, 599 sctp_assoc_control_transport(asoc, t, SCTP_TRANSPORT_UP,
598 SCTP_HEARTBEAT_SUCCESS); 600 SCTP_HEARTBEAT_SUCCESS);
599 601
@@ -665,8 +667,11 @@ static void sctp_cmd_new_state(sctp_cmd_seq_t *cmds,
665 667
666 asoc->state = state; 668 asoc->state = state;
667 669
670 SCTP_DEBUG_PRINTK("sctp_cmd_new_state: asoc %p[%s]\n",
671 asoc, sctp_state_tbl[state]);
672
668 if (sctp_style(sk, TCP)) { 673 if (sctp_style(sk, TCP)) {
669 /* Change the sk->sk_state of a TCP-style socket that has 674 /* Change the sk->sk_state of a TCP-style socket that has
670 * sucessfully completed a connect() call. 675 * sucessfully completed a connect() call.
671 */ 676 */
672 if (sctp_state(asoc, ESTABLISHED) && sctp_sstate(sk, CLOSED)) 677 if (sctp_state(asoc, ESTABLISHED) && sctp_sstate(sk, CLOSED))
@@ -678,6 +683,16 @@ static void sctp_cmd_new_state(sctp_cmd_seq_t *cmds,
678 sk->sk_shutdown |= RCV_SHUTDOWN; 683 sk->sk_shutdown |= RCV_SHUTDOWN;
679 } 684 }
680 685
686 if (sctp_state(asoc, COOKIE_WAIT)) {
687 /* Reset init timeouts since they may have been
688 * increased due to timer expirations.
689 */
690 asoc->timeouts[SCTP_EVENT_TIMEOUT_T1_INIT] =
691 asoc->ep->timeouts[SCTP_EVENT_TIMEOUT_T1_INIT];
692 asoc->timeouts[SCTP_EVENT_TIMEOUT_T1_COOKIE] =
693 asoc->ep->timeouts[SCTP_EVENT_TIMEOUT_T1_COOKIE];
694 }
695
681 if (sctp_state(asoc, ESTABLISHED) || 696 if (sctp_state(asoc, ESTABLISHED) ||
682 sctp_state(asoc, CLOSED) || 697 sctp_state(asoc, CLOSED) ||
683 sctp_state(asoc, SHUTDOWN_RECEIVED)) { 698 sctp_state(asoc, SHUTDOWN_RECEIVED)) {
@@ -1120,10 +1135,10 @@ static int sctp_cmd_interpreter(sctp_event_t event_type,
1120 * to be executed only during failed attempts of 1135 * to be executed only during failed attempts of
1121 * association establishment. 1136 * association establishment.
1122 */ 1137 */
1123 if ((asoc->peer.retran_path != 1138 if ((asoc->peer.retran_path !=
1124 asoc->peer.primary_path) && 1139 asoc->peer.primary_path) &&
1125 (asoc->counters[SCTP_COUNTER_INIT_ERROR] > 0)) { 1140 (asoc->init_err_counter > 0)) {
1126 sctp_add_cmd_sf(commands, 1141 sctp_add_cmd_sf(commands,
1127 SCTP_CMD_FORCE_PRIM_RETRAN, 1142 SCTP_CMD_FORCE_PRIM_RETRAN,
1128 SCTP_NULL()); 1143 SCTP_NULL());
1129 } 1144 }
@@ -1237,18 +1252,67 @@ static int sctp_cmd_interpreter(sctp_event_t event_type,
1237 sctp_association_put(asoc); 1252 sctp_association_put(asoc);
1238 break; 1253 break;
1239 1254
1255 case SCTP_CMD_INIT_CHOOSE_TRANSPORT:
1256 chunk = cmd->obj.ptr;
1257 t = sctp_assoc_choose_init_transport(asoc);
1258 asoc->init_last_sent_to = t;
1259 chunk->transport = t;
1260 t->init_sent_count++;
1261 break;
1262
1240 case SCTP_CMD_INIT_RESTART: 1263 case SCTP_CMD_INIT_RESTART:
1241 /* Do the needed accounting and updates 1264 /* Do the needed accounting and updates
1242 * associated with restarting an initialization 1265 * associated with restarting an initialization
1243 * timer. 1266 * timer. Only multiply the timeout by two if
1267 * all transports have been tried at the current
1268 * timeout.
1269 */
1270 t = asoc->init_last_sent_to;
1271 asoc->init_err_counter++;
1272
1273 if (t->init_sent_count > (asoc->init_cycle + 1)) {
1274 asoc->timeouts[SCTP_EVENT_TIMEOUT_T1_INIT] *= 2;
1275 if (asoc->timeouts[SCTP_EVENT_TIMEOUT_T1_INIT] >
1276 asoc->max_init_timeo) {
1277 asoc->timeouts[SCTP_EVENT_TIMEOUT_T1_INIT] =
1278 asoc->max_init_timeo;
1279 }
1280 asoc->init_cycle++;
1281 SCTP_DEBUG_PRINTK(
1282 "T1 INIT Timeout adjustment"
1283 " init_err_counter: %d"
1284 " cycle: %d"
1285 " timeout: %d\n",
1286 asoc->init_err_counter,
1287 asoc->init_cycle,
1288 asoc->timeouts[SCTP_EVENT_TIMEOUT_T1_INIT]);
1289 }
1290
1291 sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_RESTART,
1292 SCTP_TO(SCTP_EVENT_TIMEOUT_T1_INIT));
1293 break;
1294
1295 case SCTP_CMD_COOKIEECHO_RESTART:
1296 /* Do the needed accounting and updates
1297 * associated with restarting an initialization
1298 * timer. Only multiply the timeout by two if
1299 * all transports have been tried at the current
1300 * timeout.
1244 */ 1301 */
1245 asoc->counters[SCTP_COUNTER_INIT_ERROR]++; 1302 asoc->init_err_counter++;
1246 asoc->timeouts[cmd->obj.to] *= 2; 1303
1247 if (asoc->timeouts[cmd->obj.to] > 1304 asoc->timeouts[SCTP_EVENT_TIMEOUT_T1_COOKIE] *= 2;
1305 if (asoc->timeouts[SCTP_EVENT_TIMEOUT_T1_COOKIE] >
1248 asoc->max_init_timeo) { 1306 asoc->max_init_timeo) {
1249 asoc->timeouts[cmd->obj.to] = 1307 asoc->timeouts[SCTP_EVENT_TIMEOUT_T1_COOKIE] =
1250 asoc->max_init_timeo; 1308 asoc->max_init_timeo;
1251 } 1309 }
1310 SCTP_DEBUG_PRINTK(
1311 "T1 COOKIE Timeout adjustment"
1312 " init_err_counter: %d"
1313 " timeout: %d\n",
1314 asoc->init_err_counter,
1315 asoc->timeouts[SCTP_EVENT_TIMEOUT_T1_COOKIE]);
1252 1316
1253 /* If we've sent any data bundled with 1317 /* If we've sent any data bundled with
1254 * COOKIE-ECHO we need to resend. 1318 * COOKIE-ECHO we need to resend.
@@ -1261,7 +1325,7 @@ static int sctp_cmd_interpreter(sctp_event_t event_type,
1261 1325
1262 sctp_add_cmd_sf(commands, 1326 sctp_add_cmd_sf(commands,
1263 SCTP_CMD_TIMER_RESTART, 1327 SCTP_CMD_TIMER_RESTART,
1264 SCTP_TO(cmd->obj.to)); 1328 SCTP_TO(SCTP_EVENT_TIMEOUT_T1_COOKIE));
1265 break; 1329 break;
1266 1330
1267 case SCTP_CMD_INIT_FAILED: 1331 case SCTP_CMD_INIT_FAILED:
@@ -1273,12 +1337,13 @@ static int sctp_cmd_interpreter(sctp_event_t event_type,
1273 subtype, chunk, cmd->obj.u32); 1337 subtype, chunk, cmd->obj.u32);
1274 break; 1338 break;
1275 1339
1276 case SCTP_CMD_COUNTER_INC: 1340 case SCTP_CMD_INIT_COUNTER_INC:
1277 asoc->counters[cmd->obj.counter]++; 1341 asoc->init_err_counter++;
1278 break; 1342 break;
1279 1343
1280 case SCTP_CMD_COUNTER_RESET: 1344 case SCTP_CMD_INIT_COUNTER_RESET:
1281 asoc->counters[cmd->obj.counter] = 0; 1345 asoc->init_err_counter = 0;
1346 asoc->init_cycle = 0;
1282 break; 1347 break;
1283 1348
1284 case SCTP_CMD_REPORT_DUP: 1349 case SCTP_CMD_REPORT_DUP:
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index 8e01b8f09a..058189684c 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -533,6 +533,9 @@ sctp_disposition_t sctp_sf_do_5_1C_ack(const struct sctp_endpoint *ep,
533 sctp_add_cmd_sf(commands, SCTP_CMD_PEER_INIT, 533 sctp_add_cmd_sf(commands, SCTP_CMD_PEER_INIT,
534 SCTP_PEER_INIT(initchunk)); 534 SCTP_PEER_INIT(initchunk));
535 535
536 /* Reset init error count upon receipt of INIT-ACK. */
537 sctp_add_cmd_sf(commands, SCTP_CMD_INIT_COUNTER_RESET, SCTP_NULL());
538
536 /* 5.1 C) "A" shall stop the T1-init timer and leave 539 /* 5.1 C) "A" shall stop the T1-init timer and leave
537 * COOKIE-WAIT state. "A" shall then ... start the T1-cookie 540 * COOKIE-WAIT state. "A" shall then ... start the T1-cookie
538 * timer, and enter the COOKIE-ECHOED state. 541 * timer, and enter the COOKIE-ECHOED state.
@@ -775,8 +778,7 @@ sctp_disposition_t sctp_sf_do_5_1E_ca(const struct sctp_endpoint *ep,
775 * from the COOKIE-ECHOED state to the COOKIE-WAIT 778 * from the COOKIE-ECHOED state to the COOKIE-WAIT
776 * state is performed. 779 * state is performed.
777 */ 780 */
778 sctp_add_cmd_sf(commands, SCTP_CMD_COUNTER_RESET, 781 sctp_add_cmd_sf(commands, SCTP_CMD_INIT_COUNTER_RESET, SCTP_NULL());
779 SCTP_COUNTER(SCTP_COUNTER_INIT_ERROR));
780 782
781 /* RFC 2960 5.1 Normal Establishment of an Association 783 /* RFC 2960 5.1 Normal Establishment of an Association
782 * 784 *
@@ -1019,10 +1021,22 @@ sctp_disposition_t sctp_sf_backbeat_8_3(const struct sctp_endpoint *ep,
1019 link = sctp_assoc_lookup_paddr(asoc, &from_addr); 1021 link = sctp_assoc_lookup_paddr(asoc, &from_addr);
1020 1022
1021 /* This should never happen, but lets log it if so. */ 1023 /* This should never happen, but lets log it if so. */
1022 if (!link) { 1024 if (unlikely(!link)) {
1023 printk(KERN_WARNING 1025 if (from_addr.sa.sa_family == AF_INET6) {
1024 "%s: Could not find address %d.%d.%d.%d\n", 1026 printk(KERN_WARNING
1025 __FUNCTION__, NIPQUAD(from_addr.v4.sin_addr)); 1027 "%s association %p could not find address "
1028 "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n",
1029 __FUNCTION__,
1030 asoc,
1031 NIP6(from_addr.v6.sin6_addr));
1032 } else {
1033 printk(KERN_WARNING
1034 "%s association %p could not find address "
1035 "%u.%u.%u.%u\n",
1036 __FUNCTION__,
1037 asoc,
1038 NIPQUAD(from_addr.v4.sin_addr.s_addr));
1039 }
1026 return SCTP_DISPOSITION_DISCARD; 1040 return SCTP_DISPOSITION_DISCARD;
1027 } 1041 }
1028 1042
@@ -2095,9 +2109,7 @@ static sctp_disposition_t sctp_sf_do_5_2_6_stale(const struct sctp_endpoint *ep,
2095 sctp_errhdr_t *err; 2109 sctp_errhdr_t *err;
2096 struct sctp_chunk *reply; 2110 struct sctp_chunk *reply;
2097 struct sctp_bind_addr *bp; 2111 struct sctp_bind_addr *bp;
2098 int attempts; 2112 int attempts = asoc->init_err_counter + 1;
2099
2100 attempts = asoc->counters[SCTP_COUNTER_INIT_ERROR] + 1;
2101 2113
2102 if (attempts >= asoc->max_init_attempts) { 2114 if (attempts >= asoc->max_init_attempts) {
2103 sctp_add_cmd_sf(commands, SCTP_CMD_INIT_FAILED, 2115 sctp_add_cmd_sf(commands, SCTP_CMD_INIT_FAILED,
@@ -2157,8 +2169,7 @@ static sctp_disposition_t sctp_sf_do_5_2_6_stale(const struct sctp_endpoint *ep,
2157 /* Cast away the const modifier, as we want to just 2169 /* Cast away the const modifier, as we want to just
2158 * rerun it through as a sideffect. 2170 * rerun it through as a sideffect.
2159 */ 2171 */
2160 sctp_add_cmd_sf(commands, SCTP_CMD_COUNTER_INC, 2172 sctp_add_cmd_sf(commands, SCTP_CMD_INIT_COUNTER_INC, SCTP_NULL());
2161 SCTP_COUNTER(SCTP_COUNTER_INIT_ERROR));
2162 2173
2163 sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP, 2174 sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP,
2164 SCTP_TO(SCTP_EVENT_TIMEOUT_T1_COOKIE)); 2175 SCTP_TO(SCTP_EVENT_TIMEOUT_T1_COOKIE));
@@ -2281,8 +2292,7 @@ sctp_disposition_t sctp_sf_cookie_wait_abort(const struct sctp_endpoint *ep,
2281 if (len >= sizeof(struct sctp_chunkhdr) + sizeof(struct sctp_errhdr)) 2292 if (len >= sizeof(struct sctp_chunkhdr) + sizeof(struct sctp_errhdr))
2282 error = ((sctp_errhdr_t *)chunk->skb->data)->cause; 2293 error = ((sctp_errhdr_t *)chunk->skb->data)->cause;
2283 2294
2284 sctp_stop_t1_and_abort(commands, error); 2295 return sctp_stop_t1_and_abort(commands, error, asoc, chunk->transport);
2285 return SCTP_DISPOSITION_ABORT;
2286} 2296}
2287 2297
2288/* 2298/*
@@ -2294,8 +2304,8 @@ sctp_disposition_t sctp_sf_cookie_wait_icmp_abort(const struct sctp_endpoint *ep
2294 void *arg, 2304 void *arg,
2295 sctp_cmd_seq_t *commands) 2305 sctp_cmd_seq_t *commands)
2296{ 2306{
2297 sctp_stop_t1_and_abort(commands, SCTP_ERROR_NO_ERROR); 2307 return sctp_stop_t1_and_abort(commands, SCTP_ERROR_NO_ERROR, asoc,
2298 return SCTP_DISPOSITION_ABORT; 2308 (struct sctp_transport *)arg);
2299} 2309}
2300 2310
2301/* 2311/*
@@ -2318,8 +2328,12 @@ sctp_disposition_t sctp_sf_cookie_echoed_abort(const struct sctp_endpoint *ep,
2318 * 2328 *
2319 * This is common code called by several sctp_sf_*_abort() functions above. 2329 * This is common code called by several sctp_sf_*_abort() functions above.
2320 */ 2330 */
2321void sctp_stop_t1_and_abort(sctp_cmd_seq_t *commands, __u16 error) 2331sctp_disposition_t sctp_stop_t1_and_abort(sctp_cmd_seq_t *commands,
2332 __u16 error,
2333 const struct sctp_association *asoc,
2334 struct sctp_transport *transport)
2322{ 2335{
2336 SCTP_DEBUG_PRINTK("ABORT received (INIT).\n");
2323 sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE, 2337 sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE,
2324 SCTP_STATE(SCTP_STATE_CLOSED)); 2338 SCTP_STATE(SCTP_STATE_CLOSED));
2325 SCTP_INC_STATS(SCTP_MIB_ABORTEDS); 2339 SCTP_INC_STATS(SCTP_MIB_ABORTEDS);
@@ -2328,6 +2342,7 @@ void sctp_stop_t1_and_abort(sctp_cmd_seq_t *commands, __u16 error)
2328 /* CMD_INIT_FAILED will DELETE_TCB. */ 2342 /* CMD_INIT_FAILED will DELETE_TCB. */
2329 sctp_add_cmd_sf(commands, SCTP_CMD_INIT_FAILED, 2343 sctp_add_cmd_sf(commands, SCTP_CMD_INIT_FAILED,
2330 SCTP_U32(error)); 2344 SCTP_U32(error));
2345 return SCTP_DISPOSITION_ABORT;
2331} 2346}
2332 2347
2333/* 2348/*
@@ -3805,6 +3820,10 @@ sctp_disposition_t sctp_sf_do_prm_asoc(const struct sctp_endpoint *ep,
3805 sctp_add_cmd_sf(commands, SCTP_CMD_NEW_ASOC, 3820 sctp_add_cmd_sf(commands, SCTP_CMD_NEW_ASOC,
3806 SCTP_ASOC((struct sctp_association *) asoc)); 3821 SCTP_ASOC((struct sctp_association *) asoc));
3807 3822
3823 /* Choose transport for INIT. */
3824 sctp_add_cmd_sf(commands, SCTP_CMD_INIT_CHOOSE_TRANSPORT,
3825 SCTP_CHUNK(repl));
3826
3808 /* After sending the INIT, "A" starts the T1-init timer and 3827 /* After sending the INIT, "A" starts the T1-init timer and
3809 * enters the COOKIE-WAIT state. 3828 * enters the COOKIE-WAIT state.
3810 */ 3829 */
@@ -4589,7 +4608,7 @@ sctp_disposition_t sctp_sf_do_6_2_sack(const struct sctp_endpoint *ep,
4589} 4608}
4590 4609
4591/* 4610/*
4592 * sctp_sf_t1_timer_expire 4611 * sctp_sf_t1_init_timer_expire
4593 * 4612 *
4594 * Section: 4 Note: 2 4613 * Section: 4 Note: 2
4595 * Verification Tag: 4614 * Verification Tag:
@@ -4603,7 +4622,59 @@ sctp_disposition_t sctp_sf_do_6_2_sack(const struct sctp_endpoint *ep,
4603 * endpoint MUST abort the initialization process and report the 4622 * endpoint MUST abort the initialization process and report the
4604 * error to SCTP user. 4623 * error to SCTP user.
4605 * 4624 *
4606 * 3) If the T1-cookie timer expires, the endpoint MUST retransmit 4625 * Outputs
4626 * (timers, events)
4627 *
4628 */
4629sctp_disposition_t sctp_sf_t1_init_timer_expire(const struct sctp_endpoint *ep,
4630 const struct sctp_association *asoc,
4631 const sctp_subtype_t type,
4632 void *arg,
4633 sctp_cmd_seq_t *commands)
4634{
4635 struct sctp_chunk *repl = NULL;
4636 struct sctp_bind_addr *bp;
4637 int attempts = asoc->init_err_counter + 1;
4638
4639 SCTP_DEBUG_PRINTK("Timer T1 expired (INIT).\n");
4640
4641 if (attempts < asoc->max_init_attempts) {
4642 bp = (struct sctp_bind_addr *) &asoc->base.bind_addr;
4643 repl = sctp_make_init(asoc, bp, GFP_ATOMIC, 0);
4644 if (!repl)
4645 return SCTP_DISPOSITION_NOMEM;
4646
4647 /* Choose transport for INIT. */
4648 sctp_add_cmd_sf(commands, SCTP_CMD_INIT_CHOOSE_TRANSPORT,
4649 SCTP_CHUNK(repl));
4650
4651 /* Issue a sideeffect to do the needed accounting. */
4652 sctp_add_cmd_sf(commands, SCTP_CMD_INIT_RESTART,
4653 SCTP_TO(SCTP_EVENT_TIMEOUT_T1_INIT));
4654
4655 sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(repl));
4656 } else {
4657 SCTP_DEBUG_PRINTK("Giving up on INIT, attempts: %d"
4658 " max_init_attempts: %d\n",
4659 attempts, asoc->max_init_attempts);
4660 sctp_add_cmd_sf(commands, SCTP_CMD_INIT_FAILED,
4661 SCTP_U32(SCTP_ERROR_NO_ERROR));
4662 return SCTP_DISPOSITION_DELETE_TCB;
4663 }
4664
4665 return SCTP_DISPOSITION_CONSUME;
4666}
4667
4668/*
4669 * sctp_sf_t1_cookie_timer_expire
4670 *
4671 * Section: 4 Note: 2
4672 * Verification Tag:
4673 * Inputs
4674 * (endpoint, asoc)
4675 *
4676 * RFC 2960 Section 4 Notes
4677 * 3) If the T1-cookie timer expires, the endpoint MUST retransmit
4607 * COOKIE ECHO and re-start the T1-cookie timer without changing 4678 * COOKIE ECHO and re-start the T1-cookie timer without changing
4608 * state. This MUST be repeated up to 'Max.Init.Retransmits' times. 4679 * state. This MUST be repeated up to 'Max.Init.Retransmits' times.
4609 * After that, the endpoint MUST abort the initialization process and 4680 * After that, the endpoint MUST abort the initialization process and
@@ -4613,46 +4684,26 @@ sctp_disposition_t sctp_sf_do_6_2_sack(const struct sctp_endpoint *ep,
4613 * (timers, events) 4684 * (timers, events)
4614 * 4685 *
4615 */ 4686 */
4616sctp_disposition_t sctp_sf_t1_timer_expire(const struct sctp_endpoint *ep, 4687sctp_disposition_t sctp_sf_t1_cookie_timer_expire(const struct sctp_endpoint *ep,
4617 const struct sctp_association *asoc, 4688 const struct sctp_association *asoc,
4618 const sctp_subtype_t type, 4689 const sctp_subtype_t type,
4619 void *arg, 4690 void *arg,
4620 sctp_cmd_seq_t *commands) 4691 sctp_cmd_seq_t *commands)
4621{ 4692{
4622 struct sctp_chunk *repl; 4693 struct sctp_chunk *repl = NULL;
4623 struct sctp_bind_addr *bp; 4694 int attempts = asoc->init_err_counter + 1;
4624 sctp_event_timeout_t timer = (sctp_event_timeout_t) arg;
4625 int timeout;
4626 int attempts;
4627
4628 timeout = asoc->timeouts[timer];
4629 attempts = asoc->counters[SCTP_COUNTER_INIT_ERROR] + 1;
4630 repl = NULL;
4631 4695
4632 SCTP_DEBUG_PRINTK("Timer T1 expired.\n"); 4696 SCTP_DEBUG_PRINTK("Timer T1 expired (COOKIE-ECHO).\n");
4633 4697
4634 if (attempts < asoc->max_init_attempts) { 4698 if (attempts < asoc->max_init_attempts) {
4635 switch (timer) { 4699 repl = sctp_make_cookie_echo(asoc, NULL);
4636 case SCTP_EVENT_TIMEOUT_T1_INIT:
4637 bp = (struct sctp_bind_addr *) &asoc->base.bind_addr;
4638 repl = sctp_make_init(asoc, bp, GFP_ATOMIC, 0);
4639 break;
4640
4641 case SCTP_EVENT_TIMEOUT_T1_COOKIE:
4642 repl = sctp_make_cookie_echo(asoc, NULL);
4643 break;
4644
4645 default:
4646 BUG();
4647 break;
4648 };
4649
4650 if (!repl) 4700 if (!repl)
4651 goto nomem; 4701 return SCTP_DISPOSITION_NOMEM;
4652 4702
4653 /* Issue a sideeffect to do the needed accounting. */ 4703 /* Issue a sideeffect to do the needed accounting. */
4654 sctp_add_cmd_sf(commands, SCTP_CMD_INIT_RESTART, 4704 sctp_add_cmd_sf(commands, SCTP_CMD_COOKIEECHO_RESTART,
4655 SCTP_TO(timer)); 4705 SCTP_TO(SCTP_EVENT_TIMEOUT_T1_COOKIE));
4706
4656 sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(repl)); 4707 sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(repl));
4657 } else { 4708 } else {
4658 sctp_add_cmd_sf(commands, SCTP_CMD_INIT_FAILED, 4709 sctp_add_cmd_sf(commands, SCTP_CMD_INIT_FAILED,
@@ -4661,9 +4712,6 @@ sctp_disposition_t sctp_sf_t1_timer_expire(const struct sctp_endpoint *ep,
4661 } 4712 }
4662 4713
4663 return SCTP_DISPOSITION_CONSUME; 4714 return SCTP_DISPOSITION_CONSUME;
4664
4665nomem:
4666 return SCTP_DISPOSITION_NOMEM;
4667} 4715}
4668 4716
4669/* RFC2960 9.2 If the timer expires, the endpoint must re-send the SHUTDOWN 4717/* RFC2960 9.2 If the timer expires, the endpoint must re-send the SHUTDOWN
diff --git a/net/sctp/sm_statetable.c b/net/sctp/sm_statetable.c
index 8967846f69..75ef104087 100644
--- a/net/sctp/sm_statetable.c
+++ b/net/sctp/sm_statetable.c
@@ -783,7 +783,8 @@ static const sctp_sm_table_entry_t other_event_table[SCTP_NUM_OTHER_TYPES][SCTP_
783 /* SCTP_STATE_COOKIE_WAIT */ \ 783 /* SCTP_STATE_COOKIE_WAIT */ \
784 {.fn = sctp_sf_bug, .name = "sctp_sf_bug"}, \ 784 {.fn = sctp_sf_bug, .name = "sctp_sf_bug"}, \
785 /* SCTP_STATE_COOKIE_ECHOED */ \ 785 /* SCTP_STATE_COOKIE_ECHOED */ \
786 {.fn = sctp_sf_t1_timer_expire, .name = "sctp_sf_t1_timer_expire"}, \ 786 {.fn = sctp_sf_t1_cookie_timer_expire, \
787 .name = "sctp_sf_t1_cookie_timer_expire"}, \
787 /* SCTP_STATE_ESTABLISHED */ \ 788 /* SCTP_STATE_ESTABLISHED */ \
788 {.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \ 789 {.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \
789 /* SCTP_STATE_SHUTDOWN_PENDING */ \ 790 /* SCTP_STATE_SHUTDOWN_PENDING */ \
@@ -802,7 +803,8 @@ static const sctp_sm_table_entry_t other_event_table[SCTP_NUM_OTHER_TYPES][SCTP_
802 /* SCTP_STATE_CLOSED */ \ 803 /* SCTP_STATE_CLOSED */ \
803 {.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \ 804 {.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \
804 /* SCTP_STATE_COOKIE_WAIT */ \ 805 /* SCTP_STATE_COOKIE_WAIT */ \
805 {.fn = sctp_sf_t1_timer_expire, .name = "sctp_sf_t1_timer_expire"}, \ 806 {.fn = sctp_sf_t1_init_timer_expire, \
807 .name = "sctp_sf_t1_init_timer_expire"}, \
806 /* SCTP_STATE_COOKIE_ECHOED */ \ 808 /* SCTP_STATE_COOKIE_ECHOED */ \
807 {.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \ 809 {.fn = sctp_sf_timer_ignore, .name = "sctp_sf_timer_ignore"}, \
808 /* SCTP_STATE_ESTABLISHED */ \ 810 /* SCTP_STATE_ESTABLISHED */ \
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 0b338eca6d..aad55dc379 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -262,18 +262,18 @@ static struct sctp_transport *sctp_addr_id2transport(struct sock *sk,
262 * sockaddr_in6 [RFC 2553]), 262 * sockaddr_in6 [RFC 2553]),
263 * addr_len - the size of the address structure. 263 * addr_len - the size of the address structure.
264 */ 264 */
265SCTP_STATIC int sctp_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) 265SCTP_STATIC int sctp_bind(struct sock *sk, struct sockaddr *addr, int addr_len)
266{ 266{
267 int retval = 0; 267 int retval = 0;
268 268
269 sctp_lock_sock(sk); 269 sctp_lock_sock(sk);
270 270
271 SCTP_DEBUG_PRINTK("sctp_bind(sk: %p, uaddr: %p, addr_len: %d)\n", 271 SCTP_DEBUG_PRINTK("sctp_bind(sk: %p, addr: %p, addr_len: %d)\n",
272 sk, uaddr, addr_len); 272 sk, addr, addr_len);
273 273
274 /* Disallow binding twice. */ 274 /* Disallow binding twice. */
275 if (!sctp_sk(sk)->ep->base.bind_addr.port) 275 if (!sctp_sk(sk)->ep->base.bind_addr.port)
276 retval = sctp_do_bind(sk, (union sctp_addr *)uaddr, 276 retval = sctp_do_bind(sk, (union sctp_addr *)addr,
277 addr_len); 277 addr_len);
278 else 278 else
279 retval = -EINVAL; 279 retval = -EINVAL;
@@ -318,23 +318,27 @@ SCTP_STATIC int sctp_do_bind(struct sock *sk, union sctp_addr *addr, int len)
318 unsigned short snum; 318 unsigned short snum;
319 int ret = 0; 319 int ret = 0;
320 320
321 SCTP_DEBUG_PRINTK("sctp_do_bind(sk: %p, newaddr: %p, len: %d)\n",
322 sk, addr, len);
323
324 /* Common sockaddr verification. */ 321 /* Common sockaddr verification. */
325 af = sctp_sockaddr_af(sp, addr, len); 322 af = sctp_sockaddr_af(sp, addr, len);
326 if (!af) 323 if (!af) {
324 SCTP_DEBUG_PRINTK("sctp_do_bind(sk: %p, newaddr: %p, len: %d) EINVAL\n",
325 sk, addr, len);
327 return -EINVAL; 326 return -EINVAL;
327 }
328
329 snum = ntohs(addr->v4.sin_port);
330
331 SCTP_DEBUG_PRINTK_IPADDR("sctp_do_bind(sk: %p, new addr: ",
332 ", port: %d, new port: %d, len: %d)\n",
333 sk,
334 addr,
335 bp->port, snum,
336 len);
328 337
329 /* PF specific bind() address verification. */ 338 /* PF specific bind() address verification. */
330 if (!sp->pf->bind_verify(sp, addr)) 339 if (!sp->pf->bind_verify(sp, addr))
331 return -EADDRNOTAVAIL; 340 return -EADDRNOTAVAIL;
332 341
333 snum= ntohs(addr->v4.sin_port);
334
335 SCTP_DEBUG_PRINTK("sctp_do_bind: port: %d, new port: %d\n",
336 bp->port, snum);
337
338 /* We must either be unbound, or bind to the same port. */ 342 /* We must either be unbound, or bind to the same port. */
339 if (bp->port && (snum != bp->port)) { 343 if (bp->port && (snum != bp->port)) {
340 SCTP_DEBUG_PRINTK("sctp_do_bind:" 344 SCTP_DEBUG_PRINTK("sctp_do_bind:"
@@ -816,7 +820,8 @@ out:
816 * 820 *
817 * Basically do nothing but copying the addresses from user to kernel 821 * Basically do nothing but copying the addresses from user to kernel
818 * land and invoking either sctp_bindx_add() or sctp_bindx_rem() on the sk. 822 * land and invoking either sctp_bindx_add() or sctp_bindx_rem() on the sk.
819 * This is used for tunneling the sctp_bindx() request through sctp_setsockopt() * from userspace. 823 * This is used for tunneling the sctp_bindx() request through sctp_setsockopt()
824 * from userspace.
820 * 825 *
821 * We don't use copy_from_user() for optimization: we first do the 826 * We don't use copy_from_user() for optimization: we first do the
822 * sanity checks (buffer size -fast- and access check-healthy 827 * sanity checks (buffer size -fast- and access check-healthy
@@ -913,6 +918,243 @@ out:
913 return err; 918 return err;
914} 919}
915 920
921/* __sctp_connect(struct sock* sk, struct sockaddr *kaddrs, int addrs_size)
922 *
923 * Common routine for handling connect() and sctp_connectx().
924 * Connect will come in with just a single address.
925 */
926static int __sctp_connect(struct sock* sk,
927 struct sockaddr *kaddrs,
928 int addrs_size)
929{
930 struct sctp_sock *sp;
931 struct sctp_endpoint *ep;
932 struct sctp_association *asoc = NULL;
933 struct sctp_association *asoc2;
934 struct sctp_transport *transport;
935 union sctp_addr to;
936 struct sctp_af *af;
937 sctp_scope_t scope;
938 long timeo;
939 int err = 0;
940 int addrcnt = 0;
941 int walk_size = 0;
942 struct sockaddr *sa_addr;
943 void *addr_buf;
944
945 sp = sctp_sk(sk);
946 ep = sp->ep;
947
948 /* connect() cannot be done on a socket that is already in ESTABLISHED
949 * state - UDP-style peeled off socket or a TCP-style socket that
950 * is already connected.
951 * It cannot be done even on a TCP-style listening socket.
952 */
953 if (sctp_sstate(sk, ESTABLISHED) ||
954 (sctp_style(sk, TCP) && sctp_sstate(sk, LISTENING))) {
955 err = -EISCONN;
956 goto out_free;
957 }
958
959 /* Walk through the addrs buffer and count the number of addresses. */
960 addr_buf = kaddrs;
961 while (walk_size < addrs_size) {
962 sa_addr = (struct sockaddr *)addr_buf;
963 af = sctp_get_af_specific(sa_addr->sa_family);
964
965 /* If the address family is not supported or if this address
966 * causes the address buffer to overflow return EINVAL.
967 */
968 if (!af || (walk_size + af->sockaddr_len) > addrs_size) {
969 err = -EINVAL;
970 goto out_free;
971 }
972
973 err = sctp_verify_addr(sk, (union sctp_addr *)sa_addr,
974 af->sockaddr_len);
975 if (err)
976 goto out_free;
977
978 memcpy(&to, sa_addr, af->sockaddr_len);
979 to.v4.sin_port = ntohs(to.v4.sin_port);
980
981 /* Check if there already is a matching association on the
982 * endpoint (other than the one created here).
983 */
984 asoc2 = sctp_endpoint_lookup_assoc(ep, &to, &transport);
985 if (asoc2 && asoc2 != asoc) {
986 if (asoc2->state >= SCTP_STATE_ESTABLISHED)
987 err = -EISCONN;
988 else
989 err = -EALREADY;
990 goto out_free;
991 }
992
993 /* If we could not find a matching association on the endpoint,
994 * make sure that there is no peeled-off association matching
995 * the peer address even on another socket.
996 */
997 if (sctp_endpoint_is_peeled_off(ep, &to)) {
998 err = -EADDRNOTAVAIL;
999 goto out_free;
1000 }
1001
1002 if (!asoc) {
1003 /* If a bind() or sctp_bindx() is not called prior to
1004 * an sctp_connectx() call, the system picks an
1005 * ephemeral port and will choose an address set
1006 * equivalent to binding with a wildcard address.
1007 */
1008 if (!ep->base.bind_addr.port) {
1009 if (sctp_autobind(sk)) {
1010 err = -EAGAIN;
1011 goto out_free;
1012 }
1013 }
1014
1015 scope = sctp_scope(&to);
1016 asoc = sctp_association_new(ep, sk, scope, GFP_KERNEL);
1017 if (!asoc) {
1018 err = -ENOMEM;
1019 goto out_free;
1020 }
1021 }
1022
1023 /* Prime the peer's transport structures. */
1024 transport = sctp_assoc_add_peer(asoc, &to, GFP_KERNEL,
1025 SCTP_UNKNOWN);
1026 if (!transport) {
1027 err = -ENOMEM;
1028 goto out_free;
1029 }
1030
1031 addrcnt++;
1032 addr_buf += af->sockaddr_len;
1033 walk_size += af->sockaddr_len;
1034 }
1035
1036 err = sctp_assoc_set_bind_addr_from_ep(asoc, GFP_KERNEL);
1037 if (err < 0) {
1038 goto out_free;
1039 }
1040
1041 err = sctp_primitive_ASSOCIATE(asoc, NULL);
1042 if (err < 0) {
1043 goto out_free;
1044 }
1045
1046 /* Initialize sk's dport and daddr for getpeername() */
1047 inet_sk(sk)->dport = htons(asoc->peer.port);
1048 af = sctp_get_af_specific(to.sa.sa_family);
1049 af->to_sk_daddr(&to, sk);
1050
1051 timeo = sock_sndtimeo(sk, sk->sk_socket->file->f_flags & O_NONBLOCK);
1052 err = sctp_wait_for_connect(asoc, &timeo);
1053
1054 /* Don't free association on exit. */
1055 asoc = NULL;
1056
1057out_free:
1058
1059 SCTP_DEBUG_PRINTK("About to exit __sctp_connect() free asoc: %p"
1060 " kaddrs: %p err: %d\n",
1061 asoc, kaddrs, err);
1062 if (asoc)
1063 sctp_association_free(asoc);
1064 return err;
1065}
1066
1067/* Helper for tunneling sctp_connectx() requests through sctp_setsockopt()
1068 *
1069 * API 8.9
1070 * int sctp_connectx(int sd, struct sockaddr *addrs, int addrcnt);
1071 *
1072 * If sd is an IPv4 socket, the addresses passed must be IPv4 addresses.
1073 * If the sd is an IPv6 socket, the addresses passed can either be IPv4
1074 * or IPv6 addresses.
1075 *
1076 * A single address may be specified as INADDR_ANY or IN6ADDR_ANY, see
1077 * Section 3.1.2 for this usage.
1078 *
1079 * addrs is a pointer to an array of one or more socket addresses. Each
1080 * address is contained in its appropriate structure (i.e. struct
1081 * sockaddr_in or struct sockaddr_in6) the family of the address type
1082 * must be used to distengish the address length (note that this
1083 * representation is termed a "packed array" of addresses). The caller
1084 * specifies the number of addresses in the array with addrcnt.
1085 *
1086 * On success, sctp_connectx() returns 0. On failure, sctp_connectx() returns
1087 * -1, and sets errno to the appropriate error code.
1088 *
1089 * For SCTP, the port given in each socket address must be the same, or
1090 * sctp_connectx() will fail, setting errno to EINVAL.
1091 *
1092 * An application can use sctp_connectx to initiate an association with
1093 * an endpoint that is multi-homed. Much like sctp_bindx() this call
1094 * allows a caller to specify multiple addresses at which a peer can be
1095 * reached. The way the SCTP stack uses the list of addresses to set up
1096 * the association is implementation dependant. This function only
1097 * specifies that the stack will try to make use of all the addresses in
1098 * the list when needed.
1099 *
1100 * Note that the list of addresses passed in is only used for setting up
1101 * the association. It does not necessarily equal the set of addresses
1102 * the peer uses for the resulting association. If the caller wants to
1103 * find out the set of peer addresses, it must use sctp_getpaddrs() to
1104 * retrieve them after the association has been set up.
1105 *
1106 * Basically do nothing but copying the addresses from user to kernel
1107 * land and invoking either sctp_connectx(). This is used for tunneling
1108 * the sctp_connectx() request through sctp_setsockopt() from userspace.
1109 *
1110 * We don't use copy_from_user() for optimization: we first do the
1111 * sanity checks (buffer size -fast- and access check-healthy
1112 * pointer); if all of those succeed, then we can alloc the memory
1113 * (expensive operation) needed to copy the data to kernel. Then we do
1114 * the copying without checking the user space area
1115 * (__copy_from_user()).
1116 *
1117 * On exit there is no need to do sockfd_put(), sys_setsockopt() does
1118 * it.
1119 *
1120 * sk The sk of the socket
1121 * addrs The pointer to the addresses in user land
1122 * addrssize Size of the addrs buffer
1123 *
1124 * Returns 0 if ok, <0 errno code on error.
1125 */
1126SCTP_STATIC int sctp_setsockopt_connectx(struct sock* sk,
1127 struct sockaddr __user *addrs,
1128 int addrs_size)
1129{
1130 int err = 0;
1131 struct sockaddr *kaddrs;
1132
1133 SCTP_DEBUG_PRINTK("%s - sk %p addrs %p addrs_size %d\n",
1134 __FUNCTION__, sk, addrs, addrs_size);
1135
1136 if (unlikely(addrs_size <= 0))
1137 return -EINVAL;
1138
1139 /* Check the user passed a healthy pointer. */
1140 if (unlikely(!access_ok(VERIFY_READ, addrs, addrs_size)))
1141 return -EFAULT;
1142
1143 /* Alloc space for the address array in kernel memory. */
1144 kaddrs = (struct sockaddr *)kmalloc(addrs_size, GFP_KERNEL);
1145 if (unlikely(!kaddrs))
1146 return -ENOMEM;
1147
1148 if (__copy_from_user(kaddrs, addrs, addrs_size)) {
1149 err = -EFAULT;
1150 } else {
1151 err = __sctp_connect(sk, kaddrs, addrs_size);
1152 }
1153
1154 kfree(kaddrs);
1155 return err;
1156}
1157
916/* API 3.1.4 close() - UDP Style Syntax 1158/* API 3.1.4 close() - UDP Style Syntax
917 * Applications use close() to perform graceful shutdown (as described in 1159 * Applications use close() to perform graceful shutdown (as described in
918 * Section 10.1 of [SCTP]) on ALL the associations currently represented 1160 * Section 10.1 of [SCTP]) on ALL the associations currently represented
@@ -1095,7 +1337,7 @@ SCTP_STATIC int sctp_sendmsg(struct kiocb *iocb, struct sock *sk,
1095 sp = sctp_sk(sk); 1337 sp = sctp_sk(sk);
1096 ep = sp->ep; 1338 ep = sp->ep;
1097 1339
1098 SCTP_DEBUG_PRINTK("Using endpoint: %s.\n", ep->debug_name); 1340 SCTP_DEBUG_PRINTK("Using endpoint: %p.\n", ep);
1099 1341
1100 /* We cannot send a message over a TCP-style listening socket. */ 1342 /* We cannot send a message over a TCP-style listening socket. */
1101 if (sctp_style(sk, TCP) && sctp_sstate(sk, LISTENING)) { 1343 if (sctp_style(sk, TCP) && sctp_sstate(sk, LISTENING)) {
@@ -1306,7 +1548,7 @@ SCTP_STATIC int sctp_sendmsg(struct kiocb *iocb, struct sock *sk,
1306 } 1548 }
1307 1549
1308 /* Prime the peer's transport structures. */ 1550 /* Prime the peer's transport structures. */
1309 transport = sctp_assoc_add_peer(asoc, &to, GFP_KERNEL); 1551 transport = sctp_assoc_add_peer(asoc, &to, GFP_KERNEL, SCTP_UNKNOWN);
1310 if (!transport) { 1552 if (!transport) {
1311 err = -ENOMEM; 1553 err = -ENOMEM;
1312 goto out_free; 1554 goto out_free;
@@ -2208,6 +2450,12 @@ SCTP_STATIC int sctp_setsockopt(struct sock *sk, int level, int optname,
2208 optlen, SCTP_BINDX_REM_ADDR); 2450 optlen, SCTP_BINDX_REM_ADDR);
2209 break; 2451 break;
2210 2452
2453 case SCTP_SOCKOPT_CONNECTX:
2454 /* 'optlen' is the size of the addresses buffer. */
2455 retval = sctp_setsockopt_connectx(sk, (struct sockaddr __user *)optval,
2456 optlen);
2457 break;
2458
2211 case SCTP_DISABLE_FRAGMENTS: 2459 case SCTP_DISABLE_FRAGMENTS:
2212 retval = sctp_setsockopt_disable_fragments(sk, optval, optlen); 2460 retval = sctp_setsockopt_disable_fragments(sk, optval, optlen);
2213 break; 2461 break;
@@ -2283,112 +2531,29 @@ out_nounlock:
2283 * 2531 *
2284 * len: the size of the address. 2532 * len: the size of the address.
2285 */ 2533 */
2286SCTP_STATIC int sctp_connect(struct sock *sk, struct sockaddr *uaddr, 2534SCTP_STATIC int sctp_connect(struct sock *sk, struct sockaddr *addr,
2287 int addr_len) 2535 int addr_len)
2288{ 2536{
2289 struct sctp_sock *sp;
2290 struct sctp_endpoint *ep;
2291 struct sctp_association *asoc;
2292 struct sctp_transport *transport;
2293 union sctp_addr to;
2294 struct sctp_af *af;
2295 sctp_scope_t scope;
2296 long timeo;
2297 int err = 0; 2537 int err = 0;
2538 struct sctp_af *af;
2298 2539
2299 sctp_lock_sock(sk); 2540 sctp_lock_sock(sk);
2300 2541
2301 SCTP_DEBUG_PRINTK("%s - sk: %p, sockaddr: %p, addr_len: %d)\n", 2542 SCTP_DEBUG_PRINTK("%s - sk: %p, sockaddr: %p, addr_len: %d\n",
2302 __FUNCTION__, sk, uaddr, addr_len); 2543 __FUNCTION__, sk, addr, addr_len);
2303
2304 sp = sctp_sk(sk);
2305 ep = sp->ep;
2306
2307 /* connect() cannot be done on a socket that is already in ESTABLISHED
2308 * state - UDP-style peeled off socket or a TCP-style socket that
2309 * is already connected.
2310 * It cannot be done even on a TCP-style listening socket.
2311 */
2312 if (sctp_sstate(sk, ESTABLISHED) ||
2313 (sctp_style(sk, TCP) && sctp_sstate(sk, LISTENING))) {
2314 err = -EISCONN;
2315 goto out_unlock;
2316 }
2317
2318 err = sctp_verify_addr(sk, (union sctp_addr *)uaddr, addr_len);
2319 if (err)
2320 goto out_unlock;
2321 2544
2322 if (addr_len > sizeof(to)) 2545 /* Validate addr_len before calling common connect/connectx routine. */
2323 addr_len = sizeof(to); 2546 af = sctp_get_af_specific(addr->sa_family);
2324 memcpy(&to, uaddr, addr_len); 2547 if (!af || addr_len < af->sockaddr_len) {
2325 to.v4.sin_port = ntohs(to.v4.sin_port); 2548 err = -EINVAL;
2326 2549 } else {
2327 asoc = sctp_endpoint_lookup_assoc(ep, &to, &transport); 2550 /* Pass correct addr len to common routine (so it knows there
2328 if (asoc) { 2551 * is only one address being passed.
2329 if (asoc->state >= SCTP_STATE_ESTABLISHED) 2552 */
2330 err = -EISCONN; 2553 err = __sctp_connect(sk, addr, af->sockaddr_len);
2331 else
2332 err = -EALREADY;
2333 goto out_unlock;
2334 }
2335
2336 /* If we could not find a matching association on the endpoint,
2337 * make sure that there is no peeled-off association matching the
2338 * peer address even on another socket.
2339 */
2340 if (sctp_endpoint_is_peeled_off(ep, &to)) {
2341 err = -EADDRNOTAVAIL;
2342 goto out_unlock;
2343 }
2344
2345 /* If a bind() or sctp_bindx() is not called prior to a connect()
2346 * call, the system picks an ephemeral port and will choose an address
2347 * set equivalent to binding with a wildcard address.
2348 */
2349 if (!ep->base.bind_addr.port) {
2350 if (sctp_autobind(sk)) {
2351 err = -EAGAIN;
2352 goto out_unlock;
2353 }
2354 }
2355
2356 scope = sctp_scope(&to);
2357 asoc = sctp_association_new(ep, sk, scope, GFP_KERNEL);
2358 if (!asoc) {
2359 err = -ENOMEM;
2360 goto out_unlock;
2361 }
2362
2363 /* Prime the peer's transport structures. */
2364 transport = sctp_assoc_add_peer(asoc, &to, GFP_KERNEL);
2365 if (!transport) {
2366 sctp_association_free(asoc);
2367 goto out_unlock;
2368 }
2369 err = sctp_assoc_set_bind_addr_from_ep(asoc, GFP_KERNEL);
2370 if (err < 0) {
2371 sctp_association_free(asoc);
2372 goto out_unlock;
2373 }
2374
2375 err = sctp_primitive_ASSOCIATE(asoc, NULL);
2376 if (err < 0) {
2377 sctp_association_free(asoc);
2378 goto out_unlock;
2379 } 2554 }
2380 2555
2381 /* Initialize sk's dport and daddr for getpeername() */
2382 inet_sk(sk)->dport = htons(asoc->peer.port);
2383 af = sctp_get_af_specific(to.sa.sa_family);
2384 af->to_sk_daddr(&to, sk);
2385
2386 timeo = sock_sndtimeo(sk, sk->sk_socket->file->f_flags & O_NONBLOCK);
2387 err = sctp_wait_for_connect(asoc, &timeo);
2388
2389out_unlock:
2390 sctp_release_sock(sk); 2556 sctp_release_sock(sk);
2391
2392 return err; 2557 return err;
2393} 2558}
2394 2559
@@ -2677,12 +2842,15 @@ static int sctp_getsockopt_sctp_status(struct sock *sk, int len,
2677 /* Map ipv4 address into v4-mapped-on-v6 address. */ 2842 /* Map ipv4 address into v4-mapped-on-v6 address. */
2678 sctp_get_pf_specific(sk->sk_family)->addr_v4map(sctp_sk(sk), 2843 sctp_get_pf_specific(sk->sk_family)->addr_v4map(sctp_sk(sk),
2679 (union sctp_addr *)&status.sstat_primary.spinfo_address); 2844 (union sctp_addr *)&status.sstat_primary.spinfo_address);
2680 status.sstat_primary.spinfo_state = transport->active; 2845 status.sstat_primary.spinfo_state = transport->state;
2681 status.sstat_primary.spinfo_cwnd = transport->cwnd; 2846 status.sstat_primary.spinfo_cwnd = transport->cwnd;
2682 status.sstat_primary.spinfo_srtt = transport->srtt; 2847 status.sstat_primary.spinfo_srtt = transport->srtt;
2683 status.sstat_primary.spinfo_rto = jiffies_to_msecs(transport->rto); 2848 status.sstat_primary.spinfo_rto = jiffies_to_msecs(transport->rto);
2684 status.sstat_primary.spinfo_mtu = transport->pmtu; 2849 status.sstat_primary.spinfo_mtu = transport->pmtu;
2685 2850
2851 if (status.sstat_primary.spinfo_state == SCTP_UNKNOWN)
2852 status.sstat_primary.spinfo_state = SCTP_ACTIVE;
2853
2686 if (put_user(len, optlen)) { 2854 if (put_user(len, optlen)) {
2687 retval = -EFAULT; 2855 retval = -EFAULT;
2688 goto out; 2856 goto out;
@@ -2733,12 +2901,15 @@ static int sctp_getsockopt_peer_addr_info(struct sock *sk, int len,
2733 return -EINVAL; 2901 return -EINVAL;
2734 2902
2735 pinfo.spinfo_assoc_id = sctp_assoc2id(transport->asoc); 2903 pinfo.spinfo_assoc_id = sctp_assoc2id(transport->asoc);
2736 pinfo.spinfo_state = transport->active; 2904 pinfo.spinfo_state = transport->state;
2737 pinfo.spinfo_cwnd = transport->cwnd; 2905 pinfo.spinfo_cwnd = transport->cwnd;
2738 pinfo.spinfo_srtt = transport->srtt; 2906 pinfo.spinfo_srtt = transport->srtt;
2739 pinfo.spinfo_rto = jiffies_to_msecs(transport->rto); 2907 pinfo.spinfo_rto = jiffies_to_msecs(transport->rto);
2740 pinfo.spinfo_mtu = transport->pmtu; 2908 pinfo.spinfo_mtu = transport->pmtu;
2741 2909
2910 if (pinfo.spinfo_state == SCTP_UNKNOWN)
2911 pinfo.spinfo_state = SCTP_ACTIVE;
2912
2742 if (put_user(len, optlen)) { 2913 if (put_user(len, optlen)) {
2743 retval = -EFAULT; 2914 retval = -EFAULT;
2744 goto out; 2915 goto out;
@@ -3591,7 +3762,8 @@ SCTP_STATIC int sctp_getsockopt(struct sock *sk, int level, int optname,
3591 int retval = 0; 3762 int retval = 0;
3592 int len; 3763 int len;
3593 3764
3594 SCTP_DEBUG_PRINTK("sctp_getsockopt(sk: %p, ...)\n", sk); 3765 SCTP_DEBUG_PRINTK("sctp_getsockopt(sk: %p... optname: %d)\n",
3766 sk, optname);
3595 3767
3596 /* I can hardly begin to describe how wrong this is. This is 3768 /* I can hardly begin to describe how wrong this is. This is
3597 * so broken as to be worse than useless. The API draft 3769 * so broken as to be worse than useless. The API draft
@@ -4368,15 +4540,11 @@ static struct sk_buff *sctp_skb_recv_datagram(struct sock *sk, int flags,
4368 * However, this function was corrent in any case. 8) 4540 * However, this function was corrent in any case. 8)
4369 */ 4541 */
4370 if (flags & MSG_PEEK) { 4542 if (flags & MSG_PEEK) {
4371 unsigned long cpu_flags; 4543 spin_lock_bh(&sk->sk_receive_queue.lock);
4372
4373 sctp_spin_lock_irqsave(&sk->sk_receive_queue.lock,
4374 cpu_flags);
4375 skb = skb_peek(&sk->sk_receive_queue); 4544 skb = skb_peek(&sk->sk_receive_queue);
4376 if (skb) 4545 if (skb)
4377 atomic_inc(&skb->users); 4546 atomic_inc(&skb->users);
4378 sctp_spin_unlock_irqrestore(&sk->sk_receive_queue.lock, 4547 spin_unlock_bh(&sk->sk_receive_queue.lock);
4379 cpu_flags);
4380 } else { 4548 } else {
4381 skb = skb_dequeue(&sk->sk_receive_queue); 4549 skb = skb_dequeue(&sk->sk_receive_queue);
4382 } 4550 }
@@ -4600,8 +4768,7 @@ out:
4600 return err; 4768 return err;
4601 4769
4602do_error: 4770do_error:
4603 if (asoc->counters[SCTP_COUNTER_INIT_ERROR] + 1 >= 4771 if (asoc->init_err_counter + 1 >= asoc->max_init_attempts)
4604 asoc->max_init_attempts)
4605 err = -ETIMEDOUT; 4772 err = -ETIMEDOUT;
4606 else 4773 else
4607 err = -ECONNREFUSED; 4774 err = -ECONNREFUSED;
@@ -4686,6 +4853,7 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk,
4686 struct sctp_endpoint *newep = newsp->ep; 4853 struct sctp_endpoint *newep = newsp->ep;
4687 struct sk_buff *skb, *tmp; 4854 struct sk_buff *skb, *tmp;
4688 struct sctp_ulpevent *event; 4855 struct sctp_ulpevent *event;
4856 int flags = 0;
4689 4857
4690 /* Migrate socket buffer sizes and all the socket level options to the 4858 /* Migrate socket buffer sizes and all the socket level options to the
4691 * new socket. 4859 * new socket.
@@ -4707,6 +4875,17 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk,
4707 sctp_sk(newsk)->bind_hash = pp; 4875 sctp_sk(newsk)->bind_hash = pp;
4708 inet_sk(newsk)->num = inet_sk(oldsk)->num; 4876 inet_sk(newsk)->num = inet_sk(oldsk)->num;
4709 4877
4878 /* Copy the bind_addr list from the original endpoint to the new
4879 * endpoint so that we can handle restarts properly
4880 */
4881 if (assoc->peer.ipv4_address)
4882 flags |= SCTP_ADDR4_PEERSUPP;
4883 if (assoc->peer.ipv6_address)
4884 flags |= SCTP_ADDR6_PEERSUPP;
4885 sctp_bind_addr_copy(&newsp->ep->base.bind_addr,
4886 &oldsp->ep->base.bind_addr,
4887 SCTP_SCOPE_GLOBAL, GFP_KERNEL, flags);
4888
4710 /* Move any messages in the old socket's receive queue that are for the 4889 /* Move any messages in the old socket's receive queue that are for the
4711 * peeled off association to the new socket's receive queue. 4890 * peeled off association to the new socket's receive queue.
4712 */ 4891 */
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index f30882e1e9..0ec0fde6e6 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -83,7 +83,9 @@ static struct sctp_transport *sctp_transport_init(struct sctp_transport *peer,
83 peer->last_time_used = jiffies; 83 peer->last_time_used = jiffies;
84 peer->last_time_ecne_reduced = jiffies; 84 peer->last_time_ecne_reduced = jiffies;
85 85
86 peer->active = SCTP_ACTIVE; 86 peer->init_sent_count = 0;
87
88 peer->state = SCTP_ACTIVE;
87 peer->hb_allowed = 0; 89 peer->hb_allowed = 0;
88 90
89 /* Initialize the default path max_retrans. */ 91 /* Initialize the default path max_retrans. */
diff --git a/net/socket.c b/net/socket.c
index cec0cb38b9..6f2a178819 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -81,6 +81,7 @@
81#include <linux/syscalls.h> 81#include <linux/syscalls.h>
82#include <linux/compat.h> 82#include <linux/compat.h>
83#include <linux/kmod.h> 83#include <linux/kmod.h>
84#include <linux/audit.h>
84 85
85#ifdef CONFIG_NET_RADIO 86#ifdef CONFIG_NET_RADIO
86#include <linux/wireless.h> /* Note : will define WIRELESS_EXT */ 87#include <linux/wireless.h> /* Note : will define WIRELESS_EXT */
@@ -226,7 +227,7 @@ int move_addr_to_kernel(void __user *uaddr, int ulen, void *kaddr)
226 return 0; 227 return 0;
227 if(copy_from_user(kaddr,uaddr,ulen)) 228 if(copy_from_user(kaddr,uaddr,ulen))
228 return -EFAULT; 229 return -EFAULT;
229 return 0; 230 return audit_sockaddr(ulen, kaddr);
230} 231}
231 232
232/** 233/**
@@ -382,9 +383,8 @@ int sock_map_fd(struct socket *sock)
382 goto out; 383 goto out;
383 } 384 }
384 385
385 sprintf(name, "[%lu]", SOCK_INODE(sock)->i_ino); 386 this.len = sprintf(name, "[%lu]", SOCK_INODE(sock)->i_ino);
386 this.name = name; 387 this.name = name;
387 this.len = strlen(name);
388 this.hash = SOCK_INODE(sock)->i_ino; 388 this.hash = SOCK_INODE(sock)->i_ino;
389 389
390 file->f_dentry = d_alloc(sock_mnt->mnt_sb->s_root, &this); 390 file->f_dentry = d_alloc(sock_mnt->mnt_sb->s_root, &this);
@@ -1906,7 +1906,11 @@ asmlinkage long sys_socketcall(int call, unsigned long __user *args)
1906 /* copy_from_user should be SMP safe. */ 1906 /* copy_from_user should be SMP safe. */
1907 if (copy_from_user(a, args, nargs[call])) 1907 if (copy_from_user(a, args, nargs[call]))
1908 return -EFAULT; 1908 return -EFAULT;
1909 1909
1910 err = audit_socketcall(nargs[call]/sizeof(unsigned long), a);
1911 if (err)
1912 return err;
1913
1910 a0=a[0]; 1914 a0=a[0];
1911 a1=a[1]; 1915 a1=a[1];
1912 1916
diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index 9bcec9b927..505e2d4b3d 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -66,10 +66,10 @@ rpcauth_create(rpc_authflavor_t pseudoflavor, struct rpc_clnt *clnt)
66 u32 flavor = pseudoflavor_to_flavor(pseudoflavor); 66 u32 flavor = pseudoflavor_to_flavor(pseudoflavor);
67 67
68 if (flavor >= RPC_AUTH_MAXFLAVOR || !(ops = auth_flavors[flavor])) 68 if (flavor >= RPC_AUTH_MAXFLAVOR || !(ops = auth_flavors[flavor]))
69 return NULL; 69 return ERR_PTR(-EINVAL);
70 auth = ops->create(clnt, pseudoflavor); 70 auth = ops->create(clnt, pseudoflavor);
71 if (!auth) 71 if (IS_ERR(auth))
72 return NULL; 72 return auth;
73 if (clnt->cl_auth) 73 if (clnt->cl_auth)
74 rpcauth_destroy(clnt->cl_auth); 74 rpcauth_destroy(clnt->cl_auth);
75 clnt->cl_auth = auth; 75 clnt->cl_auth = auth;
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index a33b627cbe..2f7b867161 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -660,14 +660,16 @@ gss_create(struct rpc_clnt *clnt, rpc_authflavor_t flavor)
660{ 660{
661 struct gss_auth *gss_auth; 661 struct gss_auth *gss_auth;
662 struct rpc_auth * auth; 662 struct rpc_auth * auth;
663 int err = -ENOMEM; /* XXX? */
663 664
664 dprintk("RPC: creating GSS authenticator for client %p\n",clnt); 665 dprintk("RPC: creating GSS authenticator for client %p\n",clnt);
665 666
666 if (!try_module_get(THIS_MODULE)) 667 if (!try_module_get(THIS_MODULE))
667 return NULL; 668 return ERR_PTR(err);
668 if (!(gss_auth = kmalloc(sizeof(*gss_auth), GFP_KERNEL))) 669 if (!(gss_auth = kmalloc(sizeof(*gss_auth), GFP_KERNEL)))
669 goto out_dec; 670 goto out_dec;
670 gss_auth->client = clnt; 671 gss_auth->client = clnt;
672 err = -EINVAL;
671 gss_auth->mech = gss_mech_get_by_pseudoflavor(flavor); 673 gss_auth->mech = gss_mech_get_by_pseudoflavor(flavor);
672 if (!gss_auth->mech) { 674 if (!gss_auth->mech) {
673 printk(KERN_WARNING "%s: Pseudoflavor %d not found!", 675 printk(KERN_WARNING "%s: Pseudoflavor %d not found!",
@@ -675,9 +677,8 @@ gss_create(struct rpc_clnt *clnt, rpc_authflavor_t flavor)
675 goto err_free; 677 goto err_free;
676 } 678 }
677 gss_auth->service = gss_pseudoflavor_to_service(gss_auth->mech, flavor); 679 gss_auth->service = gss_pseudoflavor_to_service(gss_auth->mech, flavor);
678 /* FIXME: Will go away once privacy support is merged in */ 680 if (gss_auth->service == 0)
679 if (gss_auth->service == RPC_GSS_SVC_PRIVACY) 681 goto err_put_mech;
680 gss_auth->service = RPC_GSS_SVC_INTEGRITY;
681 INIT_LIST_HEAD(&gss_auth->upcalls); 682 INIT_LIST_HEAD(&gss_auth->upcalls);
682 spin_lock_init(&gss_auth->lock); 683 spin_lock_init(&gss_auth->lock);
683 auth = &gss_auth->rpc_auth; 684 auth = &gss_auth->rpc_auth;
@@ -687,15 +688,18 @@ gss_create(struct rpc_clnt *clnt, rpc_authflavor_t flavor)
687 auth->au_flavor = flavor; 688 auth->au_flavor = flavor;
688 atomic_set(&auth->au_count, 1); 689 atomic_set(&auth->au_count, 1);
689 690
690 if (rpcauth_init_credcache(auth, GSS_CRED_EXPIRE) < 0) 691 err = rpcauth_init_credcache(auth, GSS_CRED_EXPIRE);
692 if (err)
691 goto err_put_mech; 693 goto err_put_mech;
692 694
693 snprintf(gss_auth->path, sizeof(gss_auth->path), "%s/%s", 695 snprintf(gss_auth->path, sizeof(gss_auth->path), "%s/%s",
694 clnt->cl_pathname, 696 clnt->cl_pathname,
695 gss_auth->mech->gm_name); 697 gss_auth->mech->gm_name);
696 gss_auth->dentry = rpc_mkpipe(gss_auth->path, clnt, &gss_upcall_ops, RPC_PIPE_WAIT_FOR_OPEN); 698 gss_auth->dentry = rpc_mkpipe(gss_auth->path, clnt, &gss_upcall_ops, RPC_PIPE_WAIT_FOR_OPEN);
697 if (IS_ERR(gss_auth->dentry)) 699 if (IS_ERR(gss_auth->dentry)) {
700 err = PTR_ERR(gss_auth->dentry);
698 goto err_put_mech; 701 goto err_put_mech;
702 }
699 703
700 return auth; 704 return auth;
701err_put_mech: 705err_put_mech:
@@ -704,7 +708,7 @@ err_free:
704 kfree(gss_auth); 708 kfree(gss_auth);
705out_dec: 709out_dec:
706 module_put(THIS_MODULE); 710 module_put(THIS_MODULE);
707 return NULL; 711 return ERR_PTR(err);
708} 712}
709 713
710static void 714static void
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 02bc029d46..f17e6153b6 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -97,12 +97,13 @@ rpc_setup_pipedir(struct rpc_clnt *clnt, char *dir_name)
97 * made to sleep too long. 97 * made to sleep too long.
98 */ 98 */
99struct rpc_clnt * 99struct rpc_clnt *
100rpc_create_client(struct rpc_xprt *xprt, char *servname, 100rpc_new_client(struct rpc_xprt *xprt, char *servname,
101 struct rpc_program *program, u32 vers, 101 struct rpc_program *program, u32 vers,
102 rpc_authflavor_t flavor) 102 rpc_authflavor_t flavor)
103{ 103{
104 struct rpc_version *version; 104 struct rpc_version *version;
105 struct rpc_clnt *clnt = NULL; 105 struct rpc_clnt *clnt = NULL;
106 struct rpc_auth *auth;
106 int err; 107 int err;
107 int len; 108 int len;
108 109
@@ -157,10 +158,11 @@ rpc_create_client(struct rpc_xprt *xprt, char *servname,
157 if (err < 0) 158 if (err < 0)
158 goto out_no_path; 159 goto out_no_path;
159 160
160 err = -ENOMEM; 161 auth = rpcauth_create(flavor, clnt);
161 if (!rpcauth_create(flavor, clnt)) { 162 if (IS_ERR(auth)) {
162 printk(KERN_INFO "RPC: Couldn't create auth handle (flavor %u)\n", 163 printk(KERN_INFO "RPC: Couldn't create auth handle (flavor %u)\n",
163 flavor); 164 flavor);
165 err = PTR_ERR(auth);
164 goto out_no_auth; 166 goto out_no_auth;
165 } 167 }
166 168
@@ -178,6 +180,37 @@ out_no_path:
178 kfree(clnt->cl_server); 180 kfree(clnt->cl_server);
179 kfree(clnt); 181 kfree(clnt);
180out_err: 182out_err:
183 xprt_destroy(xprt);
184 return ERR_PTR(err);
185}
186
187/**
188 * Create an RPC client
189 * @xprt - pointer to xprt struct
190 * @servname - name of server
191 * @info - rpc_program
192 * @version - rpc_program version
193 * @authflavor - rpc_auth flavour to use
194 *
195 * Creates an RPC client structure, then pings the server in order to
196 * determine if it is up, and if it supports this program and version.
197 *
198 * This function should never be called by asynchronous tasks such as
199 * the portmapper.
200 */
201struct rpc_clnt *rpc_create_client(struct rpc_xprt *xprt, char *servname,
202 struct rpc_program *info, u32 version, rpc_authflavor_t authflavor)
203{
204 struct rpc_clnt *clnt;
205 int err;
206
207 clnt = rpc_new_client(xprt, servname, info, version, authflavor);
208 if (IS_ERR(clnt))
209 return clnt;
210 err = rpc_ping(clnt, RPC_TASK_SOFT|RPC_TASK_NOINTR);
211 if (err == 0)
212 return clnt;
213 rpc_shutdown_client(clnt);
181 return ERR_PTR(err); 214 return ERR_PTR(err);
182} 215}
183 216
@@ -208,6 +241,8 @@ rpc_clone_client(struct rpc_clnt *clnt)
208 rpc_init_rtt(&new->cl_rtt_default, clnt->cl_xprt->timeout.to_initval); 241 rpc_init_rtt(&new->cl_rtt_default, clnt->cl_xprt->timeout.to_initval);
209 if (new->cl_auth) 242 if (new->cl_auth)
210 atomic_inc(&new->cl_auth->au_count); 243 atomic_inc(&new->cl_auth->au_count);
244 new->cl_pmap = &new->cl_pmap_default;
245 rpc_init_wait_queue(&new->cl_pmap_default.pm_bindwait, "bindwait");
211 return new; 246 return new;
212out_no_clnt: 247out_no_clnt:
213 printk(KERN_INFO "RPC: out of memory in %s\n", __FUNCTION__); 248 printk(KERN_INFO "RPC: out of memory in %s\n", __FUNCTION__);
@@ -296,6 +331,44 @@ rpc_release_client(struct rpc_clnt *clnt)
296 rpc_destroy_client(clnt); 331 rpc_destroy_client(clnt);
297} 332}
298 333
334/**
335 * rpc_bind_new_program - bind a new RPC program to an existing client
336 * @old - old rpc_client
337 * @program - rpc program to set
338 * @vers - rpc program version
339 *
340 * Clones the rpc client and sets up a new RPC program. This is mainly
341 * of use for enabling different RPC programs to share the same transport.
342 * The Sun NFSv2/v3 ACL protocol can do this.
343 */
344struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *old,
345 struct rpc_program *program,
346 int vers)
347{
348 struct rpc_clnt *clnt;
349 struct rpc_version *version;
350 int err;
351
352 BUG_ON(vers >= program->nrvers || !program->version[vers]);
353 version = program->version[vers];
354 clnt = rpc_clone_client(old);
355 if (IS_ERR(clnt))
356 goto out;
357 clnt->cl_procinfo = version->procs;
358 clnt->cl_maxproc = version->nrprocs;
359 clnt->cl_protname = program->name;
360 clnt->cl_prog = program->number;
361 clnt->cl_vers = version->number;
362 clnt->cl_stats = program->stats;
363 err = rpc_ping(clnt, RPC_TASK_SOFT|RPC_TASK_NOINTR);
364 if (err != 0) {
365 rpc_shutdown_client(clnt);
366 clnt = ERR_PTR(err);
367 }
368out:
369 return clnt;
370}
371
299/* 372/*
300 * Default callback for async RPC calls 373 * Default callback for async RPC calls
301 */ 374 */
@@ -305,38 +378,41 @@ rpc_default_callback(struct rpc_task *task)
305} 378}
306 379
307/* 380/*
308 * Export the signal mask handling for aysnchronous code that 381 * Export the signal mask handling for synchronous code that
309 * sleeps on RPC calls 382 * sleeps on RPC calls
310 */ 383 */
384#define RPC_INTR_SIGNALS (sigmask(SIGINT) | sigmask(SIGQUIT) | sigmask(SIGKILL))
311 385
386static void rpc_save_sigmask(sigset_t *oldset, int intr)
387{
388 unsigned long sigallow = 0;
389 sigset_t sigmask;
390
391 /* Block all signals except those listed in sigallow */
392 if (intr)
393 sigallow |= RPC_INTR_SIGNALS;
394 siginitsetinv(&sigmask, sigallow);
395 sigprocmask(SIG_BLOCK, &sigmask, oldset);
396}
397
398static inline void rpc_task_sigmask(struct rpc_task *task, sigset_t *oldset)
399{
400 rpc_save_sigmask(oldset, !RPC_TASK_UNINTERRUPTIBLE(task));
401}
402
403static inline void rpc_restore_sigmask(sigset_t *oldset)
404{
405 sigprocmask(SIG_SETMASK, oldset, NULL);
406}
407
312void rpc_clnt_sigmask(struct rpc_clnt *clnt, sigset_t *oldset) 408void rpc_clnt_sigmask(struct rpc_clnt *clnt, sigset_t *oldset)
313{ 409{
314 unsigned long sigallow = sigmask(SIGKILL); 410 rpc_save_sigmask(oldset, clnt->cl_intr);
315 unsigned long irqflags;
316
317 /* Turn off various signals */
318 if (clnt->cl_intr) {
319 struct k_sigaction *action = current->sighand->action;
320 if (action[SIGINT-1].sa.sa_handler == SIG_DFL)
321 sigallow |= sigmask(SIGINT);
322 if (action[SIGQUIT-1].sa.sa_handler == SIG_DFL)
323 sigallow |= sigmask(SIGQUIT);
324 }
325 spin_lock_irqsave(&current->sighand->siglock, irqflags);
326 *oldset = current->blocked;
327 siginitsetinv(&current->blocked, sigallow & ~oldset->sig[0]);
328 recalc_sigpending();
329 spin_unlock_irqrestore(&current->sighand->siglock, irqflags);
330} 411}
331 412
332void rpc_clnt_sigunmask(struct rpc_clnt *clnt, sigset_t *oldset) 413void rpc_clnt_sigunmask(struct rpc_clnt *clnt, sigset_t *oldset)
333{ 414{
334 unsigned long irqflags; 415 rpc_restore_sigmask(oldset);
335
336 spin_lock_irqsave(&current->sighand->siglock, irqflags);
337 current->blocked = *oldset;
338 recalc_sigpending();
339 spin_unlock_irqrestore(&current->sighand->siglock, irqflags);
340} 416}
341 417
342/* 418/*
@@ -354,26 +430,26 @@ int rpc_call_sync(struct rpc_clnt *clnt, struct rpc_message *msg, int flags)
354 430
355 BUG_ON(flags & RPC_TASK_ASYNC); 431 BUG_ON(flags & RPC_TASK_ASYNC);
356 432
357 rpc_clnt_sigmask(clnt, &oldset);
358
359 status = -ENOMEM; 433 status = -ENOMEM;
360 task = rpc_new_task(clnt, NULL, flags); 434 task = rpc_new_task(clnt, NULL, flags);
361 if (task == NULL) 435 if (task == NULL)
362 goto out; 436 goto out;
363 437
438 /* Mask signals on RPC calls _and_ GSS_AUTH upcalls */
439 rpc_task_sigmask(task, &oldset);
440
364 rpc_call_setup(task, msg, 0); 441 rpc_call_setup(task, msg, 0);
365 442
366 /* Set up the call info struct and execute the task */ 443 /* Set up the call info struct and execute the task */
367 if (task->tk_status == 0) 444 if (task->tk_status == 0) {
368 status = rpc_execute(task); 445 status = rpc_execute(task);
369 else { 446 } else {
370 status = task->tk_status; 447 status = task->tk_status;
371 rpc_release_task(task); 448 rpc_release_task(task);
372 } 449 }
373 450
451 rpc_restore_sigmask(&oldset);
374out: 452out:
375 rpc_clnt_sigunmask(clnt, &oldset);
376
377 return status; 453 return status;
378} 454}
379 455
@@ -394,8 +470,6 @@ rpc_call_async(struct rpc_clnt *clnt, struct rpc_message *msg, int flags,
394 470
395 flags |= RPC_TASK_ASYNC; 471 flags |= RPC_TASK_ASYNC;
396 472
397 rpc_clnt_sigmask(clnt, &oldset);
398
399 /* Create/initialize a new RPC task */ 473 /* Create/initialize a new RPC task */
400 if (!callback) 474 if (!callback)
401 callback = rpc_default_callback; 475 callback = rpc_default_callback;
@@ -404,6 +478,9 @@ rpc_call_async(struct rpc_clnt *clnt, struct rpc_message *msg, int flags,
404 goto out; 478 goto out;
405 task->tk_calldata = data; 479 task->tk_calldata = data;
406 480
481 /* Mask signals on GSS_AUTH upcalls */
482 rpc_task_sigmask(task, &oldset);
483
407 rpc_call_setup(task, msg, 0); 484 rpc_call_setup(task, msg, 0);
408 485
409 /* Set up the call info struct and execute the task */ 486 /* Set up the call info struct and execute the task */
@@ -413,9 +490,8 @@ rpc_call_async(struct rpc_clnt *clnt, struct rpc_message *msg, int flags,
413 else 490 else
414 rpc_release_task(task); 491 rpc_release_task(task);
415 492
493 rpc_restore_sigmask(&oldset);
416out: 494out:
417 rpc_clnt_sigunmask(clnt, &oldset);
418
419 return status; 495 return status;
420} 496}
421 497
@@ -593,7 +669,7 @@ call_allocate(struct rpc_task *task)
593 return; 669 return;
594 printk(KERN_INFO "RPC: buffer allocation failed for task %p\n", task); 670 printk(KERN_INFO "RPC: buffer allocation failed for task %p\n", task);
595 671
596 if (RPC_IS_ASYNC(task) || !(task->tk_client->cl_intr && signalled())) { 672 if (RPC_IS_ASYNC(task) || !signalled()) {
597 xprt_release(task); 673 xprt_release(task);
598 task->tk_action = call_reserve; 674 task->tk_action = call_reserve;
599 rpc_delay(task, HZ>>4); 675 rpc_delay(task, HZ>>4);
@@ -957,7 +1033,9 @@ call_header(struct rpc_task *task)
957 *p++ = htonl(clnt->cl_prog); /* program number */ 1033 *p++ = htonl(clnt->cl_prog); /* program number */
958 *p++ = htonl(clnt->cl_vers); /* program version */ 1034 *p++ = htonl(clnt->cl_vers); /* program version */
959 *p++ = htonl(task->tk_msg.rpc_proc->p_proc); /* procedure */ 1035 *p++ = htonl(task->tk_msg.rpc_proc->p_proc); /* procedure */
960 return rpcauth_marshcred(task, p); 1036 p = rpcauth_marshcred(task, p);
1037 req->rq_slen = xdr_adjust_iovec(&req->rq_svec[0], p);
1038 return p;
961} 1039}
962 1040
963/* 1041/*
@@ -986,10 +1064,11 @@ call_verify(struct rpc_task *task)
986 case RPC_AUTH_ERROR: 1064 case RPC_AUTH_ERROR:
987 break; 1065 break;
988 case RPC_MISMATCH: 1066 case RPC_MISMATCH:
989 printk(KERN_WARNING "%s: RPC call version mismatch!\n", __FUNCTION__); 1067 dprintk("%s: RPC call version mismatch!\n", __FUNCTION__);
990 goto out_eio; 1068 error = -EPROTONOSUPPORT;
1069 goto out_err;
991 default: 1070 default:
992 printk(KERN_WARNING "%s: RPC call rejected, unknown error: %x\n", __FUNCTION__, n); 1071 dprintk("%s: RPC call rejected, unknown error: %x\n", __FUNCTION__, n);
993 goto out_eio; 1072 goto out_eio;
994 } 1073 }
995 if (--len < 0) 1074 if (--len < 0)
@@ -1040,23 +1119,26 @@ call_verify(struct rpc_task *task)
1040 case RPC_SUCCESS: 1119 case RPC_SUCCESS:
1041 return p; 1120 return p;
1042 case RPC_PROG_UNAVAIL: 1121 case RPC_PROG_UNAVAIL:
1043 printk(KERN_WARNING "RPC: call_verify: program %u is unsupported by server %s\n", 1122 dprintk("RPC: call_verify: program %u is unsupported by server %s\n",
1044 (unsigned int)task->tk_client->cl_prog, 1123 (unsigned int)task->tk_client->cl_prog,
1045 task->tk_client->cl_server); 1124 task->tk_client->cl_server);
1046 goto out_eio; 1125 error = -EPFNOSUPPORT;
1126 goto out_err;
1047 case RPC_PROG_MISMATCH: 1127 case RPC_PROG_MISMATCH:
1048 printk(KERN_WARNING "RPC: call_verify: program %u, version %u unsupported by server %s\n", 1128 dprintk("RPC: call_verify: program %u, version %u unsupported by server %s\n",
1049 (unsigned int)task->tk_client->cl_prog, 1129 (unsigned int)task->tk_client->cl_prog,
1050 (unsigned int)task->tk_client->cl_vers, 1130 (unsigned int)task->tk_client->cl_vers,
1051 task->tk_client->cl_server); 1131 task->tk_client->cl_server);
1052 goto out_eio; 1132 error = -EPROTONOSUPPORT;
1133 goto out_err;
1053 case RPC_PROC_UNAVAIL: 1134 case RPC_PROC_UNAVAIL:
1054 printk(KERN_WARNING "RPC: call_verify: proc %p unsupported by program %u, version %u on server %s\n", 1135 dprintk("RPC: call_verify: proc %p unsupported by program %u, version %u on server %s\n",
1055 task->tk_msg.rpc_proc, 1136 task->tk_msg.rpc_proc,
1056 task->tk_client->cl_prog, 1137 task->tk_client->cl_prog,
1057 task->tk_client->cl_vers, 1138 task->tk_client->cl_vers,
1058 task->tk_client->cl_server); 1139 task->tk_client->cl_server);
1059 goto out_eio; 1140 error = -EOPNOTSUPP;
1141 goto out_err;
1060 case RPC_GARBAGE_ARGS: 1142 case RPC_GARBAGE_ARGS:
1061 dprintk("RPC: %4d %s: server saw garbage\n", task->tk_pid, __FUNCTION__); 1143 dprintk("RPC: %4d %s: server saw garbage\n", task->tk_pid, __FUNCTION__);
1062 break; /* retry */ 1144 break; /* retry */
@@ -1069,7 +1151,7 @@ out_retry:
1069 task->tk_client->cl_stats->rpcgarbage++; 1151 task->tk_client->cl_stats->rpcgarbage++;
1070 if (task->tk_garb_retry) { 1152 if (task->tk_garb_retry) {
1071 task->tk_garb_retry--; 1153 task->tk_garb_retry--;
1072 dprintk(KERN_WARNING "RPC %s: retrying %4d\n", __FUNCTION__, task->tk_pid); 1154 dprintk("RPC %s: retrying %4d\n", __FUNCTION__, task->tk_pid);
1073 task->tk_action = call_bind; 1155 task->tk_action = call_bind;
1074 return NULL; 1156 return NULL;
1075 } 1157 }
@@ -1083,3 +1165,30 @@ out_overflow:
1083 printk(KERN_WARNING "RPC %s: server reply was truncated.\n", __FUNCTION__); 1165 printk(KERN_WARNING "RPC %s: server reply was truncated.\n", __FUNCTION__);
1084 goto out_retry; 1166 goto out_retry;
1085} 1167}
1168
1169static int rpcproc_encode_null(void *rqstp, u32 *data, void *obj)
1170{
1171 return 0;
1172}
1173
1174static int rpcproc_decode_null(void *rqstp, u32 *data, void *obj)
1175{
1176 return 0;
1177}
1178
1179static struct rpc_procinfo rpcproc_null = {
1180 .p_encode = rpcproc_encode_null,
1181 .p_decode = rpcproc_decode_null,
1182};
1183
1184int rpc_ping(struct rpc_clnt *clnt, int flags)
1185{
1186 struct rpc_message msg = {
1187 .rpc_proc = &rpcproc_null,
1188 };
1189 int err;
1190 msg.rpc_cred = authnull_ops.lookup_cred(NULL, NULL, 0);
1191 err = rpc_call_sync(clnt, &msg, flags);
1192 put_rpccred(msg.rpc_cred);
1193 return err;
1194}
diff --git a/net/sunrpc/pmap_clnt.c b/net/sunrpc/pmap_clnt.c
index d0b1d2c34a..4e81f27669 100644
--- a/net/sunrpc/pmap_clnt.c
+++ b/net/sunrpc/pmap_clnt.c
@@ -53,6 +53,9 @@ rpc_getport(struct rpc_task *task, struct rpc_clnt *clnt)
53 task->tk_pid, clnt->cl_server, 53 task->tk_pid, clnt->cl_server,
54 map->pm_prog, map->pm_vers, map->pm_prot); 54 map->pm_prog, map->pm_vers, map->pm_prot);
55 55
56 /* Autobind on cloned rpc clients is discouraged */
57 BUG_ON(clnt->cl_parent != clnt);
58
56 spin_lock(&pmap_lock); 59 spin_lock(&pmap_lock);
57 if (map->pm_binding) { 60 if (map->pm_binding) {
58 rpc_sleep_on(&map->pm_bindwait, task, NULL, NULL); 61 rpc_sleep_on(&map->pm_bindwait, task, NULL, NULL);
@@ -207,12 +210,10 @@ pmap_create(char *hostname, struct sockaddr_in *srvaddr, int proto)
207 xprt->addr.sin_port = htons(RPC_PMAP_PORT); 210 xprt->addr.sin_port = htons(RPC_PMAP_PORT);
208 211
209 /* printk("pmap: create clnt\n"); */ 212 /* printk("pmap: create clnt\n"); */
210 clnt = rpc_create_client(xprt, hostname, 213 clnt = rpc_new_client(xprt, hostname,
211 &pmap_program, RPC_PMAP_VERSION, 214 &pmap_program, RPC_PMAP_VERSION,
212 RPC_AUTH_UNIX); 215 RPC_AUTH_UNIX);
213 if (IS_ERR(clnt)) { 216 if (!IS_ERR(clnt)) {
214 xprt_destroy(xprt);
215 } else {
216 clnt->cl_softrtry = 1; 217 clnt->cl_softrtry = 1;
217 clnt->cl_chatty = 1; 218 clnt->cl_chatty = 1;
218 clnt->cl_oneshot = 1; 219 clnt->cl_oneshot = 1;
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index c06614d0e3..2d9eb7fbd5 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -290,7 +290,7 @@ static void rpc_make_runnable(struct rpc_task *task)
290 return; 290 return;
291 } 291 }
292 } else 292 } else
293 wake_up(&task->u.tk_wait.waitq); 293 wake_up_bit(&task->tk_runstate, RPC_TASK_QUEUED);
294} 294}
295 295
296/* 296/*
@@ -555,6 +555,38 @@ __rpc_atrun(struct rpc_task *task)
555} 555}
556 556
557/* 557/*
558 * Helper that calls task->tk_exit if it exists and then returns
559 * true if we should exit __rpc_execute.
560 */
561static inline int __rpc_do_exit(struct rpc_task *task)
562{
563 if (task->tk_exit != NULL) {
564 lock_kernel();
565 task->tk_exit(task);
566 unlock_kernel();
567 /* If tk_action is non-null, we should restart the call */
568 if (task->tk_action != NULL) {
569 if (!RPC_ASSASSINATED(task)) {
570 /* Release RPC slot and buffer memory */
571 xprt_release(task);
572 rpc_free(task);
573 return 0;
574 }
575 printk(KERN_ERR "RPC: dead task tried to walk away.\n");
576 }
577 }
578 return 1;
579}
580
581static int rpc_wait_bit_interruptible(void *word)
582{
583 if (signal_pending(current))
584 return -ERESTARTSYS;
585 schedule();
586 return 0;
587}
588
589/*
558 * This is the RPC `scheduler' (or rather, the finite state machine). 590 * This is the RPC `scheduler' (or rather, the finite state machine).
559 */ 591 */
560static int __rpc_execute(struct rpc_task *task) 592static int __rpc_execute(struct rpc_task *task)
@@ -566,8 +598,7 @@ static int __rpc_execute(struct rpc_task *task)
566 598
567 BUG_ON(RPC_IS_QUEUED(task)); 599 BUG_ON(RPC_IS_QUEUED(task));
568 600
569 restarted: 601 for (;;) {
570 while (1) {
571 /* 602 /*
572 * Garbage collection of pending timers... 603 * Garbage collection of pending timers...
573 */ 604 */
@@ -600,11 +631,12 @@ static int __rpc_execute(struct rpc_task *task)
600 * by someone else. 631 * by someone else.
601 */ 632 */
602 if (!RPC_IS_QUEUED(task)) { 633 if (!RPC_IS_QUEUED(task)) {
603 if (!task->tk_action) 634 if (task->tk_action != NULL) {
635 lock_kernel();
636 task->tk_action(task);
637 unlock_kernel();
638 } else if (__rpc_do_exit(task))
604 break; 639 break;
605 lock_kernel();
606 task->tk_action(task);
607 unlock_kernel();
608 } 640 }
609 641
610 /* 642 /*
@@ -624,44 +656,26 @@ static int __rpc_execute(struct rpc_task *task)
624 656
625 /* sync task: sleep here */ 657 /* sync task: sleep here */
626 dprintk("RPC: %4d sync task going to sleep\n", task->tk_pid); 658 dprintk("RPC: %4d sync task going to sleep\n", task->tk_pid);
627 if (RPC_TASK_UNINTERRUPTIBLE(task)) { 659 /* Note: Caller should be using rpc_clnt_sigmask() */
628 __wait_event(task->u.tk_wait.waitq, !RPC_IS_QUEUED(task)); 660 status = out_of_line_wait_on_bit(&task->tk_runstate,
629 } else { 661 RPC_TASK_QUEUED, rpc_wait_bit_interruptible,
630 __wait_event_interruptible(task->u.tk_wait.waitq, !RPC_IS_QUEUED(task), status); 662 TASK_INTERRUPTIBLE);
663 if (status == -ERESTARTSYS) {
631 /* 664 /*
632 * When a sync task receives a signal, it exits with 665 * When a sync task receives a signal, it exits with
633 * -ERESTARTSYS. In order to catch any callbacks that 666 * -ERESTARTSYS. In order to catch any callbacks that
634 * clean up after sleeping on some queue, we don't 667 * clean up after sleeping on some queue, we don't
635 * break the loop here, but go around once more. 668 * break the loop here, but go around once more.
636 */ 669 */
637 if (status == -ERESTARTSYS) { 670 dprintk("RPC: %4d got signal\n", task->tk_pid);
638 dprintk("RPC: %4d got signal\n", task->tk_pid); 671 task->tk_flags |= RPC_TASK_KILLED;
639 task->tk_flags |= RPC_TASK_KILLED; 672 rpc_exit(task, -ERESTARTSYS);
640 rpc_exit(task, -ERESTARTSYS); 673 rpc_wake_up_task(task);
641 rpc_wake_up_task(task);
642 }
643 } 674 }
644 rpc_set_running(task); 675 rpc_set_running(task);
645 dprintk("RPC: %4d sync task resuming\n", task->tk_pid); 676 dprintk("RPC: %4d sync task resuming\n", task->tk_pid);
646 } 677 }
647 678
648 if (task->tk_exit) {
649 lock_kernel();
650 task->tk_exit(task);
651 unlock_kernel();
652 /* If tk_action is non-null, the user wants us to restart */
653 if (task->tk_action) {
654 if (!RPC_ASSASSINATED(task)) {
655 /* Release RPC slot and buffer memory */
656 if (task->tk_rqstp)
657 xprt_release(task);
658 rpc_free(task);
659 goto restarted;
660 }
661 printk(KERN_ERR "RPC: dead task tries to walk away.\n");
662 }
663 }
664
665 dprintk("RPC: %4d exit() = %d\n", task->tk_pid, task->tk_status); 679 dprintk("RPC: %4d exit() = %d\n", task->tk_pid, task->tk_status);
666 status = task->tk_status; 680 status = task->tk_status;
667 681
@@ -759,8 +773,6 @@ void rpc_init_task(struct rpc_task *task, struct rpc_clnt *clnt, rpc_action call
759 773
760 /* Initialize workqueue for async tasks */ 774 /* Initialize workqueue for async tasks */
761 task->tk_workqueue = rpciod_workqueue; 775 task->tk_workqueue = rpciod_workqueue;
762 if (!RPC_IS_ASYNC(task))
763 init_waitqueue_head(&task->u.tk_wait.waitq);
764 776
765 if (clnt) { 777 if (clnt) {
766 atomic_inc(&clnt->cl_users); 778 atomic_inc(&clnt->cl_users);
diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c
index d4f26bf9e7..32e8acbc60 100644
--- a/net/sunrpc/sunrpc_syms.c
+++ b/net/sunrpc/sunrpc_syms.c
@@ -42,6 +42,7 @@ EXPORT_SYMBOL(rpc_release_task);
42/* RPC client functions */ 42/* RPC client functions */
43EXPORT_SYMBOL(rpc_create_client); 43EXPORT_SYMBOL(rpc_create_client);
44EXPORT_SYMBOL(rpc_clone_client); 44EXPORT_SYMBOL(rpc_clone_client);
45EXPORT_SYMBOL(rpc_bind_new_program);
45EXPORT_SYMBOL(rpc_destroy_client); 46EXPORT_SYMBOL(rpc_destroy_client);
46EXPORT_SYMBOL(rpc_shutdown_client); 47EXPORT_SYMBOL(rpc_shutdown_client);
47EXPORT_SYMBOL(rpc_release_client); 48EXPORT_SYMBOL(rpc_release_client);
@@ -61,7 +62,6 @@ EXPORT_SYMBOL(rpc_mkpipe);
61 62
62/* Client transport */ 63/* Client transport */
63EXPORT_SYMBOL(xprt_create_proto); 64EXPORT_SYMBOL(xprt_create_proto);
64EXPORT_SYMBOL(xprt_destroy);
65EXPORT_SYMBOL(xprt_set_timeout); 65EXPORT_SYMBOL(xprt_set_timeout);
66EXPORT_SYMBOL(xprt_udp_slot_table_entries); 66EXPORT_SYMBOL(xprt_udp_slot_table_entries);
67EXPORT_SYMBOL(xprt_tcp_slot_table_entries); 67EXPORT_SYMBOL(xprt_tcp_slot_table_entries);
@@ -129,6 +129,10 @@ EXPORT_SYMBOL(xdr_encode_netobj);
129EXPORT_SYMBOL(xdr_encode_pages); 129EXPORT_SYMBOL(xdr_encode_pages);
130EXPORT_SYMBOL(xdr_inline_pages); 130EXPORT_SYMBOL(xdr_inline_pages);
131EXPORT_SYMBOL(xdr_shift_buf); 131EXPORT_SYMBOL(xdr_shift_buf);
132EXPORT_SYMBOL(xdr_encode_word);
133EXPORT_SYMBOL(xdr_decode_word);
134EXPORT_SYMBOL(xdr_encode_array2);
135EXPORT_SYMBOL(xdr_decode_array2);
132EXPORT_SYMBOL(xdr_buf_from_iov); 136EXPORT_SYMBOL(xdr_buf_from_iov);
133EXPORT_SYMBOL(xdr_buf_subsegment); 137EXPORT_SYMBOL(xdr_buf_subsegment);
134EXPORT_SYMBOL(xdr_buf_read_netobj); 138EXPORT_SYMBOL(xdr_buf_read_netobj);
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index bb2d99f333..e9bd91265f 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -35,20 +35,24 @@ svc_create(struct svc_program *prog, unsigned int bufsize)
35 if (!(serv = (struct svc_serv *) kmalloc(sizeof(*serv), GFP_KERNEL))) 35 if (!(serv = (struct svc_serv *) kmalloc(sizeof(*serv), GFP_KERNEL)))
36 return NULL; 36 return NULL;
37 memset(serv, 0, sizeof(*serv)); 37 memset(serv, 0, sizeof(*serv));
38 serv->sv_name = prog->pg_name;
38 serv->sv_program = prog; 39 serv->sv_program = prog;
39 serv->sv_nrthreads = 1; 40 serv->sv_nrthreads = 1;
40 serv->sv_stats = prog->pg_stats; 41 serv->sv_stats = prog->pg_stats;
41 serv->sv_bufsz = bufsize? bufsize : 4096; 42 serv->sv_bufsz = bufsize? bufsize : 4096;
42 prog->pg_lovers = prog->pg_nvers-1;
43 xdrsize = 0; 43 xdrsize = 0;
44 for (vers=0; vers<prog->pg_nvers ; vers++) 44 while (prog) {
45 if (prog->pg_vers[vers]) { 45 prog->pg_lovers = prog->pg_nvers-1;
46 prog->pg_hivers = vers; 46 for (vers=0; vers<prog->pg_nvers ; vers++)
47 if (prog->pg_lovers > vers) 47 if (prog->pg_vers[vers]) {
48 prog->pg_lovers = vers; 48 prog->pg_hivers = vers;
49 if (prog->pg_vers[vers]->vs_xdrsize > xdrsize) 49 if (prog->pg_lovers > vers)
50 xdrsize = prog->pg_vers[vers]->vs_xdrsize; 50 prog->pg_lovers = vers;
51 } 51 if (prog->pg_vers[vers]->vs_xdrsize > xdrsize)
52 xdrsize = prog->pg_vers[vers]->vs_xdrsize;
53 }
54 prog = prog->pg_next;
55 }
52 serv->sv_xdrsize = xdrsize; 56 serv->sv_xdrsize = xdrsize;
53 INIT_LIST_HEAD(&serv->sv_threads); 57 INIT_LIST_HEAD(&serv->sv_threads);
54 INIT_LIST_HEAD(&serv->sv_sockets); 58 INIT_LIST_HEAD(&serv->sv_sockets);
@@ -56,8 +60,6 @@ svc_create(struct svc_program *prog, unsigned int bufsize)
56 INIT_LIST_HEAD(&serv->sv_permsocks); 60 INIT_LIST_HEAD(&serv->sv_permsocks);
57 spin_lock_init(&serv->sv_lock); 61 spin_lock_init(&serv->sv_lock);
58 62
59 serv->sv_name = prog->pg_name;
60
61 /* Remove any stale portmap registrations */ 63 /* Remove any stale portmap registrations */
62 svc_register(serv, 0, 0); 64 svc_register(serv, 0, 0);
63 65
@@ -281,6 +283,7 @@ svc_process(struct svc_serv *serv, struct svc_rqst *rqstp)
281 rqstp->rq_res.len = 0; 283 rqstp->rq_res.len = 0;
282 rqstp->rq_res.page_base = 0; 284 rqstp->rq_res.page_base = 0;
283 rqstp->rq_res.page_len = 0; 285 rqstp->rq_res.page_len = 0;
286 rqstp->rq_res.buflen = PAGE_SIZE;
284 rqstp->rq_res.tail[0].iov_len = 0; 287 rqstp->rq_res.tail[0].iov_len = 0;
285 /* tcp needs a space for the record length... */ 288 /* tcp needs a space for the record length... */
286 if (rqstp->rq_prot == IPPROTO_TCP) 289 if (rqstp->rq_prot == IPPROTO_TCP)
@@ -338,7 +341,10 @@ svc_process(struct svc_serv *serv, struct svc_rqst *rqstp)
338 goto sendit; 341 goto sendit;
339 } 342 }
340 343
341 if (prog != progp->pg_prog) 344 for (progp = serv->sv_program; progp; progp = progp->pg_next)
345 if (prog == progp->pg_prog)
346 break;
347 if (progp == NULL)
342 goto err_bad_prog; 348 goto err_bad_prog;
343 349
344 if (vers >= progp->pg_nvers || 350 if (vers >= progp->pg_nvers ||
@@ -451,11 +457,7 @@ err_bad_auth:
451 goto sendit; 457 goto sendit;
452 458
453err_bad_prog: 459err_bad_prog:
454#ifdef RPC_PARANOIA 460 dprintk("svc: unknown program %d\n", prog);
455 if (prog != 100227 || progp->pg_prog != 100003)
456 printk("svc: unknown program %d (me %d)\n", prog, progp->pg_prog);
457 /* else it is just a Solaris client seeing if ACLs are supported */
458#endif
459 serv->sv_stats->rpcbadfmt++; 461 serv->sv_stats->rpcbadfmt++;
460 svc_putu32(resv, rpc_prog_unavail); 462 svc_putu32(resv, rpc_prog_unavail);
461 goto sendit; 463 goto sendit;
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index 67b9f035ba..8a4d9c106a 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -176,21 +176,23 @@ xdr_inline_pages(struct xdr_buf *xdr, unsigned int offset,
176 xdr->buflen += len; 176 xdr->buflen += len;
177} 177}
178 178
179void 179ssize_t
180xdr_partial_copy_from_skb(struct xdr_buf *xdr, unsigned int base, 180xdr_partial_copy_from_skb(struct xdr_buf *xdr, unsigned int base,
181 skb_reader_t *desc, 181 skb_reader_t *desc,
182 skb_read_actor_t copy_actor) 182 skb_read_actor_t copy_actor)
183{ 183{
184 struct page **ppage = xdr->pages; 184 struct page **ppage = xdr->pages;
185 unsigned int len, pglen = xdr->page_len; 185 unsigned int len, pglen = xdr->page_len;
186 ssize_t copied = 0;
186 int ret; 187 int ret;
187 188
188 len = xdr->head[0].iov_len; 189 len = xdr->head[0].iov_len;
189 if (base < len) { 190 if (base < len) {
190 len -= base; 191 len -= base;
191 ret = copy_actor(desc, (char *)xdr->head[0].iov_base + base, len); 192 ret = copy_actor(desc, (char *)xdr->head[0].iov_base + base, len);
193 copied += ret;
192 if (ret != len || !desc->count) 194 if (ret != len || !desc->count)
193 return; 195 goto out;
194 base = 0; 196 base = 0;
195 } else 197 } else
196 base -= len; 198 base -= len;
@@ -210,6 +212,17 @@ xdr_partial_copy_from_skb(struct xdr_buf *xdr, unsigned int base,
210 do { 212 do {
211 char *kaddr; 213 char *kaddr;
212 214
215 /* ACL likes to be lazy in allocating pages - ACLs
216 * are small by default but can get huge. */
217 if (unlikely(*ppage == NULL)) {
218 *ppage = alloc_page(GFP_ATOMIC);
219 if (unlikely(*ppage == NULL)) {
220 if (copied == 0)
221 copied = -ENOMEM;
222 goto out;
223 }
224 }
225
213 len = PAGE_CACHE_SIZE; 226 len = PAGE_CACHE_SIZE;
214 kaddr = kmap_atomic(*ppage, KM_SKB_SUNRPC_DATA); 227 kaddr = kmap_atomic(*ppage, KM_SKB_SUNRPC_DATA);
215 if (base) { 228 if (base) {
@@ -225,14 +238,17 @@ xdr_partial_copy_from_skb(struct xdr_buf *xdr, unsigned int base,
225 } 238 }
226 flush_dcache_page(*ppage); 239 flush_dcache_page(*ppage);
227 kunmap_atomic(kaddr, KM_SKB_SUNRPC_DATA); 240 kunmap_atomic(kaddr, KM_SKB_SUNRPC_DATA);
241 copied += ret;
228 if (ret != len || !desc->count) 242 if (ret != len || !desc->count)
229 return; 243 goto out;
230 ppage++; 244 ppage++;
231 } while ((pglen -= len) != 0); 245 } while ((pglen -= len) != 0);
232copy_tail: 246copy_tail:
233 len = xdr->tail[0].iov_len; 247 len = xdr->tail[0].iov_len;
234 if (base < len) 248 if (base < len)
235 copy_actor(desc, (char *)xdr->tail[0].iov_base + base, len - base); 249 copied += copy_actor(desc, (char *)xdr->tail[0].iov_base + base, len - base);
250out:
251 return copied;
236} 252}
237 253
238 254
@@ -616,12 +632,24 @@ xdr_shift_buf(struct xdr_buf *buf, size_t len)
616void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, uint32_t *p) 632void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, uint32_t *p)
617{ 633{
618 struct kvec *iov = buf->head; 634 struct kvec *iov = buf->head;
635 int scratch_len = buf->buflen - buf->page_len - buf->tail[0].iov_len;
619 636
637 BUG_ON(scratch_len < 0);
620 xdr->buf = buf; 638 xdr->buf = buf;
621 xdr->iov = iov; 639 xdr->iov = iov;
622 xdr->end = (uint32_t *)((char *)iov->iov_base + iov->iov_len); 640 xdr->p = (uint32_t *)((char *)iov->iov_base + iov->iov_len);
623 buf->len = iov->iov_len = (char *)p - (char *)iov->iov_base; 641 xdr->end = (uint32_t *)((char *)iov->iov_base + scratch_len);
624 xdr->p = p; 642 BUG_ON(iov->iov_len > scratch_len);
643
644 if (p != xdr->p && p != NULL) {
645 size_t len;
646
647 BUG_ON(p < xdr->p || p > xdr->end);
648 len = (char *)p - (char *)xdr->p;
649 xdr->p = p;
650 buf->len += len;
651 iov->iov_len += len;
652 }
625} 653}
626EXPORT_SYMBOL(xdr_init_encode); 654EXPORT_SYMBOL(xdr_init_encode);
627 655
@@ -859,8 +887,34 @@ out:
859 return status; 887 return status;
860} 888}
861 889
862static int 890/* obj is assumed to point to allocated memory of size at least len: */
863read_u32_from_xdr_buf(struct xdr_buf *buf, int base, u32 *obj) 891int
892write_bytes_to_xdr_buf(struct xdr_buf *buf, int base, void *obj, int len)
893{
894 struct xdr_buf subbuf;
895 int this_len;
896 int status;
897
898 status = xdr_buf_subsegment(buf, &subbuf, base, len);
899 if (status)
900 goto out;
901 this_len = min(len, (int)subbuf.head[0].iov_len);
902 memcpy(subbuf.head[0].iov_base, obj, this_len);
903 len -= this_len;
904 obj += this_len;
905 this_len = min(len, (int)subbuf.page_len);
906 if (this_len)
907 _copy_to_pages(subbuf.pages, subbuf.page_base, obj, this_len);
908 len -= this_len;
909 obj += this_len;
910 this_len = min(len, (int)subbuf.tail[0].iov_len);
911 memcpy(subbuf.tail[0].iov_base, obj, this_len);
912out:
913 return status;
914}
915
916int
917xdr_decode_word(struct xdr_buf *buf, int base, u32 *obj)
864{ 918{
865 u32 raw; 919 u32 raw;
866 int status; 920 int status;
@@ -872,6 +926,14 @@ read_u32_from_xdr_buf(struct xdr_buf *buf, int base, u32 *obj)
872 return 0; 926 return 0;
873} 927}
874 928
929int
930xdr_encode_word(struct xdr_buf *buf, int base, u32 obj)
931{
932 u32 raw = htonl(obj);
933
934 return write_bytes_to_xdr_buf(buf, base, &raw, sizeof(obj));
935}
936
875/* If the netobj starting offset bytes from the start of xdr_buf is contained 937/* If the netobj starting offset bytes from the start of xdr_buf is contained
876 * entirely in the head or the tail, set object to point to it; otherwise 938 * entirely in the head or the tail, set object to point to it; otherwise
877 * try to find space for it at the end of the tail, copy it there, and 939 * try to find space for it at the end of the tail, copy it there, and
@@ -882,7 +944,7 @@ xdr_buf_read_netobj(struct xdr_buf *buf, struct xdr_netobj *obj, int offset)
882 u32 tail_offset = buf->head[0].iov_len + buf->page_len; 944 u32 tail_offset = buf->head[0].iov_len + buf->page_len;
883 u32 obj_end_offset; 945 u32 obj_end_offset;
884 946
885 if (read_u32_from_xdr_buf(buf, offset, &obj->len)) 947 if (xdr_decode_word(buf, offset, &obj->len))
886 goto out; 948 goto out;
887 obj_end_offset = offset + 4 + obj->len; 949 obj_end_offset = offset + 4 + obj->len;
888 950
@@ -915,3 +977,219 @@ xdr_buf_read_netobj(struct xdr_buf *buf, struct xdr_netobj *obj, int offset)
915out: 977out:
916 return -1; 978 return -1;
917} 979}
980
981/* Returns 0 on success, or else a negative error code. */
982static int
983xdr_xcode_array2(struct xdr_buf *buf, unsigned int base,
984 struct xdr_array2_desc *desc, int encode)
985{
986 char *elem = NULL, *c;
987 unsigned int copied = 0, todo, avail_here;
988 struct page **ppages = NULL;
989 int err;
990
991 if (encode) {
992 if (xdr_encode_word(buf, base, desc->array_len) != 0)
993 return -EINVAL;
994 } else {
995 if (xdr_decode_word(buf, base, &desc->array_len) != 0 ||
996 (unsigned long) base + 4 + desc->array_len *
997 desc->elem_size > buf->len)
998 return -EINVAL;
999 }
1000 base += 4;
1001
1002 if (!desc->xcode)
1003 return 0;
1004
1005 todo = desc->array_len * desc->elem_size;
1006
1007 /* process head */
1008 if (todo && base < buf->head->iov_len) {
1009 c = buf->head->iov_base + base;
1010 avail_here = min_t(unsigned int, todo,
1011 buf->head->iov_len - base);
1012 todo -= avail_here;
1013
1014 while (avail_here >= desc->elem_size) {
1015 err = desc->xcode(desc, c);
1016 if (err)
1017 goto out;
1018 c += desc->elem_size;
1019 avail_here -= desc->elem_size;
1020 }
1021 if (avail_here) {
1022 if (!elem) {
1023 elem = kmalloc(desc->elem_size, GFP_KERNEL);
1024 err = -ENOMEM;
1025 if (!elem)
1026 goto out;
1027 }
1028 if (encode) {
1029 err = desc->xcode(desc, elem);
1030 if (err)
1031 goto out;
1032 memcpy(c, elem, avail_here);
1033 } else
1034 memcpy(elem, c, avail_here);
1035 copied = avail_here;
1036 }
1037 base = buf->head->iov_len; /* align to start of pages */
1038 }
1039
1040 /* process pages array */
1041 base -= buf->head->iov_len;
1042 if (todo && base < buf->page_len) {
1043 unsigned int avail_page;
1044
1045 avail_here = min(todo, buf->page_len - base);
1046 todo -= avail_here;
1047
1048 base += buf->page_base;
1049 ppages = buf->pages + (base >> PAGE_CACHE_SHIFT);
1050 base &= ~PAGE_CACHE_MASK;
1051 avail_page = min_t(unsigned int, PAGE_CACHE_SIZE - base,
1052 avail_here);
1053 c = kmap(*ppages) + base;
1054
1055 while (avail_here) {
1056 avail_here -= avail_page;
1057 if (copied || avail_page < desc->elem_size) {
1058 unsigned int l = min(avail_page,
1059 desc->elem_size - copied);
1060 if (!elem) {
1061 elem = kmalloc(desc->elem_size,
1062 GFP_KERNEL);
1063 err = -ENOMEM;
1064 if (!elem)
1065 goto out;
1066 }
1067 if (encode) {
1068 if (!copied) {
1069 err = desc->xcode(desc, elem);
1070 if (err)
1071 goto out;
1072 }
1073 memcpy(c, elem + copied, l);
1074 copied += l;
1075 if (copied == desc->elem_size)
1076 copied = 0;
1077 } else {
1078 memcpy(elem + copied, c, l);
1079 copied += l;
1080 if (copied == desc->elem_size) {
1081 err = desc->xcode(desc, elem);
1082 if (err)
1083 goto out;
1084 copied = 0;
1085 }
1086 }
1087 avail_page -= l;
1088 c += l;
1089 }
1090 while (avail_page >= desc->elem_size) {
1091 err = desc->xcode(desc, c);
1092 if (err)
1093 goto out;
1094 c += desc->elem_size;
1095 avail_page -= desc->elem_size;
1096 }
1097 if (avail_page) {
1098 unsigned int l = min(avail_page,
1099 desc->elem_size - copied);
1100 if (!elem) {
1101 elem = kmalloc(desc->elem_size,
1102 GFP_KERNEL);
1103 err = -ENOMEM;
1104 if (!elem)
1105 goto out;
1106 }
1107 if (encode) {
1108 if (!copied) {
1109 err = desc->xcode(desc, elem);
1110 if (err)
1111 goto out;
1112 }
1113 memcpy(c, elem + copied, l);
1114 copied += l;
1115 if (copied == desc->elem_size)
1116 copied = 0;
1117 } else {
1118 memcpy(elem + copied, c, l);
1119 copied += l;
1120 if (copied == desc->elem_size) {
1121 err = desc->xcode(desc, elem);
1122 if (err)
1123 goto out;
1124 copied = 0;
1125 }
1126 }
1127 }
1128 if (avail_here) {
1129 kunmap(*ppages);
1130 ppages++;
1131 c = kmap(*ppages);
1132 }
1133
1134 avail_page = min(avail_here,
1135 (unsigned int) PAGE_CACHE_SIZE);
1136 }
1137 base = buf->page_len; /* align to start of tail */
1138 }
1139
1140 /* process tail */
1141 base -= buf->page_len;
1142 if (todo) {
1143 c = buf->tail->iov_base + base;
1144 if (copied) {
1145 unsigned int l = desc->elem_size - copied;
1146
1147 if (encode)
1148 memcpy(c, elem + copied, l);
1149 else {
1150 memcpy(elem + copied, c, l);
1151 err = desc->xcode(desc, elem);
1152 if (err)
1153 goto out;
1154 }
1155 todo -= l;
1156 c += l;
1157 }
1158 while (todo) {
1159 err = desc->xcode(desc, c);
1160 if (err)
1161 goto out;
1162 c += desc->elem_size;
1163 todo -= desc->elem_size;
1164 }
1165 }
1166 err = 0;
1167
1168out:
1169 if (elem)
1170 kfree(elem);
1171 if (ppages)
1172 kunmap(*ppages);
1173 return err;
1174}
1175
1176int
1177xdr_decode_array2(struct xdr_buf *buf, unsigned int base,
1178 struct xdr_array2_desc *desc)
1179{
1180 if (base >= buf->len)
1181 return -EINVAL;
1182
1183 return xdr_xcode_array2(buf, base, desc, 0);
1184}
1185
1186int
1187xdr_encode_array2(struct xdr_buf *buf, unsigned int base,
1188 struct xdr_array2_desc *desc)
1189{
1190 if ((unsigned long) base + 4 + desc->array_len * desc->elem_size >
1191 buf->head->iov_len + buf->page_len + buf->tail->iov_len)
1192 return -EINVAL;
1193
1194 return xdr_xcode_array2(buf, base, desc, 1);
1195}
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index c74a6bb940..eca9240594 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -569,8 +569,11 @@ void xprt_connect(struct rpc_task *task)
569 if (xprt->sock != NULL) 569 if (xprt->sock != NULL)
570 schedule_delayed_work(&xprt->sock_connect, 570 schedule_delayed_work(&xprt->sock_connect,
571 RPC_REESTABLISH_TIMEOUT); 571 RPC_REESTABLISH_TIMEOUT);
572 else 572 else {
573 schedule_work(&xprt->sock_connect); 573 schedule_work(&xprt->sock_connect);
574 if (!RPC_IS_ASYNC(task))
575 flush_scheduled_work();
576 }
574 } 577 }
575 return; 578 return;
576 out_write: 579 out_write:
@@ -725,7 +728,8 @@ csum_partial_copy_to_xdr(struct xdr_buf *xdr, struct sk_buff *skb)
725 goto no_checksum; 728 goto no_checksum;
726 729
727 desc.csum = csum_partial(skb->data, desc.offset, skb->csum); 730 desc.csum = csum_partial(skb->data, desc.offset, skb->csum);
728 xdr_partial_copy_from_skb(xdr, 0, &desc, skb_read_and_csum_bits); 731 if (xdr_partial_copy_from_skb(xdr, 0, &desc, skb_read_and_csum_bits) < 0)
732 return -1;
729 if (desc.offset != skb->len) { 733 if (desc.offset != skb->len) {
730 unsigned int csum2; 734 unsigned int csum2;
731 csum2 = skb_checksum(skb, desc.offset, skb->len - desc.offset, 0); 735 csum2 = skb_checksum(skb, desc.offset, skb->len - desc.offset, 0);
@@ -737,7 +741,8 @@ csum_partial_copy_to_xdr(struct xdr_buf *xdr, struct sk_buff *skb)
737 return -1; 741 return -1;
738 return 0; 742 return 0;
739no_checksum: 743no_checksum:
740 xdr_partial_copy_from_skb(xdr, 0, &desc, skb_read_bits); 744 if (xdr_partial_copy_from_skb(xdr, 0, &desc, skb_read_bits) < 0)
745 return -1;
741 if (desc.count) 746 if (desc.count)
742 return -1; 747 return -1;
743 return 0; 748 return 0;
@@ -821,10 +826,15 @@ tcp_copy_data(skb_reader_t *desc, void *p, size_t len)
821{ 826{
822 if (len > desc->count) 827 if (len > desc->count)
823 len = desc->count; 828 len = desc->count;
824 if (skb_copy_bits(desc->skb, desc->offset, p, len)) 829 if (skb_copy_bits(desc->skb, desc->offset, p, len)) {
830 dprintk("RPC: failed to copy %zu bytes from skb. %zu bytes remain\n",
831 len, desc->count);
825 return 0; 832 return 0;
833 }
826 desc->offset += len; 834 desc->offset += len;
827 desc->count -= len; 835 desc->count -= len;
836 dprintk("RPC: copied %zu bytes from skb. %zu bytes remain\n",
837 len, desc->count);
828 return len; 838 return len;
829} 839}
830 840
@@ -863,6 +873,8 @@ tcp_read_fraghdr(struct rpc_xprt *xprt, skb_reader_t *desc)
863static void 873static void
864tcp_check_recm(struct rpc_xprt *xprt) 874tcp_check_recm(struct rpc_xprt *xprt)
865{ 875{
876 dprintk("RPC: xprt = %p, tcp_copied = %lu, tcp_offset = %u, tcp_reclen = %u, tcp_flags = %lx\n",
877 xprt, xprt->tcp_copied, xprt->tcp_offset, xprt->tcp_reclen, xprt->tcp_flags);
866 if (xprt->tcp_offset == xprt->tcp_reclen) { 878 if (xprt->tcp_offset == xprt->tcp_reclen) {
867 xprt->tcp_flags |= XPRT_COPY_RECM; 879 xprt->tcp_flags |= XPRT_COPY_RECM;
868 xprt->tcp_offset = 0; 880 xprt->tcp_offset = 0;
@@ -907,6 +919,7 @@ tcp_read_request(struct rpc_xprt *xprt, skb_reader_t *desc)
907 struct rpc_rqst *req; 919 struct rpc_rqst *req;
908 struct xdr_buf *rcvbuf; 920 struct xdr_buf *rcvbuf;
909 size_t len; 921 size_t len;
922 ssize_t r;
910 923
911 /* Find and lock the request corresponding to this xid */ 924 /* Find and lock the request corresponding to this xid */
912 spin_lock(&xprt->sock_lock); 925 spin_lock(&xprt->sock_lock);
@@ -927,15 +940,40 @@ tcp_read_request(struct rpc_xprt *xprt, skb_reader_t *desc)
927 len = xprt->tcp_reclen - xprt->tcp_offset; 940 len = xprt->tcp_reclen - xprt->tcp_offset;
928 memcpy(&my_desc, desc, sizeof(my_desc)); 941 memcpy(&my_desc, desc, sizeof(my_desc));
929 my_desc.count = len; 942 my_desc.count = len;
930 xdr_partial_copy_from_skb(rcvbuf, xprt->tcp_copied, 943 r = xdr_partial_copy_from_skb(rcvbuf, xprt->tcp_copied,
931 &my_desc, tcp_copy_data); 944 &my_desc, tcp_copy_data);
932 desc->count -= len; 945 desc->count -= r;
933 desc->offset += len; 946 desc->offset += r;
934 } else 947 } else
935 xdr_partial_copy_from_skb(rcvbuf, xprt->tcp_copied, 948 r = xdr_partial_copy_from_skb(rcvbuf, xprt->tcp_copied,
936 desc, tcp_copy_data); 949 desc, tcp_copy_data);
937 xprt->tcp_copied += len; 950
938 xprt->tcp_offset += len; 951 if (r > 0) {
952 xprt->tcp_copied += r;
953 xprt->tcp_offset += r;
954 }
955 if (r != len) {
956 /* Error when copying to the receive buffer,
957 * usually because we weren't able to allocate
958 * additional buffer pages. All we can do now
959 * is turn off XPRT_COPY_DATA, so the request
960 * will not receive any additional updates,
961 * and time out.
962 * Any remaining data from this record will
963 * be discarded.
964 */
965 xprt->tcp_flags &= ~XPRT_COPY_DATA;
966 dprintk("RPC: XID %08x truncated request\n",
967 ntohl(xprt->tcp_xid));
968 dprintk("RPC: xprt = %p, tcp_copied = %lu, tcp_offset = %u, tcp_reclen = %u\n",
969 xprt, xprt->tcp_copied, xprt->tcp_offset, xprt->tcp_reclen);
970 goto out;
971 }
972
973 dprintk("RPC: XID %08x read %u bytes\n",
974 ntohl(xprt->tcp_xid), r);
975 dprintk("RPC: xprt = %p, tcp_copied = %lu, tcp_offset = %u, tcp_reclen = %u\n",
976 xprt, xprt->tcp_copied, xprt->tcp_offset, xprt->tcp_reclen);
939 977
940 if (xprt->tcp_copied == req->rq_private_buf.buflen) 978 if (xprt->tcp_copied == req->rq_private_buf.buflen)
941 xprt->tcp_flags &= ~XPRT_COPY_DATA; 979 xprt->tcp_flags &= ~XPRT_COPY_DATA;
@@ -944,6 +982,7 @@ tcp_read_request(struct rpc_xprt *xprt, skb_reader_t *desc)
944 xprt->tcp_flags &= ~XPRT_COPY_DATA; 982 xprt->tcp_flags &= ~XPRT_COPY_DATA;
945 } 983 }
946 984
985out:
947 if (!(xprt->tcp_flags & XPRT_COPY_DATA)) { 986 if (!(xprt->tcp_flags & XPRT_COPY_DATA)) {
948 dprintk("RPC: %4d received reply complete\n", 987 dprintk("RPC: %4d received reply complete\n",
949 req->rq_task->tk_pid); 988 req->rq_task->tk_pid);
@@ -967,6 +1006,7 @@ tcp_read_discard(struct rpc_xprt *xprt, skb_reader_t *desc)
967 desc->count -= len; 1006 desc->count -= len;
968 desc->offset += len; 1007 desc->offset += len;
969 xprt->tcp_offset += len; 1008 xprt->tcp_offset += len;
1009 dprintk("RPC: discarded %u bytes\n", len);
970 tcp_check_recm(xprt); 1010 tcp_check_recm(xprt);
971} 1011}
972 1012
@@ -1064,8 +1104,7 @@ tcp_state_change(struct sock *sk)
1064 case TCP_SYN_RECV: 1104 case TCP_SYN_RECV:
1065 break; 1105 break;
1066 default: 1106 default:
1067 if (xprt_test_and_clear_connected(xprt)) 1107 xprt_disconnect(xprt);
1068 rpc_wake_up_status(&xprt->pending, -ENOTCONN);
1069 break; 1108 break;
1070 } 1109 }
1071 out: 1110 out:
@@ -1203,6 +1242,8 @@ xprt_transmit(struct rpc_task *task)
1203 list_add_tail(&req->rq_list, &xprt->recv); 1242 list_add_tail(&req->rq_list, &xprt->recv);
1204 spin_unlock_bh(&xprt->sock_lock); 1243 spin_unlock_bh(&xprt->sock_lock);
1205 xprt_reset_majortimeo(req); 1244 xprt_reset_majortimeo(req);
1245 /* Turn off autodisconnect */
1246 del_singleshot_timer_sync(&xprt->timer);
1206 } 1247 }
1207 } else if (!req->rq_bytes_sent) 1248 } else if (!req->rq_bytes_sent)
1208 return; 1249 return;
@@ -1333,8 +1374,6 @@ xprt_reserve(struct rpc_task *task)
1333 spin_lock(&xprt->xprt_lock); 1374 spin_lock(&xprt->xprt_lock);
1334 do_xprt_reserve(task); 1375 do_xprt_reserve(task);
1335 spin_unlock(&xprt->xprt_lock); 1376 spin_unlock(&xprt->xprt_lock);
1336 if (task->tk_rqstp)
1337 del_timer_sync(&xprt->timer);
1338 } 1377 }
1339} 1378}
1340 1379
@@ -1649,6 +1688,10 @@ xprt_shutdown(struct rpc_xprt *xprt)
1649 rpc_wake_up(&xprt->backlog); 1688 rpc_wake_up(&xprt->backlog);
1650 wake_up(&xprt->cong_wait); 1689 wake_up(&xprt->cong_wait);
1651 del_timer_sync(&xprt->timer); 1690 del_timer_sync(&xprt->timer);
1691
1692 /* synchronously wait for connect worker to finish */
1693 cancel_delayed_work(&xprt->sock_connect);
1694 flush_scheduled_work();
1652} 1695}
1653 1696
1654/* 1697/*
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index 2a24b243b8..04bec047fa 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -29,6 +29,10 @@
29 * 2000-11-14 Henner Eisen Closing datalink from NETDEV_GOING_DOWN 29 * 2000-11-14 Henner Eisen Closing datalink from NETDEV_GOING_DOWN
30 * 2002-10-06 Arnaldo C. Melo Get rid of cli/sti, move proc stuff to 30 * 2002-10-06 Arnaldo C. Melo Get rid of cli/sti, move proc stuff to
31 * x25_proc.c, using seq_file 31 * x25_proc.c, using seq_file
32 * 2005-04-02 Shaun Pereira Selective sub address matching
33 * with call user data
34 * 2005-04-15 Shaun Pereira Fast select with no restriction on
35 * response
32 */ 36 */
33 37
34#include <linux/config.h> 38#include <linux/config.h>
@@ -219,7 +223,8 @@ static void x25_insert_socket(struct sock *sk)
219 * Note: if a listening socket has cud set it must only get calls 223 * Note: if a listening socket has cud set it must only get calls
220 * with matching cud. 224 * with matching cud.
221 */ 225 */
222static struct sock *x25_find_listener(struct x25_address *addr, struct x25_calluserdata *calluserdata) 226static struct sock *x25_find_listener(struct x25_address *addr,
227 struct sk_buff *skb)
223{ 228{
224 struct sock *s; 229 struct sock *s;
225 struct sock *next_best; 230 struct sock *next_best;
@@ -230,22 +235,23 @@ static struct sock *x25_find_listener(struct x25_address *addr, struct x25_callu
230 235
231 sk_for_each(s, node, &x25_list) 236 sk_for_each(s, node, &x25_list)
232 if ((!strcmp(addr->x25_addr, 237 if ((!strcmp(addr->x25_addr,
233 x25_sk(s)->source_addr.x25_addr) || 238 x25_sk(s)->source_addr.x25_addr) ||
234 !strcmp(addr->x25_addr, 239 !strcmp(addr->x25_addr,
235 null_x25_address.x25_addr)) && 240 null_x25_address.x25_addr)) &&
236 s->sk_state == TCP_LISTEN) { 241 s->sk_state == TCP_LISTEN) {
237
238 /* 242 /*
239 * Found a listening socket, now check the incoming 243 * Found a listening socket, now check the incoming
240 * call user data vs this sockets call user data 244 * call user data vs this sockets call user data
241 */ 245 */
242 if (x25_check_calluserdata(&x25_sk(s)->calluserdata, calluserdata)) { 246 if(skb->len > 0 && x25_sk(s)->cudmatchlength > 0) {
243 sock_hold(s); 247 if((memcmp(x25_sk(s)->calluserdata.cuddata,
244 goto found; 248 skb->data,
245 } 249 x25_sk(s)->cudmatchlength)) == 0) {
246 if (x25_sk(s)->calluserdata.cudlength == 0) { 250 sock_hold(s);
251 goto found;
252 }
253 } else
247 next_best = s; 254 next_best = s;
248 }
249 } 255 }
250 if (next_best) { 256 if (next_best) {
251 s = next_best; 257 s = next_best;
@@ -497,6 +503,9 @@ static int x25_create(struct socket *sock, int protocol)
497 x25->t23 = sysctl_x25_clear_request_timeout; 503 x25->t23 = sysctl_x25_clear_request_timeout;
498 x25->t2 = sysctl_x25_ack_holdback_timeout; 504 x25->t2 = sysctl_x25_ack_holdback_timeout;
499 x25->state = X25_STATE_0; 505 x25->state = X25_STATE_0;
506 x25->cudmatchlength = 0;
507 x25->accptapprv = X25_DENY_ACCPT_APPRV; /* normally no cud */
508 /* on call accept */
500 509
501 x25->facilities.winsize_in = X25_DEFAULT_WINDOW_SIZE; 510 x25->facilities.winsize_in = X25_DEFAULT_WINDOW_SIZE;
502 x25->facilities.winsize_out = X25_DEFAULT_WINDOW_SIZE; 511 x25->facilities.winsize_out = X25_DEFAULT_WINDOW_SIZE;
@@ -545,6 +554,8 @@ static struct sock *x25_make_new(struct sock *osk)
545 x25->t2 = ox25->t2; 554 x25->t2 = ox25->t2;
546 x25->facilities = ox25->facilities; 555 x25->facilities = ox25->facilities;
547 x25->qbitincl = ox25->qbitincl; 556 x25->qbitincl = ox25->qbitincl;
557 x25->cudmatchlength = ox25->cudmatchlength;
558 x25->accptapprv = ox25->accptapprv;
548 559
549 x25_init_timers(sk); 560 x25_init_timers(sk);
550out: 561out:
@@ -822,7 +833,6 @@ int x25_rx_call_request(struct sk_buff *skb, struct x25_neigh *nb,
822 struct x25_sock *makex25; 833 struct x25_sock *makex25;
823 struct x25_address source_addr, dest_addr; 834 struct x25_address source_addr, dest_addr;
824 struct x25_facilities facilities; 835 struct x25_facilities facilities;
825 struct x25_calluserdata calluserdata;
826 int len, rc; 836 int len, rc;
827 837
828 /* 838 /*
@@ -845,19 +855,10 @@ int x25_rx_call_request(struct sk_buff *skb, struct x25_neigh *nb,
845 skb_pull(skb,len); 855 skb_pull(skb,len);
846 856
847 /* 857 /*
848 * Incoming Call User Data.
849 */
850 if (skb->len >= 0) {
851 memcpy(calluserdata.cuddata, skb->data, skb->len);
852 calluserdata.cudlength = skb->len;
853 }
854
855 skb_push(skb,len);
856
857 /*
858 * Find a listener for the particular address/cud pair. 858 * Find a listener for the particular address/cud pair.
859 */ 859 */
860 sk = x25_find_listener(&source_addr,&calluserdata); 860 sk = x25_find_listener(&source_addr,skb);
861 skb_push(skb,len);
861 862
862 /* 863 /*
863 * We can't accept the Call Request. 864 * We can't accept the Call Request.
@@ -900,11 +901,23 @@ int x25_rx_call_request(struct sk_buff *skb, struct x25_neigh *nb,
900 makex25->neighbour = nb; 901 makex25->neighbour = nb;
901 makex25->facilities = facilities; 902 makex25->facilities = facilities;
902 makex25->vc_facil_mask = x25_sk(sk)->vc_facil_mask; 903 makex25->vc_facil_mask = x25_sk(sk)->vc_facil_mask;
903 makex25->calluserdata = calluserdata; 904 /* ensure no reverse facil on accept */
904 905 makex25->vc_facil_mask &= ~X25_MASK_REVERSE;
905 x25_write_internal(make, X25_CALL_ACCEPTED); 906 makex25->cudmatchlength = x25_sk(sk)->cudmatchlength;
907
908 /* Normally all calls are accepted immediatly */
909 if(makex25->accptapprv & X25_DENY_ACCPT_APPRV) {
910 x25_write_internal(make, X25_CALL_ACCEPTED);
911 makex25->state = X25_STATE_3;
912 }
906 913
907 makex25->state = X25_STATE_3; 914 /*
915 * Incoming Call User Data.
916 */
917 if (skb->len >= 0) {
918 memcpy(makex25->calluserdata.cuddata, skb->data, skb->len);
919 makex25->calluserdata.cudlength = skb->len;
920 }
908 921
909 sk->sk_ack_backlog++; 922 sk->sk_ack_backlog++;
910 923
@@ -1288,7 +1301,8 @@ static int x25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1288 if (facilities.throughput < 0x03 || 1301 if (facilities.throughput < 0x03 ||
1289 facilities.throughput > 0xDD) 1302 facilities.throughput > 0xDD)
1290 break; 1303 break;
1291 if (facilities.reverse && facilities.reverse != 1) 1304 if (facilities.reverse &&
1305 (facilities.reverse | 0x81)!= 0x81)
1292 break; 1306 break;
1293 x25->facilities = facilities; 1307 x25->facilities = facilities;
1294 rc = 0; 1308 rc = 0;
@@ -1325,6 +1339,44 @@ static int x25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1325 break; 1339 break;
1326 } 1340 }
1327 1341
1342 case SIOCX25SCUDMATCHLEN: {
1343 struct x25_subaddr sub_addr;
1344 rc = -EINVAL;
1345 if(sk->sk_state != TCP_CLOSE)
1346 break;
1347 rc = -EFAULT;
1348 if (copy_from_user(&sub_addr, argp,
1349 sizeof(sub_addr)))
1350 break;
1351 rc = -EINVAL;
1352 if(sub_addr.cudmatchlength > X25_MAX_CUD_LEN)
1353 break;
1354 x25->cudmatchlength = sub_addr.cudmatchlength;
1355 rc = 0;
1356 break;
1357 }
1358
1359 case SIOCX25CALLACCPTAPPRV: {
1360 rc = -EINVAL;
1361 if (sk->sk_state != TCP_CLOSE)
1362 break;
1363 x25->accptapprv = X25_ALLOW_ACCPT_APPRV;
1364 rc = 0;
1365 break;
1366 }
1367
1368 case SIOCX25SENDCALLACCPT: {
1369 rc = -EINVAL;
1370 if (sk->sk_state != TCP_ESTABLISHED)
1371 break;
1372 if (x25->accptapprv) /* must call accptapprv above */
1373 break;
1374 x25_write_internal(sk, X25_CALL_ACCEPTED);
1375 x25->state = X25_STATE_3;
1376 rc = 0;
1377 break;
1378 }
1379
1328 default: 1380 default:
1329 rc = dev_ioctl(cmd, argp); 1381 rc = dev_ioctl(cmd, argp);
1330 break; 1382 break;
diff --git a/net/x25/x25_facilities.c b/net/x25/x25_facilities.c
index a21bdb95f9..54278b962f 100644
--- a/net/x25/x25_facilities.c
+++ b/net/x25/x25_facilities.c
@@ -17,6 +17,8 @@
17 * X.25 001 Split from x25_subr.c 17 * X.25 001 Split from x25_subr.c
18 * mar/20/00 Daniela Squassoni Disabling/enabling of facilities 18 * mar/20/00 Daniela Squassoni Disabling/enabling of facilities
19 * negotiation. 19 * negotiation.
20 * apr/14/05 Shaun Pereira - Allow fast select with no restriction
21 * on response.
20 */ 22 */
21 23
22#include <linux/kernel.h> 24#include <linux/kernel.h>
@@ -43,9 +45,31 @@ int x25_parse_facilities(struct sk_buff *skb,
43 case X25_FAC_CLASS_A: 45 case X25_FAC_CLASS_A:
44 switch (*p) { 46 switch (*p) {
45 case X25_FAC_REVERSE: 47 case X25_FAC_REVERSE:
46 facilities->reverse = p[1] & 0x01; 48 if((p[1] & 0x81) == 0x81) {
47 *vc_fac_mask |= X25_MASK_REVERSE; 49 facilities->reverse = p[1] & 0x81;
48 break; 50 *vc_fac_mask |= X25_MASK_REVERSE;
51 break;
52 }
53
54 if((p[1] & 0x01) == 0x01) {
55 facilities->reverse = p[1] & 0x01;
56 *vc_fac_mask |= X25_MASK_REVERSE;
57 break;
58 }
59
60 if((p[1] & 0x80) == 0x80) {
61 facilities->reverse = p[1] & 0x80;
62 *vc_fac_mask |= X25_MASK_REVERSE;
63 break;
64 }
65
66 if(p[1] == 0x00) {
67 facilities->reverse
68 = X25_DEFAULT_REVERSE;
69 *vc_fac_mask |= X25_MASK_REVERSE;
70 break;
71 }
72
49 case X25_FAC_THROUGHPUT: 73 case X25_FAC_THROUGHPUT:
50 facilities->throughput = p[1]; 74 facilities->throughput = p[1];
51 *vc_fac_mask |= X25_MASK_THROUGHPUT; 75 *vc_fac_mask |= X25_MASK_THROUGHPUT;
@@ -122,7 +146,7 @@ int x25_create_facilities(unsigned char *buffer,
122 146
123 if (facilities->reverse && (facil_mask & X25_MASK_REVERSE)) { 147 if (facilities->reverse && (facil_mask & X25_MASK_REVERSE)) {
124 *p++ = X25_FAC_REVERSE; 148 *p++ = X25_FAC_REVERSE;
125 *p++ = !!facilities->reverse; 149 *p++ = facilities->reverse;
126 } 150 }
127 151
128 if (facilities->throughput && (facil_mask & X25_MASK_THROUGHPUT)) { 152 if (facilities->throughput && (facil_mask & X25_MASK_THROUGHPUT)) {
@@ -171,7 +195,7 @@ int x25_negotiate_facilities(struct sk_buff *skb, struct sock *sk,
171 /* 195 /*
172 * They want reverse charging, we won't accept it. 196 * They want reverse charging, we won't accept it.
173 */ 197 */
174 if (theirs.reverse && ours->reverse) { 198 if ((theirs.reverse & 0x01 ) && (ours->reverse & 0x01)) {
175 SOCK_DEBUG(sk, "X.25: rejecting reverse charging request"); 199 SOCK_DEBUG(sk, "X.25: rejecting reverse charging request");
176 return -1; 200 return -1;
177 } 201 }
diff --git a/net/x25/x25_subr.c b/net/x25/x25_subr.c
index 183fea3bba..7fd872ad0c 100644
--- a/net/x25/x25_subr.c
+++ b/net/x25/x25_subr.c
@@ -19,6 +19,8 @@
19 * mar/20/00 Daniela Squassoni Disabling/enabling of facilities 19 * mar/20/00 Daniela Squassoni Disabling/enabling of facilities
20 * negotiation. 20 * negotiation.
21 * jun/24/01 Arnaldo C. Melo use skb_queue_purge, cleanups 21 * jun/24/01 Arnaldo C. Melo use skb_queue_purge, cleanups
22 * apr/04/15 Shaun Pereira Fast select with no
23 * restriction on response.
22 */ 24 */
23 25
24#include <linux/kernel.h> 26#include <linux/kernel.h>
@@ -127,8 +129,12 @@ void x25_write_internal(struct sock *sk, int frametype)
127 len += 1 + X25_ADDR_LEN + X25_MAX_FAC_LEN + 129 len += 1 + X25_ADDR_LEN + X25_MAX_FAC_LEN +
128 X25_MAX_CUD_LEN; 130 X25_MAX_CUD_LEN;
129 break; 131 break;
130 case X25_CALL_ACCEPTED: 132 case X25_CALL_ACCEPTED: /* fast sel with no restr on resp */
131 len += 1 + X25_MAX_FAC_LEN + X25_MAX_CUD_LEN; 133 if(x25->facilities.reverse & 0x80) {
134 len += 1 + X25_MAX_FAC_LEN + X25_MAX_CUD_LEN;
135 } else {
136 len += 1 + X25_MAX_FAC_LEN;
137 }
132 break; 138 break;
133 case X25_CLEAR_REQUEST: 139 case X25_CLEAR_REQUEST:
134 case X25_RESET_REQUEST: 140 case X25_RESET_REQUEST:
@@ -203,9 +209,16 @@ void x25_write_internal(struct sock *sk, int frametype)
203 x25->vc_facil_mask); 209 x25->vc_facil_mask);
204 dptr = skb_put(skb, len); 210 dptr = skb_put(skb, len);
205 memcpy(dptr, facilities, len); 211 memcpy(dptr, facilities, len);
206 dptr = skb_put(skb, x25->calluserdata.cudlength); 212
207 memcpy(dptr, x25->calluserdata.cuddata, 213 /* fast select with no restriction on response
208 x25->calluserdata.cudlength); 214 allows call user data. Userland must
215 ensure it is ours and not theirs */
216 if(x25->facilities.reverse & 0x80) {
217 dptr = skb_put(skb,
218 x25->calluserdata.cudlength);
219 memcpy(dptr, x25->calluserdata.cuddata,
220 x25->calluserdata.cudlength);
221 }
209 x25->calluserdata.cudlength = 0; 222 x25->calluserdata.cudlength = 0;
210 break; 223 break;
211 224
@@ -354,21 +367,3 @@ void x25_check_rbuf(struct sock *sk)
354 } 367 }
355} 368}
356 369
357/*
358 * Compare 2 calluserdata structures, used to find correct listening sockets
359 * when call user data is used.
360 */
361int x25_check_calluserdata(struct x25_calluserdata *ours, struct x25_calluserdata *theirs)
362{
363 int i;
364 if (ours->cudlength != theirs->cudlength)
365 return 0;
366
367 for (i=0;i<ours->cudlength;i++) {
368 if (ours->cuddata[i] != theirs->cuddata[i]) {
369 return 0;
370 }
371 }
372 return 1;
373}
374
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index d07f5ce318..d65ed8684f 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -118,7 +118,6 @@ retry:
118 xfrm_policy_put_afinfo(afinfo); 118 xfrm_policy_put_afinfo(afinfo);
119 return type; 119 return type;
120} 120}
121EXPORT_SYMBOL(xfrm_get_type);
122 121
123int xfrm_dst_lookup(struct xfrm_dst **dst, struct flowi *fl, 122int xfrm_dst_lookup(struct xfrm_dst **dst, struct flowi *fl,
124 unsigned short family) 123 unsigned short family)
@@ -216,8 +215,8 @@ out:
216 215
217expired: 216expired:
218 read_unlock(&xp->lock); 217 read_unlock(&xp->lock);
219 km_policy_expired(xp, dir, 1); 218 if (!xfrm_policy_delete(xp, dir))
220 xfrm_policy_delete(xp, dir); 219 km_policy_expired(xp, dir, 1);
221 xfrm_pol_put(xp); 220 xfrm_pol_put(xp);
222} 221}
223 222
@@ -555,7 +554,7 @@ static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol,
555 return NULL; 554 return NULL;
556} 555}
557 556
558void xfrm_policy_delete(struct xfrm_policy *pol, int dir) 557int xfrm_policy_delete(struct xfrm_policy *pol, int dir)
559{ 558{
560 write_lock_bh(&xfrm_policy_lock); 559 write_lock_bh(&xfrm_policy_lock);
561 pol = __xfrm_policy_unlink(pol, dir); 560 pol = __xfrm_policy_unlink(pol, dir);
@@ -564,7 +563,9 @@ void xfrm_policy_delete(struct xfrm_policy *pol, int dir)
564 if (dir < XFRM_POLICY_MAX) 563 if (dir < XFRM_POLICY_MAX)
565 atomic_inc(&flow_cache_genid); 564 atomic_inc(&flow_cache_genid);
566 xfrm_policy_kill(pol); 565 xfrm_policy_kill(pol);
566 return 0;
567 } 567 }
568 return -ENOENT;
568} 569}
569 570
570int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol) 571int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol)
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index d11747c2a7..9d206c282c 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -50,7 +50,7 @@ static DEFINE_SPINLOCK(xfrm_state_gc_lock);
50 50
51static int xfrm_state_gc_flush_bundles; 51static int xfrm_state_gc_flush_bundles;
52 52
53static void __xfrm_state_delete(struct xfrm_state *x); 53static int __xfrm_state_delete(struct xfrm_state *x);
54 54
55static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family); 55static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family);
56static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo); 56static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo);
@@ -154,6 +154,7 @@ static void xfrm_timer_handler(unsigned long data)
154 next = tmo; 154 next = tmo;
155 } 155 }
156 156
157 x->km.dying = warn;
157 if (warn) 158 if (warn)
158 km_state_expired(x, 0); 159 km_state_expired(x, 0);
159resched: 160resched:
@@ -169,9 +170,8 @@ expired:
169 next = 2; 170 next = 2;
170 goto resched; 171 goto resched;
171 } 172 }
172 if (x->id.spi != 0) 173 if (!__xfrm_state_delete(x) && x->id.spi)
173 km_state_expired(x, 1); 174 km_state_expired(x, 1);
174 __xfrm_state_delete(x);
175 175
176out: 176out:
177 spin_unlock(&x->lock); 177 spin_unlock(&x->lock);
@@ -215,8 +215,10 @@ void __xfrm_state_destroy(struct xfrm_state *x)
215} 215}
216EXPORT_SYMBOL(__xfrm_state_destroy); 216EXPORT_SYMBOL(__xfrm_state_destroy);
217 217
218static void __xfrm_state_delete(struct xfrm_state *x) 218static int __xfrm_state_delete(struct xfrm_state *x)
219{ 219{
220 int err = -ESRCH;
221
220 if (x->km.state != XFRM_STATE_DEAD) { 222 if (x->km.state != XFRM_STATE_DEAD) {
221 x->km.state = XFRM_STATE_DEAD; 223 x->km.state = XFRM_STATE_DEAD;
222 spin_lock(&xfrm_state_lock); 224 spin_lock(&xfrm_state_lock);
@@ -245,14 +247,21 @@ static void __xfrm_state_delete(struct xfrm_state *x)
245 * is what we are dropping here. 247 * is what we are dropping here.
246 */ 248 */
247 atomic_dec(&x->refcnt); 249 atomic_dec(&x->refcnt);
250 err = 0;
248 } 251 }
252
253 return err;
249} 254}
250 255
251void xfrm_state_delete(struct xfrm_state *x) 256int xfrm_state_delete(struct xfrm_state *x)
252{ 257{
258 int err;
259
253 spin_lock_bh(&x->lock); 260 spin_lock_bh(&x->lock);
254 __xfrm_state_delete(x); 261 err = __xfrm_state_delete(x);
255 spin_unlock_bh(&x->lock); 262 spin_unlock_bh(&x->lock);
263
264 return err;
256} 265}
257EXPORT_SYMBOL(xfrm_state_delete); 266EXPORT_SYMBOL(xfrm_state_delete);
258 267
@@ -557,16 +566,18 @@ int xfrm_state_check_expire(struct xfrm_state *x)
557 566
558 if (x->curlft.bytes >= x->lft.hard_byte_limit || 567 if (x->curlft.bytes >= x->lft.hard_byte_limit ||
559 x->curlft.packets >= x->lft.hard_packet_limit) { 568 x->curlft.packets >= x->lft.hard_packet_limit) {
560 km_state_expired(x, 1); 569 x->km.state = XFRM_STATE_EXPIRED;
561 if (!mod_timer(&x->timer, jiffies + XFRM_ACQ_EXPIRES*HZ)) 570 if (!mod_timer(&x->timer, jiffies))
562 xfrm_state_hold(x); 571 xfrm_state_hold(x);
563 return -EINVAL; 572 return -EINVAL;
564 } 573 }
565 574
566 if (!x->km.dying && 575 if (!x->km.dying &&
567 (x->curlft.bytes >= x->lft.soft_byte_limit || 576 (x->curlft.bytes >= x->lft.soft_byte_limit ||
568 x->curlft.packets >= x->lft.soft_packet_limit)) 577 x->curlft.packets >= x->lft.soft_packet_limit)) {
578 x->km.dying = 1;
569 km_state_expired(x, 0); 579 km_state_expired(x, 0);
580 }
570 return 0; 581 return 0;
571} 582}
572EXPORT_SYMBOL(xfrm_state_check_expire); 583EXPORT_SYMBOL(xfrm_state_check_expire);
@@ -796,34 +807,56 @@ EXPORT_SYMBOL(xfrm_replay_advance);
796static struct list_head xfrm_km_list = LIST_HEAD_INIT(xfrm_km_list); 807static struct list_head xfrm_km_list = LIST_HEAD_INIT(xfrm_km_list);
797static DEFINE_RWLOCK(xfrm_km_lock); 808static DEFINE_RWLOCK(xfrm_km_lock);
798 809
799static void km_state_expired(struct xfrm_state *x, int hard) 810void km_policy_notify(struct xfrm_policy *xp, int dir, struct km_event *c)
800{ 811{
801 struct xfrm_mgr *km; 812 struct xfrm_mgr *km;
802 813
803 if (hard) 814 read_lock(&xfrm_km_lock);
804 x->km.state = XFRM_STATE_EXPIRED; 815 list_for_each_entry(km, &xfrm_km_list, list)
805 else 816 if (km->notify_policy)
806 x->km.dying = 1; 817 km->notify_policy(xp, dir, c);
818 read_unlock(&xfrm_km_lock);
819}
807 820
821void km_state_notify(struct xfrm_state *x, struct km_event *c)
822{
823 struct xfrm_mgr *km;
808 read_lock(&xfrm_km_lock); 824 read_lock(&xfrm_km_lock);
809 list_for_each_entry(km, &xfrm_km_list, list) 825 list_for_each_entry(km, &xfrm_km_list, list)
810 km->notify(x, hard); 826 if (km->notify)
827 km->notify(x, c);
811 read_unlock(&xfrm_km_lock); 828 read_unlock(&xfrm_km_lock);
829}
830
831EXPORT_SYMBOL(km_policy_notify);
832EXPORT_SYMBOL(km_state_notify);
833
834static void km_state_expired(struct xfrm_state *x, int hard)
835{
836 struct km_event c;
837
838 c.data.hard = hard;
839 c.event = XFRM_MSG_EXPIRE;
840 km_state_notify(x, &c);
812 841
813 if (hard) 842 if (hard)
814 wake_up(&km_waitq); 843 wake_up(&km_waitq);
815} 844}
816 845
846/*
847 * We send to all registered managers regardless of failure
848 * We are happy with one success
849*/
817static int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol) 850static int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol)
818{ 851{
819 int err = -EINVAL; 852 int err = -EINVAL, acqret;
820 struct xfrm_mgr *km; 853 struct xfrm_mgr *km;
821 854
822 read_lock(&xfrm_km_lock); 855 read_lock(&xfrm_km_lock);
823 list_for_each_entry(km, &xfrm_km_list, list) { 856 list_for_each_entry(km, &xfrm_km_list, list) {
824 err = km->acquire(x, t, pol, XFRM_POLICY_OUT); 857 acqret = km->acquire(x, t, pol, XFRM_POLICY_OUT);
825 if (!err) 858 if (!acqret)
826 break; 859 err = acqret;
827 } 860 }
828 read_unlock(&xfrm_km_lock); 861 read_unlock(&xfrm_km_lock);
829 return err; 862 return err;
@@ -848,13 +881,11 @@ EXPORT_SYMBOL(km_new_mapping);
848 881
849void km_policy_expired(struct xfrm_policy *pol, int dir, int hard) 882void km_policy_expired(struct xfrm_policy *pol, int dir, int hard)
850{ 883{
851 struct xfrm_mgr *km; 884 struct km_event c;
852 885
853 read_lock(&xfrm_km_lock); 886 c.data.hard = hard;
854 list_for_each_entry(km, &xfrm_km_list, list) 887 c.event = XFRM_MSG_POLEXPIRE;
855 if (km->notify_policy) 888 km_policy_notify(pol, dir, &c);
856 km->notify_policy(pol, dir, hard);
857 read_unlock(&xfrm_km_lock);
858 889
859 if (hard) 890 if (hard)
860 wake_up(&km_waitq); 891 wake_up(&km_waitq);
@@ -1024,6 +1055,43 @@ int xfrm_state_mtu(struct xfrm_state *x, int mtu)
1024} 1055}
1025 1056
1026EXPORT_SYMBOL(xfrm_state_mtu); 1057EXPORT_SYMBOL(xfrm_state_mtu);
1058
1059int xfrm_init_state(struct xfrm_state *x)
1060{
1061 struct xfrm_state_afinfo *afinfo;
1062 int family = x->props.family;
1063 int err;
1064
1065 err = -EAFNOSUPPORT;
1066 afinfo = xfrm_state_get_afinfo(family);
1067 if (!afinfo)
1068 goto error;
1069
1070 err = 0;
1071 if (afinfo->init_flags)
1072 err = afinfo->init_flags(x);
1073
1074 xfrm_state_put_afinfo(afinfo);
1075
1076 if (err)
1077 goto error;
1078
1079 err = -EPROTONOSUPPORT;
1080 x->type = xfrm_get_type(x->id.proto, family);
1081 if (x->type == NULL)
1082 goto error;
1083
1084 err = x->type->init_state(x);
1085 if (err)
1086 goto error;
1087
1088 x->km.state = XFRM_STATE_VALID;
1089
1090error:
1091 return err;
1092}
1093
1094EXPORT_SYMBOL(xfrm_init_state);
1027 1095
1028void __init xfrm_state_init(void) 1096void __init xfrm_state_init(void)
1029{ 1097{
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 97509011c2..ecade4893a 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -249,17 +249,10 @@ static struct xfrm_state *xfrm_state_construct(struct xfrm_usersa_info *p,
249 if ((err = attach_encap_tmpl(&x->encap, xfrma[XFRMA_ENCAP-1]))) 249 if ((err = attach_encap_tmpl(&x->encap, xfrma[XFRMA_ENCAP-1])))
250 goto error; 250 goto error;
251 251
252 err = -ENOENT; 252 err = xfrm_init_state(x);
253 x->type = xfrm_get_type(x->id.proto, x->props.family);
254 if (x->type == NULL)
255 goto error;
256
257 err = x->type->init_state(x, NULL);
258 if (err) 253 if (err)
259 goto error; 254 goto error;
260 255
261 x->curlft.add_time = (unsigned long) xtime.tv_sec;
262 x->km.state = XFRM_STATE_VALID;
263 x->km.seq = p->seq; 256 x->km.seq = p->seq;
264 257
265 return x; 258 return x;
@@ -277,6 +270,7 @@ static int xfrm_add_sa(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma)
277 struct xfrm_usersa_info *p = NLMSG_DATA(nlh); 270 struct xfrm_usersa_info *p = NLMSG_DATA(nlh);
278 struct xfrm_state *x; 271 struct xfrm_state *x;
279 int err; 272 int err;
273 struct km_event c;
280 274
281 err = verify_newsa_info(p, (struct rtattr **) xfrma); 275 err = verify_newsa_info(p, (struct rtattr **) xfrma);
282 if (err) 276 if (err)
@@ -286,6 +280,7 @@ static int xfrm_add_sa(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma)
286 if (!x) 280 if (!x)
287 return err; 281 return err;
288 282
283 xfrm_state_hold(x);
289 if (nlh->nlmsg_type == XFRM_MSG_NEWSA) 284 if (nlh->nlmsg_type == XFRM_MSG_NEWSA)
290 err = xfrm_state_add(x); 285 err = xfrm_state_add(x);
291 else 286 else
@@ -294,14 +289,24 @@ static int xfrm_add_sa(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma)
294 if (err < 0) { 289 if (err < 0) {
295 x->km.state = XFRM_STATE_DEAD; 290 x->km.state = XFRM_STATE_DEAD;
296 xfrm_state_put(x); 291 xfrm_state_put(x);
292 goto out;
297 } 293 }
298 294
295 c.seq = nlh->nlmsg_seq;
296 c.pid = nlh->nlmsg_pid;
297 c.event = nlh->nlmsg_type;
298
299 km_state_notify(x, &c);
300out:
301 xfrm_state_put(x);
299 return err; 302 return err;
300} 303}
301 304
302static int xfrm_del_sa(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma) 305static int xfrm_del_sa(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma)
303{ 306{
304 struct xfrm_state *x; 307 struct xfrm_state *x;
308 int err;
309 struct km_event c;
305 struct xfrm_usersa_id *p = NLMSG_DATA(nlh); 310 struct xfrm_usersa_id *p = NLMSG_DATA(nlh);
306 311
307 x = xfrm_state_lookup(&p->daddr, p->spi, p->proto, p->family); 312 x = xfrm_state_lookup(&p->daddr, p->spi, p->proto, p->family);
@@ -313,10 +318,19 @@ static int xfrm_del_sa(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma)
313 return -EPERM; 318 return -EPERM;
314 } 319 }
315 320
316 xfrm_state_delete(x); 321 err = xfrm_state_delete(x);
322 if (err < 0) {
323 xfrm_state_put(x);
324 return err;
325 }
326
327 c.seq = nlh->nlmsg_seq;
328 c.pid = nlh->nlmsg_pid;
329 c.event = nlh->nlmsg_type;
330 km_state_notify(x, &c);
317 xfrm_state_put(x); 331 xfrm_state_put(x);
318 332
319 return 0; 333 return err;
320} 334}
321 335
322static void copy_to_user_state(struct xfrm_state *x, struct xfrm_usersa_info *p) 336static void copy_to_user_state(struct xfrm_state *x, struct xfrm_usersa_info *p)
@@ -681,6 +695,7 @@ static int xfrm_add_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfr
681{ 695{
682 struct xfrm_userpolicy_info *p = NLMSG_DATA(nlh); 696 struct xfrm_userpolicy_info *p = NLMSG_DATA(nlh);
683 struct xfrm_policy *xp; 697 struct xfrm_policy *xp;
698 struct km_event c;
684 int err; 699 int err;
685 int excl; 700 int excl;
686 701
@@ -692,6 +707,10 @@ static int xfrm_add_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfr
692 if (!xp) 707 if (!xp)
693 return err; 708 return err;
694 709
710 /* shouldnt excl be based on nlh flags??
711 * Aha! this is anti-netlink really i.e more pfkey derived
712 * in netlink excl is a flag and you wouldnt need
713 * a type XFRM_MSG_UPDPOLICY - JHS */
695 excl = nlh->nlmsg_type == XFRM_MSG_NEWPOLICY; 714 excl = nlh->nlmsg_type == XFRM_MSG_NEWPOLICY;
696 err = xfrm_policy_insert(p->dir, xp, excl); 715 err = xfrm_policy_insert(p->dir, xp, excl);
697 if (err) { 716 if (err) {
@@ -699,6 +718,11 @@ static int xfrm_add_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfr
699 return err; 718 return err;
700 } 719 }
701 720
721 c.event = nlh->nlmsg_type;
722 c.seq = nlh->nlmsg_seq;
723 c.pid = nlh->nlmsg_pid;
724 km_policy_notify(xp, p->dir, &c);
725
702 xfrm_pol_put(xp); 726 xfrm_pol_put(xp);
703 727
704 return 0; 728 return 0;
@@ -816,6 +840,7 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfr
816 struct xfrm_policy *xp; 840 struct xfrm_policy *xp;
817 struct xfrm_userpolicy_id *p; 841 struct xfrm_userpolicy_id *p;
818 int err; 842 int err;
843 struct km_event c;
819 int delete; 844 int delete;
820 845
821 p = NLMSG_DATA(nlh); 846 p = NLMSG_DATA(nlh);
@@ -843,6 +868,12 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfr
843 NETLINK_CB(skb).pid, 868 NETLINK_CB(skb).pid,
844 MSG_DONTWAIT); 869 MSG_DONTWAIT);
845 } 870 }
871 } else {
872 c.data.byid = p->index;
873 c.event = nlh->nlmsg_type;
874 c.seq = nlh->nlmsg_seq;
875 c.pid = nlh->nlmsg_pid;
876 km_policy_notify(xp, p->dir, &c);
846 } 877 }
847 878
848 xfrm_pol_put(xp); 879 xfrm_pol_put(xp);
@@ -852,15 +883,28 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfr
852 883
853static int xfrm_flush_sa(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma) 884static int xfrm_flush_sa(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma)
854{ 885{
886 struct km_event c;
855 struct xfrm_usersa_flush *p = NLMSG_DATA(nlh); 887 struct xfrm_usersa_flush *p = NLMSG_DATA(nlh);
856 888
857 xfrm_state_flush(p->proto); 889 xfrm_state_flush(p->proto);
890 c.data.proto = p->proto;
891 c.event = nlh->nlmsg_type;
892 c.seq = nlh->nlmsg_seq;
893 c.pid = nlh->nlmsg_pid;
894 km_state_notify(NULL, &c);
895
858 return 0; 896 return 0;
859} 897}
860 898
861static int xfrm_flush_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma) 899static int xfrm_flush_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma)
862{ 900{
901 struct km_event c;
902
863 xfrm_policy_flush(); 903 xfrm_policy_flush();
904 c.event = nlh->nlmsg_type;
905 c.seq = nlh->nlmsg_seq;
906 c.pid = nlh->nlmsg_pid;
907 km_policy_notify(NULL, 0, &c);
864 return 0; 908 return 0;
865} 909}
866 910
@@ -1069,15 +1113,16 @@ nlmsg_failure:
1069 return -1; 1113 return -1;
1070} 1114}
1071 1115
1072static int xfrm_send_state_notify(struct xfrm_state *x, int hard) 1116static int xfrm_exp_state_notify(struct xfrm_state *x, struct km_event *c)
1073{ 1117{
1074 struct sk_buff *skb; 1118 struct sk_buff *skb;
1119 int len = NLMSG_LENGTH(sizeof(struct xfrm_user_expire));
1075 1120
1076 skb = alloc_skb(sizeof(struct xfrm_user_expire) + 16, GFP_ATOMIC); 1121 skb = alloc_skb(len, GFP_ATOMIC);
1077 if (skb == NULL) 1122 if (skb == NULL)
1078 return -ENOMEM; 1123 return -ENOMEM;
1079 1124
1080 if (build_expire(skb, x, hard) < 0) 1125 if (build_expire(skb, x, c->data.hard) < 0)
1081 BUG(); 1126 BUG();
1082 1127
1083 NETLINK_CB(skb).dst_groups = XFRMGRP_EXPIRE; 1128 NETLINK_CB(skb).dst_groups = XFRMGRP_EXPIRE;
@@ -1085,6 +1130,131 @@ static int xfrm_send_state_notify(struct xfrm_state *x, int hard)
1085 return netlink_broadcast(xfrm_nl, skb, 0, XFRMGRP_EXPIRE, GFP_ATOMIC); 1130 return netlink_broadcast(xfrm_nl, skb, 0, XFRMGRP_EXPIRE, GFP_ATOMIC);
1086} 1131}
1087 1132
1133static int xfrm_notify_sa_flush(struct km_event *c)
1134{
1135 struct xfrm_usersa_flush *p;
1136 struct nlmsghdr *nlh;
1137 struct sk_buff *skb;
1138 unsigned char *b;
1139 int len = NLMSG_LENGTH(sizeof(struct xfrm_usersa_flush));
1140
1141 skb = alloc_skb(len, GFP_ATOMIC);
1142 if (skb == NULL)
1143 return -ENOMEM;
1144 b = skb->tail;
1145
1146 nlh = NLMSG_PUT(skb, c->pid, c->seq,
1147 XFRM_MSG_FLUSHSA, sizeof(*p));
1148 nlh->nlmsg_flags = 0;
1149
1150 p = NLMSG_DATA(nlh);
1151 p->proto = c->data.proto;
1152
1153 nlh->nlmsg_len = skb->tail - b;
1154
1155 return netlink_broadcast(xfrm_nl, skb, 0, XFRMGRP_SA, GFP_ATOMIC);
1156
1157nlmsg_failure:
1158 kfree_skb(skb);
1159 return -1;
1160}
1161
1162static int inline xfrm_sa_len(struct xfrm_state *x)
1163{
1164 int l = 0;
1165 if (x->aalg)
1166 l += RTA_SPACE(sizeof(*x->aalg) + (x->aalg->alg_key_len+7)/8);
1167 if (x->ealg)
1168 l += RTA_SPACE(sizeof(*x->ealg) + (x->ealg->alg_key_len+7)/8);
1169 if (x->calg)
1170 l += RTA_SPACE(sizeof(*x->calg));
1171 if (x->encap)
1172 l += RTA_SPACE(sizeof(*x->encap));
1173
1174 return l;
1175}
1176
1177static int xfrm_notify_sa(struct xfrm_state *x, struct km_event *c)
1178{
1179 struct xfrm_usersa_info *p;
1180 struct xfrm_usersa_id *id;
1181 struct nlmsghdr *nlh;
1182 struct sk_buff *skb;
1183 unsigned char *b;
1184 int len = xfrm_sa_len(x);
1185 int headlen;
1186
1187 headlen = sizeof(*p);
1188 if (c->event == XFRM_MSG_DELSA) {
1189 len += RTA_SPACE(headlen);
1190 headlen = sizeof(*id);
1191 }
1192 len += NLMSG_SPACE(headlen);
1193
1194 skb = alloc_skb(len, GFP_ATOMIC);
1195 if (skb == NULL)
1196 return -ENOMEM;
1197 b = skb->tail;
1198
1199 nlh = NLMSG_PUT(skb, c->pid, c->seq, c->event, headlen);
1200 nlh->nlmsg_flags = 0;
1201
1202 p = NLMSG_DATA(nlh);
1203 if (c->event == XFRM_MSG_DELSA) {
1204 id = NLMSG_DATA(nlh);
1205 memcpy(&id->daddr, &x->id.daddr, sizeof(id->daddr));
1206 id->spi = x->id.spi;
1207 id->family = x->props.family;
1208 id->proto = x->id.proto;
1209
1210 p = RTA_DATA(__RTA_PUT(skb, XFRMA_SA, sizeof(*p)));
1211 }
1212
1213 copy_to_user_state(x, p);
1214
1215 if (x->aalg)
1216 RTA_PUT(skb, XFRMA_ALG_AUTH,
1217 sizeof(*(x->aalg))+(x->aalg->alg_key_len+7)/8, x->aalg);
1218 if (x->ealg)
1219 RTA_PUT(skb, XFRMA_ALG_CRYPT,
1220 sizeof(*(x->ealg))+(x->ealg->alg_key_len+7)/8, x->ealg);
1221 if (x->calg)
1222 RTA_PUT(skb, XFRMA_ALG_COMP, sizeof(*(x->calg)), x->calg);
1223
1224 if (x->encap)
1225 RTA_PUT(skb, XFRMA_ENCAP, sizeof(*x->encap), x->encap);
1226
1227 nlh->nlmsg_len = skb->tail - b;
1228
1229 return netlink_broadcast(xfrm_nl, skb, 0, XFRMGRP_SA, GFP_ATOMIC);
1230
1231nlmsg_failure:
1232rtattr_failure:
1233 kfree_skb(skb);
1234 return -1;
1235}
1236
1237static int xfrm_send_state_notify(struct xfrm_state *x, struct km_event *c)
1238{
1239
1240 switch (c->event) {
1241 case XFRM_MSG_EXPIRE:
1242 return xfrm_exp_state_notify(x, c);
1243 case XFRM_MSG_DELSA:
1244 case XFRM_MSG_UPDSA:
1245 case XFRM_MSG_NEWSA:
1246 return xfrm_notify_sa(x, c);
1247 case XFRM_MSG_FLUSHSA:
1248 return xfrm_notify_sa_flush(c);
1249 default:
1250 printk("xfrm_user: Unknown SA event %d\n", c->event);
1251 break;
1252 }
1253
1254 return 0;
1255
1256}
1257
1088static int build_acquire(struct sk_buff *skb, struct xfrm_state *x, 1258static int build_acquire(struct sk_buff *skb, struct xfrm_state *x,
1089 struct xfrm_tmpl *xt, struct xfrm_policy *xp, 1259 struct xfrm_tmpl *xt, struct xfrm_policy *xp,
1090 int dir) 1260 int dir)
@@ -1218,7 +1388,7 @@ nlmsg_failure:
1218 return -1; 1388 return -1;
1219} 1389}
1220 1390
1221static int xfrm_send_policy_notify(struct xfrm_policy *xp, int dir, int hard) 1391static int xfrm_exp_policy_notify(struct xfrm_policy *xp, int dir, struct km_event *c)
1222{ 1392{
1223 struct sk_buff *skb; 1393 struct sk_buff *skb;
1224 size_t len; 1394 size_t len;
@@ -1229,7 +1399,7 @@ static int xfrm_send_policy_notify(struct xfrm_policy *xp, int dir, int hard)
1229 if (skb == NULL) 1399 if (skb == NULL)
1230 return -ENOMEM; 1400 return -ENOMEM;
1231 1401
1232 if (build_polexpire(skb, xp, dir, hard) < 0) 1402 if (build_polexpire(skb, xp, dir, c->data.hard) < 0)
1233 BUG(); 1403 BUG();
1234 1404
1235 NETLINK_CB(skb).dst_groups = XFRMGRP_EXPIRE; 1405 NETLINK_CB(skb).dst_groups = XFRMGRP_EXPIRE;
@@ -1237,6 +1407,103 @@ static int xfrm_send_policy_notify(struct xfrm_policy *xp, int dir, int hard)
1237 return netlink_broadcast(xfrm_nl, skb, 0, XFRMGRP_EXPIRE, GFP_ATOMIC); 1407 return netlink_broadcast(xfrm_nl, skb, 0, XFRMGRP_EXPIRE, GFP_ATOMIC);
1238} 1408}
1239 1409
1410static int xfrm_notify_policy(struct xfrm_policy *xp, int dir, struct km_event *c)
1411{
1412 struct xfrm_userpolicy_info *p;
1413 struct xfrm_userpolicy_id *id;
1414 struct nlmsghdr *nlh;
1415 struct sk_buff *skb;
1416 unsigned char *b;
1417 int len = RTA_SPACE(sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr);
1418 int headlen;
1419
1420 headlen = sizeof(*p);
1421 if (c->event == XFRM_MSG_DELPOLICY) {
1422 len += RTA_SPACE(headlen);
1423 headlen = sizeof(*id);
1424 }
1425 len += NLMSG_SPACE(headlen);
1426
1427 skb = alloc_skb(len, GFP_ATOMIC);
1428 if (skb == NULL)
1429 return -ENOMEM;
1430 b = skb->tail;
1431
1432 nlh = NLMSG_PUT(skb, c->pid, c->seq, c->event, headlen);
1433
1434 p = NLMSG_DATA(nlh);
1435 if (c->event == XFRM_MSG_DELPOLICY) {
1436 id = NLMSG_DATA(nlh);
1437 memset(id, 0, sizeof(*id));
1438 id->dir = dir;
1439 if (c->data.byid)
1440 id->index = xp->index;
1441 else
1442 memcpy(&id->sel, &xp->selector, sizeof(id->sel));
1443
1444 p = RTA_DATA(__RTA_PUT(skb, XFRMA_POLICY, sizeof(*p)));
1445 }
1446
1447 nlh->nlmsg_flags = 0;
1448
1449 copy_to_user_policy(xp, p, dir);
1450 if (copy_to_user_tmpl(xp, skb) < 0)
1451 goto nlmsg_failure;
1452
1453 nlh->nlmsg_len = skb->tail - b;
1454
1455 return netlink_broadcast(xfrm_nl, skb, 0, XFRMGRP_POLICY, GFP_ATOMIC);
1456
1457nlmsg_failure:
1458rtattr_failure:
1459 kfree_skb(skb);
1460 return -1;
1461}
1462
1463static int xfrm_notify_policy_flush(struct km_event *c)
1464{
1465 struct nlmsghdr *nlh;
1466 struct sk_buff *skb;
1467 unsigned char *b;
1468 int len = NLMSG_LENGTH(0);
1469
1470 skb = alloc_skb(len, GFP_ATOMIC);
1471 if (skb == NULL)
1472 return -ENOMEM;
1473 b = skb->tail;
1474
1475
1476 nlh = NLMSG_PUT(skb, c->pid, c->seq, XFRM_MSG_FLUSHPOLICY, 0);
1477
1478 nlh->nlmsg_len = skb->tail - b;
1479
1480 return netlink_broadcast(xfrm_nl, skb, 0, XFRMGRP_POLICY, GFP_ATOMIC);
1481
1482nlmsg_failure:
1483 kfree_skb(skb);
1484 return -1;
1485}
1486
1487static int xfrm_send_policy_notify(struct xfrm_policy *xp, int dir, struct km_event *c)
1488{
1489
1490 switch (c->event) {
1491 case XFRM_MSG_NEWPOLICY:
1492 case XFRM_MSG_UPDPOLICY:
1493 case XFRM_MSG_DELPOLICY:
1494 return xfrm_notify_policy(xp, dir, c);
1495 case XFRM_MSG_FLUSHPOLICY:
1496 return xfrm_notify_policy_flush(c);
1497 case XFRM_MSG_POLEXPIRE:
1498 return xfrm_exp_policy_notify(xp, dir, c);
1499 default:
1500 printk("xfrm_user: Unknown Policy event %d\n", c->event);
1501 }
1502
1503 return 0;
1504
1505}
1506
1240static struct xfrm_mgr netlink_mgr = { 1507static struct xfrm_mgr netlink_mgr = {
1241 .id = "netlink", 1508 .id = "netlink",
1242 .notify = xfrm_send_state_notify, 1509 .notify = xfrm_send_state_notify,