aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorJohn W. Linville <linville@tuxdriver.com>2010-07-27 11:59:19 -0400
committerJohn W. Linville <linville@tuxdriver.com>2010-07-27 11:59:19 -0400
commit800f65bba8d2030b3fef62850e203f9f176625a8 (patch)
tree6507c4fe7a0826c253b4afb29375ab306a0fd9c8 /net
parent06b3cda0c12986f5bba578b918b188d731c4e191 (diff)
parentb3190df628617c7a4f188a9465aeabe1f5761933 (diff)
Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/holtmann/bluetooth-next-2.6
Conflicts: drivers/net/wireless/iwlwifi/iwl-commands.h
Diffstat (limited to 'net')
-rw-r--r--net/8021q/vlan.c13
-rw-r--r--net/8021q/vlan.h13
-rw-r--r--net/8021q/vlan_core.c30
-rw-r--r--net/8021q/vlan_dev.c164
-rw-r--r--net/8021q/vlanproc.c16
-rw-r--r--net/9p/client.c70
-rw-r--r--net/9p/protocol.c8
-rw-r--r--net/9p/trans_fd.c2
-rw-r--r--net/9p/trans_virtio.c6
-rw-r--r--net/Kconfig10
-rw-r--r--net/Makefile4
-rw-r--r--net/atm/br2684.c66
-rw-r--r--net/atm/clip.c2
-rw-r--r--net/atm/common.c30
-rw-r--r--net/bluetooth/Kconfig13
-rw-r--r--net/bluetooth/bnep/bnep.h8
-rw-r--r--net/bluetooth/bnep/netdev.c2
-rw-r--r--net/bluetooth/hci_conn.c7
-rw-r--r--net/bluetooth/hci_core.c204
-rw-r--r--net/bluetooth/hci_event.c10
-rw-r--r--net/bluetooth/hci_sock.c90
-rw-r--r--net/bluetooth/hci_sysfs.c38
-rw-r--r--net/bluetooth/l2cap.c677
-rw-r--r--net/bridge/br.c2
-rw-r--r--net/bridge/br_device.c135
-rw-r--r--net/bridge/br_fdb.c10
-rw-r--r--net/bridge/br_forward.c38
-rw-r--r--net/bridge/br_if.c33
-rw-r--r--net/bridge/br_input.c21
-rw-r--r--net/bridge/br_multicast.c23
-rw-r--r--net/bridge/br_netfilter.c63
-rw-r--r--net/bridge/br_netlink.c9
-rw-r--r--net/bridge/br_notify.c5
-rw-r--r--net/bridge/br_private.h67
-rw-r--r--net/bridge/br_stp_bpdu.c5
-rw-r--r--net/bridge/br_sysfs_br.c74
-rw-r--r--net/bridge/netfilter/ebt_redirect.c3
-rw-r--r--net/bridge/netfilter/ebt_ulog.c8
-rw-r--r--net/bridge/netfilter/ebtables.c11
-rw-r--r--net/caif/Kconfig12
-rw-r--r--net/caif/Makefile14
-rw-r--r--net/caif/caif_config_util.c5
-rw-r--r--net/caif/caif_dev.c12
-rw-r--r--net/caif/caif_socket.c152
-rw-r--r--net/caif/cfcnfg.c54
-rw-r--r--net/caif/cfctrl.c100
-rw-r--r--net/caif/cfdbgl.c2
-rw-r--r--net/caif/cfdgml.c7
-rw-r--r--net/caif/cfmuxl.c3
-rw-r--r--net/caif/cfpkt_skbuff.c30
-rw-r--r--net/caif/cfrfml.c318
-rw-r--r--net/caif/cfserl.c14
-rw-r--r--net/caif/cfsrvl.c32
-rw-r--r--net/caif/cfutill.c8
-rw-r--r--net/caif/cfveil.c7
-rw-r--r--net/caif/cfvidl.c2
-rw-r--r--net/caif/chnl_net.c67
-rw-r--r--net/can/raw.c11
-rw-r--r--net/compat.c53
-rw-r--r--net/core/Makefile2
-rw-r--r--net/core/datagram.c14
-rw-r--r--net/core/dev.c500
-rw-r--r--net/core/drop_monitor.c22
-rw-r--r--net/core/dst.c2
-rw-r--r--net/core/ethtool.c149
-rw-r--r--net/core/filter.c212
-rw-r--r--net/core/flow.c9
-rw-r--r--net/core/gen_estimator.c16
-rw-r--r--net/core/gen_stats.c14
-rw-r--r--net/core/iovec.c9
-rw-r--r--net/core/link_watch.c1
-rw-r--r--net/core/neighbour.c6
-rw-r--r--net/core/net-sysfs.c80
-rw-r--r--net/core/net-sysfs.h1
-rw-r--r--net/core/netevent.c5
-rw-r--r--net/core/netpoll.c213
-rw-r--r--net/core/pktgen.c178
-rw-r--r--net/core/rtnetlink.c41
-rw-r--r--net/core/scm.c33
-rw-r--r--net/core/skbuff.c83
-rw-r--r--net/core/sock.c101
-rw-r--r--net/core/stream.c6
-rw-r--r--net/core/timestamping.c126
-rw-r--r--net/core/utils.c3
-rw-r--r--net/dccp/ackvec.c4
-rw-r--r--net/dccp/ccids/ccid3.c4
-rw-r--r--net/dccp/dccp.h12
-rw-r--r--net/dccp/input.c19
-rw-r--r--net/dccp/ipv4.c4
-rw-r--r--net/dccp/ipv6.c30
-rw-r--r--net/dccp/options.c22
-rw-r--r--net/dccp/proto.c14
-rw-r--r--net/decnet/dn_route.c158
-rw-r--r--net/dsa/Kconfig2
-rw-r--r--net/dsa/slave.c3
-rw-r--r--net/econet/af_econet.c27
-rw-r--r--net/ethernet/eth.c5
-rw-r--r--net/ethernet/pe2.c3
-rw-r--r--net/ieee802154/wpan-class.c7
-rw-r--r--net/ipv4/Kconfig10
-rw-r--r--net/ipv4/af_inet.c84
-rw-r--r--net/ipv4/arp.c51
-rw-r--r--net/ipv4/datagram.c4
-rw-r--r--net/ipv4/devinet.c1
-rw-r--r--net/ipv4/fib_frontend.c13
-rw-r--r--net/ipv4/icmp.c37
-rw-r--r--net/ipv4/igmp.c32
-rw-r--r--net/ipv4/inet_connection_sock.c21
-rw-r--r--net/ipv4/inet_fragment.c1
-rw-r--r--net/ipv4/inet_hashtables.c4
-rw-r--r--net/ipv4/inetpeer.c244
-rw-r--r--net/ipv4/ip_forward.c10
-rw-r--r--net/ipv4/ip_fragment.c27
-rw-r--r--net/ipv4/ip_gre.c16
-rw-r--r--net/ipv4/ip_input.c26
-rw-r--r--net/ipv4/ip_output.c83
-rw-r--r--net/ipv4/ip_sockglue.c45
-rw-r--r--net/ipv4/ipconfig.c7
-rw-r--r--net/ipv4/ipip.c8
-rw-r--r--net/ipv4/ipmr.c22
-rw-r--r--net/ipv4/netfilter.c12
-rw-r--r--net/ipv4/netfilter/arp_tables.c7
-rw-r--r--net/ipv4/netfilter/ip_queue.c57
-rw-r--r--net/ipv4/netfilter/ip_tables.c6
-rw-r--r--net/ipv4/netfilter/ipt_CLUSTERIP.c48
-rw-r--r--net/ipv4/netfilter/ipt_LOG.c54
-rw-r--r--net/ipv4/netfilter/ipt_NETMAP.c6
-rw-r--r--net/ipv4/netfilter/ipt_REJECT.c2
-rw-r--r--net/ipv4/netfilter/nf_defrag_ipv4.c5
-rw-r--r--net/ipv4/netfilter/nf_nat_core.c2
-rw-r--r--net/ipv4/netfilter/nf_nat_rule.c10
-rw-r--r--net/ipv4/netfilter/nf_nat_standalone.c10
-rw-r--r--net/ipv4/proc.c16
-rw-r--r--net/ipv4/protocol.c3
-rw-r--r--net/ipv4/raw.c22
-rw-r--r--net/ipv4/route.c511
-rw-r--r--net/ipv4/syncookies.c107
-rw-r--r--net/ipv4/tcp.c66
-rw-r--r--net/ipv4/tcp_hybla.c4
-rw-r--r--net/ipv4/tcp_input.c24
-rw-r--r--net/ipv4/tcp_ipv4.c182
-rw-r--r--net/ipv4/tcp_minisocks.c9
-rw-r--r--net/ipv4/tcp_output.c80
-rw-r--r--net/ipv4/tcp_timer.c1
-rw-r--r--net/ipv4/tunnel4.c2
-rw-r--r--net/ipv4/udp.c30
-rw-r--r--net/ipv4/udplite.c3
-rw-r--r--net/ipv4/xfrm4_input.c1
-rw-r--r--net/ipv4/xfrm4_policy.c4
-rw-r--r--net/ipv6/addrconf.c53
-rw-r--r--net/ipv6/addrlabel.c6
-rw-r--r--net/ipv6/af_inet6.c32
-rw-r--r--net/ipv6/anycast.c96
-rw-r--r--net/ipv6/datagram.c18
-rw-r--r--net/ipv6/exthdrs.c34
-rw-r--r--net/ipv6/fib6_rules.c10
-rw-r--r--net/ipv6/icmp.c4
-rw-r--r--net/ipv6/inet6_connection_sock.c9
-rw-r--r--net/ipv6/ip6_fib.c30
-rw-r--r--net/ipv6/ip6_output.c40
-rw-r--r--net/ipv6/ip6_tunnel.c8
-rw-r--r--net/ipv6/ip6mr.c8
-rw-r--r--net/ipv6/ipv6_sockglue.c2
-rw-r--r--net/ipv6/mcast.c195
-rw-r--r--net/ipv6/mip6.c3
-rw-r--r--net/ipv6/ndisc.c10
-rw-r--r--net/ipv6/netfilter.c4
-rw-r--r--net/ipv6/netfilter/ip6_queue.c57
-rw-r--r--net/ipv6/netfilter/ip6_tables.c9
-rw-r--r--net/ipv6/netfilter/ip6t_LOG.c81
-rw-r--r--net/ipv6/netfilter/ip6t_REJECT.c6
-rw-r--r--net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c2
-rw-r--r--net/ipv6/netfilter/nf_conntrack_reasm.c9
-rw-r--r--net/ipv6/proc.c17
-rw-r--r--net/ipv6/raw.c32
-rw-r--r--net/ipv6/reassembly.c21
-rw-r--r--net/ipv6/route.c321
-rw-r--r--net/ipv6/sit.c16
-rw-r--r--net/ipv6/syncookies.c58
-rw-r--r--net/ipv6/tcp_ipv6.c47
-rw-r--r--net/ipv6/udp.c16
-rw-r--r--net/ipv6/xfrm6_policy.c2
-rw-r--r--net/irda/irnet/irnet_ppp.c10
-rw-r--r--net/irda/irttp.c14
-rw-r--r--net/iucv/af_iucv.c2
-rw-r--r--net/iucv/iucv.c23
-rw-r--r--net/l2tp/l2tp_ip.c6
-rw-r--r--net/mac80211/cfg.c2
-rw-r--r--net/mac80211/ieee80211_i.h2
-rw-r--r--net/mac80211/rx.c2
-rw-r--r--net/mac80211/sta_info.c2
-rw-r--r--net/netfilter/Kconfig36
-rw-r--r--net/netfilter/Makefile1
-rw-r--r--net/netfilter/ipvs/ip_vs_conn.c14
-rw-r--r--net/netfilter/ipvs/ip_vs_core.c20
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c10
-rw-r--r--net/netfilter/ipvs/ip_vs_xmit.c86
-rw-r--r--net/netfilter/nf_conntrack_acct.c14
-rw-r--r--net/netfilter/nf_conntrack_core.c46
-rw-r--r--net/netfilter/nf_conntrack_h323_main.c12
-rw-r--r--net/netfilter/nf_conntrack_netbios_ns.c2
-rw-r--r--net/netfilter/nf_conntrack_netlink.c2
-rw-r--r--net/netfilter/nf_conntrack_proto_tcp.c32
-rw-r--r--net/netfilter/nfnetlink_log.c73
-rw-r--r--net/netfilter/nfnetlink_queue.c39
-rw-r--r--net/netfilter/x_tables.c17
-rw-r--r--net/netfilter/xt_CT.c4
-rw-r--r--net/netfilter/xt_IDLETIMER.c315
-rw-r--r--net/netfilter/xt_NOTRACK.c2
-rw-r--r--net/netfilter/xt_RATEEST.c12
-rw-r--r--net/netfilter/xt_TCPMSS.c8
-rw-r--r--net/netfilter/xt_TEE.c12
-rw-r--r--net/netfilter/xt_cluster.c2
-rw-r--r--net/netfilter/xt_connbytes.c10
-rw-r--r--net/netfilter/xt_conntrack.c11
-rw-r--r--net/netfilter/xt_sctp.c3
-rw-r--r--net/netfilter/xt_socket.c2
-rw-r--r--net/netfilter/xt_state.c14
-rw-r--r--net/netfilter/xt_statistic.c19
-rw-r--r--net/netlink/af_netlink.c41
-rw-r--r--net/packet/af_packet.c37
-rw-r--r--net/phonet/pep.c9
-rw-r--r--net/phonet/pn_dev.c15
-rw-r--r--net/rds/ib_cm.c1
-rw-r--r--net/rds/iw_cm.c1
-rw-r--r--net/rxrpc/ar-peer.c4
-rw-r--r--net/sched/act_api.c11
-rw-r--r--net/sched/act_mirred.c12
-rw-r--r--net/sched/act_nat.c40
-rw-r--r--net/sched/act_pedit.c27
-rw-r--r--net/sched/act_police.c12
-rw-r--r--net/sched/act_simple.c4
-rw-r--r--net/sched/cls_cgroup.c50
-rw-r--r--net/sched/cls_u32.c49
-rw-r--r--net/sched/sch_api.c14
-rw-r--r--net/sched/sch_atm.c98
-rw-r--r--net/sched/sch_generic.c25
-rw-r--r--net/sched/sch_htb.c2
-rw-r--r--net/sched/sch_teql.c1
-rw-r--r--net/sctp/protocol.c7
-rw-r--r--net/sctp/sm_make_chunk.c2
-rw-r--r--net/socket.c186
-rw-r--r--net/sunrpc/cache.c13
-rw-r--r--net/sunrpc/rpc_pipe.c18
-rw-r--r--net/sunrpc/rpcb_clnt.c2
-rw-r--r--net/sunrpc/xprt.c5
-rw-r--r--net/sunrpc/xprtsock.c29
-rw-r--r--net/unix/af_unix.c99
-rw-r--r--net/wanrouter/wanmain.c7
-rw-r--r--net/wanrouter/wanproc.c7
-rw-r--r--net/wireless/nl80211.c2
-rw-r--r--net/wireless/util.c4
-rw-r--r--net/xfrm/xfrm_output.c4
-rw-r--r--net/xfrm/xfrm_policy.c22
254 files changed, 6238 insertions, 3822 deletions
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 3c1c8c14e929..a2ad15250575 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -155,9 +155,10 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head)
155 BUG_ON(!grp); 155 BUG_ON(!grp);
156 156
157 /* Take it out of our own structures, but be sure to interlock with 157 /* Take it out of our own structures, but be sure to interlock with
158 * HW accelerating devices or SW vlan input packet processing. 158 * HW accelerating devices or SW vlan input packet processing if
159 * VLAN is not 0 (leave it there for 802.1p).
159 */ 160 */
160 if (real_dev->features & NETIF_F_HW_VLAN_FILTER) 161 if (vlan_id && (real_dev->features & NETIF_F_HW_VLAN_FILTER))
161 ops->ndo_vlan_rx_kill_vid(real_dev, vlan_id); 162 ops->ndo_vlan_rx_kill_vid(real_dev, vlan_id);
162 163
163 grp->nr_vlans--; 164 grp->nr_vlans--;
@@ -419,6 +420,14 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
419 if (is_vlan_dev(dev)) 420 if (is_vlan_dev(dev))
420 __vlan_device_event(dev, event); 421 __vlan_device_event(dev, event);
421 422
423 if ((event == NETDEV_UP) &&
424 (dev->features & NETIF_F_HW_VLAN_FILTER) &&
425 dev->netdev_ops->ndo_vlan_rx_add_vid) {
426 pr_info("8021q: adding VLAN 0 to HW filter on device %s\n",
427 dev->name);
428 dev->netdev_ops->ndo_vlan_rx_add_vid(dev, 0);
429 }
430
422 grp = __vlan_find_group(dev); 431 grp = __vlan_find_group(dev);
423 if (!grp) 432 if (!grp)
424 goto out; 433 goto out;
diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h
index 6abdcac1b2e8..8d9503ad01da 100644
--- a/net/8021q/vlan.h
+++ b/net/8021q/vlan.h
@@ -2,6 +2,7 @@
2#define __BEN_VLAN_802_1Q_INC__ 2#define __BEN_VLAN_802_1Q_INC__
3 3
4#include <linux/if_vlan.h> 4#include <linux/if_vlan.h>
5#include <linux/u64_stats_sync.h>
5 6
6 7
7/** 8/**
@@ -21,14 +22,16 @@ struct vlan_priority_tci_mapping {
21 * struct vlan_rx_stats - VLAN percpu rx stats 22 * struct vlan_rx_stats - VLAN percpu rx stats
22 * @rx_packets: number of received packets 23 * @rx_packets: number of received packets
23 * @rx_bytes: number of received bytes 24 * @rx_bytes: number of received bytes
24 * @multicast: number of received multicast packets 25 * @rx_multicast: number of received multicast packets
26 * @syncp: synchronization point for 64bit counters
25 * @rx_errors: number of errors 27 * @rx_errors: number of errors
26 */ 28 */
27struct vlan_rx_stats { 29struct vlan_rx_stats {
28 unsigned long rx_packets; 30 u64 rx_packets;
29 unsigned long rx_bytes; 31 u64 rx_bytes;
30 unsigned long multicast; 32 u64 rx_multicast;
31 unsigned long rx_errors; 33 struct u64_stats_sync syncp;
34 unsigned long rx_errors;
32}; 35};
33 36
34/** 37/**
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index bd537fc10254..01ddb0472f86 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -8,17 +8,23 @@
8int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp, 8int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp,
9 u16 vlan_tci, int polling) 9 u16 vlan_tci, int polling)
10{ 10{
11 struct net_device *vlan_dev;
12 u16 vlan_id;
13
11 if (netpoll_rx(skb)) 14 if (netpoll_rx(skb))
12 return NET_RX_DROP; 15 return NET_RX_DROP;
13 16
14 if (skb_bond_should_drop(skb, ACCESS_ONCE(skb->dev->master))) 17 if (skb_bond_should_drop(skb, ACCESS_ONCE(skb->dev->master)))
15 goto drop; 18 skb->deliver_no_wcard = 1;
16 19
17 skb->skb_iif = skb->dev->ifindex; 20 skb->skb_iif = skb->dev->ifindex;
18 __vlan_hwaccel_put_tag(skb, vlan_tci); 21 __vlan_hwaccel_put_tag(skb, vlan_tci);
19 skb->dev = vlan_group_get_device(grp, vlan_tci & VLAN_VID_MASK); 22 vlan_id = vlan_tci & VLAN_VID_MASK;
23 vlan_dev = vlan_group_get_device(grp, vlan_id);
20 24
21 if (!skb->dev) 25 if (vlan_dev)
26 skb->dev = vlan_dev;
27 else if (vlan_id)
22 goto drop; 28 goto drop;
23 29
24 return (polling ? netif_receive_skb(skb) : netif_rx(skb)); 30 return (polling ? netif_receive_skb(skb) : netif_rx(skb));
@@ -41,9 +47,9 @@ int vlan_hwaccel_do_receive(struct sk_buff *skb)
41 skb->priority = vlan_get_ingress_priority(dev, skb->vlan_tci); 47 skb->priority = vlan_get_ingress_priority(dev, skb->vlan_tci);
42 skb->vlan_tci = 0; 48 skb->vlan_tci = 0;
43 49
44 rx_stats = per_cpu_ptr(vlan_dev_info(dev)->vlan_rx_stats, 50 rx_stats = this_cpu_ptr(vlan_dev_info(dev)->vlan_rx_stats);
45 smp_processor_id());
46 51
52 u64_stats_update_begin(&rx_stats->syncp);
47 rx_stats->rx_packets++; 53 rx_stats->rx_packets++;
48 rx_stats->rx_bytes += skb->len; 54 rx_stats->rx_bytes += skb->len;
49 55
@@ -51,7 +57,7 @@ int vlan_hwaccel_do_receive(struct sk_buff *skb)
51 case PACKET_BROADCAST: 57 case PACKET_BROADCAST:
52 break; 58 break;
53 case PACKET_MULTICAST: 59 case PACKET_MULTICAST:
54 rx_stats->multicast++; 60 rx_stats->rx_multicast++;
55 break; 61 break;
56 case PACKET_OTHERHOST: 62 case PACKET_OTHERHOST:
57 /* Our lower layer thinks this is not local, let's make sure. 63 /* Our lower layer thinks this is not local, let's make sure.
@@ -62,6 +68,7 @@ int vlan_hwaccel_do_receive(struct sk_buff *skb)
62 skb->pkt_type = PACKET_HOST; 68 skb->pkt_type = PACKET_HOST;
63 break; 69 break;
64 } 70 }
71 u64_stats_update_end(&rx_stats->syncp);
65 return 0; 72 return 0;
66} 73}
67 74
@@ -82,15 +89,20 @@ vlan_gro_common(struct napi_struct *napi, struct vlan_group *grp,
82 unsigned int vlan_tci, struct sk_buff *skb) 89 unsigned int vlan_tci, struct sk_buff *skb)
83{ 90{
84 struct sk_buff *p; 91 struct sk_buff *p;
92 struct net_device *vlan_dev;
93 u16 vlan_id;
85 94
86 if (skb_bond_should_drop(skb, ACCESS_ONCE(skb->dev->master))) 95 if (skb_bond_should_drop(skb, ACCESS_ONCE(skb->dev->master)))
87 goto drop; 96 skb->deliver_no_wcard = 1;
88 97
89 skb->skb_iif = skb->dev->ifindex; 98 skb->skb_iif = skb->dev->ifindex;
90 __vlan_hwaccel_put_tag(skb, vlan_tci); 99 __vlan_hwaccel_put_tag(skb, vlan_tci);
91 skb->dev = vlan_group_get_device(grp, vlan_tci & VLAN_VID_MASK); 100 vlan_id = vlan_tci & VLAN_VID_MASK;
101 vlan_dev = vlan_group_get_device(grp, vlan_id);
92 102
93 if (!skb->dev) 103 if (vlan_dev)
104 skb->dev = vlan_dev;
105 else if (vlan_id)
94 goto drop; 106 goto drop;
95 107
96 for (p = napi->gro_list; p; p = p->next) { 108 for (p = napi->gro_list; p; p = p->next) {
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 55be90826f5f..3d59c9bf8feb 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -142,6 +142,7 @@ int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev,
142{ 142{
143 struct vlan_hdr *vhdr; 143 struct vlan_hdr *vhdr;
144 struct vlan_rx_stats *rx_stats; 144 struct vlan_rx_stats *rx_stats;
145 struct net_device *vlan_dev;
145 u16 vlan_id; 146 u16 vlan_id;
146 u16 vlan_tci; 147 u16 vlan_tci;
147 148
@@ -157,53 +158,71 @@ int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev,
157 vlan_id = vlan_tci & VLAN_VID_MASK; 158 vlan_id = vlan_tci & VLAN_VID_MASK;
158 159
159 rcu_read_lock(); 160 rcu_read_lock();
160 skb->dev = __find_vlan_dev(dev, vlan_id); 161 vlan_dev = __find_vlan_dev(dev, vlan_id);
161 if (!skb->dev) {
162 pr_debug("%s: ERROR: No net_device for VID: %u on dev: %s\n",
163 __func__, vlan_id, dev->name);
164 goto err_unlock;
165 }
166
167 rx_stats = per_cpu_ptr(vlan_dev_info(skb->dev)->vlan_rx_stats,
168 smp_processor_id());
169 rx_stats->rx_packets++;
170 rx_stats->rx_bytes += skb->len;
171
172 skb_pull_rcsum(skb, VLAN_HLEN);
173
174 skb->priority = vlan_get_ingress_priority(skb->dev, vlan_tci);
175
176 pr_debug("%s: priority: %u for TCI: %hu\n",
177 __func__, skb->priority, vlan_tci);
178
179 switch (skb->pkt_type) {
180 case PACKET_BROADCAST: /* Yeah, stats collect these together.. */
181 /* stats->broadcast ++; // no such counter :-( */
182 break;
183 162
184 case PACKET_MULTICAST: 163 /* If the VLAN device is defined, we use it.
185 rx_stats->multicast++; 164 * If not, and the VID is 0, it is a 802.1p packet (not
186 break; 165 * really a VLAN), so we will just netif_rx it later to the
166 * original interface, but with the skb->proto set to the
167 * wrapped proto: we do nothing here.
168 */
187 169
188 case PACKET_OTHERHOST: 170 if (!vlan_dev) {
189 /* Our lower layer thinks this is not local, let's make sure. 171 if (vlan_id) {
190 * This allows the VLAN to have a different MAC than the 172 pr_debug("%s: ERROR: No net_device for VID: %u on dev: %s\n",
191 * underlying device, and still route correctly. 173 __func__, vlan_id, dev->name);
192 */ 174 goto err_unlock;
193 if (!compare_ether_addr(eth_hdr(skb)->h_dest, 175 }
194 skb->dev->dev_addr)) 176 rx_stats = NULL;
195 skb->pkt_type = PACKET_HOST; 177 } else {
196 break; 178 skb->dev = vlan_dev;
197 default: 179
198 break; 180 rx_stats = per_cpu_ptr(vlan_dev_info(skb->dev)->vlan_rx_stats,
181 smp_processor_id());
182 u64_stats_update_begin(&rx_stats->syncp);
183 rx_stats->rx_packets++;
184 rx_stats->rx_bytes += skb->len;
185
186 skb->priority = vlan_get_ingress_priority(skb->dev, vlan_tci);
187
188 pr_debug("%s: priority: %u for TCI: %hu\n",
189 __func__, skb->priority, vlan_tci);
190
191 switch (skb->pkt_type) {
192 case PACKET_BROADCAST:
193 /* Yeah, stats collect these together.. */
194 /* stats->broadcast ++; // no such counter :-( */
195 break;
196
197 case PACKET_MULTICAST:
198 rx_stats->rx_multicast++;
199 break;
200
201 case PACKET_OTHERHOST:
202 /* Our lower layer thinks this is not local, let's make
203 * sure.
204 * This allows the VLAN to have a different MAC than the
205 * underlying device, and still route correctly.
206 */
207 if (!compare_ether_addr(eth_hdr(skb)->h_dest,
208 skb->dev->dev_addr))
209 skb->pkt_type = PACKET_HOST;
210 break;
211 default:
212 break;
213 }
214 u64_stats_update_end(&rx_stats->syncp);
199 } 215 }
200 216
217 skb_pull_rcsum(skb, VLAN_HLEN);
201 vlan_set_encap_proto(skb, vhdr); 218 vlan_set_encap_proto(skb, vhdr);
202 219
203 skb = vlan_check_reorder_header(skb); 220 if (vlan_dev) {
204 if (!skb) { 221 skb = vlan_check_reorder_header(skb);
205 rx_stats->rx_errors++; 222 if (!skb) {
206 goto err_unlock; 223 rx_stats->rx_errors++;
224 goto err_unlock;
225 }
207 } 226 }
208 227
209 netif_rx(skb); 228 netif_rx(skb);
@@ -708,7 +727,8 @@ static int vlan_dev_init(struct net_device *dev)
708 netif_carrier_off(dev); 727 netif_carrier_off(dev);
709 728
710 /* IFF_BROADCAST|IFF_MULTICAST; ??? */ 729 /* IFF_BROADCAST|IFF_MULTICAST; ??? */
711 dev->flags = real_dev->flags & ~(IFF_UP | IFF_PROMISC | IFF_ALLMULTI); 730 dev->flags = real_dev->flags & ~(IFF_UP | IFF_PROMISC | IFF_ALLMULTI |
731 IFF_MASTER | IFF_SLAVE);
712 dev->iflink = real_dev->ifindex; 732 dev->iflink = real_dev->ifindex;
713 dev->state = (real_dev->state & ((1<<__LINK_STATE_NOCARRIER) | 733 dev->state = (real_dev->state & ((1<<__LINK_STATE_NOCARRIER) |
714 (1<<__LINK_STATE_DORMANT))) | 734 (1<<__LINK_STATE_DORMANT))) |
@@ -800,37 +820,65 @@ static u32 vlan_ethtool_get_flags(struct net_device *dev)
800 return dev_ethtool_get_flags(vlan->real_dev); 820 return dev_ethtool_get_flags(vlan->real_dev);
801} 821}
802 822
803static struct net_device_stats *vlan_dev_get_stats(struct net_device *dev) 823static struct rtnl_link_stats64 *vlan_dev_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats)
804{ 824{
805 struct net_device_stats *stats = &dev->stats;
806
807 dev_txq_stats_fold(dev, stats); 825 dev_txq_stats_fold(dev, stats);
808 826
809 if (vlan_dev_info(dev)->vlan_rx_stats) { 827 if (vlan_dev_info(dev)->vlan_rx_stats) {
810 struct vlan_rx_stats *p, rx = {0}; 828 struct vlan_rx_stats *p, accum = {0};
811 int i; 829 int i;
812 830
813 for_each_possible_cpu(i) { 831 for_each_possible_cpu(i) {
832 u64 rxpackets, rxbytes, rxmulticast;
833 unsigned int start;
834
814 p = per_cpu_ptr(vlan_dev_info(dev)->vlan_rx_stats, i); 835 p = per_cpu_ptr(vlan_dev_info(dev)->vlan_rx_stats, i);
815 rx.rx_packets += p->rx_packets; 836 do {
816 rx.rx_bytes += p->rx_bytes; 837 start = u64_stats_fetch_begin_bh(&p->syncp);
817 rx.rx_errors += p->rx_errors; 838 rxpackets = p->rx_packets;
818 rx.multicast += p->multicast; 839 rxbytes = p->rx_bytes;
840 rxmulticast = p->rx_multicast;
841 } while (u64_stats_fetch_retry_bh(&p->syncp, start));
842 accum.rx_packets += rxpackets;
843 accum.rx_bytes += rxbytes;
844 accum.rx_multicast += rxmulticast;
845 /* rx_errors is an ulong, not protected by syncp */
846 accum.rx_errors += p->rx_errors;
819 } 847 }
820 stats->rx_packets = rx.rx_packets; 848 stats->rx_packets = accum.rx_packets;
821 stats->rx_bytes = rx.rx_bytes; 849 stats->rx_bytes = accum.rx_bytes;
822 stats->rx_errors = rx.rx_errors; 850 stats->rx_errors = accum.rx_errors;
823 stats->multicast = rx.multicast; 851 stats->multicast = accum.rx_multicast;
824 } 852 }
825 return stats; 853 return stats;
826} 854}
827 855
856static int vlan_ethtool_set_tso(struct net_device *dev, u32 data)
857{
858 if (data) {
859 struct net_device *real_dev = vlan_dev_info(dev)->real_dev;
860
861 /* Underlying device must support TSO for VLAN-tagged packets
862 * and must have TSO enabled now.
863 */
864 if (!(real_dev->vlan_features & NETIF_F_TSO))
865 return -EOPNOTSUPP;
866 if (!(real_dev->features & NETIF_F_TSO))
867 return -EINVAL;
868 dev->features |= NETIF_F_TSO;
869 } else {
870 dev->features &= ~NETIF_F_TSO;
871 }
872 return 0;
873}
874
828static const struct ethtool_ops vlan_ethtool_ops = { 875static const struct ethtool_ops vlan_ethtool_ops = {
829 .get_settings = vlan_ethtool_get_settings, 876 .get_settings = vlan_ethtool_get_settings,
830 .get_drvinfo = vlan_ethtool_get_drvinfo, 877 .get_drvinfo = vlan_ethtool_get_drvinfo,
831 .get_link = ethtool_op_get_link, 878 .get_link = ethtool_op_get_link,
832 .get_rx_csum = vlan_ethtool_get_rx_csum, 879 .get_rx_csum = vlan_ethtool_get_rx_csum,
833 .get_flags = vlan_ethtool_get_flags, 880 .get_flags = vlan_ethtool_get_flags,
881 .set_tso = vlan_ethtool_set_tso,
834}; 882};
835 883
836static const struct net_device_ops vlan_netdev_ops = { 884static const struct net_device_ops vlan_netdev_ops = {
@@ -847,7 +895,7 @@ static const struct net_device_ops vlan_netdev_ops = {
847 .ndo_change_rx_flags = vlan_dev_change_rx_flags, 895 .ndo_change_rx_flags = vlan_dev_change_rx_flags,
848 .ndo_do_ioctl = vlan_dev_ioctl, 896 .ndo_do_ioctl = vlan_dev_ioctl,
849 .ndo_neigh_setup = vlan_dev_neigh_setup, 897 .ndo_neigh_setup = vlan_dev_neigh_setup,
850 .ndo_get_stats = vlan_dev_get_stats, 898 .ndo_get_stats64 = vlan_dev_get_stats64,
851#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE) 899#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE)
852 .ndo_fcoe_ddp_setup = vlan_dev_fcoe_ddp_setup, 900 .ndo_fcoe_ddp_setup = vlan_dev_fcoe_ddp_setup,
853 .ndo_fcoe_ddp_done = vlan_dev_fcoe_ddp_done, 901 .ndo_fcoe_ddp_done = vlan_dev_fcoe_ddp_done,
@@ -871,7 +919,7 @@ static const struct net_device_ops vlan_netdev_accel_ops = {
871 .ndo_change_rx_flags = vlan_dev_change_rx_flags, 919 .ndo_change_rx_flags = vlan_dev_change_rx_flags,
872 .ndo_do_ioctl = vlan_dev_ioctl, 920 .ndo_do_ioctl = vlan_dev_ioctl,
873 .ndo_neigh_setup = vlan_dev_neigh_setup, 921 .ndo_neigh_setup = vlan_dev_neigh_setup,
874 .ndo_get_stats = vlan_dev_get_stats, 922 .ndo_get_stats64 = vlan_dev_get_stats64,
875#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE) 923#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE)
876 .ndo_fcoe_ddp_setup = vlan_dev_fcoe_ddp_setup, 924 .ndo_fcoe_ddp_setup = vlan_dev_fcoe_ddp_setup,
877 .ndo_fcoe_ddp_done = vlan_dev_fcoe_ddp_done, 925 .ndo_fcoe_ddp_done = vlan_dev_fcoe_ddp_done,
@@ -896,7 +944,7 @@ static const struct net_device_ops vlan_netdev_ops_sq = {
896 .ndo_change_rx_flags = vlan_dev_change_rx_flags, 944 .ndo_change_rx_flags = vlan_dev_change_rx_flags,
897 .ndo_do_ioctl = vlan_dev_ioctl, 945 .ndo_do_ioctl = vlan_dev_ioctl,
898 .ndo_neigh_setup = vlan_dev_neigh_setup, 946 .ndo_neigh_setup = vlan_dev_neigh_setup,
899 .ndo_get_stats = vlan_dev_get_stats, 947 .ndo_get_stats64 = vlan_dev_get_stats64,
900#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE) 948#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE)
901 .ndo_fcoe_ddp_setup = vlan_dev_fcoe_ddp_setup, 949 .ndo_fcoe_ddp_setup = vlan_dev_fcoe_ddp_setup,
902 .ndo_fcoe_ddp_done = vlan_dev_fcoe_ddp_done, 950 .ndo_fcoe_ddp_done = vlan_dev_fcoe_ddp_done,
@@ -921,7 +969,7 @@ static const struct net_device_ops vlan_netdev_accel_ops_sq = {
921 .ndo_change_rx_flags = vlan_dev_change_rx_flags, 969 .ndo_change_rx_flags = vlan_dev_change_rx_flags,
922 .ndo_do_ioctl = vlan_dev_ioctl, 970 .ndo_do_ioctl = vlan_dev_ioctl,
923 .ndo_neigh_setup = vlan_dev_neigh_setup, 971 .ndo_neigh_setup = vlan_dev_neigh_setup,
924 .ndo_get_stats = vlan_dev_get_stats, 972 .ndo_get_stats64 = vlan_dev_get_stats64,
925#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE) 973#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE)
926 .ndo_fcoe_ddp_setup = vlan_dev_fcoe_ddp_setup, 974 .ndo_fcoe_ddp_setup = vlan_dev_fcoe_ddp_setup,
927 .ndo_fcoe_ddp_done = vlan_dev_fcoe_ddp_done, 975 .ndo_fcoe_ddp_done = vlan_dev_fcoe_ddp_done,
diff --git a/net/8021q/vlanproc.c b/net/8021q/vlanproc.c
index afead353e215..80e280f56686 100644
--- a/net/8021q/vlanproc.c
+++ b/net/8021q/vlanproc.c
@@ -278,25 +278,27 @@ static int vlandev_seq_show(struct seq_file *seq, void *offset)
278{ 278{
279 struct net_device *vlandev = (struct net_device *) seq->private; 279 struct net_device *vlandev = (struct net_device *) seq->private;
280 const struct vlan_dev_info *dev_info = vlan_dev_info(vlandev); 280 const struct vlan_dev_info *dev_info = vlan_dev_info(vlandev);
281 const struct net_device_stats *stats; 281 struct rtnl_link_stats64 temp;
282 const struct rtnl_link_stats64 *stats;
282 static const char fmt[] = "%30s %12lu\n"; 283 static const char fmt[] = "%30s %12lu\n";
284 static const char fmt64[] = "%30s %12llu\n";
283 int i; 285 int i;
284 286
285 if (!is_vlan_dev(vlandev)) 287 if (!is_vlan_dev(vlandev))
286 return 0; 288 return 0;
287 289
288 stats = dev_get_stats(vlandev); 290 stats = dev_get_stats(vlandev, &temp);
289 seq_printf(seq, 291 seq_printf(seq,
290 "%s VID: %d REORDER_HDR: %i dev->priv_flags: %hx\n", 292 "%s VID: %d REORDER_HDR: %i dev->priv_flags: %hx\n",
291 vlandev->name, dev_info->vlan_id, 293 vlandev->name, dev_info->vlan_id,
292 (int)(dev_info->flags & 1), vlandev->priv_flags); 294 (int)(dev_info->flags & 1), vlandev->priv_flags);
293 295
294 seq_printf(seq, fmt, "total frames received", stats->rx_packets); 296 seq_printf(seq, fmt64, "total frames received", stats->rx_packets);
295 seq_printf(seq, fmt, "total bytes received", stats->rx_bytes); 297 seq_printf(seq, fmt64, "total bytes received", stats->rx_bytes);
296 seq_printf(seq, fmt, "Broadcast/Multicast Rcvd", stats->multicast); 298 seq_printf(seq, fmt64, "Broadcast/Multicast Rcvd", stats->multicast);
297 seq_puts(seq, "\n"); 299 seq_puts(seq, "\n");
298 seq_printf(seq, fmt, "total frames transmitted", stats->tx_packets); 300 seq_printf(seq, fmt64, "total frames transmitted", stats->tx_packets);
299 seq_printf(seq, fmt, "total bytes transmitted", stats->tx_bytes); 301 seq_printf(seq, fmt64, "total bytes transmitted", stats->tx_bytes);
300 seq_printf(seq, fmt, "total headroom inc", 302 seq_printf(seq, fmt, "total headroom inc",
301 dev_info->cnt_inc_headroom_on_tx); 303 dev_info->cnt_inc_headroom_on_tx);
302 seq_printf(seq, fmt, "total encap on xmit", 304 seq_printf(seq, fmt, "total encap on xmit",
diff --git a/net/9p/client.c b/net/9p/client.c
index 0aa79faa9850..37c8da07a80b 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -1321,7 +1321,8 @@ static int p9_client_statsize(struct p9_wstat *wst, int proto_version)
1321 if (wst->muid) 1321 if (wst->muid)
1322 ret += strlen(wst->muid); 1322 ret += strlen(wst->muid);
1323 1323
1324 if (proto_version == p9_proto_2000u) { 1324 if ((proto_version == p9_proto_2000u) ||
1325 (proto_version == p9_proto_2000L)) {
1325 ret += 2+4+4+4; /* extension[s] n_uid[4] n_gid[4] n_muid[4] */ 1326 ret += 2+4+4+4; /* extension[s] n_uid[4] n_gid[4] n_muid[4] */
1326 if (wst->extension) 1327 if (wst->extension)
1327 ret += strlen(wst->extension); 1328 ret += strlen(wst->extension);
@@ -1364,3 +1365,70 @@ error:
1364 return err; 1365 return err;
1365} 1366}
1366EXPORT_SYMBOL(p9_client_wstat); 1367EXPORT_SYMBOL(p9_client_wstat);
1368
1369int p9_client_statfs(struct p9_fid *fid, struct p9_rstatfs *sb)
1370{
1371 int err;
1372 struct p9_req_t *req;
1373 struct p9_client *clnt;
1374
1375 err = 0;
1376 clnt = fid->clnt;
1377
1378 P9_DPRINTK(P9_DEBUG_9P, ">>> TSTATFS fid %d\n", fid->fid);
1379
1380 req = p9_client_rpc(clnt, P9_TSTATFS, "d", fid->fid);
1381 if (IS_ERR(req)) {
1382 err = PTR_ERR(req);
1383 goto error;
1384 }
1385
1386 err = p9pdu_readf(req->rc, clnt->proto_version, "ddqqqqqqd", &sb->type,
1387 &sb->bsize, &sb->blocks, &sb->bfree, &sb->bavail,
1388 &sb->files, &sb->ffree, &sb->fsid, &sb->namelen);
1389 if (err) {
1390 p9pdu_dump(1, req->rc);
1391 p9_free_req(clnt, req);
1392 goto error;
1393 }
1394
1395 P9_DPRINTK(P9_DEBUG_9P, "<<< RSTATFS fid %d type 0x%lx bsize %ld "
1396 "blocks %llu bfree %llu bavail %llu files %llu ffree %llu "
1397 "fsid %llu namelen %ld\n",
1398 fid->fid, (long unsigned int)sb->type, (long int)sb->bsize,
1399 sb->blocks, sb->bfree, sb->bavail, sb->files, sb->ffree,
1400 sb->fsid, (long int)sb->namelen);
1401
1402 p9_free_req(clnt, req);
1403error:
1404 return err;
1405}
1406EXPORT_SYMBOL(p9_client_statfs);
1407
1408int p9_client_rename(struct p9_fid *fid, struct p9_fid *newdirfid, char *name)
1409{
1410 int err;
1411 struct p9_req_t *req;
1412 struct p9_client *clnt;
1413
1414 err = 0;
1415 clnt = fid->clnt;
1416
1417 P9_DPRINTK(P9_DEBUG_9P, ">>> TRENAME fid %d newdirfid %d name %s\n",
1418 fid->fid, newdirfid->fid, name);
1419
1420 req = p9_client_rpc(clnt, P9_TRENAME, "dds", fid->fid,
1421 newdirfid->fid, name);
1422 if (IS_ERR(req)) {
1423 err = PTR_ERR(req);
1424 goto error;
1425 }
1426
1427 P9_DPRINTK(P9_DEBUG_9P, "<<< RRENAME fid %d\n", fid->fid);
1428
1429 p9_free_req(clnt, req);
1430error:
1431 return err;
1432}
1433EXPORT_SYMBOL(p9_client_rename);
1434
diff --git a/net/9p/protocol.c b/net/9p/protocol.c
index e7541d5b0118..149f82160130 100644
--- a/net/9p/protocol.c
+++ b/net/9p/protocol.c
@@ -341,7 +341,8 @@ p9pdu_vreadf(struct p9_fcall *pdu, int proto_version, const char *fmt,
341 } 341 }
342 break; 342 break;
343 case '?': 343 case '?':
344 if (proto_version != p9_proto_2000u) 344 if ((proto_version != p9_proto_2000u) &&
345 (proto_version != p9_proto_2000L))
345 return 0; 346 return 0;
346 break; 347 break;
347 default: 348 default:
@@ -393,7 +394,7 @@ p9pdu_vwritef(struct p9_fcall *pdu, int proto_version, const char *fmt,
393 const char *sptr = va_arg(ap, const char *); 394 const char *sptr = va_arg(ap, const char *);
394 int16_t len = 0; 395 int16_t len = 0;
395 if (sptr) 396 if (sptr)
396 len = MIN(strlen(sptr), USHORT_MAX); 397 len = MIN(strlen(sptr), USHRT_MAX);
397 398
398 errcode = p9pdu_writef(pdu, proto_version, 399 errcode = p9pdu_writef(pdu, proto_version,
399 "w", len); 400 "w", len);
@@ -488,7 +489,8 @@ p9pdu_vwritef(struct p9_fcall *pdu, int proto_version, const char *fmt,
488 } 489 }
489 break; 490 break;
490 case '?': 491 case '?':
491 if (proto_version != p9_proto_2000u) 492 if ((proto_version != p9_proto_2000u) &&
493 (proto_version != p9_proto_2000L))
492 return 0; 494 return 0;
493 break; 495 break;
494 default: 496 default:
diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index 98ce9bcb0e15..c85109d809ca 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -948,7 +948,7 @@ p9_fd_create_unix(struct p9_client *client, const char *addr, char *args)
948 948
949 csocket = NULL; 949 csocket = NULL;
950 950
951 if (strlen(addr) > UNIX_PATH_MAX) { 951 if (strlen(addr) >= UNIX_PATH_MAX) {
952 P9_EPRINTK(KERN_ERR, "p9_trans_unix: address too long: %s\n", 952 P9_EPRINTK(KERN_ERR, "p9_trans_unix: address too long: %s\n",
953 addr); 953 addr);
954 return -ENAMETOOLONG; 954 return -ENAMETOOLONG;
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index 7eb78ecc1618..dcfbe99ff81c 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -137,7 +137,7 @@ static void req_done(struct virtqueue *vq)
137 137
138 P9_DPRINTK(P9_DEBUG_TRANS, ": request done\n"); 138 P9_DPRINTK(P9_DEBUG_TRANS, ": request done\n");
139 139
140 while ((rc = chan->vq->vq_ops->get_buf(chan->vq, &len)) != NULL) { 140 while ((rc = virtqueue_get_buf(chan->vq, &len)) != NULL) {
141 P9_DPRINTK(P9_DEBUG_TRANS, ": rc %p\n", rc); 141 P9_DPRINTK(P9_DEBUG_TRANS, ": rc %p\n", rc);
142 P9_DPRINTK(P9_DEBUG_TRANS, ": lookup tag %d\n", rc->tag); 142 P9_DPRINTK(P9_DEBUG_TRANS, ": lookup tag %d\n", rc->tag);
143 req = p9_tag_lookup(chan->client, rc->tag); 143 req = p9_tag_lookup(chan->client, rc->tag);
@@ -209,13 +209,13 @@ p9_virtio_request(struct p9_client *client, struct p9_req_t *req)
209 209
210 req->status = REQ_STATUS_SENT; 210 req->status = REQ_STATUS_SENT;
211 211
212 if (chan->vq->vq_ops->add_buf(chan->vq, chan->sg, out, in, req->tc) < 0) { 212 if (virtqueue_add_buf(chan->vq, chan->sg, out, in, req->tc) < 0) {
213 P9_DPRINTK(P9_DEBUG_TRANS, 213 P9_DPRINTK(P9_DEBUG_TRANS,
214 "9p debug: virtio rpc add_buf returned failure"); 214 "9p debug: virtio rpc add_buf returned failure");
215 return -EIO; 215 return -EIO;
216 } 216 }
217 217
218 chan->vq->vq_ops->kick(chan->vq); 218 virtqueue_kick(chan->vq);
219 219
220 P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: virtio request kicked\n"); 220 P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: virtio request kicked\n");
221 return 0; 221 return 0;
diff --git a/net/Kconfig b/net/Kconfig
index 0d68b40fc0e6..b3250944cde9 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -86,6 +86,16 @@ config NETWORK_SECMARK
86 to nfmark, but designated for security purposes. 86 to nfmark, but designated for security purposes.
87 If you are unsure how to answer this question, answer N. 87 If you are unsure how to answer this question, answer N.
88 88
89config NETWORK_PHY_TIMESTAMPING
90 bool "Timestamping in PHY devices"
91 depends on EXPERIMENTAL
92 help
93 This allows timestamping of network packets by PHYs with
94 hardware timestamping capabilities. This option adds some
95 overhead in the transmit and receive paths.
96
97 If you are unsure how to answer this question, answer N.
98
89menuconfig NETFILTER 99menuconfig NETFILTER
90 bool "Network packet filtering framework (Netfilter)" 100 bool "Network packet filtering framework (Netfilter)"
91 ---help--- 101 ---help---
diff --git a/net/Makefile b/net/Makefile
index cb7bdc1210cb..41d420070a38 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -50,7 +50,7 @@ endif
50obj-$(CONFIG_IP_DCCP) += dccp/ 50obj-$(CONFIG_IP_DCCP) += dccp/
51obj-$(CONFIG_IP_SCTP) += sctp/ 51obj-$(CONFIG_IP_SCTP) += sctp/
52obj-$(CONFIG_RDS) += rds/ 52obj-$(CONFIG_RDS) += rds/
53obj-y += wireless/ 53obj-$(CONFIG_WIRELESS) += wireless/
54obj-$(CONFIG_MAC80211) += mac80211/ 54obj-$(CONFIG_MAC80211) += mac80211/
55obj-$(CONFIG_TIPC) += tipc/ 55obj-$(CONFIG_TIPC) += tipc/
56obj-$(CONFIG_NETLABEL) += netlabel/ 56obj-$(CONFIG_NETLABEL) += netlabel/
@@ -61,7 +61,7 @@ obj-$(CONFIG_CAIF) += caif/
61ifneq ($(CONFIG_DCB),) 61ifneq ($(CONFIG_DCB),)
62obj-y += dcb/ 62obj-y += dcb/
63endif 63endif
64obj-y += ieee802154/ 64obj-$(CONFIG_IEEE802154) += ieee802154/
65 65
66ifeq ($(CONFIG_NET),y) 66ifeq ($(CONFIG_NET),y)
67obj-$(CONFIG_SYSCTL) += sysctl_net.o 67obj-$(CONFIG_SYSCTL) += sysctl_net.o
diff --git a/net/atm/br2684.c b/net/atm/br2684.c
index 6719af6a59fa..651babdfab38 100644
--- a/net/atm/br2684.c
+++ b/net/atm/br2684.c
@@ -139,6 +139,43 @@ static struct net_device *br2684_find_dev(const struct br2684_if_spec *s)
139 return NULL; 139 return NULL;
140} 140}
141 141
142static int atm_dev_event(struct notifier_block *this, unsigned long event,
143 void *arg)
144{
145 struct atm_dev *atm_dev = arg;
146 struct list_head *lh;
147 struct net_device *net_dev;
148 struct br2684_vcc *brvcc;
149 struct atm_vcc *atm_vcc;
150 unsigned long flags;
151
152 pr_debug("event=%ld dev=%p\n", event, atm_dev);
153
154 read_lock_irqsave(&devs_lock, flags);
155 list_for_each(lh, &br2684_devs) {
156 net_dev = list_entry_brdev(lh);
157
158 list_for_each_entry(brvcc, &BRPRIV(net_dev)->brvccs, brvccs) {
159 atm_vcc = brvcc->atmvcc;
160 if (atm_vcc && brvcc->atmvcc->dev == atm_dev) {
161
162 if (atm_vcc->dev->signal == ATM_PHY_SIG_LOST)
163 netif_carrier_off(net_dev);
164 else
165 netif_carrier_on(net_dev);
166
167 }
168 }
169 }
170 read_unlock_irqrestore(&devs_lock, flags);
171
172 return NOTIFY_DONE;
173}
174
175static struct notifier_block atm_dev_notifier = {
176 .notifier_call = atm_dev_event,
177};
178
142/* chained vcc->pop function. Check if we should wake the netif_queue */ 179/* chained vcc->pop function. Check if we should wake the netif_queue */
143static void br2684_pop(struct atm_vcc *vcc, struct sk_buff *skb) 180static void br2684_pop(struct atm_vcc *vcc, struct sk_buff *skb)
144{ 181{
@@ -362,6 +399,12 @@ static void br2684_push(struct atm_vcc *atmvcc, struct sk_buff *skb)
362 unregister_netdev(net_dev); 399 unregister_netdev(net_dev);
363 free_netdev(net_dev); 400 free_netdev(net_dev);
364 } 401 }
402 read_lock_irq(&devs_lock);
403 if (list_empty(&br2684_devs)) {
404 /* last br2684 device */
405 unregister_atmdevice_notifier(&atm_dev_notifier);
406 }
407 read_unlock_irq(&devs_lock);
365 return; 408 return;
366 } 409 }
367 410
@@ -530,6 +573,13 @@ static int br2684_regvcc(struct atm_vcc *atmvcc, void __user * arg)
530 573
531 br2684_push(atmvcc, skb); 574 br2684_push(atmvcc, skb);
532 } 575 }
576
577 /* initialize netdev carrier state */
578 if (atmvcc->dev->signal == ATM_PHY_SIG_LOST)
579 netif_carrier_off(net_dev);
580 else
581 netif_carrier_on(net_dev);
582
533 __module_get(THIS_MODULE); 583 __module_get(THIS_MODULE);
534 return 0; 584 return 0;
535 585
@@ -620,9 +670,16 @@ static int br2684_create(void __user *arg)
620 } 670 }
621 671
622 write_lock_irq(&devs_lock); 672 write_lock_irq(&devs_lock);
673
623 brdev->payload = payload; 674 brdev->payload = payload;
624 brdev->number = list_empty(&br2684_devs) ? 1 : 675
625 BRPRIV(list_entry_brdev(br2684_devs.prev))->number + 1; 676 if (list_empty(&br2684_devs)) {
677 /* 1st br2684 device */
678 register_atmdevice_notifier(&atm_dev_notifier);
679 brdev->number = 1;
680 } else
681 brdev->number = BRPRIV(list_entry_brdev(br2684_devs.prev))->number + 1;
682
626 list_add_tail(&brdev->br2684_devs, &br2684_devs); 683 list_add_tail(&brdev->br2684_devs, &br2684_devs);
627 write_unlock_irq(&devs_lock); 684 write_unlock_irq(&devs_lock);
628 return 0; 685 return 0;
@@ -772,6 +829,11 @@ static void __exit br2684_exit(void)
772 remove_proc_entry("br2684", atm_proc_root); 829 remove_proc_entry("br2684", atm_proc_root);
773#endif 830#endif
774 831
832
833 /* if not already empty */
834 if (!list_empty(&br2684_devs))
835 unregister_atmdevice_notifier(&atm_dev_notifier);
836
775 while (!list_empty(&br2684_devs)) { 837 while (!list_empty(&br2684_devs)) {
776 net_dev = list_entry_brdev(br2684_devs.next); 838 net_dev = list_entry_brdev(br2684_devs.next);
777 brdev = BRPRIV(net_dev); 839 brdev = BRPRIV(net_dev);
diff --git a/net/atm/clip.c b/net/atm/clip.c
index 313aba11316b..95fdd1185067 100644
--- a/net/atm/clip.c
+++ b/net/atm/clip.c
@@ -522,7 +522,7 @@ static int clip_setentry(struct atm_vcc *vcc, __be32 ip)
522 error = ip_route_output_key(&init_net, &rt, &fl); 522 error = ip_route_output_key(&init_net, &rt, &fl);
523 if (error) 523 if (error)
524 return error; 524 return error;
525 neigh = __neigh_lookup(&clip_tbl, &ip, rt->u.dst.dev, 1); 525 neigh = __neigh_lookup(&clip_tbl, &ip, rt->dst.dev, 1);
526 ip_rt_put(rt); 526 ip_rt_put(rt);
527 if (!neigh) 527 if (!neigh)
528 return -ENOMEM; 528 return -ENOMEM;
diff --git a/net/atm/common.c b/net/atm/common.c
index b43feb1a3995..940404a73b3d 100644
--- a/net/atm/common.c
+++ b/net/atm/common.c
@@ -37,6 +37,8 @@ EXPORT_SYMBOL(vcc_hash);
37DEFINE_RWLOCK(vcc_sklist_lock); 37DEFINE_RWLOCK(vcc_sklist_lock);
38EXPORT_SYMBOL(vcc_sklist_lock); 38EXPORT_SYMBOL(vcc_sklist_lock);
39 39
40static ATOMIC_NOTIFIER_HEAD(atm_dev_notify_chain);
41
40static void __vcc_insert_socket(struct sock *sk) 42static void __vcc_insert_socket(struct sock *sk)
41{ 43{
42 struct atm_vcc *vcc = atm_sk(sk); 44 struct atm_vcc *vcc = atm_sk(sk);
@@ -212,6 +214,22 @@ void vcc_release_async(struct atm_vcc *vcc, int reply)
212} 214}
213EXPORT_SYMBOL(vcc_release_async); 215EXPORT_SYMBOL(vcc_release_async);
214 216
217void atm_dev_signal_change(struct atm_dev *dev, char signal)
218{
219 pr_debug("%s signal=%d dev=%p number=%d dev->signal=%d\n",
220 __func__, signal, dev, dev->number, dev->signal);
221
222 /* atm driver sending invalid signal */
223 WARN_ON(signal < ATM_PHY_SIG_LOST || signal > ATM_PHY_SIG_FOUND);
224
225 if (dev->signal == signal)
226 return; /* no change */
227
228 dev->signal = signal;
229
230 atomic_notifier_call_chain(&atm_dev_notify_chain, signal, dev);
231}
232EXPORT_SYMBOL(atm_dev_signal_change);
215 233
216void atm_dev_release_vccs(struct atm_dev *dev) 234void atm_dev_release_vccs(struct atm_dev *dev)
217{ 235{
@@ -781,6 +799,18 @@ int vcc_getsockopt(struct socket *sock, int level, int optname,
781 return vcc->dev->ops->getsockopt(vcc, level, optname, optval, len); 799 return vcc->dev->ops->getsockopt(vcc, level, optname, optval, len);
782} 800}
783 801
802int register_atmdevice_notifier(struct notifier_block *nb)
803{
804 return atomic_notifier_chain_register(&atm_dev_notify_chain, nb);
805}
806EXPORT_SYMBOL_GPL(register_atmdevice_notifier);
807
808void unregister_atmdevice_notifier(struct notifier_block *nb)
809{
810 atomic_notifier_chain_unregister(&atm_dev_notify_chain, nb);
811}
812EXPORT_SYMBOL_GPL(unregister_atmdevice_notifier);
813
784static int __init atm_init(void) 814static int __init atm_init(void)
785{ 815{
786 int error; 816 int error;
diff --git a/net/bluetooth/Kconfig b/net/bluetooth/Kconfig
index ee3b3049d385..ed371684c133 100644
--- a/net/bluetooth/Kconfig
+++ b/net/bluetooth/Kconfig
@@ -43,19 +43,6 @@ config BT_L2CAP
43 Say Y here to compile L2CAP support into the kernel or say M to 43 Say Y here to compile L2CAP support into the kernel or say M to
44 compile it as module (l2cap). 44 compile it as module (l2cap).
45 45
46config BT_L2CAP_EXT_FEATURES
47 bool "L2CAP Extended Features support (EXPERIMENTAL)"
48 depends on BT_L2CAP && EXPERIMENTAL
49 help
50 This option enables the L2CAP Extended Features support. These
51 new features include the Enhanced Retransmission and Streaming
52 Modes, the Frame Check Sequence (FCS), and Segmentation and
53 Reassembly (SAR) for L2CAP packets. They are a required for the
54 new Alternate MAC/PHY and the Bluetooth Medical Profile.
55
56 You should say N unless you know what you are doing. Note that
57 this is in an experimental state yet.
58
59config BT_SCO 46config BT_SCO
60 tristate "SCO links support" 47 tristate "SCO links support"
61 depends on BT 48 depends on BT
diff --git a/net/bluetooth/bnep/bnep.h b/net/bluetooth/bnep/bnep.h
index 0d9e506f5d5a..70672544db86 100644
--- a/net/bluetooth/bnep/bnep.h
+++ b/net/bluetooth/bnep/bnep.h
@@ -86,26 +86,26 @@ struct bnep_setup_conn_req {
86 __u8 ctrl; 86 __u8 ctrl;
87 __u8 uuid_size; 87 __u8 uuid_size;
88 __u8 service[0]; 88 __u8 service[0];
89} __attribute__((packed)); 89} __packed;
90 90
91struct bnep_set_filter_req { 91struct bnep_set_filter_req {
92 __u8 type; 92 __u8 type;
93 __u8 ctrl; 93 __u8 ctrl;
94 __be16 len; 94 __be16 len;
95 __u8 list[0]; 95 __u8 list[0];
96} __attribute__((packed)); 96} __packed;
97 97
98struct bnep_control_rsp { 98struct bnep_control_rsp {
99 __u8 type; 99 __u8 type;
100 __u8 ctrl; 100 __u8 ctrl;
101 __be16 resp; 101 __be16 resp;
102} __attribute__((packed)); 102} __packed;
103 103
104struct bnep_ext_hdr { 104struct bnep_ext_hdr {
105 __u8 type; 105 __u8 type;
106 __u8 len; 106 __u8 len;
107 __u8 data[0]; 107 __u8 data[0];
108} __attribute__((packed)); 108} __packed;
109 109
110/* BNEP ioctl defines */ 110/* BNEP ioctl defines */
111#define BNEPCONNADD _IOW('B', 200, int) 111#define BNEPCONNADD _IOW('B', 200, int)
diff --git a/net/bluetooth/bnep/netdev.c b/net/bluetooth/bnep/netdev.c
index 0faad5ce6dc4..8c100c9dae28 100644
--- a/net/bluetooth/bnep/netdev.c
+++ b/net/bluetooth/bnep/netdev.c
@@ -104,6 +104,8 @@ static void bnep_net_set_mc_list(struct net_device *dev)
104 break; 104 break;
105 memcpy(__skb_put(skb, ETH_ALEN), ha->addr, ETH_ALEN); 105 memcpy(__skb_put(skb, ETH_ALEN), ha->addr, ETH_ALEN);
106 memcpy(__skb_put(skb, ETH_ALEN), ha->addr, ETH_ALEN); 106 memcpy(__skb_put(skb, ETH_ALEN), ha->addr, ETH_ALEN);
107
108 i++;
107 } 109 }
108 r->len = htons(skb->len - len); 110 r->len = htons(skb->len - len);
109 } 111 }
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index b10e3cdb08f8..e9fef83449f8 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -1,6 +1,6 @@
1/* 1/*
2 BlueZ - Bluetooth protocol stack for Linux 2 BlueZ - Bluetooth protocol stack for Linux
3 Copyright (C) 2000-2001 Qualcomm Incorporated 3 Copyright (c) 2000-2001, 2010, Code Aurora Forum. All rights reserved.
4 4
5 Written 2000,2001 by Maxim Krasnyansky <maxk@qualcomm.com> 5 Written 2000,2001 by Maxim Krasnyansky <maxk@qualcomm.com>
6 6
@@ -358,6 +358,11 @@ struct hci_conn *hci_connect(struct hci_dev *hdev, int type, bdaddr_t *dst, __u8
358 acl->sec_level = sec_level; 358 acl->sec_level = sec_level;
359 acl->auth_type = auth_type; 359 acl->auth_type = auth_type;
360 hci_acl_connect(acl); 360 hci_acl_connect(acl);
361 } else {
362 if (acl->sec_level < sec_level)
363 acl->sec_level = sec_level;
364 if (acl->auth_type < auth_type)
365 acl->auth_type = auth_type;
361 } 366 }
362 367
363 if (type == ACL_LINK) 368 if (type == ACL_LINK)
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 2f768de87011..995c9f9b84d0 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -562,6 +562,7 @@ static int hci_dev_do_close(struct hci_dev *hdev)
562 hci_dev_lock_bh(hdev); 562 hci_dev_lock_bh(hdev);
563 inquiry_cache_flush(hdev); 563 inquiry_cache_flush(hdev);
564 hci_conn_hash_flush(hdev); 564 hci_conn_hash_flush(hdev);
565 hci_blacklist_clear(hdev);
565 hci_dev_unlock_bh(hdev); 566 hci_dev_unlock_bh(hdev);
566 567
567 hci_notify(hdev, HCI_DEV_DOWN); 568 hci_notify(hdev, HCI_DEV_DOWN);
@@ -913,7 +914,7 @@ int hci_register_dev(struct hci_dev *hdev)
913 skb_queue_head_init(&hdev->cmd_q); 914 skb_queue_head_init(&hdev->cmd_q);
914 skb_queue_head_init(&hdev->raw_q); 915 skb_queue_head_init(&hdev->raw_q);
915 916
916 for (i = 0; i < 3; i++) 917 for (i = 0; i < NUM_REASSEMBLY; i++)
917 hdev->reassembly[i] = NULL; 918 hdev->reassembly[i] = NULL;
918 919
919 init_waitqueue_head(&hdev->req_wait_q); 920 init_waitqueue_head(&hdev->req_wait_q);
@@ -923,6 +924,8 @@ int hci_register_dev(struct hci_dev *hdev)
923 924
924 hci_conn_hash_init(hdev); 925 hci_conn_hash_init(hdev);
925 926
927 INIT_LIST_HEAD(&hdev->blacklist.list);
928
926 memset(&hdev->stat, 0, sizeof(struct hci_dev_stats)); 929 memset(&hdev->stat, 0, sizeof(struct hci_dev_stats));
927 930
928 atomic_set(&hdev->promisc, 0); 931 atomic_set(&hdev->promisc, 0);
@@ -970,7 +973,7 @@ int hci_unregister_dev(struct hci_dev *hdev)
970 973
971 hci_dev_do_close(hdev); 974 hci_dev_do_close(hdev);
972 975
973 for (i = 0; i < 3; i++) 976 for (i = 0; i < NUM_REASSEMBLY; i++)
974 kfree_skb(hdev->reassembly[i]); 977 kfree_skb(hdev->reassembly[i]);
975 978
976 hci_notify(hdev, HCI_DEV_UNREG); 979 hci_notify(hdev, HCI_DEV_UNREG);
@@ -1030,89 +1033,170 @@ int hci_recv_frame(struct sk_buff *skb)
1030} 1033}
1031EXPORT_SYMBOL(hci_recv_frame); 1034EXPORT_SYMBOL(hci_recv_frame);
1032 1035
1033/* Receive packet type fragment */ 1036static int hci_reassembly(struct hci_dev *hdev, int type, void *data,
1034#define __reassembly(hdev, type) ((hdev)->reassembly[(type) - 2]) 1037 int count, __u8 index, gfp_t gfp_mask)
1035
1036int hci_recv_fragment(struct hci_dev *hdev, int type, void *data, int count)
1037{ 1038{
1038 if (type < HCI_ACLDATA_PKT || type > HCI_EVENT_PKT) 1039 int len = 0;
1040 int hlen = 0;
1041 int remain = count;
1042 struct sk_buff *skb;
1043 struct bt_skb_cb *scb;
1044
1045 if ((type < HCI_ACLDATA_PKT || type > HCI_EVENT_PKT) ||
1046 index >= NUM_REASSEMBLY)
1039 return -EILSEQ; 1047 return -EILSEQ;
1040 1048
1049 skb = hdev->reassembly[index];
1050
1051 if (!skb) {
1052 switch (type) {
1053 case HCI_ACLDATA_PKT:
1054 len = HCI_MAX_FRAME_SIZE;
1055 hlen = HCI_ACL_HDR_SIZE;
1056 break;
1057 case HCI_EVENT_PKT:
1058 len = HCI_MAX_EVENT_SIZE;
1059 hlen = HCI_EVENT_HDR_SIZE;
1060 break;
1061 case HCI_SCODATA_PKT:
1062 len = HCI_MAX_SCO_SIZE;
1063 hlen = HCI_SCO_HDR_SIZE;
1064 break;
1065 }
1066
1067 skb = bt_skb_alloc(len, gfp_mask);
1068 if (!skb)
1069 return -ENOMEM;
1070
1071 scb = (void *) skb->cb;
1072 scb->expect = hlen;
1073 scb->pkt_type = type;
1074
1075 skb->dev = (void *) hdev;
1076 hdev->reassembly[index] = skb;
1077 }
1078
1041 while (count) { 1079 while (count) {
1042 struct sk_buff *skb = __reassembly(hdev, type); 1080 scb = (void *) skb->cb;
1043 struct { int expect; } *scb; 1081 len = min(scb->expect, (__u16)count);
1044 int len = 0;
1045 1082
1046 if (!skb) { 1083 memcpy(skb_put(skb, len), data, len);
1047 /* Start of the frame */
1048 1084
1049 switch (type) { 1085 count -= len;
1050 case HCI_EVENT_PKT: 1086 data += len;
1051 if (count >= HCI_EVENT_HDR_SIZE) { 1087 scb->expect -= len;
1052 struct hci_event_hdr *h = data; 1088 remain = count;
1053 len = HCI_EVENT_HDR_SIZE + h->plen;
1054 } else
1055 return -EILSEQ;
1056 break;
1057 1089
1058 case HCI_ACLDATA_PKT: 1090 switch (type) {
1059 if (count >= HCI_ACL_HDR_SIZE) { 1091 case HCI_EVENT_PKT:
1060 struct hci_acl_hdr *h = data; 1092 if (skb->len == HCI_EVENT_HDR_SIZE) {
1061 len = HCI_ACL_HDR_SIZE + __le16_to_cpu(h->dlen); 1093 struct hci_event_hdr *h = hci_event_hdr(skb);
1062 } else 1094 scb->expect = h->plen;
1063 return -EILSEQ; 1095
1064 break; 1096 if (skb_tailroom(skb) < scb->expect) {
1097 kfree_skb(skb);
1098 hdev->reassembly[index] = NULL;
1099 return -ENOMEM;
1100 }
1101 }
1102 break;
1065 1103
1066 case HCI_SCODATA_PKT: 1104 case HCI_ACLDATA_PKT:
1067 if (count >= HCI_SCO_HDR_SIZE) { 1105 if (skb->len == HCI_ACL_HDR_SIZE) {
1068 struct hci_sco_hdr *h = data; 1106 struct hci_acl_hdr *h = hci_acl_hdr(skb);
1069 len = HCI_SCO_HDR_SIZE + h->dlen; 1107 scb->expect = __le16_to_cpu(h->dlen);
1070 } else 1108
1071 return -EILSEQ; 1109 if (skb_tailroom(skb) < scb->expect) {
1072 break; 1110 kfree_skb(skb);
1111 hdev->reassembly[index] = NULL;
1112 return -ENOMEM;
1113 }
1073 } 1114 }
1115 break;
1074 1116
1075 skb = bt_skb_alloc(len, GFP_ATOMIC); 1117 case HCI_SCODATA_PKT:
1076 if (!skb) { 1118 if (skb->len == HCI_SCO_HDR_SIZE) {
1077 BT_ERR("%s no memory for packet", hdev->name); 1119 struct hci_sco_hdr *h = hci_sco_hdr(skb);
1078 return -ENOMEM; 1120 scb->expect = h->dlen;
1121
1122 if (skb_tailroom(skb) < scb->expect) {
1123 kfree_skb(skb);
1124 hdev->reassembly[index] = NULL;
1125 return -ENOMEM;
1126 }
1079 } 1127 }
1128 break;
1129 }
1130
1131 if (scb->expect == 0) {
1132 /* Complete frame */
1080 1133
1081 skb->dev = (void *) hdev;
1082 bt_cb(skb)->pkt_type = type; 1134 bt_cb(skb)->pkt_type = type;
1135 hci_recv_frame(skb);
1083 1136
1084 __reassembly(hdev, type) = skb; 1137 hdev->reassembly[index] = NULL;
1138 return remain;
1139 }
1140 }
1085 1141
1086 scb = (void *) skb->cb; 1142 return remain;
1087 scb->expect = len; 1143}
1088 } else {
1089 /* Continuation */
1090 1144
1091 scb = (void *) skb->cb; 1145int hci_recv_fragment(struct hci_dev *hdev, int type, void *data, int count)
1092 len = scb->expect; 1146{
1093 } 1147 int rem = 0;
1094 1148
1095 len = min(len, count); 1149 if (type < HCI_ACLDATA_PKT || type > HCI_EVENT_PKT)
1150 return -EILSEQ;
1096 1151
1097 memcpy(skb_put(skb, len), data, len); 1152 do {
1153 rem = hci_reassembly(hdev, type, data, count,
1154 type - 1, GFP_ATOMIC);
1155 if (rem < 0)
1156 return rem;
1098 1157
1099 scb->expect -= len; 1158 data += (count - rem);
1159 count = rem;
1160 } while (count);
1100 1161
1101 if (scb->expect == 0) { 1162 return rem;
1102 /* Complete frame */ 1163}
1164EXPORT_SYMBOL(hci_recv_fragment);
1103 1165
1104 __reassembly(hdev, type) = NULL; 1166#define STREAM_REASSEMBLY 0
1105 1167
1106 bt_cb(skb)->pkt_type = type; 1168int hci_recv_stream_fragment(struct hci_dev *hdev, void *data, int count)
1107 hci_recv_frame(skb); 1169{
1108 } 1170 int type;
1171 int rem = 0;
1109 1172
1110 count -= len; data += len; 1173 do {
1111 } 1174 struct sk_buff *skb = hdev->reassembly[STREAM_REASSEMBLY];
1112 1175
1113 return 0; 1176 if (!skb) {
1177 struct { char type; } *pkt;
1178
1179 /* Start of the frame */
1180 pkt = data;
1181 type = pkt->type;
1182
1183 data++;
1184 count--;
1185 } else
1186 type = bt_cb(skb)->pkt_type;
1187
1188 rem = hci_reassembly(hdev, type, data,
1189 count, STREAM_REASSEMBLY, GFP_ATOMIC);
1190 if (rem < 0)
1191 return rem;
1192
1193 data += (count - rem);
1194 count = rem;
1195 } while (count);
1196
1197 return rem;
1114} 1198}
1115EXPORT_SYMBOL(hci_recv_fragment); 1199EXPORT_SYMBOL(hci_recv_stream_fragment);
1116 1200
1117/* ---- Interface to upper protocols ---- */ 1201/* ---- Interface to upper protocols ---- */
1118 1202
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 6c57fc71c7e2..2069c3b05fda 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -1,6 +1,6 @@
1/* 1/*
2 BlueZ - Bluetooth protocol stack for Linux 2 BlueZ - Bluetooth protocol stack for Linux
3 Copyright (C) 2000-2001 Qualcomm Incorporated 3 Copyright (c) 2000-2001, 2010, Code Aurora Forum. All rights reserved.
4 4
5 Written 2000,2001 by Maxim Krasnyansky <maxk@qualcomm.com> 5 Written 2000,2001 by Maxim Krasnyansky <maxk@qualcomm.com>
6 6
@@ -584,7 +584,7 @@ static inline void hci_cs_create_conn(struct hci_dev *hdev, __u8 status)
584 conn->out = 1; 584 conn->out = 1;
585 conn->link_mode |= HCI_LM_MASTER; 585 conn->link_mode |= HCI_LM_MASTER;
586 } else 586 } else
587 BT_ERR("No memmory for new connection"); 587 BT_ERR("No memory for new connection");
588 } 588 }
589 } 589 }
590 590
@@ -952,7 +952,7 @@ static inline void hci_conn_request_evt(struct hci_dev *hdev, struct sk_buff *sk
952 952
953 mask |= hci_proto_connect_ind(hdev, &ev->bdaddr, ev->link_type); 953 mask |= hci_proto_connect_ind(hdev, &ev->bdaddr, ev->link_type);
954 954
955 if (mask & HCI_LM_ACCEPT) { 955 if ((mask & HCI_LM_ACCEPT) && !hci_blacklist_lookup(hdev, &ev->bdaddr)) {
956 /* Connection accepted */ 956 /* Connection accepted */
957 struct inquiry_entry *ie; 957 struct inquiry_entry *ie;
958 struct hci_conn *conn; 958 struct hci_conn *conn;
@@ -965,7 +965,7 @@ static inline void hci_conn_request_evt(struct hci_dev *hdev, struct sk_buff *sk
965 conn = hci_conn_hash_lookup_ba(hdev, ev->link_type, &ev->bdaddr); 965 conn = hci_conn_hash_lookup_ba(hdev, ev->link_type, &ev->bdaddr);
966 if (!conn) { 966 if (!conn) {
967 if (!(conn = hci_conn_add(hdev, ev->link_type, &ev->bdaddr))) { 967 if (!(conn = hci_conn_add(hdev, ev->link_type, &ev->bdaddr))) {
968 BT_ERR("No memmory for new connection"); 968 BT_ERR("No memory for new connection");
969 hci_dev_unlock(hdev); 969 hci_dev_unlock(hdev);
970 return; 970 return;
971 } 971 }
@@ -1049,6 +1049,8 @@ static inline void hci_auth_complete_evt(struct hci_dev *hdev, struct sk_buff *s
1049 if (conn) { 1049 if (conn) {
1050 if (!ev->status) 1050 if (!ev->status)
1051 conn->link_mode |= HCI_LM_AUTH; 1051 conn->link_mode |= HCI_LM_AUTH;
1052 else
1053 conn->sec_level = BT_SECURITY_LOW;
1052 1054
1053 clear_bit(HCI_CONN_AUTH_PEND, &conn->pend); 1055 clear_bit(HCI_CONN_AUTH_PEND, &conn->pend);
1054 1056
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index 38f08f6b86f6..4f170a595934 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -165,6 +165,86 @@ static int hci_sock_release(struct socket *sock)
165 return 0; 165 return 0;
166} 166}
167 167
168struct bdaddr_list *hci_blacklist_lookup(struct hci_dev *hdev, bdaddr_t *bdaddr)
169{
170 struct list_head *p;
171 struct bdaddr_list *blacklist = &hdev->blacklist;
172
173 list_for_each(p, &blacklist->list) {
174 struct bdaddr_list *b;
175
176 b = list_entry(p, struct bdaddr_list, list);
177
178 if (bacmp(bdaddr, &b->bdaddr) == 0)
179 return b;
180 }
181
182 return NULL;
183}
184
185static int hci_blacklist_add(struct hci_dev *hdev, void __user *arg)
186{
187 bdaddr_t bdaddr;
188 struct bdaddr_list *entry;
189
190 if (copy_from_user(&bdaddr, arg, sizeof(bdaddr)))
191 return -EFAULT;
192
193 if (bacmp(&bdaddr, BDADDR_ANY) == 0)
194 return -EBADF;
195
196 if (hci_blacklist_lookup(hdev, &bdaddr))
197 return -EEXIST;
198
199 entry = kzalloc(sizeof(struct bdaddr_list), GFP_KERNEL);
200 if (!entry)
201 return -ENOMEM;
202
203 bacpy(&entry->bdaddr, &bdaddr);
204
205 list_add(&entry->list, &hdev->blacklist.list);
206
207 return 0;
208}
209
210int hci_blacklist_clear(struct hci_dev *hdev)
211{
212 struct list_head *p, *n;
213 struct bdaddr_list *blacklist = &hdev->blacklist;
214
215 list_for_each_safe(p, n, &blacklist->list) {
216 struct bdaddr_list *b;
217
218 b = list_entry(p, struct bdaddr_list, list);
219
220 list_del(p);
221 kfree(b);
222 }
223
224 return 0;
225}
226
227static int hci_blacklist_del(struct hci_dev *hdev, void __user *arg)
228{
229 bdaddr_t bdaddr;
230 struct bdaddr_list *entry;
231
232 if (copy_from_user(&bdaddr, arg, sizeof(bdaddr)))
233 return -EFAULT;
234
235 if (bacmp(&bdaddr, BDADDR_ANY) == 0)
236 return hci_blacklist_clear(hdev);
237
238 entry = hci_blacklist_lookup(hdev, &bdaddr);
239 if (!entry)
240 return -ENOENT;
241
242 list_del(&entry->list);
243 kfree(entry);
244
245 return 0;
246}
247
168/* Ioctls that require bound socket */ 248/* Ioctls that require bound socket */
169static inline int hci_sock_bound_ioctl(struct sock *sk, unsigned int cmd, unsigned long arg) 249static inline int hci_sock_bound_ioctl(struct sock *sk, unsigned int cmd, unsigned long arg)
170{ 250{
@@ -194,6 +274,16 @@ static inline int hci_sock_bound_ioctl(struct sock *sk, unsigned int cmd, unsign
194 case HCIGETAUTHINFO: 274 case HCIGETAUTHINFO:
195 return hci_get_auth_info(hdev, (void __user *) arg); 275 return hci_get_auth_info(hdev, (void __user *) arg);
196 276
277 case HCIBLOCKADDR:
278 if (!capable(CAP_NET_ADMIN))
279 return -EACCES;
280 return hci_blacklist_add(hdev, (void __user *) arg);
281
282 case HCIUNBLOCKADDR:
283 if (!capable(CAP_NET_ADMIN))
284 return -EACCES;
285 return hci_blacklist_del(hdev, (void __user *) arg);
286
197 default: 287 default:
198 if (hdev->ioctl) 288 if (hdev->ioctl)
199 return hdev->ioctl(hdev, cmd, arg); 289 return hdev->ioctl(hdev, cmd, arg);
diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c
index 463ffa4fe042..ce44c47eeac1 100644
--- a/net/bluetooth/hci_sysfs.c
+++ b/net/bluetooth/hci_sysfs.c
@@ -436,6 +436,41 @@ static const struct file_operations inquiry_cache_fops = {
436 .release = single_release, 436 .release = single_release,
437}; 437};
438 438
439static int blacklist_show(struct seq_file *f, void *p)
440{
441 struct hci_dev *hdev = f->private;
442 struct bdaddr_list *blacklist = &hdev->blacklist;
443 struct list_head *l;
444
445 hci_dev_lock_bh(hdev);
446
447 list_for_each(l, &blacklist->list) {
448 struct bdaddr_list *b;
449 bdaddr_t bdaddr;
450
451 b = list_entry(l, struct bdaddr_list, list);
452
453 baswap(&bdaddr, &b->bdaddr);
454
455 seq_printf(f, "%s\n", batostr(&bdaddr));
456 }
457
458 hci_dev_unlock_bh(hdev);
459
460 return 0;
461}
462
463static int blacklist_open(struct inode *inode, struct file *file)
464{
465 return single_open(file, blacklist_show, inode->i_private);
466}
467
468static const struct file_operations blacklist_fops = {
469 .open = blacklist_open,
470 .read = seq_read,
471 .llseek = seq_lseek,
472 .release = single_release,
473};
439int hci_register_sysfs(struct hci_dev *hdev) 474int hci_register_sysfs(struct hci_dev *hdev)
440{ 475{
441 struct device *dev = &hdev->dev; 476 struct device *dev = &hdev->dev;
@@ -465,6 +500,9 @@ int hci_register_sysfs(struct hci_dev *hdev)
465 debugfs_create_file("inquiry_cache", 0444, hdev->debugfs, 500 debugfs_create_file("inquiry_cache", 0444, hdev->debugfs,
466 hdev, &inquiry_cache_fops); 501 hdev, &inquiry_cache_fops);
467 502
503 debugfs_create_file("blacklist", 0444, hdev->debugfs,
504 hdev, &blacklist_fops);
505
468 return 0; 506 return 0;
469} 507}
470 508
diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 1b682a5aa061..9ba1e8eee37c 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -1,6 +1,8 @@
1/* 1/*
2 BlueZ - Bluetooth protocol stack for Linux 2 BlueZ - Bluetooth protocol stack for Linux
3 Copyright (C) 2000-2001 Qualcomm Incorporated 3 Copyright (C) 2000-2001 Qualcomm Incorporated
4 Copyright (C) 2009-2010 Gustavo F. Padovan <gustavo@padovan.org>
5 Copyright (C) 2010 Google Inc.
4 6
5 Written 2000,2001 by Maxim Krasnyansky <maxk@qualcomm.com> 7 Written 2000,2001 by Maxim Krasnyansky <maxk@qualcomm.com>
6 8
@@ -53,15 +55,9 @@
53#include <net/bluetooth/hci_core.h> 55#include <net/bluetooth/hci_core.h>
54#include <net/bluetooth/l2cap.h> 56#include <net/bluetooth/l2cap.h>
55 57
56#define VERSION "2.14" 58#define VERSION "2.15"
57 59
58#ifdef CONFIG_BT_L2CAP_EXT_FEATURES 60static int disable_ertm = 0;
59static int enable_ertm = 1;
60#else
61static int enable_ertm = 0;
62#endif
63static int max_transmit = L2CAP_DEFAULT_MAX_TX;
64static int tx_window = L2CAP_DEFAULT_TX_WINDOW;
65 61
66static u32 l2cap_feat_mask = L2CAP_FEAT_FIXED_CHAN; 62static u32 l2cap_feat_mask = L2CAP_FEAT_FIXED_CHAN;
67static u8 l2cap_fixed_chan[8] = { 0x02, }; 63static u8 l2cap_fixed_chan[8] = { 0x02, };
@@ -80,9 +76,12 @@ static void __l2cap_sock_close(struct sock *sk, int reason);
80static void l2cap_sock_close(struct sock *sk); 76static void l2cap_sock_close(struct sock *sk);
81static void l2cap_sock_kill(struct sock *sk); 77static void l2cap_sock_kill(struct sock *sk);
82 78
79static int l2cap_build_conf_req(struct sock *sk, void *data);
83static struct sk_buff *l2cap_build_cmd(struct l2cap_conn *conn, 80static struct sk_buff *l2cap_build_cmd(struct l2cap_conn *conn,
84 u8 code, u8 ident, u16 dlen, void *data); 81 u8 code, u8 ident, u16 dlen, void *data);
85 82
83static int l2cap_ertm_data_rcv(struct sock *sk, struct sk_buff *skb);
84
86/* ---- L2CAP timers ---- */ 85/* ---- L2CAP timers ---- */
87static void l2cap_sock_timeout(unsigned long arg) 86static void l2cap_sock_timeout(unsigned long arg)
88{ 87{
@@ -278,6 +277,24 @@ static void l2cap_chan_del(struct sock *sk, int err)
278 parent->sk_data_ready(parent, 0); 277 parent->sk_data_ready(parent, 0);
279 } else 278 } else
280 sk->sk_state_change(sk); 279 sk->sk_state_change(sk);
280
281 skb_queue_purge(TX_QUEUE(sk));
282
283 if (l2cap_pi(sk)->mode == L2CAP_MODE_ERTM) {
284 struct srej_list *l, *tmp;
285
286 del_timer(&l2cap_pi(sk)->retrans_timer);
287 del_timer(&l2cap_pi(sk)->monitor_timer);
288 del_timer(&l2cap_pi(sk)->ack_timer);
289
290 skb_queue_purge(SREJ_QUEUE(sk));
291 skb_queue_purge(BUSY_QUEUE(sk));
292
293 list_for_each_entry_safe(l, tmp, SREJ_LIST(sk), list) {
294 list_del(&l->list);
295 kfree(l);
296 }
297 }
281} 298}
282 299
283/* Service level security */ 300/* Service level security */
@@ -351,8 +368,12 @@ static inline void l2cap_send_sframe(struct l2cap_pinfo *pi, u16 control)
351 struct sk_buff *skb; 368 struct sk_buff *skb;
352 struct l2cap_hdr *lh; 369 struct l2cap_hdr *lh;
353 struct l2cap_conn *conn = pi->conn; 370 struct l2cap_conn *conn = pi->conn;
371 struct sock *sk = (struct sock *)pi;
354 int count, hlen = L2CAP_HDR_SIZE + 2; 372 int count, hlen = L2CAP_HDR_SIZE + 2;
355 373
374 if (sk->sk_state != BT_CONNECTED)
375 return;
376
356 if (pi->fcs == L2CAP_FCS_CRC16) 377 if (pi->fcs == L2CAP_FCS_CRC16)
357 hlen += 2; 378 hlen += 2;
358 379
@@ -401,6 +422,11 @@ static inline void l2cap_send_rr_or_rnr(struct l2cap_pinfo *pi, u16 control)
401 l2cap_send_sframe(pi, control); 422 l2cap_send_sframe(pi, control);
402} 423}
403 424
425static inline int __l2cap_no_conn_pending(struct sock *sk)
426{
427 return !(l2cap_pi(sk)->conf_state & L2CAP_CONF_CONNECT_PEND);
428}
429
404static void l2cap_do_start(struct sock *sk) 430static void l2cap_do_start(struct sock *sk)
405{ 431{
406 struct l2cap_conn *conn = l2cap_pi(sk)->conn; 432 struct l2cap_conn *conn = l2cap_pi(sk)->conn;
@@ -409,12 +435,13 @@ static void l2cap_do_start(struct sock *sk)
409 if (!(conn->info_state & L2CAP_INFO_FEAT_MASK_REQ_DONE)) 435 if (!(conn->info_state & L2CAP_INFO_FEAT_MASK_REQ_DONE))
410 return; 436 return;
411 437
412 if (l2cap_check_security(sk)) { 438 if (l2cap_check_security(sk) && __l2cap_no_conn_pending(sk)) {
413 struct l2cap_conn_req req; 439 struct l2cap_conn_req req;
414 req.scid = cpu_to_le16(l2cap_pi(sk)->scid); 440 req.scid = cpu_to_le16(l2cap_pi(sk)->scid);
415 req.psm = l2cap_pi(sk)->psm; 441 req.psm = l2cap_pi(sk)->psm;
416 442
417 l2cap_pi(sk)->ident = l2cap_get_ident(conn); 443 l2cap_pi(sk)->ident = l2cap_get_ident(conn);
444 l2cap_pi(sk)->conf_state |= L2CAP_CONF_CONNECT_PEND;
418 445
419 l2cap_send_cmd(conn, l2cap_pi(sk)->ident, 446 l2cap_send_cmd(conn, l2cap_pi(sk)->ident,
420 L2CAP_CONN_REQ, sizeof(req), &req); 447 L2CAP_CONN_REQ, sizeof(req), &req);
@@ -434,24 +461,57 @@ static void l2cap_do_start(struct sock *sk)
434 } 461 }
435} 462}
436 463
437static void l2cap_send_disconn_req(struct l2cap_conn *conn, struct sock *sk) 464static inline int l2cap_mode_supported(__u8 mode, __u32 feat_mask)
465{
466 u32 local_feat_mask = l2cap_feat_mask;
467 if (!disable_ertm)
468 local_feat_mask |= L2CAP_FEAT_ERTM | L2CAP_FEAT_STREAMING;
469
470 switch (mode) {
471 case L2CAP_MODE_ERTM:
472 return L2CAP_FEAT_ERTM & feat_mask & local_feat_mask;
473 case L2CAP_MODE_STREAMING:
474 return L2CAP_FEAT_STREAMING & feat_mask & local_feat_mask;
475 default:
476 return 0x00;
477 }
478}
479
480static void l2cap_send_disconn_req(struct l2cap_conn *conn, struct sock *sk, int err)
438{ 481{
439 struct l2cap_disconn_req req; 482 struct l2cap_disconn_req req;
440 483
484 if (!conn)
485 return;
486
487 skb_queue_purge(TX_QUEUE(sk));
488
489 if (l2cap_pi(sk)->mode == L2CAP_MODE_ERTM) {
490 del_timer(&l2cap_pi(sk)->retrans_timer);
491 del_timer(&l2cap_pi(sk)->monitor_timer);
492 del_timer(&l2cap_pi(sk)->ack_timer);
493 }
494
441 req.dcid = cpu_to_le16(l2cap_pi(sk)->dcid); 495 req.dcid = cpu_to_le16(l2cap_pi(sk)->dcid);
442 req.scid = cpu_to_le16(l2cap_pi(sk)->scid); 496 req.scid = cpu_to_le16(l2cap_pi(sk)->scid);
443 l2cap_send_cmd(conn, l2cap_get_ident(conn), 497 l2cap_send_cmd(conn, l2cap_get_ident(conn),
444 L2CAP_DISCONN_REQ, sizeof(req), &req); 498 L2CAP_DISCONN_REQ, sizeof(req), &req);
499
500 sk->sk_state = BT_DISCONN;
501 sk->sk_err = err;
445} 502}
446 503
447/* ---- L2CAP connections ---- */ 504/* ---- L2CAP connections ---- */
448static void l2cap_conn_start(struct l2cap_conn *conn) 505static void l2cap_conn_start(struct l2cap_conn *conn)
449{ 506{
450 struct l2cap_chan_list *l = &conn->chan_list; 507 struct l2cap_chan_list *l = &conn->chan_list;
508 struct sock_del_list del, *tmp1, *tmp2;
451 struct sock *sk; 509 struct sock *sk;
452 510
453 BT_DBG("conn %p", conn); 511 BT_DBG("conn %p", conn);
454 512
513 INIT_LIST_HEAD(&del.list);
514
455 read_lock(&l->lock); 515 read_lock(&l->lock);
456 516
457 for (sk = l->head; sk; sk = l2cap_pi(sk)->next_c) { 517 for (sk = l->head; sk; sk = l2cap_pi(sk)->next_c) {
@@ -464,18 +524,38 @@ static void l2cap_conn_start(struct l2cap_conn *conn)
464 } 524 }
465 525
466 if (sk->sk_state == BT_CONNECT) { 526 if (sk->sk_state == BT_CONNECT) {
467 if (l2cap_check_security(sk)) { 527 struct l2cap_conn_req req;
468 struct l2cap_conn_req req;
469 req.scid = cpu_to_le16(l2cap_pi(sk)->scid);
470 req.psm = l2cap_pi(sk)->psm;
471 528
472 l2cap_pi(sk)->ident = l2cap_get_ident(conn); 529 if (!l2cap_check_security(sk) ||
530 !__l2cap_no_conn_pending(sk)) {
531 bh_unlock_sock(sk);
532 continue;
533 }
473 534
474 l2cap_send_cmd(conn, l2cap_pi(sk)->ident, 535 if (!l2cap_mode_supported(l2cap_pi(sk)->mode,
475 L2CAP_CONN_REQ, sizeof(req), &req); 536 conn->feat_mask)
537 && l2cap_pi(sk)->conf_state &
538 L2CAP_CONF_STATE2_DEVICE) {
539 tmp1 = kzalloc(sizeof(struct sock_del_list),
540 GFP_ATOMIC);
541 tmp1->sk = sk;
542 list_add_tail(&tmp1->list, &del.list);
543 bh_unlock_sock(sk);
544 continue;
476 } 545 }
546
547 req.scid = cpu_to_le16(l2cap_pi(sk)->scid);
548 req.psm = l2cap_pi(sk)->psm;
549
550 l2cap_pi(sk)->ident = l2cap_get_ident(conn);
551 l2cap_pi(sk)->conf_state |= L2CAP_CONF_CONNECT_PEND;
552
553 l2cap_send_cmd(conn, l2cap_pi(sk)->ident,
554 L2CAP_CONN_REQ, sizeof(req), &req);
555
477 } else if (sk->sk_state == BT_CONNECT2) { 556 } else if (sk->sk_state == BT_CONNECT2) {
478 struct l2cap_conn_rsp rsp; 557 struct l2cap_conn_rsp rsp;
558 char buf[128];
479 rsp.scid = cpu_to_le16(l2cap_pi(sk)->dcid); 559 rsp.scid = cpu_to_le16(l2cap_pi(sk)->dcid);
480 rsp.dcid = cpu_to_le16(l2cap_pi(sk)->scid); 560 rsp.dcid = cpu_to_le16(l2cap_pi(sk)->scid);
481 561
@@ -498,12 +578,31 @@ static void l2cap_conn_start(struct l2cap_conn *conn)
498 578
499 l2cap_send_cmd(conn, l2cap_pi(sk)->ident, 579 l2cap_send_cmd(conn, l2cap_pi(sk)->ident,
500 L2CAP_CONN_RSP, sizeof(rsp), &rsp); 580 L2CAP_CONN_RSP, sizeof(rsp), &rsp);
581
582 if (l2cap_pi(sk)->conf_state & L2CAP_CONF_REQ_SENT ||
583 rsp.result != L2CAP_CR_SUCCESS) {
584 bh_unlock_sock(sk);
585 continue;
586 }
587
588 l2cap_pi(sk)->conf_state |= L2CAP_CONF_REQ_SENT;
589 l2cap_send_cmd(conn, l2cap_get_ident(conn), L2CAP_CONF_REQ,
590 l2cap_build_conf_req(sk, buf), buf);
591 l2cap_pi(sk)->num_conf_req++;
501 } 592 }
502 593
503 bh_unlock_sock(sk); 594 bh_unlock_sock(sk);
504 } 595 }
505 596
506 read_unlock(&l->lock); 597 read_unlock(&l->lock);
598
599 list_for_each_entry_safe(tmp1, tmp2, &del.list, list) {
600 bh_lock_sock(tmp1->sk);
601 __l2cap_sock_close(tmp1->sk, ECONNRESET);
602 bh_unlock_sock(tmp1->sk);
603 list_del(&tmp1->list);
604 kfree(tmp1);
605 }
507} 606}
508 607
509static void l2cap_conn_ready(struct l2cap_conn *conn) 608static void l2cap_conn_ready(struct l2cap_conn *conn)
@@ -732,9 +831,8 @@ static void __l2cap_sock_close(struct sock *sk, int reason)
732 sk->sk_type == SOCK_STREAM) { 831 sk->sk_type == SOCK_STREAM) {
733 struct l2cap_conn *conn = l2cap_pi(sk)->conn; 832 struct l2cap_conn *conn = l2cap_pi(sk)->conn;
734 833
735 sk->sk_state = BT_DISCONN;
736 l2cap_sock_set_timer(sk, sk->sk_sndtimeo); 834 l2cap_sock_set_timer(sk, sk->sk_sndtimeo);
737 l2cap_send_disconn_req(conn, sk); 835 l2cap_send_disconn_req(conn, sk, reason);
738 } else 836 } else
739 l2cap_chan_del(sk, reason); 837 l2cap_chan_del(sk, reason);
740 break; 838 break;
@@ -794,6 +892,7 @@ static void l2cap_sock_init(struct sock *sk, struct sock *parent)
794 892
795 pi->imtu = l2cap_pi(parent)->imtu; 893 pi->imtu = l2cap_pi(parent)->imtu;
796 pi->omtu = l2cap_pi(parent)->omtu; 894 pi->omtu = l2cap_pi(parent)->omtu;
895 pi->conf_state = l2cap_pi(parent)->conf_state;
797 pi->mode = l2cap_pi(parent)->mode; 896 pi->mode = l2cap_pi(parent)->mode;
798 pi->fcs = l2cap_pi(parent)->fcs; 897 pi->fcs = l2cap_pi(parent)->fcs;
799 pi->max_tx = l2cap_pi(parent)->max_tx; 898 pi->max_tx = l2cap_pi(parent)->max_tx;
@@ -804,13 +903,15 @@ static void l2cap_sock_init(struct sock *sk, struct sock *parent)
804 } else { 903 } else {
805 pi->imtu = L2CAP_DEFAULT_MTU; 904 pi->imtu = L2CAP_DEFAULT_MTU;
806 pi->omtu = 0; 905 pi->omtu = 0;
807 if (enable_ertm && sk->sk_type == SOCK_STREAM) 906 if (!disable_ertm && sk->sk_type == SOCK_STREAM) {
808 pi->mode = L2CAP_MODE_ERTM; 907 pi->mode = L2CAP_MODE_ERTM;
809 else 908 pi->conf_state |= L2CAP_CONF_STATE2_DEVICE;
909 } else {
810 pi->mode = L2CAP_MODE_BASIC; 910 pi->mode = L2CAP_MODE_BASIC;
811 pi->max_tx = max_transmit; 911 }
912 pi->max_tx = L2CAP_DEFAULT_MAX_TX;
812 pi->fcs = L2CAP_FCS_CRC16; 913 pi->fcs = L2CAP_FCS_CRC16;
813 pi->tx_win = tx_window; 914 pi->tx_win = L2CAP_DEFAULT_TX_WINDOW;
814 pi->sec_level = BT_SECURITY_LOW; 915 pi->sec_level = BT_SECURITY_LOW;
815 pi->role_switch = 0; 916 pi->role_switch = 0;
816 pi->force_reliable = 0; 917 pi->force_reliable = 0;
@@ -1059,7 +1160,7 @@ static int l2cap_sock_connect(struct socket *sock, struct sockaddr *addr, int al
1059 break; 1160 break;
1060 case L2CAP_MODE_ERTM: 1161 case L2CAP_MODE_ERTM:
1061 case L2CAP_MODE_STREAMING: 1162 case L2CAP_MODE_STREAMING:
1062 if (enable_ertm) 1163 if (!disable_ertm)
1063 break; 1164 break;
1064 /* fall through */ 1165 /* fall through */
1065 default: 1166 default:
@@ -1076,6 +1177,7 @@ static int l2cap_sock_connect(struct socket *sock, struct sockaddr *addr, int al
1076 1177
1077 case BT_CONNECTED: 1178 case BT_CONNECTED:
1078 /* Already connected */ 1179 /* Already connected */
1180 err = -EISCONN;
1079 goto done; 1181 goto done;
1080 1182
1081 case BT_OPEN: 1183 case BT_OPEN:
@@ -1124,7 +1226,7 @@ static int l2cap_sock_listen(struct socket *sock, int backlog)
1124 break; 1226 break;
1125 case L2CAP_MODE_ERTM: 1227 case L2CAP_MODE_ERTM:
1126 case L2CAP_MODE_STREAMING: 1228 case L2CAP_MODE_STREAMING:
1127 if (enable_ertm) 1229 if (!disable_ertm)
1128 break; 1230 break;
1129 /* fall through */ 1231 /* fall through */
1130 default: 1232 default:
@@ -1277,9 +1379,11 @@ static void l2cap_monitor_timeout(unsigned long arg)
1277{ 1379{
1278 struct sock *sk = (void *) arg; 1380 struct sock *sk = (void *) arg;
1279 1381
1382 BT_DBG("sk %p", sk);
1383
1280 bh_lock_sock(sk); 1384 bh_lock_sock(sk);
1281 if (l2cap_pi(sk)->retry_count >= l2cap_pi(sk)->remote_max_tx) { 1385 if (l2cap_pi(sk)->retry_count >= l2cap_pi(sk)->remote_max_tx) {
1282 l2cap_send_disconn_req(l2cap_pi(sk)->conn, sk); 1386 l2cap_send_disconn_req(l2cap_pi(sk)->conn, sk, ECONNABORTED);
1283 bh_unlock_sock(sk); 1387 bh_unlock_sock(sk);
1284 return; 1388 return;
1285 } 1389 }
@@ -1295,6 +1399,8 @@ static void l2cap_retrans_timeout(unsigned long arg)
1295{ 1399{
1296 struct sock *sk = (void *) arg; 1400 struct sock *sk = (void *) arg;
1297 1401
1402 BT_DBG("sk %p", sk);
1403
1298 bh_lock_sock(sk); 1404 bh_lock_sock(sk);
1299 l2cap_pi(sk)->retry_count = 1; 1405 l2cap_pi(sk)->retry_count = 1;
1300 __mod_monitor_timer(); 1406 __mod_monitor_timer();
@@ -1333,7 +1439,7 @@ static inline void l2cap_do_send(struct sock *sk, struct sk_buff *skb)
1333 hci_send_acl(pi->conn->hcon, skb, 0); 1439 hci_send_acl(pi->conn->hcon, skb, 0);
1334} 1440}
1335 1441
1336static int l2cap_streaming_send(struct sock *sk) 1442static void l2cap_streaming_send(struct sock *sk)
1337{ 1443{
1338 struct sk_buff *skb, *tx_skb; 1444 struct sk_buff *skb, *tx_skb;
1339 struct l2cap_pinfo *pi = l2cap_pi(sk); 1445 struct l2cap_pinfo *pi = l2cap_pi(sk);
@@ -1363,7 +1469,6 @@ static int l2cap_streaming_send(struct sock *sk)
1363 skb = skb_dequeue(TX_QUEUE(sk)); 1469 skb = skb_dequeue(TX_QUEUE(sk));
1364 kfree_skb(skb); 1470 kfree_skb(skb);
1365 } 1471 }
1366 return 0;
1367} 1472}
1368 1473
1369static void l2cap_retransmit_one_frame(struct sock *sk, u8 tx_seq) 1474static void l2cap_retransmit_one_frame(struct sock *sk, u8 tx_seq)
@@ -1387,15 +1492,22 @@ static void l2cap_retransmit_one_frame(struct sock *sk, u8 tx_seq)
1387 1492
1388 if (pi->remote_max_tx && 1493 if (pi->remote_max_tx &&
1389 bt_cb(skb)->retries == pi->remote_max_tx) { 1494 bt_cb(skb)->retries == pi->remote_max_tx) {
1390 l2cap_send_disconn_req(pi->conn, sk); 1495 l2cap_send_disconn_req(pi->conn, sk, ECONNABORTED);
1391 return; 1496 return;
1392 } 1497 }
1393 1498
1394 tx_skb = skb_clone(skb, GFP_ATOMIC); 1499 tx_skb = skb_clone(skb, GFP_ATOMIC);
1395 bt_cb(skb)->retries++; 1500 bt_cb(skb)->retries++;
1396 control = get_unaligned_le16(tx_skb->data + L2CAP_HDR_SIZE); 1501 control = get_unaligned_le16(tx_skb->data + L2CAP_HDR_SIZE);
1502
1503 if (pi->conn_state & L2CAP_CONN_SEND_FBIT) {
1504 control |= L2CAP_CTRL_FINAL;
1505 pi->conn_state &= ~L2CAP_CONN_SEND_FBIT;
1506 }
1507
1397 control |= (pi->buffer_seq << L2CAP_CTRL_REQSEQ_SHIFT) 1508 control |= (pi->buffer_seq << L2CAP_CTRL_REQSEQ_SHIFT)
1398 | (tx_seq << L2CAP_CTRL_TXSEQ_SHIFT); 1509 | (tx_seq << L2CAP_CTRL_TXSEQ_SHIFT);
1510
1399 put_unaligned_le16(control, tx_skb->data + L2CAP_HDR_SIZE); 1511 put_unaligned_le16(control, tx_skb->data + L2CAP_HDR_SIZE);
1400 1512
1401 if (pi->fcs == L2CAP_FCS_CRC16) { 1513 if (pi->fcs == L2CAP_FCS_CRC16) {
@@ -1413,15 +1525,14 @@ static int l2cap_ertm_send(struct sock *sk)
1413 u16 control, fcs; 1525 u16 control, fcs;
1414 int nsent = 0; 1526 int nsent = 0;
1415 1527
1416 if (pi->conn_state & L2CAP_CONN_WAIT_F) 1528 if (sk->sk_state != BT_CONNECTED)
1417 return 0; 1529 return -ENOTCONN;
1418 1530
1419 while ((skb = sk->sk_send_head) && (!l2cap_tx_window_full(sk)) && 1531 while ((skb = sk->sk_send_head) && (!l2cap_tx_window_full(sk))) {
1420 !(pi->conn_state & L2CAP_CONN_REMOTE_BUSY)) {
1421 1532
1422 if (pi->remote_max_tx && 1533 if (pi->remote_max_tx &&
1423 bt_cb(skb)->retries == pi->remote_max_tx) { 1534 bt_cb(skb)->retries == pi->remote_max_tx) {
1424 l2cap_send_disconn_req(pi->conn, sk); 1535 l2cap_send_disconn_req(pi->conn, sk, ECONNABORTED);
1425 break; 1536 break;
1426 } 1537 }
1427 1538
@@ -1430,6 +1541,8 @@ static int l2cap_ertm_send(struct sock *sk)
1430 bt_cb(skb)->retries++; 1541 bt_cb(skb)->retries++;
1431 1542
1432 control = get_unaligned_le16(tx_skb->data + L2CAP_HDR_SIZE); 1543 control = get_unaligned_le16(tx_skb->data + L2CAP_HDR_SIZE);
1544 control &= L2CAP_CTRL_SAR;
1545
1433 if (pi->conn_state & L2CAP_CONN_SEND_FBIT) { 1546 if (pi->conn_state & L2CAP_CONN_SEND_FBIT) {
1434 control |= L2CAP_CTRL_FINAL; 1547 control |= L2CAP_CTRL_FINAL;
1435 pi->conn_state &= ~L2CAP_CONN_SEND_FBIT; 1548 pi->conn_state &= ~L2CAP_CONN_SEND_FBIT;
@@ -1470,16 +1583,11 @@ static int l2cap_retransmit_frames(struct sock *sk)
1470 struct l2cap_pinfo *pi = l2cap_pi(sk); 1583 struct l2cap_pinfo *pi = l2cap_pi(sk);
1471 int ret; 1584 int ret;
1472 1585
1473 spin_lock_bh(&pi->send_lock);
1474
1475 if (!skb_queue_empty(TX_QUEUE(sk))) 1586 if (!skb_queue_empty(TX_QUEUE(sk)))
1476 sk->sk_send_head = TX_QUEUE(sk)->next; 1587 sk->sk_send_head = TX_QUEUE(sk)->next;
1477 1588
1478 pi->next_tx_seq = pi->expected_ack_seq; 1589 pi->next_tx_seq = pi->expected_ack_seq;
1479 ret = l2cap_ertm_send(sk); 1590 ret = l2cap_ertm_send(sk);
1480
1481 spin_unlock_bh(&pi->send_lock);
1482
1483 return ret; 1591 return ret;
1484} 1592}
1485 1593
@@ -1487,7 +1595,6 @@ static void l2cap_send_ack(struct l2cap_pinfo *pi)
1487{ 1595{
1488 struct sock *sk = (struct sock *)pi; 1596 struct sock *sk = (struct sock *)pi;
1489 u16 control = 0; 1597 u16 control = 0;
1490 int nframes;
1491 1598
1492 control |= pi->buffer_seq << L2CAP_CTRL_REQSEQ_SHIFT; 1599 control |= pi->buffer_seq << L2CAP_CTRL_REQSEQ_SHIFT;
1493 1600
@@ -1498,11 +1605,7 @@ static void l2cap_send_ack(struct l2cap_pinfo *pi)
1498 return; 1605 return;
1499 } 1606 }
1500 1607
1501 spin_lock_bh(&pi->send_lock); 1608 if (l2cap_ertm_send(sk) > 0)
1502 nframes = l2cap_ertm_send(sk);
1503 spin_unlock_bh(&pi->send_lock);
1504
1505 if (nframes > 0)
1506 return; 1609 return;
1507 1610
1508 control |= L2CAP_SUPER_RCV_READY; 1611 control |= L2CAP_SUPER_RCV_READY;
@@ -1697,10 +1800,8 @@ static inline int l2cap_sar_segment_sdu(struct sock *sk, struct msghdr *msg, siz
1697 size += buflen; 1800 size += buflen;
1698 } 1801 }
1699 skb_queue_splice_tail(&sar_queue, TX_QUEUE(sk)); 1802 skb_queue_splice_tail(&sar_queue, TX_QUEUE(sk));
1700 spin_lock_bh(&pi->send_lock);
1701 if (sk->sk_send_head == NULL) 1803 if (sk->sk_send_head == NULL)
1702 sk->sk_send_head = sar_queue.next; 1804 sk->sk_send_head = sar_queue.next;
1703 spin_unlock_bh(&pi->send_lock);
1704 1805
1705 return size; 1806 return size;
1706} 1807}
@@ -1745,7 +1846,7 @@ static int l2cap_sock_sendmsg(struct kiocb *iocb, struct socket *sock, struct ms
1745 case L2CAP_MODE_BASIC: 1846 case L2CAP_MODE_BASIC:
1746 /* Check outgoing MTU */ 1847 /* Check outgoing MTU */
1747 if (len > pi->omtu) { 1848 if (len > pi->omtu) {
1748 err = -EINVAL; 1849 err = -EMSGSIZE;
1749 goto done; 1850 goto done;
1750 } 1851 }
1751 1852
@@ -1772,14 +1873,9 @@ static int l2cap_sock_sendmsg(struct kiocb *iocb, struct socket *sock, struct ms
1772 } 1873 }
1773 __skb_queue_tail(TX_QUEUE(sk), skb); 1874 __skb_queue_tail(TX_QUEUE(sk), skb);
1774 1875
1775 if (pi->mode == L2CAP_MODE_ERTM)
1776 spin_lock_bh(&pi->send_lock);
1777
1778 if (sk->sk_send_head == NULL) 1876 if (sk->sk_send_head == NULL)
1779 sk->sk_send_head = skb; 1877 sk->sk_send_head = skb;
1780 1878
1781 if (pi->mode == L2CAP_MODE_ERTM)
1782 spin_unlock_bh(&pi->send_lock);
1783 } else { 1879 } else {
1784 /* Segment SDU into multiples PDUs */ 1880 /* Segment SDU into multiples PDUs */
1785 err = l2cap_sar_segment_sdu(sk, msg, len); 1881 err = l2cap_sar_segment_sdu(sk, msg, len);
@@ -1788,11 +1884,14 @@ static int l2cap_sock_sendmsg(struct kiocb *iocb, struct socket *sock, struct ms
1788 } 1884 }
1789 1885
1790 if (pi->mode == L2CAP_MODE_STREAMING) { 1886 if (pi->mode == L2CAP_MODE_STREAMING) {
1791 err = l2cap_streaming_send(sk); 1887 l2cap_streaming_send(sk);
1792 } else { 1888 } else {
1793 spin_lock_bh(&pi->send_lock); 1889 if (pi->conn_state & L2CAP_CONN_REMOTE_BUSY &&
1890 pi->conn_state && L2CAP_CONN_WAIT_F) {
1891 err = len;
1892 break;
1893 }
1794 err = l2cap_ertm_send(sk); 1894 err = l2cap_ertm_send(sk);
1795 spin_unlock_bh(&pi->send_lock);
1796 } 1895 }
1797 1896
1798 if (err >= 0) 1897 if (err >= 0)
@@ -1801,7 +1900,7 @@ static int l2cap_sock_sendmsg(struct kiocb *iocb, struct socket *sock, struct ms
1801 1900
1802 default: 1901 default:
1803 BT_DBG("bad state %1.1x", pi->mode); 1902 BT_DBG("bad state %1.1x", pi->mode);
1804 err = -EINVAL; 1903 err = -EBADFD;
1805 } 1904 }
1806 1905
1807done: 1906done:
@@ -1817,6 +1916,8 @@ static int l2cap_sock_recvmsg(struct kiocb *iocb, struct socket *sock, struct ms
1817 1916
1818 if (sk->sk_state == BT_CONNECT2 && bt_sk(sk)->defer_setup) { 1917 if (sk->sk_state == BT_CONNECT2 && bt_sk(sk)->defer_setup) {
1819 struct l2cap_conn_rsp rsp; 1918 struct l2cap_conn_rsp rsp;
1919 struct l2cap_conn *conn = l2cap_pi(sk)->conn;
1920 u8 buf[128];
1820 1921
1821 sk->sk_state = BT_CONFIG; 1922 sk->sk_state = BT_CONFIG;
1822 1923
@@ -1827,6 +1928,16 @@ static int l2cap_sock_recvmsg(struct kiocb *iocb, struct socket *sock, struct ms
1827 l2cap_send_cmd(l2cap_pi(sk)->conn, l2cap_pi(sk)->ident, 1928 l2cap_send_cmd(l2cap_pi(sk)->conn, l2cap_pi(sk)->ident,
1828 L2CAP_CONN_RSP, sizeof(rsp), &rsp); 1929 L2CAP_CONN_RSP, sizeof(rsp), &rsp);
1829 1930
1931 if (l2cap_pi(sk)->conf_state & L2CAP_CONF_REQ_SENT) {
1932 release_sock(sk);
1933 return 0;
1934 }
1935
1936 l2cap_pi(sk)->conf_state |= L2CAP_CONF_REQ_SENT;
1937 l2cap_send_cmd(conn, l2cap_get_ident(conn), L2CAP_CONF_REQ,
1938 l2cap_build_conf_req(sk, buf), buf);
1939 l2cap_pi(sk)->num_conf_req++;
1940
1830 release_sock(sk); 1941 release_sock(sk);
1831 return 0; 1942 return 0;
1832 } 1943 }
@@ -1863,13 +1974,19 @@ static int l2cap_sock_setsockopt_old(struct socket *sock, int optname, char __us
1863 break; 1974 break;
1864 } 1975 }
1865 1976
1977 if (opts.txwin_size > L2CAP_DEFAULT_TX_WINDOW) {
1978 err = -EINVAL;
1979 break;
1980 }
1981
1866 l2cap_pi(sk)->mode = opts.mode; 1982 l2cap_pi(sk)->mode = opts.mode;
1867 switch (l2cap_pi(sk)->mode) { 1983 switch (l2cap_pi(sk)->mode) {
1868 case L2CAP_MODE_BASIC: 1984 case L2CAP_MODE_BASIC:
1985 l2cap_pi(sk)->conf_state &= ~L2CAP_CONF_STATE2_DEVICE;
1869 break; 1986 break;
1870 case L2CAP_MODE_ERTM: 1987 case L2CAP_MODE_ERTM:
1871 case L2CAP_MODE_STREAMING: 1988 case L2CAP_MODE_STREAMING:
1872 if (enable_ertm) 1989 if (!disable_ertm)
1873 break; 1990 break;
1874 /* fall through */ 1991 /* fall through */
1875 default: 1992 default:
@@ -2137,6 +2254,10 @@ static int l2cap_sock_shutdown(struct socket *sock, int how)
2137 err = bt_sock_wait_state(sk, BT_CLOSED, 2254 err = bt_sock_wait_state(sk, BT_CLOSED,
2138 sk->sk_lingertime); 2255 sk->sk_lingertime);
2139 } 2256 }
2257
2258 if (!err && sk->sk_err)
2259 err = -sk->sk_err;
2260
2140 release_sock(sk); 2261 release_sock(sk);
2141 return err; 2262 return err;
2142} 2263}
@@ -2357,25 +2478,10 @@ static inline void l2cap_ertm_init(struct sock *sk)
2357 2478
2358 __skb_queue_head_init(SREJ_QUEUE(sk)); 2479 __skb_queue_head_init(SREJ_QUEUE(sk));
2359 __skb_queue_head_init(BUSY_QUEUE(sk)); 2480 __skb_queue_head_init(BUSY_QUEUE(sk));
2360 spin_lock_init(&l2cap_pi(sk)->send_lock);
2361 2481
2362 INIT_WORK(&l2cap_pi(sk)->busy_work, l2cap_busy_work); 2482 INIT_WORK(&l2cap_pi(sk)->busy_work, l2cap_busy_work);
2363}
2364 2483
2365static int l2cap_mode_supported(__u8 mode, __u32 feat_mask) 2484 sk->sk_backlog_rcv = l2cap_ertm_data_rcv;
2366{
2367 u32 local_feat_mask = l2cap_feat_mask;
2368 if (enable_ertm)
2369 local_feat_mask |= L2CAP_FEAT_ERTM | L2CAP_FEAT_STREAMING;
2370
2371 switch (mode) {
2372 case L2CAP_MODE_ERTM:
2373 return L2CAP_FEAT_ERTM & feat_mask & local_feat_mask;
2374 case L2CAP_MODE_STREAMING:
2375 return L2CAP_FEAT_STREAMING & feat_mask & local_feat_mask;
2376 default:
2377 return 0x00;
2378 }
2379} 2485}
2380 2486
2381static inline __u8 l2cap_select_mode(__u8 mode, __u16 remote_feat_mask) 2487static inline __u8 l2cap_select_mode(__u8 mode, __u16 remote_feat_mask)
@@ -2406,10 +2512,10 @@ static int l2cap_build_conf_req(struct sock *sk, void *data)
2406 switch (pi->mode) { 2512 switch (pi->mode) {
2407 case L2CAP_MODE_STREAMING: 2513 case L2CAP_MODE_STREAMING:
2408 case L2CAP_MODE_ERTM: 2514 case L2CAP_MODE_ERTM:
2409 pi->conf_state |= L2CAP_CONF_STATE2_DEVICE; 2515 if (pi->conf_state & L2CAP_CONF_STATE2_DEVICE)
2410 if (!l2cap_mode_supported(pi->mode, pi->conn->feat_mask)) 2516 break;
2411 l2cap_send_disconn_req(pi->conn, sk); 2517
2412 break; 2518 /* fall through */
2413 default: 2519 default:
2414 pi->mode = l2cap_select_mode(rfc.mode, pi->conn->feat_mask); 2520 pi->mode = l2cap_select_mode(rfc.mode, pi->conn->feat_mask);
2415 break; 2521 break;
@@ -2420,6 +2526,14 @@ done:
2420 case L2CAP_MODE_BASIC: 2526 case L2CAP_MODE_BASIC:
2421 if (pi->imtu != L2CAP_DEFAULT_MTU) 2527 if (pi->imtu != L2CAP_DEFAULT_MTU)
2422 l2cap_add_conf_opt(&ptr, L2CAP_CONF_MTU, 2, pi->imtu); 2528 l2cap_add_conf_opt(&ptr, L2CAP_CONF_MTU, 2, pi->imtu);
2529
2530 rfc.mode = L2CAP_MODE_BASIC;
2531 rfc.txwin_size = 0;
2532 rfc.max_transmit = 0;
2533 rfc.retrans_timeout = 0;
2534 rfc.monitor_timeout = 0;
2535 rfc.max_pdu_size = 0;
2536
2423 break; 2537 break;
2424 2538
2425 case L2CAP_MODE_ERTM: 2539 case L2CAP_MODE_ERTM:
@@ -2432,9 +2546,6 @@ done:
2432 if (L2CAP_DEFAULT_MAX_PDU_SIZE > pi->conn->mtu - 10) 2546 if (L2CAP_DEFAULT_MAX_PDU_SIZE > pi->conn->mtu - 10)
2433 rfc.max_pdu_size = cpu_to_le16(pi->conn->mtu - 10); 2547 rfc.max_pdu_size = cpu_to_le16(pi->conn->mtu - 10);
2434 2548
2435 l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC,
2436 sizeof(rfc), (unsigned long) &rfc);
2437
2438 if (!(pi->conn->feat_mask & L2CAP_FEAT_FCS)) 2549 if (!(pi->conn->feat_mask & L2CAP_FEAT_FCS))
2439 break; 2550 break;
2440 2551
@@ -2455,9 +2566,6 @@ done:
2455 if (L2CAP_DEFAULT_MAX_PDU_SIZE > pi->conn->mtu - 10) 2566 if (L2CAP_DEFAULT_MAX_PDU_SIZE > pi->conn->mtu - 10)
2456 rfc.max_pdu_size = cpu_to_le16(pi->conn->mtu - 10); 2567 rfc.max_pdu_size = cpu_to_le16(pi->conn->mtu - 10);
2457 2568
2458 l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC,
2459 sizeof(rfc), (unsigned long) &rfc);
2460
2461 if (!(pi->conn->feat_mask & L2CAP_FEAT_FCS)) 2569 if (!(pi->conn->feat_mask & L2CAP_FEAT_FCS))
2462 break; 2570 break;
2463 2571
@@ -2469,6 +2577,9 @@ done:
2469 break; 2577 break;
2470 } 2578 }
2471 2579
2580 l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC, sizeof(rfc),
2581 (unsigned long) &rfc);
2582
2472 /* FIXME: Need actual value of the flush timeout */ 2583 /* FIXME: Need actual value of the flush timeout */
2473 //if (flush_to != L2CAP_DEFAULT_FLUSH_TO) 2584 //if (flush_to != L2CAP_DEFAULT_FLUSH_TO)
2474 // l2cap_add_conf_opt(&ptr, L2CAP_CONF_FLUSH_TO, 2, pi->flush_to); 2585 // l2cap_add_conf_opt(&ptr, L2CAP_CONF_FLUSH_TO, 2, pi->flush_to);
@@ -2533,18 +2644,21 @@ static int l2cap_parse_conf_req(struct sock *sk, void *data)
2533 } 2644 }
2534 } 2645 }
2535 2646
2536 if (pi->num_conf_rsp || pi->num_conf_req) 2647 if (pi->num_conf_rsp || pi->num_conf_req > 1)
2537 goto done; 2648 goto done;
2538 2649
2539 switch (pi->mode) { 2650 switch (pi->mode) {
2540 case L2CAP_MODE_STREAMING: 2651 case L2CAP_MODE_STREAMING:
2541 case L2CAP_MODE_ERTM: 2652 case L2CAP_MODE_ERTM:
2542 pi->conf_state |= L2CAP_CONF_STATE2_DEVICE; 2653 if (!(pi->conf_state & L2CAP_CONF_STATE2_DEVICE)) {
2543 if (!l2cap_mode_supported(pi->mode, pi->conn->feat_mask)) 2654 pi->mode = l2cap_select_mode(rfc.mode,
2655 pi->conn->feat_mask);
2656 break;
2657 }
2658
2659 if (pi->mode != rfc.mode)
2544 return -ECONNREFUSED; 2660 return -ECONNREFUSED;
2545 break; 2661
2546 default:
2547 pi->mode = l2cap_select_mode(rfc.mode, pi->conn->feat_mask);
2548 break; 2662 break;
2549 } 2663 }
2550 2664
@@ -2667,7 +2781,6 @@ static int l2cap_parse_conf_rsp(struct sock *sk, void *rsp, int len, void *data,
2667 rfc.mode != pi->mode) 2781 rfc.mode != pi->mode)
2668 return -ECONNREFUSED; 2782 return -ECONNREFUSED;
2669 2783
2670 pi->mode = rfc.mode;
2671 pi->fcs = 0; 2784 pi->fcs = 0;
2672 2785
2673 l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC, 2786 l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC,
@@ -2676,6 +2789,11 @@ static int l2cap_parse_conf_rsp(struct sock *sk, void *rsp, int len, void *data,
2676 } 2789 }
2677 } 2790 }
2678 2791
2792 if (pi->mode == L2CAP_MODE_BASIC && pi->mode != rfc.mode)
2793 return -ECONNREFUSED;
2794
2795 pi->mode = rfc.mode;
2796
2679 if (*result == L2CAP_CONF_SUCCESS) { 2797 if (*result == L2CAP_CONF_SUCCESS) {
2680 switch (rfc.mode) { 2798 switch (rfc.mode) {
2681 case L2CAP_MODE_ERTM: 2799 case L2CAP_MODE_ERTM:
@@ -2770,7 +2888,7 @@ static inline int l2cap_connect_req(struct l2cap_conn *conn, struct l2cap_cmd_hd
2770 struct l2cap_chan_list *list = &conn->chan_list; 2888 struct l2cap_chan_list *list = &conn->chan_list;
2771 struct l2cap_conn_req *req = (struct l2cap_conn_req *) data; 2889 struct l2cap_conn_req *req = (struct l2cap_conn_req *) data;
2772 struct l2cap_conn_rsp rsp; 2890 struct l2cap_conn_rsp rsp;
2773 struct sock *sk, *parent; 2891 struct sock *parent, *uninitialized_var(sk);
2774 int result, status = L2CAP_CS_NO_INFO; 2892 int result, status = L2CAP_CS_NO_INFO;
2775 2893
2776 u16 dcid = 0, scid = __le16_to_cpu(req->scid); 2894 u16 dcid = 0, scid = __le16_to_cpu(req->scid);
@@ -2879,6 +2997,15 @@ sendresp:
2879 L2CAP_INFO_REQ, sizeof(info), &info); 2997 L2CAP_INFO_REQ, sizeof(info), &info);
2880 } 2998 }
2881 2999
3000 if (!(l2cap_pi(sk)->conf_state & L2CAP_CONF_REQ_SENT) &&
3001 result == L2CAP_CR_SUCCESS) {
3002 u8 buf[128];
3003 l2cap_pi(sk)->conf_state |= L2CAP_CONF_REQ_SENT;
3004 l2cap_send_cmd(conn, l2cap_get_ident(conn), L2CAP_CONF_REQ,
3005 l2cap_build_conf_req(sk, buf), buf);
3006 l2cap_pi(sk)->num_conf_req++;
3007 }
3008
2882 return 0; 3009 return 0;
2883} 3010}
2884 3011
@@ -2899,11 +3026,11 @@ static inline int l2cap_connect_rsp(struct l2cap_conn *conn, struct l2cap_cmd_hd
2899 if (scid) { 3026 if (scid) {
2900 sk = l2cap_get_chan_by_scid(&conn->chan_list, scid); 3027 sk = l2cap_get_chan_by_scid(&conn->chan_list, scid);
2901 if (!sk) 3028 if (!sk)
2902 return 0; 3029 return -EFAULT;
2903 } else { 3030 } else {
2904 sk = l2cap_get_chan_by_ident(&conn->chan_list, cmd->ident); 3031 sk = l2cap_get_chan_by_ident(&conn->chan_list, cmd->ident);
2905 if (!sk) 3032 if (!sk)
2906 return 0; 3033 return -EFAULT;
2907 } 3034 }
2908 3035
2909 switch (result) { 3036 switch (result) {
@@ -2911,10 +3038,13 @@ static inline int l2cap_connect_rsp(struct l2cap_conn *conn, struct l2cap_cmd_hd
2911 sk->sk_state = BT_CONFIG; 3038 sk->sk_state = BT_CONFIG;
2912 l2cap_pi(sk)->ident = 0; 3039 l2cap_pi(sk)->ident = 0;
2913 l2cap_pi(sk)->dcid = dcid; 3040 l2cap_pi(sk)->dcid = dcid;
2914 l2cap_pi(sk)->conf_state |= L2CAP_CONF_REQ_SENT;
2915
2916 l2cap_pi(sk)->conf_state &= ~L2CAP_CONF_CONNECT_PEND; 3041 l2cap_pi(sk)->conf_state &= ~L2CAP_CONF_CONNECT_PEND;
2917 3042
3043 if (l2cap_pi(sk)->conf_state & L2CAP_CONF_REQ_SENT)
3044 break;
3045
3046 l2cap_pi(sk)->conf_state |= L2CAP_CONF_REQ_SENT;
3047
2918 l2cap_send_cmd(conn, l2cap_get_ident(conn), L2CAP_CONF_REQ, 3048 l2cap_send_cmd(conn, l2cap_get_ident(conn), L2CAP_CONF_REQ,
2919 l2cap_build_conf_req(sk, req), req); 3049 l2cap_build_conf_req(sk, req), req);
2920 l2cap_pi(sk)->num_conf_req++; 3050 l2cap_pi(sk)->num_conf_req++;
@@ -2950,8 +3080,14 @@ static inline int l2cap_config_req(struct l2cap_conn *conn, struct l2cap_cmd_hdr
2950 if (!sk) 3080 if (!sk)
2951 return -ENOENT; 3081 return -ENOENT;
2952 3082
2953 if (sk->sk_state == BT_DISCONN) 3083 if (sk->sk_state != BT_CONFIG) {
3084 struct l2cap_cmd_rej rej;
3085
3086 rej.reason = cpu_to_le16(0x0002);
3087 l2cap_send_cmd(conn, cmd->ident, L2CAP_COMMAND_REJ,
3088 sizeof(rej), &rej);
2954 goto unlock; 3089 goto unlock;
3090 }
2955 3091
2956 /* Reject if config buffer is too small. */ 3092 /* Reject if config buffer is too small. */
2957 len = cmd_len - sizeof(*req); 3093 len = cmd_len - sizeof(*req);
@@ -2977,7 +3113,7 @@ static inline int l2cap_config_req(struct l2cap_conn *conn, struct l2cap_cmd_hdr
2977 /* Complete config. */ 3113 /* Complete config. */
2978 len = l2cap_parse_conf_req(sk, rsp); 3114 len = l2cap_parse_conf_req(sk, rsp);
2979 if (len < 0) { 3115 if (len < 0) {
2980 l2cap_send_disconn_req(conn, sk); 3116 l2cap_send_disconn_req(conn, sk, ECONNRESET);
2981 goto unlock; 3117 goto unlock;
2982 } 3118 }
2983 3119
@@ -3047,7 +3183,7 @@ static inline int l2cap_config_rsp(struct l2cap_conn *conn, struct l2cap_cmd_hdr
3047 char req[64]; 3183 char req[64];
3048 3184
3049 if (len > sizeof(req) - sizeof(struct l2cap_conf_req)) { 3185 if (len > sizeof(req) - sizeof(struct l2cap_conf_req)) {
3050 l2cap_send_disconn_req(conn, sk); 3186 l2cap_send_disconn_req(conn, sk, ECONNRESET);
3051 goto done; 3187 goto done;
3052 } 3188 }
3053 3189
@@ -3056,7 +3192,7 @@ static inline int l2cap_config_rsp(struct l2cap_conn *conn, struct l2cap_cmd_hdr
3056 len = l2cap_parse_conf_rsp(sk, rsp->data, 3192 len = l2cap_parse_conf_rsp(sk, rsp->data,
3057 len, req, &result); 3193 len, req, &result);
3058 if (len < 0) { 3194 if (len < 0) {
3059 l2cap_send_disconn_req(conn, sk); 3195 l2cap_send_disconn_req(conn, sk, ECONNRESET);
3060 goto done; 3196 goto done;
3061 } 3197 }
3062 3198
@@ -3069,10 +3205,9 @@ static inline int l2cap_config_rsp(struct l2cap_conn *conn, struct l2cap_cmd_hdr
3069 } 3205 }
3070 3206
3071 default: 3207 default:
3072 sk->sk_state = BT_DISCONN;
3073 sk->sk_err = ECONNRESET; 3208 sk->sk_err = ECONNRESET;
3074 l2cap_sock_set_timer(sk, HZ * 5); 3209 l2cap_sock_set_timer(sk, HZ * 5);
3075 l2cap_send_disconn_req(conn, sk); 3210 l2cap_send_disconn_req(conn, sk, ECONNRESET);
3076 goto done; 3211 goto done;
3077 } 3212 }
3078 3213
@@ -3123,16 +3258,6 @@ static inline int l2cap_disconnect_req(struct l2cap_conn *conn, struct l2cap_cmd
3123 3258
3124 sk->sk_shutdown = SHUTDOWN_MASK; 3259 sk->sk_shutdown = SHUTDOWN_MASK;
3125 3260
3126 skb_queue_purge(TX_QUEUE(sk));
3127
3128 if (l2cap_pi(sk)->mode == L2CAP_MODE_ERTM) {
3129 skb_queue_purge(SREJ_QUEUE(sk));
3130 skb_queue_purge(BUSY_QUEUE(sk));
3131 del_timer(&l2cap_pi(sk)->retrans_timer);
3132 del_timer(&l2cap_pi(sk)->monitor_timer);
3133 del_timer(&l2cap_pi(sk)->ack_timer);
3134 }
3135
3136 l2cap_chan_del(sk, ECONNRESET); 3261 l2cap_chan_del(sk, ECONNRESET);
3137 bh_unlock_sock(sk); 3262 bh_unlock_sock(sk);
3138 3263
@@ -3155,16 +3280,6 @@ static inline int l2cap_disconnect_rsp(struct l2cap_conn *conn, struct l2cap_cmd
3155 if (!sk) 3280 if (!sk)
3156 return 0; 3281 return 0;
3157 3282
3158 skb_queue_purge(TX_QUEUE(sk));
3159
3160 if (l2cap_pi(sk)->mode == L2CAP_MODE_ERTM) {
3161 skb_queue_purge(SREJ_QUEUE(sk));
3162 skb_queue_purge(BUSY_QUEUE(sk));
3163 del_timer(&l2cap_pi(sk)->retrans_timer);
3164 del_timer(&l2cap_pi(sk)->monitor_timer);
3165 del_timer(&l2cap_pi(sk)->ack_timer);
3166 }
3167
3168 l2cap_chan_del(sk, 0); 3283 l2cap_chan_del(sk, 0);
3169 bh_unlock_sock(sk); 3284 bh_unlock_sock(sk);
3170 3285
@@ -3187,7 +3302,7 @@ static inline int l2cap_information_req(struct l2cap_conn *conn, struct l2cap_cm
3187 struct l2cap_info_rsp *rsp = (struct l2cap_info_rsp *) buf; 3302 struct l2cap_info_rsp *rsp = (struct l2cap_info_rsp *) buf;
3188 rsp->type = cpu_to_le16(L2CAP_IT_FEAT_MASK); 3303 rsp->type = cpu_to_le16(L2CAP_IT_FEAT_MASK);
3189 rsp->result = cpu_to_le16(L2CAP_IR_SUCCESS); 3304 rsp->result = cpu_to_le16(L2CAP_IR_SUCCESS);
3190 if (enable_ertm) 3305 if (!disable_ertm)
3191 feat_mask |= L2CAP_FEAT_ERTM | L2CAP_FEAT_STREAMING 3306 feat_mask |= L2CAP_FEAT_ERTM | L2CAP_FEAT_STREAMING
3192 | L2CAP_FEAT_FCS; 3307 | L2CAP_FEAT_FCS;
3193 put_unaligned_le32(feat_mask, rsp->data); 3308 put_unaligned_le32(feat_mask, rsp->data);
@@ -3352,7 +3467,7 @@ static int l2cap_check_fcs(struct l2cap_pinfo *pi, struct sk_buff *skb)
3352 our_fcs = crc16(0, skb->data - hdr_size, skb->len + hdr_size); 3467 our_fcs = crc16(0, skb->data - hdr_size, skb->len + hdr_size);
3353 3468
3354 if (our_fcs != rcv_fcs) 3469 if (our_fcs != rcv_fcs)
3355 return -EINVAL; 3470 return -EBADMSG;
3356 } 3471 }
3357 return 0; 3472 return 0;
3358} 3473}
@@ -3363,25 +3478,19 @@ static inline void l2cap_send_i_or_rr_or_rnr(struct sock *sk)
3363 u16 control = 0; 3478 u16 control = 0;
3364 3479
3365 pi->frames_sent = 0; 3480 pi->frames_sent = 0;
3366 pi->conn_state |= L2CAP_CONN_SEND_FBIT;
3367 3481
3368 control |= pi->buffer_seq << L2CAP_CTRL_REQSEQ_SHIFT; 3482 control |= pi->buffer_seq << L2CAP_CTRL_REQSEQ_SHIFT;
3369 3483
3370 if (pi->conn_state & L2CAP_CONN_LOCAL_BUSY) { 3484 if (pi->conn_state & L2CAP_CONN_LOCAL_BUSY) {
3371 control |= L2CAP_SUPER_RCV_NOT_READY | L2CAP_CTRL_FINAL; 3485 control |= L2CAP_SUPER_RCV_NOT_READY;
3372 l2cap_send_sframe(pi, control); 3486 l2cap_send_sframe(pi, control);
3373 pi->conn_state |= L2CAP_CONN_RNR_SENT; 3487 pi->conn_state |= L2CAP_CONN_RNR_SENT;
3374 pi->conn_state &= ~L2CAP_CONN_SEND_FBIT;
3375 } 3488 }
3376 3489
3377 if (pi->conn_state & L2CAP_CONN_REMOTE_BUSY && pi->unacked_frames > 0) 3490 if (pi->conn_state & L2CAP_CONN_REMOTE_BUSY)
3378 __mod_retrans_timer(); 3491 l2cap_retransmit_frames(sk);
3379
3380 pi->conn_state &= ~L2CAP_CONN_REMOTE_BUSY;
3381 3492
3382 spin_lock_bh(&pi->send_lock);
3383 l2cap_ertm_send(sk); 3493 l2cap_ertm_send(sk);
3384 spin_unlock_bh(&pi->send_lock);
3385 3494
3386 if (!(pi->conn_state & L2CAP_CONN_LOCAL_BUSY) && 3495 if (!(pi->conn_state & L2CAP_CONN_LOCAL_BUSY) &&
3387 pi->frames_sent == 0) { 3496 pi->frames_sent == 0) {
@@ -3393,6 +3502,8 @@ static inline void l2cap_send_i_or_rr_or_rnr(struct sock *sk)
3393static int l2cap_add_to_srej_queue(struct sock *sk, struct sk_buff *skb, u8 tx_seq, u8 sar) 3502static int l2cap_add_to_srej_queue(struct sock *sk, struct sk_buff *skb, u8 tx_seq, u8 sar)
3394{ 3503{
3395 struct sk_buff *next_skb; 3504 struct sk_buff *next_skb;
3505 struct l2cap_pinfo *pi = l2cap_pi(sk);
3506 int tx_seq_offset, next_tx_seq_offset;
3396 3507
3397 bt_cb(skb)->tx_seq = tx_seq; 3508 bt_cb(skb)->tx_seq = tx_seq;
3398 bt_cb(skb)->sar = sar; 3509 bt_cb(skb)->sar = sar;
@@ -3403,11 +3514,20 @@ static int l2cap_add_to_srej_queue(struct sock *sk, struct sk_buff *skb, u8 tx_s
3403 return 0; 3514 return 0;
3404 } 3515 }
3405 3516
3517 tx_seq_offset = (tx_seq - pi->buffer_seq) % 64;
3518 if (tx_seq_offset < 0)
3519 tx_seq_offset += 64;
3520
3406 do { 3521 do {
3407 if (bt_cb(next_skb)->tx_seq == tx_seq) 3522 if (bt_cb(next_skb)->tx_seq == tx_seq)
3408 return -EINVAL; 3523 return -EINVAL;
3409 3524
3410 if (bt_cb(next_skb)->tx_seq > tx_seq) { 3525 next_tx_seq_offset = (bt_cb(next_skb)->tx_seq -
3526 pi->buffer_seq) % 64;
3527 if (next_tx_seq_offset < 0)
3528 next_tx_seq_offset += 64;
3529
3530 if (next_tx_seq_offset > tx_seq_offset) {
3411 __skb_queue_before(SREJ_QUEUE(sk), next_skb, skb); 3531 __skb_queue_before(SREJ_QUEUE(sk), next_skb, skb);
3412 return 0; 3532 return 0;
3413 } 3533 }
@@ -3525,11 +3645,51 @@ drop:
3525 pi->sdu = NULL; 3645 pi->sdu = NULL;
3526 3646
3527disconnect: 3647disconnect:
3528 l2cap_send_disconn_req(pi->conn, sk); 3648 l2cap_send_disconn_req(pi->conn, sk, ECONNRESET);
3529 kfree_skb(skb); 3649 kfree_skb(skb);
3530 return 0; 3650 return 0;
3531} 3651}
3532 3652
3653static int l2cap_try_push_rx_skb(struct sock *sk)
3654{
3655 struct l2cap_pinfo *pi = l2cap_pi(sk);
3656 struct sk_buff *skb;
3657 u16 control;
3658 int err;
3659
3660 while ((skb = skb_dequeue(BUSY_QUEUE(sk)))) {
3661 control = bt_cb(skb)->sar << L2CAP_CTRL_SAR_SHIFT;
3662 err = l2cap_ertm_reassembly_sdu(sk, skb, control);
3663 if (err < 0) {
3664 skb_queue_head(BUSY_QUEUE(sk), skb);
3665 return -EBUSY;
3666 }
3667
3668 pi->buffer_seq = (pi->buffer_seq + 1) % 64;
3669 }
3670
3671 if (!(pi->conn_state & L2CAP_CONN_RNR_SENT))
3672 goto done;
3673
3674 control = pi->buffer_seq << L2CAP_CTRL_REQSEQ_SHIFT;
3675 control |= L2CAP_SUPER_RCV_READY | L2CAP_CTRL_POLL;
3676 l2cap_send_sframe(pi, control);
3677 l2cap_pi(sk)->retry_count = 1;
3678
3679 del_timer(&pi->retrans_timer);
3680 __mod_monitor_timer();
3681
3682 l2cap_pi(sk)->conn_state |= L2CAP_CONN_WAIT_F;
3683
3684done:
3685 pi->conn_state &= ~L2CAP_CONN_LOCAL_BUSY;
3686 pi->conn_state &= ~L2CAP_CONN_RNR_SENT;
3687
3688 BT_DBG("sk %p, Exit local busy", sk);
3689
3690 return 0;
3691}
3692
3533static void l2cap_busy_work(struct work_struct *work) 3693static void l2cap_busy_work(struct work_struct *work)
3534{ 3694{
3535 DECLARE_WAITQUEUE(wait, current); 3695 DECLARE_WAITQUEUE(wait, current);
@@ -3538,7 +3698,6 @@ static void l2cap_busy_work(struct work_struct *work)
3538 struct sock *sk = (struct sock *)pi; 3698 struct sock *sk = (struct sock *)pi;
3539 int n_tries = 0, timeo = HZ/5, err; 3699 int n_tries = 0, timeo = HZ/5, err;
3540 struct sk_buff *skb; 3700 struct sk_buff *skb;
3541 u16 control;
3542 3701
3543 lock_sock(sk); 3702 lock_sock(sk);
3544 3703
@@ -3548,8 +3707,8 @@ static void l2cap_busy_work(struct work_struct *work)
3548 3707
3549 if (n_tries++ > L2CAP_LOCAL_BUSY_TRIES) { 3708 if (n_tries++ > L2CAP_LOCAL_BUSY_TRIES) {
3550 err = -EBUSY; 3709 err = -EBUSY;
3551 l2cap_send_disconn_req(pi->conn, sk); 3710 l2cap_send_disconn_req(pi->conn, sk, EBUSY);
3552 goto done; 3711 break;
3553 } 3712 }
3554 3713
3555 if (!timeo) 3714 if (!timeo)
@@ -3557,7 +3716,7 @@ static void l2cap_busy_work(struct work_struct *work)
3557 3716
3558 if (signal_pending(current)) { 3717 if (signal_pending(current)) {
3559 err = sock_intr_errno(timeo); 3718 err = sock_intr_errno(timeo);
3560 goto done; 3719 break;
3561 } 3720 }
3562 3721
3563 release_sock(sk); 3722 release_sock(sk);
@@ -3566,40 +3725,12 @@ static void l2cap_busy_work(struct work_struct *work)
3566 3725
3567 err = sock_error(sk); 3726 err = sock_error(sk);
3568 if (err) 3727 if (err)
3569 goto done; 3728 break;
3570
3571 while ((skb = skb_dequeue(BUSY_QUEUE(sk)))) {
3572 control = bt_cb(skb)->sar << L2CAP_CTRL_SAR_SHIFT;
3573 err = l2cap_ertm_reassembly_sdu(sk, skb, control);
3574 if (err < 0) {
3575 skb_queue_head(BUSY_QUEUE(sk), skb);
3576 break;
3577 }
3578
3579 pi->buffer_seq = (pi->buffer_seq + 1) % 64;
3580 }
3581 3729
3582 if (!skb) 3730 if (l2cap_try_push_rx_skb(sk) == 0)
3583 break; 3731 break;
3584 } 3732 }
3585 3733
3586 if (!(pi->conn_state & L2CAP_CONN_RNR_SENT))
3587 goto done;
3588
3589 control = pi->buffer_seq << L2CAP_CTRL_REQSEQ_SHIFT;
3590 control |= L2CAP_SUPER_RCV_READY | L2CAP_CTRL_POLL;
3591 l2cap_send_sframe(pi, control);
3592 l2cap_pi(sk)->retry_count = 1;
3593
3594 del_timer(&pi->retrans_timer);
3595 __mod_monitor_timer();
3596
3597 l2cap_pi(sk)->conn_state |= L2CAP_CONN_WAIT_F;
3598
3599done:
3600 pi->conn_state &= ~L2CAP_CONN_LOCAL_BUSY;
3601 pi->conn_state &= ~L2CAP_CONN_RNR_SENT;
3602
3603 set_current_state(TASK_RUNNING); 3734 set_current_state(TASK_RUNNING);
3604 remove_wait_queue(sk_sleep(sk), &wait); 3735 remove_wait_queue(sk_sleep(sk), &wait);
3605 3736
@@ -3614,7 +3745,9 @@ static int l2cap_push_rx_skb(struct sock *sk, struct sk_buff *skb, u16 control)
3614 if (pi->conn_state & L2CAP_CONN_LOCAL_BUSY) { 3745 if (pi->conn_state & L2CAP_CONN_LOCAL_BUSY) {
3615 bt_cb(skb)->sar = control >> L2CAP_CTRL_SAR_SHIFT; 3746 bt_cb(skb)->sar = control >> L2CAP_CTRL_SAR_SHIFT;
3616 __skb_queue_tail(BUSY_QUEUE(sk), skb); 3747 __skb_queue_tail(BUSY_QUEUE(sk), skb);
3617 return -EBUSY; 3748 return l2cap_try_push_rx_skb(sk);
3749
3750
3618 } 3751 }
3619 3752
3620 err = l2cap_ertm_reassembly_sdu(sk, skb, control); 3753 err = l2cap_ertm_reassembly_sdu(sk, skb, control);
@@ -3624,6 +3757,8 @@ static int l2cap_push_rx_skb(struct sock *sk, struct sk_buff *skb, u16 control)
3624 } 3757 }
3625 3758
3626 /* Busy Condition */ 3759 /* Busy Condition */
3760 BT_DBG("sk %p, Enter local busy", sk);
3761
3627 pi->conn_state |= L2CAP_CONN_LOCAL_BUSY; 3762 pi->conn_state |= L2CAP_CONN_LOCAL_BUSY;
3628 bt_cb(skb)->sar = control >> L2CAP_CTRL_SAR_SHIFT; 3763 bt_cb(skb)->sar = control >> L2CAP_CTRL_SAR_SHIFT;
3629 __skb_queue_tail(BUSY_QUEUE(sk), skb); 3764 __skb_queue_tail(BUSY_QUEUE(sk), skb);
@@ -3634,6 +3769,8 @@ static int l2cap_push_rx_skb(struct sock *sk, struct sk_buff *skb, u16 control)
3634 3769
3635 pi->conn_state |= L2CAP_CONN_RNR_SENT; 3770 pi->conn_state |= L2CAP_CONN_RNR_SENT;
3636 3771
3772 del_timer(&pi->ack_timer);
3773
3637 queue_work(_busy_wq, &pi->busy_work); 3774 queue_work(_busy_wq, &pi->busy_work);
3638 3775
3639 return err; 3776 return err;
@@ -3747,7 +3884,7 @@ static void l2cap_check_srej_gap(struct sock *sk, u8 tx_seq)
3747 l2cap_ertm_reassembly_sdu(sk, skb, control); 3884 l2cap_ertm_reassembly_sdu(sk, skb, control);
3748 l2cap_pi(sk)->buffer_seq_srej = 3885 l2cap_pi(sk)->buffer_seq_srej =
3749 (l2cap_pi(sk)->buffer_seq_srej + 1) % 64; 3886 (l2cap_pi(sk)->buffer_seq_srej + 1) % 64;
3750 tx_seq++; 3887 tx_seq = (tx_seq + 1) % 64;
3751 } 3888 }
3752} 3889}
3753 3890
@@ -3783,10 +3920,11 @@ static void l2cap_send_srejframe(struct sock *sk, u8 tx_seq)
3783 l2cap_send_sframe(pi, control); 3920 l2cap_send_sframe(pi, control);
3784 3921
3785 new = kzalloc(sizeof(struct srej_list), GFP_ATOMIC); 3922 new = kzalloc(sizeof(struct srej_list), GFP_ATOMIC);
3786 new->tx_seq = pi->expected_tx_seq++; 3923 new->tx_seq = pi->expected_tx_seq;
3924 pi->expected_tx_seq = (pi->expected_tx_seq + 1) % 64;
3787 list_add_tail(&new->list, SREJ_LIST(sk)); 3925 list_add_tail(&new->list, SREJ_LIST(sk));
3788 } 3926 }
3789 pi->expected_tx_seq++; 3927 pi->expected_tx_seq = (pi->expected_tx_seq + 1) % 64;
3790} 3928}
3791 3929
3792static inline int l2cap_data_channel_iframe(struct sock *sk, u16 rx_control, struct sk_buff *skb) 3930static inline int l2cap_data_channel_iframe(struct sock *sk, u16 rx_control, struct sk_buff *skb)
@@ -3795,11 +3933,12 @@ static inline int l2cap_data_channel_iframe(struct sock *sk, u16 rx_control, str
3795 u8 tx_seq = __get_txseq(rx_control); 3933 u8 tx_seq = __get_txseq(rx_control);
3796 u8 req_seq = __get_reqseq(rx_control); 3934 u8 req_seq = __get_reqseq(rx_control);
3797 u8 sar = rx_control >> L2CAP_CTRL_SAR_SHIFT; 3935 u8 sar = rx_control >> L2CAP_CTRL_SAR_SHIFT;
3798 u8 tx_seq_offset, expected_tx_seq_offset; 3936 int tx_seq_offset, expected_tx_seq_offset;
3799 int num_to_ack = (pi->tx_win/6) + 1; 3937 int num_to_ack = (pi->tx_win/6) + 1;
3800 int err = 0; 3938 int err = 0;
3801 3939
3802 BT_DBG("sk %p rx_control 0x%4.4x len %d", sk, rx_control, skb->len); 3940 BT_DBG("sk %p len %d tx_seq %d rx_control 0x%4.4x", sk, skb->len, tx_seq,
3941 rx_control);
3803 3942
3804 if (L2CAP_CTRL_FINAL & rx_control && 3943 if (L2CAP_CTRL_FINAL & rx_control &&
3805 l2cap_pi(sk)->conn_state & L2CAP_CONN_WAIT_F) { 3944 l2cap_pi(sk)->conn_state & L2CAP_CONN_WAIT_F) {
@@ -3821,7 +3960,7 @@ static inline int l2cap_data_channel_iframe(struct sock *sk, u16 rx_control, str
3821 3960
3822 /* invalid tx_seq */ 3961 /* invalid tx_seq */
3823 if (tx_seq_offset >= pi->tx_win) { 3962 if (tx_seq_offset >= pi->tx_win) {
3824 l2cap_send_disconn_req(pi->conn, sk); 3963 l2cap_send_disconn_req(pi->conn, sk, ECONNRESET);
3825 goto drop; 3964 goto drop;
3826 } 3965 }
3827 3966
@@ -3844,6 +3983,7 @@ static inline int l2cap_data_channel_iframe(struct sock *sk, u16 rx_control, str
3844 pi->buffer_seq = pi->buffer_seq_srej; 3983 pi->buffer_seq = pi->buffer_seq_srej;
3845 pi->conn_state &= ~L2CAP_CONN_SREJ_SENT; 3984 pi->conn_state &= ~L2CAP_CONN_SREJ_SENT;
3846 l2cap_send_ack(pi); 3985 l2cap_send_ack(pi);
3986 BT_DBG("sk %p, Exit SREJ_SENT", sk);
3847 } 3987 }
3848 } else { 3988 } else {
3849 struct srej_list *l; 3989 struct srej_list *l;
@@ -3872,6 +4012,8 @@ static inline int l2cap_data_channel_iframe(struct sock *sk, u16 rx_control, str
3872 4012
3873 pi->conn_state |= L2CAP_CONN_SREJ_SENT; 4013 pi->conn_state |= L2CAP_CONN_SREJ_SENT;
3874 4014
4015 BT_DBG("sk %p, Enter SREJ", sk);
4016
3875 INIT_LIST_HEAD(SREJ_LIST(sk)); 4017 INIT_LIST_HEAD(SREJ_LIST(sk));
3876 pi->buffer_seq_srej = pi->buffer_seq; 4018 pi->buffer_seq_srej = pi->buffer_seq;
3877 4019
@@ -3882,6 +4024,8 @@ static inline int l2cap_data_channel_iframe(struct sock *sk, u16 rx_control, str
3882 pi->conn_state |= L2CAP_CONN_SEND_PBIT; 4024 pi->conn_state |= L2CAP_CONN_SEND_PBIT;
3883 4025
3884 l2cap_send_srejframe(sk, tx_seq); 4026 l2cap_send_srejframe(sk, tx_seq);
4027
4028 del_timer(&pi->ack_timer);
3885 } 4029 }
3886 return 0; 4030 return 0;
3887 4031
@@ -3895,6 +4039,10 @@ expected:
3895 return 0; 4039 return 0;
3896 } 4040 }
3897 4041
4042 err = l2cap_push_rx_skb(sk, skb, rx_control);
4043 if (err < 0)
4044 return 0;
4045
3898 if (rx_control & L2CAP_CTRL_FINAL) { 4046 if (rx_control & L2CAP_CTRL_FINAL) {
3899 if (pi->conn_state & L2CAP_CONN_REJ_ACT) 4047 if (pi->conn_state & L2CAP_CONN_REJ_ACT)
3900 pi->conn_state &= ~L2CAP_CONN_REJ_ACT; 4048 pi->conn_state &= ~L2CAP_CONN_REJ_ACT;
@@ -3902,10 +4050,6 @@ expected:
3902 l2cap_retransmit_frames(sk); 4050 l2cap_retransmit_frames(sk);
3903 } 4051 }
3904 4052
3905 err = l2cap_push_rx_skb(sk, skb, rx_control);
3906 if (err < 0)
3907 return 0;
3908
3909 __mod_ack_timer(); 4053 __mod_ack_timer();
3910 4054
3911 pi->num_acked = (pi->num_acked + 1) % num_to_ack; 4055 pi->num_acked = (pi->num_acked + 1) % num_to_ack;
@@ -3923,10 +4067,14 @@ static inline void l2cap_data_channel_rrframe(struct sock *sk, u16 rx_control)
3923{ 4067{
3924 struct l2cap_pinfo *pi = l2cap_pi(sk); 4068 struct l2cap_pinfo *pi = l2cap_pi(sk);
3925 4069
4070 BT_DBG("sk %p, req_seq %d ctrl 0x%4.4x", sk, __get_reqseq(rx_control),
4071 rx_control);
4072
3926 pi->expected_ack_seq = __get_reqseq(rx_control); 4073 pi->expected_ack_seq = __get_reqseq(rx_control);
3927 l2cap_drop_acked_frames(sk); 4074 l2cap_drop_acked_frames(sk);
3928 4075
3929 if (rx_control & L2CAP_CTRL_POLL) { 4076 if (rx_control & L2CAP_CTRL_POLL) {
4077 pi->conn_state |= L2CAP_CONN_SEND_FBIT;
3930 if (pi->conn_state & L2CAP_CONN_SREJ_SENT) { 4078 if (pi->conn_state & L2CAP_CONN_SREJ_SENT) {
3931 if ((pi->conn_state & L2CAP_CONN_REMOTE_BUSY) && 4079 if ((pi->conn_state & L2CAP_CONN_REMOTE_BUSY) &&
3932 (pi->unacked_frames > 0)) 4080 (pi->unacked_frames > 0))
@@ -3955,9 +4103,7 @@ static inline void l2cap_data_channel_rrframe(struct sock *sk, u16 rx_control)
3955 if (pi->conn_state & L2CAP_CONN_SREJ_SENT) { 4103 if (pi->conn_state & L2CAP_CONN_SREJ_SENT) {
3956 l2cap_send_ack(pi); 4104 l2cap_send_ack(pi);
3957 } else { 4105 } else {
3958 spin_lock_bh(&pi->send_lock);
3959 l2cap_ertm_send(sk); 4106 l2cap_ertm_send(sk);
3960 spin_unlock_bh(&pi->send_lock);
3961 } 4107 }
3962 } 4108 }
3963} 4109}
@@ -3967,6 +4113,8 @@ static inline void l2cap_data_channel_rejframe(struct sock *sk, u16 rx_control)
3967 struct l2cap_pinfo *pi = l2cap_pi(sk); 4113 struct l2cap_pinfo *pi = l2cap_pi(sk);
3968 u8 tx_seq = __get_reqseq(rx_control); 4114 u8 tx_seq = __get_reqseq(rx_control);
3969 4115
4116 BT_DBG("sk %p, req_seq %d ctrl 0x%4.4x", sk, tx_seq, rx_control);
4117
3970 pi->conn_state &= ~L2CAP_CONN_REMOTE_BUSY; 4118 pi->conn_state &= ~L2CAP_CONN_REMOTE_BUSY;
3971 4119
3972 pi->expected_ack_seq = tx_seq; 4120 pi->expected_ack_seq = tx_seq;
@@ -3989,16 +4137,18 @@ static inline void l2cap_data_channel_srejframe(struct sock *sk, u16 rx_control)
3989 struct l2cap_pinfo *pi = l2cap_pi(sk); 4137 struct l2cap_pinfo *pi = l2cap_pi(sk);
3990 u8 tx_seq = __get_reqseq(rx_control); 4138 u8 tx_seq = __get_reqseq(rx_control);
3991 4139
4140 BT_DBG("sk %p, req_seq %d ctrl 0x%4.4x", sk, tx_seq, rx_control);
4141
3992 pi->conn_state &= ~L2CAP_CONN_REMOTE_BUSY; 4142 pi->conn_state &= ~L2CAP_CONN_REMOTE_BUSY;
3993 4143
3994 if (rx_control & L2CAP_CTRL_POLL) { 4144 if (rx_control & L2CAP_CTRL_POLL) {
3995 pi->expected_ack_seq = tx_seq; 4145 pi->expected_ack_seq = tx_seq;
3996 l2cap_drop_acked_frames(sk); 4146 l2cap_drop_acked_frames(sk);
4147
4148 pi->conn_state |= L2CAP_CONN_SEND_FBIT;
3997 l2cap_retransmit_one_frame(sk, tx_seq); 4149 l2cap_retransmit_one_frame(sk, tx_seq);
3998 4150
3999 spin_lock_bh(&pi->send_lock);
4000 l2cap_ertm_send(sk); 4151 l2cap_ertm_send(sk);
4001 spin_unlock_bh(&pi->send_lock);
4002 4152
4003 if (pi->conn_state & L2CAP_CONN_WAIT_F) { 4153 if (pi->conn_state & L2CAP_CONN_WAIT_F) {
4004 pi->srej_save_reqseq = tx_seq; 4154 pi->srej_save_reqseq = tx_seq;
@@ -4024,10 +4174,15 @@ static inline void l2cap_data_channel_rnrframe(struct sock *sk, u16 rx_control)
4024 struct l2cap_pinfo *pi = l2cap_pi(sk); 4174 struct l2cap_pinfo *pi = l2cap_pi(sk);
4025 u8 tx_seq = __get_reqseq(rx_control); 4175 u8 tx_seq = __get_reqseq(rx_control);
4026 4176
4177 BT_DBG("sk %p, req_seq %d ctrl 0x%4.4x", sk, tx_seq, rx_control);
4178
4027 pi->conn_state |= L2CAP_CONN_REMOTE_BUSY; 4179 pi->conn_state |= L2CAP_CONN_REMOTE_BUSY;
4028 pi->expected_ack_seq = tx_seq; 4180 pi->expected_ack_seq = tx_seq;
4029 l2cap_drop_acked_frames(sk); 4181 l2cap_drop_acked_frames(sk);
4030 4182
4183 if (rx_control & L2CAP_CTRL_POLL)
4184 pi->conn_state |= L2CAP_CONN_SEND_FBIT;
4185
4031 if (!(pi->conn_state & L2CAP_CONN_SREJ_SENT)) { 4186 if (!(pi->conn_state & L2CAP_CONN_SREJ_SENT)) {
4032 del_timer(&pi->retrans_timer); 4187 del_timer(&pi->retrans_timer);
4033 if (rx_control & L2CAP_CTRL_POLL) 4188 if (rx_control & L2CAP_CTRL_POLL)
@@ -4075,12 +4230,83 @@ static inline int l2cap_data_channel_sframe(struct sock *sk, u16 rx_control, str
4075 return 0; 4230 return 0;
4076} 4231}
4077 4232
4233static int l2cap_ertm_data_rcv(struct sock *sk, struct sk_buff *skb)
4234{
4235 struct l2cap_pinfo *pi = l2cap_pi(sk);
4236 u16 control;
4237 u8 req_seq;
4238 int len, next_tx_seq_offset, req_seq_offset;
4239
4240 control = get_unaligned_le16(skb->data);
4241 skb_pull(skb, 2);
4242 len = skb->len;
4243
4244 /*
4245 * We can just drop the corrupted I-frame here.
4246 * Receiver will miss it and start proper recovery
4247 * procedures and ask retransmission.
4248 */
4249 if (l2cap_check_fcs(pi, skb))
4250 goto drop;
4251
4252 if (__is_sar_start(control) && __is_iframe(control))
4253 len -= 2;
4254
4255 if (pi->fcs == L2CAP_FCS_CRC16)
4256 len -= 2;
4257
4258 if (len > pi->mps) {
4259 l2cap_send_disconn_req(pi->conn, sk, ECONNRESET);
4260 goto drop;
4261 }
4262
4263 req_seq = __get_reqseq(control);
4264 req_seq_offset = (req_seq - pi->expected_ack_seq) % 64;
4265 if (req_seq_offset < 0)
4266 req_seq_offset += 64;
4267
4268 next_tx_seq_offset =
4269 (pi->next_tx_seq - pi->expected_ack_seq) % 64;
4270 if (next_tx_seq_offset < 0)
4271 next_tx_seq_offset += 64;
4272
4273 /* check for invalid req-seq */
4274 if (req_seq_offset > next_tx_seq_offset) {
4275 l2cap_send_disconn_req(pi->conn, sk, ECONNRESET);
4276 goto drop;
4277 }
4278
4279 if (__is_iframe(control)) {
4280 if (len < 0) {
4281 l2cap_send_disconn_req(pi->conn, sk, ECONNRESET);
4282 goto drop;
4283 }
4284
4285 l2cap_data_channel_iframe(sk, control, skb);
4286 } else {
4287 if (len != 0) {
4288 BT_ERR("%d", len);
4289 l2cap_send_disconn_req(pi->conn, sk, ECONNRESET);
4290 goto drop;
4291 }
4292
4293 l2cap_data_channel_sframe(sk, control, skb);
4294 }
4295
4296 return 0;
4297
4298drop:
4299 kfree_skb(skb);
4300 return 0;
4301}
4302
4078static inline int l2cap_data_channel(struct l2cap_conn *conn, u16 cid, struct sk_buff *skb) 4303static inline int l2cap_data_channel(struct l2cap_conn *conn, u16 cid, struct sk_buff *skb)
4079{ 4304{
4080 struct sock *sk; 4305 struct sock *sk;
4081 struct l2cap_pinfo *pi; 4306 struct l2cap_pinfo *pi;
4082 u16 control, len; 4307 u16 control;
4083 u8 tx_seq, req_seq, next_tx_seq_offset, req_seq_offset; 4308 u8 tx_seq;
4309 int len;
4084 4310
4085 sk = l2cap_get_chan_by_scid(&conn->chan_list, cid); 4311 sk = l2cap_get_chan_by_scid(&conn->chan_list, cid);
4086 if (!sk) { 4312 if (!sk) {
@@ -4110,59 +4336,11 @@ static inline int l2cap_data_channel(struct l2cap_conn *conn, u16 cid, struct sk
4110 break; 4336 break;
4111 4337
4112 case L2CAP_MODE_ERTM: 4338 case L2CAP_MODE_ERTM:
4113 control = get_unaligned_le16(skb->data); 4339 if (!sock_owned_by_user(sk)) {
4114 skb_pull(skb, 2); 4340 l2cap_ertm_data_rcv(sk, skb);
4115 len = skb->len;
4116
4117 if (__is_sar_start(control))
4118 len -= 2;
4119
4120 if (pi->fcs == L2CAP_FCS_CRC16)
4121 len -= 2;
4122
4123 /*
4124 * We can just drop the corrupted I-frame here.
4125 * Receiver will miss it and start proper recovery
4126 * procedures and ask retransmission.
4127 */
4128 if (len > pi->mps) {
4129 l2cap_send_disconn_req(pi->conn, sk);
4130 goto drop;
4131 }
4132
4133 if (l2cap_check_fcs(pi, skb))
4134 goto drop;
4135
4136 req_seq = __get_reqseq(control);
4137 req_seq_offset = (req_seq - pi->expected_ack_seq) % 64;
4138 if (req_seq_offset < 0)
4139 req_seq_offset += 64;
4140
4141 next_tx_seq_offset =
4142 (pi->next_tx_seq - pi->expected_ack_seq) % 64;
4143 if (next_tx_seq_offset < 0)
4144 next_tx_seq_offset += 64;
4145
4146 /* check for invalid req-seq */
4147 if (req_seq_offset > next_tx_seq_offset) {
4148 l2cap_send_disconn_req(pi->conn, sk);
4149 goto drop;
4150 }
4151
4152 if (__is_iframe(control)) {
4153 if (len < 4) {
4154 l2cap_send_disconn_req(pi->conn, sk);
4155 goto drop;
4156 }
4157
4158 l2cap_data_channel_iframe(sk, control, skb);
4159 } else { 4341 } else {
4160 if (len != 0) { 4342 if (sk_add_backlog(sk, skb))
4161 l2cap_send_disconn_req(pi->conn, sk);
4162 goto drop; 4343 goto drop;
4163 }
4164
4165 l2cap_data_channel_sframe(sk, control, skb);
4166 } 4344 }
4167 4345
4168 goto done; 4346 goto done;
@@ -4172,16 +4350,16 @@ static inline int l2cap_data_channel(struct l2cap_conn *conn, u16 cid, struct sk
4172 skb_pull(skb, 2); 4350 skb_pull(skb, 2);
4173 len = skb->len; 4351 len = skb->len;
4174 4352
4353 if (l2cap_check_fcs(pi, skb))
4354 goto drop;
4355
4175 if (__is_sar_start(control)) 4356 if (__is_sar_start(control))
4176 len -= 2; 4357 len -= 2;
4177 4358
4178 if (pi->fcs == L2CAP_FCS_CRC16) 4359 if (pi->fcs == L2CAP_FCS_CRC16)
4179 len -= 2; 4360 len -= 2;
4180 4361
4181 if (len > pi->mps || len < 4 || __is_sframe(control)) 4362 if (len > pi->mps || len < 0 || __is_sframe(control))
4182 goto drop;
4183
4184 if (l2cap_check_fcs(pi, skb))
4185 goto drop; 4363 goto drop;
4186 4364
4187 tx_seq = __get_txseq(control); 4365 tx_seq = __get_txseq(control);
@@ -4281,7 +4459,7 @@ static int l2cap_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 type)
4281 struct hlist_node *node; 4459 struct hlist_node *node;
4282 4460
4283 if (type != ACL_LINK) 4461 if (type != ACL_LINK)
4284 return 0; 4462 return -EINVAL;
4285 4463
4286 BT_DBG("hdev %s, bdaddr %s", hdev->name, batostr(bdaddr)); 4464 BT_DBG("hdev %s, bdaddr %s", hdev->name, batostr(bdaddr));
4287 4465
@@ -4314,7 +4492,7 @@ static int l2cap_connect_cfm(struct hci_conn *hcon, u8 status)
4314 BT_DBG("hcon %p bdaddr %s status %d", hcon, batostr(&hcon->dst), status); 4492 BT_DBG("hcon %p bdaddr %s status %d", hcon, batostr(&hcon->dst), status);
4315 4493
4316 if (hcon->type != ACL_LINK) 4494 if (hcon->type != ACL_LINK)
4317 return 0; 4495 return -EINVAL;
4318 4496
4319 if (!status) { 4497 if (!status) {
4320 conn = l2cap_conn_add(hcon, status); 4498 conn = l2cap_conn_add(hcon, status);
@@ -4343,7 +4521,7 @@ static int l2cap_disconn_cfm(struct hci_conn *hcon, u8 reason)
4343 BT_DBG("hcon %p reason %d", hcon, reason); 4521 BT_DBG("hcon %p reason %d", hcon, reason);
4344 4522
4345 if (hcon->type != ACL_LINK) 4523 if (hcon->type != ACL_LINK)
4346 return 0; 4524 return -EINVAL;
4347 4525
4348 l2cap_conn_del(hcon, bt_err(reason)); 4526 l2cap_conn_del(hcon, bt_err(reason));
4349 4527
@@ -4404,6 +4582,7 @@ static int l2cap_security_cfm(struct hci_conn *hcon, u8 status, u8 encrypt)
4404 req.psm = l2cap_pi(sk)->psm; 4582 req.psm = l2cap_pi(sk)->psm;
4405 4583
4406 l2cap_pi(sk)->ident = l2cap_get_ident(conn); 4584 l2cap_pi(sk)->ident = l2cap_get_ident(conn);
4585 l2cap_pi(sk)->conf_state |= L2CAP_CONF_CONNECT_PEND;
4407 4586
4408 l2cap_send_cmd(conn, l2cap_pi(sk)->ident, 4587 l2cap_send_cmd(conn, l2cap_pi(sk)->ident,
4409 L2CAP_CONN_REQ, sizeof(req), &req); 4588 L2CAP_CONN_REQ, sizeof(req), &req);
@@ -4671,14 +4850,8 @@ EXPORT_SYMBOL(l2cap_load);
4671module_init(l2cap_init); 4850module_init(l2cap_init);
4672module_exit(l2cap_exit); 4851module_exit(l2cap_exit);
4673 4852
4674module_param(enable_ertm, bool, 0644); 4853module_param(disable_ertm, bool, 0644);
4675MODULE_PARM_DESC(enable_ertm, "Enable enhanced retransmission mode"); 4854MODULE_PARM_DESC(disable_ertm, "Disable enhanced retransmission mode");
4676
4677module_param(max_transmit, uint, 0644);
4678MODULE_PARM_DESC(max_transmit, "Max transmit value (default = 3)");
4679
4680module_param(tx_window, uint, 0644);
4681MODULE_PARM_DESC(tx_window, "Transmission window size value (default = 63)");
4682 4855
4683MODULE_AUTHOR("Marcel Holtmann <marcel@holtmann.org>"); 4856MODULE_AUTHOR("Marcel Holtmann <marcel@holtmann.org>");
4684MODULE_DESCRIPTION("Bluetooth L2CAP ver " VERSION); 4857MODULE_DESCRIPTION("Bluetooth L2CAP ver " VERSION);
diff --git a/net/bridge/br.c b/net/bridge/br.c
index 76357b547752..c8436fa31344 100644
--- a/net/bridge/br.c
+++ b/net/bridge/br.c
@@ -63,7 +63,6 @@ static int __init br_init(void)
63 goto err_out4; 63 goto err_out4;
64 64
65 brioctl_set(br_ioctl_deviceless_stub); 65 brioctl_set(br_ioctl_deviceless_stub);
66 br_handle_frame_hook = br_handle_frame;
67 66
68#if defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE) 67#if defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE)
69 br_fdb_test_addr_hook = br_fdb_test_addr; 68 br_fdb_test_addr_hook = br_fdb_test_addr;
@@ -100,7 +99,6 @@ static void __exit br_deinit(void)
100 br_fdb_test_addr_hook = NULL; 99 br_fdb_test_addr_hook = NULL;
101#endif 100#endif
102 101
103 br_handle_frame_hook = NULL;
104 br_fdb_fini(); 102 br_fdb_fini();
105} 103}
106 104
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index eedf2c94820e..075c435ad22d 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -38,8 +38,10 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
38 } 38 }
39#endif 39#endif
40 40
41 u64_stats_update_begin(&brstats->syncp);
41 brstats->tx_packets++; 42 brstats->tx_packets++;
42 brstats->tx_bytes += skb->len; 43 brstats->tx_bytes += skb->len;
44 u64_stats_update_end(&brstats->syncp);
43 45
44 BR_INPUT_SKB_CB(skb)->brdev = dev; 46 BR_INPUT_SKB_CB(skb)->brdev = dev;
45 47
@@ -47,6 +49,10 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
47 skb_pull(skb, ETH_HLEN); 49 skb_pull(skb, ETH_HLEN);
48 50
49 if (is_multicast_ether_addr(dest)) { 51 if (is_multicast_ether_addr(dest)) {
52 if (unlikely(netpoll_tx_running(dev))) {
53 br_flood_deliver(br, skb);
54 goto out;
55 }
50 if (br_multicast_rcv(br, NULL, skb)) 56 if (br_multicast_rcv(br, NULL, skb))
51 goto out; 57 goto out;
52 58
@@ -92,21 +98,25 @@ static int br_dev_stop(struct net_device *dev)
92 return 0; 98 return 0;
93} 99}
94 100
95static struct net_device_stats *br_get_stats(struct net_device *dev) 101static struct rtnl_link_stats64 *br_get_stats64(struct net_device *dev,
102 struct rtnl_link_stats64 *stats)
96{ 103{
97 struct net_bridge *br = netdev_priv(dev); 104 struct net_bridge *br = netdev_priv(dev);
98 struct net_device_stats *stats = &dev->stats; 105 struct br_cpu_netstats tmp, sum = { 0 };
99 struct br_cpu_netstats sum = { 0 };
100 unsigned int cpu; 106 unsigned int cpu;
101 107
102 for_each_possible_cpu(cpu) { 108 for_each_possible_cpu(cpu) {
109 unsigned int start;
103 const struct br_cpu_netstats *bstats 110 const struct br_cpu_netstats *bstats
104 = per_cpu_ptr(br->stats, cpu); 111 = per_cpu_ptr(br->stats, cpu);
105 112 do {
106 sum.tx_bytes += bstats->tx_bytes; 113 start = u64_stats_fetch_begin(&bstats->syncp);
107 sum.tx_packets += bstats->tx_packets; 114 memcpy(&tmp, bstats, sizeof(tmp));
108 sum.rx_bytes += bstats->rx_bytes; 115 } while (u64_stats_fetch_retry(&bstats->syncp, start));
109 sum.rx_packets += bstats->rx_packets; 116 sum.tx_bytes += tmp.tx_bytes;
117 sum.tx_packets += tmp.tx_packets;
118 sum.rx_bytes += tmp.rx_bytes;
119 sum.rx_packets += tmp.rx_packets;
110 } 120 }
111 121
112 stats->tx_bytes = sum.tx_bytes; 122 stats->tx_bytes = sum.tx_bytes;
@@ -127,7 +137,7 @@ static int br_change_mtu(struct net_device *dev, int new_mtu)
127 137
128#ifdef CONFIG_BRIDGE_NETFILTER 138#ifdef CONFIG_BRIDGE_NETFILTER
129 /* remember the MTU in the rtable for PMTU */ 139 /* remember the MTU in the rtable for PMTU */
130 br->fake_rtable.u.dst.metrics[RTAX_MTU - 1] = new_mtu; 140 br->fake_rtable.dst.metrics[RTAX_MTU - 1] = new_mtu;
131#endif 141#endif
132 142
133 return 0; 143 return 0;
@@ -199,73 +209,81 @@ static int br_set_tx_csum(struct net_device *dev, u32 data)
199} 209}
200 210
201#ifdef CONFIG_NET_POLL_CONTROLLER 211#ifdef CONFIG_NET_POLL_CONTROLLER
202static bool br_devices_support_netpoll(struct net_bridge *br) 212static void br_poll_controller(struct net_device *br_dev)
203{ 213{
204 struct net_bridge_port *p;
205 bool ret = true;
206 int count = 0;
207 unsigned long flags;
208
209 spin_lock_irqsave(&br->lock, flags);
210 list_for_each_entry(p, &br->port_list, list) {
211 count++;
212 if ((p->dev->priv_flags & IFF_DISABLE_NETPOLL) ||
213 !p->dev->netdev_ops->ndo_poll_controller)
214 ret = false;
215 }
216 spin_unlock_irqrestore(&br->lock, flags);
217 return count != 0 && ret;
218} 214}
219 215
220static void br_poll_controller(struct net_device *br_dev) 216static void br_netpoll_cleanup(struct net_device *dev)
221{ 217{
222 struct netpoll *np = br_dev->npinfo->netpoll; 218 struct net_bridge *br = netdev_priv(dev);
219 struct net_bridge_port *p, *n;
223 220
224 if (np->real_dev != br_dev) 221 list_for_each_entry_safe(p, n, &br->port_list, list) {
225 netpoll_poll_dev(np->real_dev); 222 br_netpoll_disable(p);
223 }
226} 224}
227 225
228void br_netpoll_cleanup(struct net_device *dev) 226static int br_netpoll_setup(struct net_device *dev, struct netpoll_info *ni)
229{ 227{
230 struct net_bridge *br = netdev_priv(dev); 228 struct net_bridge *br = netdev_priv(dev);
231 struct net_bridge_port *p, *n; 229 struct net_bridge_port *p, *n;
232 const struct net_device_ops *ops; 230 int err = 0;
233 231
234 br->dev->npinfo = NULL;
235 list_for_each_entry_safe(p, n, &br->port_list, list) { 232 list_for_each_entry_safe(p, n, &br->port_list, list) {
236 if (p->dev) { 233 if (!p->dev)
237 ops = p->dev->netdev_ops; 234 continue;
238 if (ops->ndo_netpoll_cleanup) 235
239 ops->ndo_netpoll_cleanup(p->dev); 236 err = br_netpoll_enable(p);
240 else 237 if (err)
241 p->dev->npinfo = NULL; 238 goto fail;
242 }
243 } 239 }
240
241out:
242 return err;
243
244fail:
245 br_netpoll_cleanup(dev);
246 goto out;
244} 247}
245 248
246void br_netpoll_disable(struct net_bridge *br, 249int br_netpoll_enable(struct net_bridge_port *p)
247 struct net_device *dev)
248{ 250{
249 if (br_devices_support_netpoll(br)) 251 struct netpoll *np;
250 br->dev->priv_flags &= ~IFF_DISABLE_NETPOLL; 252 int err = 0;
251 if (dev->netdev_ops->ndo_netpoll_cleanup) 253
252 dev->netdev_ops->ndo_netpoll_cleanup(dev); 254 np = kzalloc(sizeof(*p->np), GFP_KERNEL);
253 else 255 err = -ENOMEM;
254 dev->npinfo = NULL; 256 if (!np)
257 goto out;
258
259 np->dev = p->dev;
260
261 err = __netpoll_setup(np);
262 if (err) {
263 kfree(np);
264 goto out;
265 }
266
267 p->np = np;
268
269out:
270 return err;
255} 271}
256 272
257void br_netpoll_enable(struct net_bridge *br, 273void br_netpoll_disable(struct net_bridge_port *p)
258 struct net_device *dev)
259{ 274{
260 if (br_devices_support_netpoll(br)) { 275 struct netpoll *np = p->np;
261 br->dev->priv_flags &= ~IFF_DISABLE_NETPOLL; 276
262 if (br->dev->npinfo) 277 if (!np)
263 dev->npinfo = br->dev->npinfo; 278 return;
264 } else if (!(br->dev->priv_flags & IFF_DISABLE_NETPOLL)) { 279
265 br->dev->priv_flags |= IFF_DISABLE_NETPOLL; 280 p->np = NULL;
266 br_info(br,"new device %s does not support netpoll (disabling)", 281
267 dev->name); 282 /* Wait for transmitting packets to finish before freeing. */
268 } 283 synchronize_rcu_bh();
284
285 __netpoll_cleanup(np);
286 kfree(np);
269} 287}
270 288
271#endif 289#endif
@@ -288,12 +306,13 @@ static const struct net_device_ops br_netdev_ops = {
288 .ndo_open = br_dev_open, 306 .ndo_open = br_dev_open,
289 .ndo_stop = br_dev_stop, 307 .ndo_stop = br_dev_stop,
290 .ndo_start_xmit = br_dev_xmit, 308 .ndo_start_xmit = br_dev_xmit,
291 .ndo_get_stats = br_get_stats, 309 .ndo_get_stats64 = br_get_stats64,
292 .ndo_set_mac_address = br_set_mac_address, 310 .ndo_set_mac_address = br_set_mac_address,
293 .ndo_set_multicast_list = br_dev_set_multicast_list, 311 .ndo_set_multicast_list = br_dev_set_multicast_list,
294 .ndo_change_mtu = br_change_mtu, 312 .ndo_change_mtu = br_change_mtu,
295 .ndo_do_ioctl = br_dev_ioctl, 313 .ndo_do_ioctl = br_dev_ioctl,
296#ifdef CONFIG_NET_POLL_CONTROLLER 314#ifdef CONFIG_NET_POLL_CONTROLLER
315 .ndo_netpoll_setup = br_netpoll_setup,
297 .ndo_netpoll_cleanup = br_netpoll_cleanup, 316 .ndo_netpoll_cleanup = br_netpoll_cleanup,
298 .ndo_poll_controller = br_poll_controller, 317 .ndo_poll_controller = br_poll_controller,
299#endif 318#endif
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index 26637439965b..a744296fc675 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -128,7 +128,7 @@ void br_fdb_cleanup(unsigned long _data)
128{ 128{
129 struct net_bridge *br = (struct net_bridge *)_data; 129 struct net_bridge *br = (struct net_bridge *)_data;
130 unsigned long delay = hold_time(br); 130 unsigned long delay = hold_time(br);
131 unsigned long next_timer = jiffies + br->forward_delay; 131 unsigned long next_timer = jiffies + br->ageing_time;
132 int i; 132 int i;
133 133
134 spin_lock_bh(&br->hash_lock); 134 spin_lock_bh(&br->hash_lock);
@@ -149,9 +149,7 @@ void br_fdb_cleanup(unsigned long _data)
149 } 149 }
150 spin_unlock_bh(&br->hash_lock); 150 spin_unlock_bh(&br->hash_lock);
151 151
152 /* Add HZ/4 to ensure we round the jiffies upwards to be after the next 152 mod_timer(&br->gc_timer, round_jiffies_up(next_timer));
153 * timer, otherwise we might round down and will have no-op run. */
154 mod_timer(&br->gc_timer, round_jiffies(next_timer + HZ/4));
155} 153}
156 154
157/* Completely flush all dynamic entries in forwarding database.*/ 155/* Completely flush all dynamic entries in forwarding database.*/
@@ -242,11 +240,11 @@ int br_fdb_test_addr(struct net_device *dev, unsigned char *addr)
242 struct net_bridge_fdb_entry *fdb; 240 struct net_bridge_fdb_entry *fdb;
243 int ret; 241 int ret;
244 242
245 if (!dev->br_port) 243 if (!br_port_exists(dev))
246 return 0; 244 return 0;
247 245
248 rcu_read_lock(); 246 rcu_read_lock();
249 fdb = __br_fdb_get(dev->br_port->br, addr); 247 fdb = __br_fdb_get(br_port_get_rcu(dev)->br, addr);
250 ret = fdb && fdb->dst->dev != dev && 248 ret = fdb && fdb->dst->dev != dev &&
251 fdb->dst->state == BR_STATE_FORWARDING; 249 fdb->dst->state == BR_STATE_FORWARDING;
252 rcu_read_unlock(); 250 rcu_read_unlock();
diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index a98ef1393097..cbfe87f0f34a 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -50,14 +50,7 @@ int br_dev_queue_push_xmit(struct sk_buff *skb)
50 kfree_skb(skb); 50 kfree_skb(skb);
51 else { 51 else {
52 skb_push(skb, ETH_HLEN); 52 skb_push(skb, ETH_HLEN);
53 53 dev_queue_xmit(skb);
54#ifdef CONFIG_NET_POLL_CONTROLLER
55 if (unlikely(skb->dev->priv_flags & IFF_IN_NETPOLL)) {
56 netpoll_send_skb(skb->dev->npinfo->netpoll, skb);
57 skb->dev->priv_flags &= ~IFF_IN_NETPOLL;
58 } else
59#endif
60 dev_queue_xmit(skb);
61 } 54 }
62 } 55 }
63 56
@@ -73,23 +66,20 @@ int br_forward_finish(struct sk_buff *skb)
73 66
74static void __br_deliver(const struct net_bridge_port *to, struct sk_buff *skb) 67static void __br_deliver(const struct net_bridge_port *to, struct sk_buff *skb)
75{ 68{
76#ifdef CONFIG_NET_POLL_CONTROLLER
77 struct net_bridge *br = to->br;
78 if (unlikely(br->dev->priv_flags & IFF_IN_NETPOLL)) {
79 struct netpoll *np;
80 to->dev->npinfo = skb->dev->npinfo;
81 np = skb->dev->npinfo->netpoll;
82 np->real_dev = np->dev = to->dev;
83 to->dev->priv_flags |= IFF_IN_NETPOLL;
84 }
85#endif
86 skb->dev = to->dev; 69 skb->dev = to->dev;
70
71 if (unlikely(netpoll_tx_running(to->dev))) {
72 if (packet_length(skb) > skb->dev->mtu && !skb_is_gso(skb))
73 kfree_skb(skb);
74 else {
75 skb_push(skb, ETH_HLEN);
76 br_netpoll_send_skb(to, skb);
77 }
78 return;
79 }
80
87 NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_OUT, skb, NULL, skb->dev, 81 NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_OUT, skb, NULL, skb->dev,
88 br_forward_finish); 82 br_forward_finish);
89#ifdef CONFIG_NET_POLL_CONTROLLER
90 if (skb->dev->npinfo)
91 skb->dev->npinfo->netpoll->dev = br->dev;
92#endif
93} 83}
94 84
95static void __br_forward(const struct net_bridge_port *to, struct sk_buff *skb) 85static void __br_forward(const struct net_bridge_port *to, struct sk_buff *skb)
@@ -140,10 +130,10 @@ static int deliver_clone(const struct net_bridge_port *prev,
140 void (*__packet_hook)(const struct net_bridge_port *p, 130 void (*__packet_hook)(const struct net_bridge_port *p,
141 struct sk_buff *skb)) 131 struct sk_buff *skb))
142{ 132{
133 struct net_device *dev = BR_INPUT_SKB_CB(skb)->brdev;
134
143 skb = skb_clone(skb, GFP_ATOMIC); 135 skb = skb_clone(skb, GFP_ATOMIC);
144 if (!skb) { 136 if (!skb) {
145 struct net_device *dev = BR_INPUT_SKB_CB(skb)->brdev;
146
147 dev->stats.tx_dropped++; 137 dev->stats.tx_dropped++;
148 return -ENOMEM; 138 return -ENOMEM;
149 } 139 }
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index 18b245e2c00e..c03d2c3ff03e 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -147,14 +147,17 @@ static void del_nbp(struct net_bridge_port *p)
147 147
148 list_del_rcu(&p->list); 148 list_del_rcu(&p->list);
149 149
150 rcu_assign_pointer(dev->br_port, NULL); 150 dev->priv_flags &= ~IFF_BRIDGE_PORT;
151
152 netdev_rx_handler_unregister(dev);
151 153
152 br_multicast_del_port(p); 154 br_multicast_del_port(p);
153 155
154 kobject_uevent(&p->kobj, KOBJ_REMOVE); 156 kobject_uevent(&p->kobj, KOBJ_REMOVE);
155 kobject_del(&p->kobj); 157 kobject_del(&p->kobj);
156 158
157 br_netpoll_disable(br, dev); 159 br_netpoll_disable(p);
160
158 call_rcu(&p->rcu, destroy_nbp_rcu); 161 call_rcu(&p->rcu, destroy_nbp_rcu);
159} 162}
160 163
@@ -167,8 +170,6 @@ static void del_br(struct net_bridge *br, struct list_head *head)
167 del_nbp(p); 170 del_nbp(p);
168 } 171 }
169 172
170 br_netpoll_cleanup(br->dev);
171
172 del_timer_sync(&br->gc_timer); 173 del_timer_sync(&br->gc_timer);
173 174
174 br_sysfs_delbr(br->dev); 175 br_sysfs_delbr(br->dev);
@@ -400,7 +401,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
400 return -ELOOP; 401 return -ELOOP;
401 402
402 /* Device is already being bridged */ 403 /* Device is already being bridged */
403 if (dev->br_port != NULL) 404 if (br_port_exists(dev))
404 return -EBUSY; 405 return -EBUSY;
405 406
406 /* No bridging devices that dislike that (e.g. wireless) */ 407 /* No bridging devices that dislike that (e.g. wireless) */
@@ -428,7 +429,15 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
428 if (err) 429 if (err)
429 goto err2; 430 goto err2;
430 431
431 rcu_assign_pointer(dev->br_port, p); 432 if (br_netpoll_info(br) && ((err = br_netpoll_enable(p))))
433 goto err3;
434
435 err = netdev_rx_handler_register(dev, br_handle_frame, p);
436 if (err)
437 goto err3;
438
439 dev->priv_flags |= IFF_BRIDGE_PORT;
440
432 dev_disable_lro(dev); 441 dev_disable_lro(dev);
433 442
434 list_add_rcu(&p->list, &br->port_list); 443 list_add_rcu(&p->list, &br->port_list);
@@ -448,9 +457,9 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
448 457
449 kobject_uevent(&p->kobj, KOBJ_ADD); 458 kobject_uevent(&p->kobj, KOBJ_ADD);
450 459
451 br_netpoll_enable(br, dev);
452
453 return 0; 460 return 0;
461err3:
462 sysfs_remove_link(br->ifobj, p->dev->name);
454err2: 463err2:
455 br_fdb_delete_by_port(br, p, 1); 464 br_fdb_delete_by_port(br, p, 1);
456err1: 465err1:
@@ -467,9 +476,13 @@ put_back:
467/* called with RTNL */ 476/* called with RTNL */
468int br_del_if(struct net_bridge *br, struct net_device *dev) 477int br_del_if(struct net_bridge *br, struct net_device *dev)
469{ 478{
470 struct net_bridge_port *p = dev->br_port; 479 struct net_bridge_port *p;
480
481 if (!br_port_exists(dev))
482 return -EINVAL;
471 483
472 if (!p || p->br != br) 484 p = br_port_get(dev);
485 if (p->br != br)
473 return -EINVAL; 486 return -EINVAL;
474 487
475 del_nbp(p); 488 del_nbp(p);
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index d36e700f7a26..5fc1c5b1c360 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -27,8 +27,10 @@ static int br_pass_frame_up(struct sk_buff *skb)
27 struct net_bridge *br = netdev_priv(brdev); 27 struct net_bridge *br = netdev_priv(brdev);
28 struct br_cpu_netstats *brstats = this_cpu_ptr(br->stats); 28 struct br_cpu_netstats *brstats = this_cpu_ptr(br->stats);
29 29
30 u64_stats_update_begin(&brstats->syncp);
30 brstats->rx_packets++; 31 brstats->rx_packets++;
31 brstats->rx_bytes += skb->len; 32 brstats->rx_bytes += skb->len;
33 u64_stats_update_end(&brstats->syncp);
32 34
33 indev = skb->dev; 35 indev = skb->dev;
34 skb->dev = brdev; 36 skb->dev = brdev;
@@ -41,7 +43,7 @@ static int br_pass_frame_up(struct sk_buff *skb)
41int br_handle_frame_finish(struct sk_buff *skb) 43int br_handle_frame_finish(struct sk_buff *skb)
42{ 44{
43 const unsigned char *dest = eth_hdr(skb)->h_dest; 45 const unsigned char *dest = eth_hdr(skb)->h_dest;
44 struct net_bridge_port *p = rcu_dereference(skb->dev->br_port); 46 struct net_bridge_port *p = br_port_get_rcu(skb->dev);
45 struct net_bridge *br; 47 struct net_bridge *br;
46 struct net_bridge_fdb_entry *dst; 48 struct net_bridge_fdb_entry *dst;
47 struct net_bridge_mdb_entry *mdst; 49 struct net_bridge_mdb_entry *mdst;
@@ -111,10 +113,9 @@ drop:
111/* note: already called with rcu_read_lock (preempt_disabled) */ 113/* note: already called with rcu_read_lock (preempt_disabled) */
112static int br_handle_local_finish(struct sk_buff *skb) 114static int br_handle_local_finish(struct sk_buff *skb)
113{ 115{
114 struct net_bridge_port *p = rcu_dereference(skb->dev->br_port); 116 struct net_bridge_port *p = br_port_get_rcu(skb->dev);
115 117
116 if (p) 118 br_fdb_update(p->br, p, eth_hdr(skb)->h_source);
117 br_fdb_update(p->br, p, eth_hdr(skb)->h_source);
118 return 0; /* process further */ 119 return 0; /* process further */
119} 120}
120 121
@@ -131,15 +132,19 @@ static inline int is_link_local(const unsigned char *dest)
131} 132}
132 133
133/* 134/*
134 * Called via br_handle_frame_hook.
135 * Return NULL if skb is handled 135 * Return NULL if skb is handled
136 * note: already called with rcu_read_lock (preempt_disabled) 136 * note: already called with rcu_read_lock (preempt_disabled) from
137 * netif_receive_skb
137 */ 138 */
138struct sk_buff *br_handle_frame(struct net_bridge_port *p, struct sk_buff *skb) 139struct sk_buff *br_handle_frame(struct sk_buff *skb)
139{ 140{
141 struct net_bridge_port *p;
140 const unsigned char *dest = eth_hdr(skb)->h_dest; 142 const unsigned char *dest = eth_hdr(skb)->h_dest;
141 int (*rhook)(struct sk_buff *skb); 143 int (*rhook)(struct sk_buff *skb);
142 144
145 if (skb->pkt_type == PACKET_LOOPBACK)
146 return skb;
147
143 if (!is_valid_ether_addr(eth_hdr(skb)->h_source)) 148 if (!is_valid_ether_addr(eth_hdr(skb)->h_source))
144 goto drop; 149 goto drop;
145 150
@@ -147,6 +152,8 @@ struct sk_buff *br_handle_frame(struct net_bridge_port *p, struct sk_buff *skb)
147 if (!skb) 152 if (!skb)
148 return NULL; 153 return NULL;
149 154
155 p = br_port_get_rcu(skb->dev);
156
150 if (unlikely(is_link_local(dest))) { 157 if (unlikely(is_link_local(dest))) {
151 /* Pause frames shouldn't be passed up by driver anyway */ 158 /* Pause frames shouldn't be passed up by driver anyway */
152 if (skb->protocol == htons(ETH_P_PAUSE)) 159 if (skb->protocol == htons(ETH_P_PAUSE))
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 9d21d98ae5fa..85afcdab4921 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -99,6 +99,15 @@ static struct net_bridge_mdb_entry *__br_mdb_ip_get(
99 return NULL; 99 return NULL;
100} 100}
101 101
102static struct net_bridge_mdb_entry *br_mdb_ip_get(
103 struct net_bridge_mdb_htable *mdb, struct br_ip *dst)
104{
105 if (!mdb)
106 return NULL;
107
108 return __br_mdb_ip_get(mdb, dst, br_ip_hash(mdb, dst));
109}
110
102static struct net_bridge_mdb_entry *br_mdb_ip4_get( 111static struct net_bridge_mdb_entry *br_mdb_ip4_get(
103 struct net_bridge_mdb_htable *mdb, __be32 dst) 112 struct net_bridge_mdb_htable *mdb, __be32 dst)
104{ 113{
@@ -107,7 +116,7 @@ static struct net_bridge_mdb_entry *br_mdb_ip4_get(
107 br_dst.u.ip4 = dst; 116 br_dst.u.ip4 = dst;
108 br_dst.proto = htons(ETH_P_IP); 117 br_dst.proto = htons(ETH_P_IP);
109 118
110 return __br_mdb_ip_get(mdb, &br_dst, __br_ip4_hash(mdb, dst)); 119 return br_mdb_ip_get(mdb, &br_dst);
111} 120}
112 121
113#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) 122#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
@@ -119,23 +128,17 @@ static struct net_bridge_mdb_entry *br_mdb_ip6_get(
119 ipv6_addr_copy(&br_dst.u.ip6, dst); 128 ipv6_addr_copy(&br_dst.u.ip6, dst);
120 br_dst.proto = htons(ETH_P_IPV6); 129 br_dst.proto = htons(ETH_P_IPV6);
121 130
122 return __br_mdb_ip_get(mdb, &br_dst, __br_ip6_hash(mdb, dst)); 131 return br_mdb_ip_get(mdb, &br_dst);
123} 132}
124#endif 133#endif
125 134
126static struct net_bridge_mdb_entry *br_mdb_ip_get(
127 struct net_bridge_mdb_htable *mdb, struct br_ip *dst)
128{
129 return __br_mdb_ip_get(mdb, dst, br_ip_hash(mdb, dst));
130}
131
132struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge *br, 135struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge *br,
133 struct sk_buff *skb) 136 struct sk_buff *skb)
134{ 137{
135 struct net_bridge_mdb_htable *mdb = br->mdb; 138 struct net_bridge_mdb_htable *mdb = br->mdb;
136 struct br_ip ip; 139 struct br_ip ip;
137 140
138 if (!mdb || br->multicast_disabled) 141 if (br->multicast_disabled)
139 return NULL; 142 return NULL;
140 143
141 if (BR_INPUT_SKB_CB(skb)->igmp) 144 if (BR_INPUT_SKB_CB(skb)->igmp)
@@ -1432,7 +1435,7 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br,
1432 struct icmp6hdr *icmp6h; 1435 struct icmp6hdr *icmp6h;
1433 u8 nexthdr; 1436 u8 nexthdr;
1434 unsigned len; 1437 unsigned len;
1435 unsigned offset; 1438 int offset;
1436 int err; 1439 int err;
1437 1440
1438 if (!pskb_may_pull(skb, sizeof(*ip6h))) 1441 if (!pskb_may_pull(skb, sizeof(*ip6h)))
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index 44420992f72f..2c911c0759c2 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -55,6 +55,9 @@ static int brnf_call_arptables __read_mostly = 1;
55static int brnf_filter_vlan_tagged __read_mostly = 0; 55static int brnf_filter_vlan_tagged __read_mostly = 0;
56static int brnf_filter_pppoe_tagged __read_mostly = 0; 56static int brnf_filter_pppoe_tagged __read_mostly = 0;
57#else 57#else
58#define brnf_call_iptables 1
59#define brnf_call_ip6tables 1
60#define brnf_call_arptables 1
58#define brnf_filter_vlan_tagged 0 61#define brnf_filter_vlan_tagged 0
59#define brnf_filter_pppoe_tagged 0 62#define brnf_filter_pppoe_tagged 0
60#endif 63#endif
@@ -117,26 +120,27 @@ void br_netfilter_rtable_init(struct net_bridge *br)
117{ 120{
118 struct rtable *rt = &br->fake_rtable; 121 struct rtable *rt = &br->fake_rtable;
119 122
120 atomic_set(&rt->u.dst.__refcnt, 1); 123 atomic_set(&rt->dst.__refcnt, 1);
121 rt->u.dst.dev = br->dev; 124 rt->dst.dev = br->dev;
122 rt->u.dst.path = &rt->u.dst; 125 rt->dst.path = &rt->dst;
123 rt->u.dst.metrics[RTAX_MTU - 1] = 1500; 126 rt->dst.metrics[RTAX_MTU - 1] = 1500;
124 rt->u.dst.flags = DST_NOXFRM; 127 rt->dst.flags = DST_NOXFRM;
125 rt->u.dst.ops = &fake_dst_ops; 128 rt->dst.ops = &fake_dst_ops;
126} 129}
127 130
128static inline struct rtable *bridge_parent_rtable(const struct net_device *dev) 131static inline struct rtable *bridge_parent_rtable(const struct net_device *dev)
129{ 132{
130 struct net_bridge_port *port = rcu_dereference(dev->br_port); 133 if (!br_port_exists(dev))
131 134 return NULL;
132 return port ? &port->br->fake_rtable : NULL; 135 return &br_port_get_rcu(dev)->br->fake_rtable;
133} 136}
134 137
135static inline struct net_device *bridge_parent(const struct net_device *dev) 138static inline struct net_device *bridge_parent(const struct net_device *dev)
136{ 139{
137 struct net_bridge_port *port = rcu_dereference(dev->br_port); 140 if (!br_port_exists(dev))
141 return NULL;
138 142
139 return port ? port->br->dev : NULL; 143 return br_port_get_rcu(dev)->br->dev;
140} 144}
141 145
142static inline struct nf_bridge_info *nf_bridge_alloc(struct sk_buff *skb) 146static inline struct nf_bridge_info *nf_bridge_alloc(struct sk_buff *skb)
@@ -244,8 +248,7 @@ static int br_nf_pre_routing_finish_ipv6(struct sk_buff *skb)
244 kfree_skb(skb); 248 kfree_skb(skb);
245 return 0; 249 return 0;
246 } 250 }
247 dst_hold(&rt->u.dst); 251 skb_dst_set_noref(skb, &rt->dst);
248 skb_dst_set(skb, &rt->u.dst);
249 252
250 skb->dev = nf_bridge->physindev; 253 skb->dev = nf_bridge->physindev;
251 nf_bridge_update_protocol(skb); 254 nf_bridge_update_protocol(skb);
@@ -396,8 +399,7 @@ bridged_dnat:
396 kfree_skb(skb); 399 kfree_skb(skb);
397 return 0; 400 return 0;
398 } 401 }
399 dst_hold(&rt->u.dst); 402 skb_dst_set_noref(skb, &rt->dst);
400 skb_dst_set(skb, &rt->u.dst);
401 } 403 }
402 404
403 skb->dev = nf_bridge->physindev; 405 skb->dev = nf_bridge->physindev;
@@ -545,25 +547,30 @@ static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff *skb,
545 const struct net_device *out, 547 const struct net_device *out,
546 int (*okfn)(struct sk_buff *)) 548 int (*okfn)(struct sk_buff *))
547{ 549{
550 struct net_bridge_port *p;
551 struct net_bridge *br;
548 struct iphdr *iph; 552 struct iphdr *iph;
549 __u32 len = nf_bridge_encap_header_len(skb); 553 __u32 len = nf_bridge_encap_header_len(skb);
550 554
551 if (unlikely(!pskb_may_pull(skb, len))) 555 if (unlikely(!pskb_may_pull(skb, len)))
552 goto out; 556 goto out;
553 557
558 p = br_port_get_rcu(in);
559 if (p == NULL)
560 goto out;
561 br = p->br;
562
554 if (skb->protocol == htons(ETH_P_IPV6) || IS_VLAN_IPV6(skb) || 563 if (skb->protocol == htons(ETH_P_IPV6) || IS_VLAN_IPV6(skb) ||
555 IS_PPPOE_IPV6(skb)) { 564 IS_PPPOE_IPV6(skb)) {
556#ifdef CONFIG_SYSCTL 565 if (!brnf_call_ip6tables && !br->nf_call_ip6tables)
557 if (!brnf_call_ip6tables)
558 return NF_ACCEPT; 566 return NF_ACCEPT;
559#endif 567
560 nf_bridge_pull_encap_header_rcsum(skb); 568 nf_bridge_pull_encap_header_rcsum(skb);
561 return br_nf_pre_routing_ipv6(hook, skb, in, out, okfn); 569 return br_nf_pre_routing_ipv6(hook, skb, in, out, okfn);
562 } 570 }
563#ifdef CONFIG_SYSCTL 571
564 if (!brnf_call_iptables) 572 if (!brnf_call_iptables && !br->nf_call_iptables)
565 return NF_ACCEPT; 573 return NF_ACCEPT;
566#endif
567 574
568 if (skb->protocol != htons(ETH_P_IP) && !IS_VLAN_IP(skb) && 575 if (skb->protocol != htons(ETH_P_IP) && !IS_VLAN_IP(skb) &&
569 !IS_PPPOE_IP(skb)) 576 !IS_PPPOE_IP(skb))
@@ -591,6 +598,9 @@ static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff *skb,
591 598
592 pskb_trim_rcsum(skb, len); 599 pskb_trim_rcsum(skb, len);
593 600
601 /* BUG: Should really parse the IP options here. */
602 memset(IPCB(skb), 0, sizeof(struct inet_skb_parm));
603
594 nf_bridge_put(skb->nf_bridge); 604 nf_bridge_put(skb->nf_bridge);
595 if (!nf_bridge_alloc(skb)) 605 if (!nf_bridge_alloc(skb))
596 return NF_DROP; 606 return NF_DROP;
@@ -716,12 +726,17 @@ static unsigned int br_nf_forward_arp(unsigned int hook, struct sk_buff *skb,
716 const struct net_device *out, 726 const struct net_device *out,
717 int (*okfn)(struct sk_buff *)) 727 int (*okfn)(struct sk_buff *))
718{ 728{
729 struct net_bridge_port *p;
730 struct net_bridge *br;
719 struct net_device **d = (struct net_device **)(skb->cb); 731 struct net_device **d = (struct net_device **)(skb->cb);
720 732
721#ifdef CONFIG_SYSCTL 733 p = br_port_get_rcu(out);
722 if (!brnf_call_arptables) 734 if (p == NULL)
735 return NF_ACCEPT;
736 br = p->br;
737
738 if (!brnf_call_arptables && !br->nf_call_arptables)
723 return NF_ACCEPT; 739 return NF_ACCEPT;
724#endif
725 740
726 if (skb->protocol != htons(ETH_P_ARP)) { 741 if (skb->protocol != htons(ETH_P_ARP)) {
727 if (!IS_VLAN_ARP(skb)) 742 if (!IS_VLAN_ARP(skb))
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index fe0a79018ab2..4a6a378c84e3 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -120,10 +120,11 @@ static int br_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
120 idx = 0; 120 idx = 0;
121 for_each_netdev(net, dev) { 121 for_each_netdev(net, dev) {
122 /* not a bridge port */ 122 /* not a bridge port */
123 if (dev->br_port == NULL || idx < cb->args[0]) 123 if (!br_port_exists(dev) || idx < cb->args[0])
124 goto skip; 124 goto skip;
125 125
126 if (br_fill_ifinfo(skb, dev->br_port, NETLINK_CB(cb->skb).pid, 126 if (br_fill_ifinfo(skb, br_port_get(dev),
127 NETLINK_CB(cb->skb).pid,
127 cb->nlh->nlmsg_seq, RTM_NEWLINK, 128 cb->nlh->nlmsg_seq, RTM_NEWLINK,
128 NLM_F_MULTI) < 0) 129 NLM_F_MULTI) < 0)
129 break; 130 break;
@@ -168,9 +169,9 @@ static int br_rtm_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
168 if (!dev) 169 if (!dev)
169 return -ENODEV; 170 return -ENODEV;
170 171
171 p = dev->br_port; 172 if (!br_port_exists(dev))
172 if (!p)
173 return -EINVAL; 173 return -EINVAL;
174 p = br_port_get(dev);
174 175
175 /* if kernel STP is running, don't allow changes */ 176 /* if kernel STP is running, don't allow changes */
176 if (p->br->stp_enabled == BR_KERNEL_STP) 177 if (p->br->stp_enabled == BR_KERNEL_STP)
diff --git a/net/bridge/br_notify.c b/net/bridge/br_notify.c
index 717e1fd6133c..404d4e14c6a7 100644
--- a/net/bridge/br_notify.c
+++ b/net/bridge/br_notify.c
@@ -32,14 +32,15 @@ struct notifier_block br_device_notifier = {
32static int br_device_event(struct notifier_block *unused, unsigned long event, void *ptr) 32static int br_device_event(struct notifier_block *unused, unsigned long event, void *ptr)
33{ 33{
34 struct net_device *dev = ptr; 34 struct net_device *dev = ptr;
35 struct net_bridge_port *p = dev->br_port; 35 struct net_bridge_port *p = br_port_get(dev);
36 struct net_bridge *br; 36 struct net_bridge *br;
37 int err; 37 int err;
38 38
39 /* not a port of a bridge */ 39 /* not a port of a bridge */
40 if (p == NULL) 40 if (!br_port_exists(dev))
41 return NOTIFY_DONE; 41 return NOTIFY_DONE;
42 42
43 p = br_port_get(dev);
43 br = p->br; 44 br = p->br;
44 45
45 switch (event) { 46 switch (event) {
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 0f4a74bc6a9b..75c90edaf7db 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -15,6 +15,8 @@
15 15
16#include <linux/netdevice.h> 16#include <linux/netdevice.h>
17#include <linux/if_bridge.h> 17#include <linux/if_bridge.h>
18#include <linux/netpoll.h>
19#include <linux/u64_stats_sync.h>
18#include <net/route.h> 20#include <net/route.h>
19 21
20#define BR_HASH_BITS 8 22#define BR_HASH_BITS 8
@@ -143,13 +145,23 @@ struct net_bridge_port
143#ifdef CONFIG_SYSFS 145#ifdef CONFIG_SYSFS
144 char sysfs_name[IFNAMSIZ]; 146 char sysfs_name[IFNAMSIZ];
145#endif 147#endif
148
149#ifdef CONFIG_NET_POLL_CONTROLLER
150 struct netpoll *np;
151#endif
146}; 152};
147 153
154#define br_port_get_rcu(dev) \
155 ((struct net_bridge_port *) rcu_dereference(dev->rx_handler_data))
156#define br_port_get(dev) ((struct net_bridge_port *) dev->rx_handler_data)
157#define br_port_exists(dev) (dev->priv_flags & IFF_BRIDGE_PORT)
158
148struct br_cpu_netstats { 159struct br_cpu_netstats {
149 unsigned long rx_packets; 160 u64 rx_packets;
150 unsigned long rx_bytes; 161 u64 rx_bytes;
151 unsigned long tx_packets; 162 u64 tx_packets;
152 unsigned long tx_bytes; 163 u64 tx_bytes;
164 struct u64_stats_sync syncp;
153}; 165};
154 166
155struct net_bridge 167struct net_bridge
@@ -164,6 +176,9 @@ struct net_bridge
164 unsigned long feature_mask; 176 unsigned long feature_mask;
165#ifdef CONFIG_BRIDGE_NETFILTER 177#ifdef CONFIG_BRIDGE_NETFILTER
166 struct rtable fake_rtable; 178 struct rtable fake_rtable;
179 bool nf_call_iptables;
180 bool nf_call_ip6tables;
181 bool nf_call_arptables;
167#endif 182#endif
168 unsigned long flags; 183 unsigned long flags;
169#define BR_SET_MAC_ADDR 0x00000001 184#define BR_SET_MAC_ADDR 0x00000001
@@ -273,16 +288,41 @@ extern void br_dev_setup(struct net_device *dev);
273extern netdev_tx_t br_dev_xmit(struct sk_buff *skb, 288extern netdev_tx_t br_dev_xmit(struct sk_buff *skb,
274 struct net_device *dev); 289 struct net_device *dev);
275#ifdef CONFIG_NET_POLL_CONTROLLER 290#ifdef CONFIG_NET_POLL_CONTROLLER
276extern void br_netpoll_cleanup(struct net_device *dev); 291static inline struct netpoll_info *br_netpoll_info(struct net_bridge *br)
277extern void br_netpoll_enable(struct net_bridge *br, 292{
278 struct net_device *dev); 293 return br->dev->npinfo;
279extern void br_netpoll_disable(struct net_bridge *br, 294}
280 struct net_device *dev); 295
296static inline void br_netpoll_send_skb(const struct net_bridge_port *p,
297 struct sk_buff *skb)
298{
299 struct netpoll *np = p->np;
300
301 if (np)
302 netpoll_send_skb(np, skb);
303}
304
305extern int br_netpoll_enable(struct net_bridge_port *p);
306extern void br_netpoll_disable(struct net_bridge_port *p);
281#else 307#else
282#define br_netpoll_cleanup(br) 308static inline struct netpoll_info *br_netpoll_info(struct net_bridge *br)
283#define br_netpoll_enable(br, dev) 309{
284#define br_netpoll_disable(br, dev) 310 return NULL;
311}
312
313static inline void br_netpoll_send_skb(const struct net_bridge_port *p,
314 struct sk_buff *skb)
315{
316}
285 317
318static inline int br_netpoll_enable(struct net_bridge_port *p)
319{
320 return 0;
321}
322
323static inline void br_netpoll_disable(struct net_bridge_port *p)
324{
325}
286#endif 326#endif
287 327
288/* br_fdb.c */ 328/* br_fdb.c */
@@ -331,8 +371,7 @@ extern void br_features_recompute(struct net_bridge *br);
331 371
332/* br_input.c */ 372/* br_input.c */
333extern int br_handle_frame_finish(struct sk_buff *skb); 373extern int br_handle_frame_finish(struct sk_buff *skb);
334extern struct sk_buff *br_handle_frame(struct net_bridge_port *p, 374extern struct sk_buff *br_handle_frame(struct sk_buff *skb);
335 struct sk_buff *skb);
336 375
337/* br_ioctl.c */ 376/* br_ioctl.c */
338extern int br_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd); 377extern int br_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd);
diff --git a/net/bridge/br_stp_bpdu.c b/net/bridge/br_stp_bpdu.c
index 217bd225a42f..70aecb48fb69 100644
--- a/net/bridge/br_stp_bpdu.c
+++ b/net/bridge/br_stp_bpdu.c
@@ -137,12 +137,13 @@ void br_stp_rcv(const struct stp_proto *proto, struct sk_buff *skb,
137 struct net_device *dev) 137 struct net_device *dev)
138{ 138{
139 const unsigned char *dest = eth_hdr(skb)->h_dest; 139 const unsigned char *dest = eth_hdr(skb)->h_dest;
140 struct net_bridge_port *p = rcu_dereference(dev->br_port); 140 struct net_bridge_port *p;
141 struct net_bridge *br; 141 struct net_bridge *br;
142 const unsigned char *buf; 142 const unsigned char *buf;
143 143
144 if (!p) 144 if (!br_port_exists(dev))
145 goto err; 145 goto err;
146 p = br_port_get_rcu(dev);
146 147
147 if (!pskb_may_pull(skb, 4)) 148 if (!pskb_may_pull(skb, 4))
148 goto err; 149 goto err;
diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c
index dd321e39e621..5c1e5559ebba 100644
--- a/net/bridge/br_sysfs_br.c
+++ b/net/bridge/br_sysfs_br.c
@@ -611,6 +611,73 @@ static DEVICE_ATTR(multicast_startup_query_interval, S_IRUGO | S_IWUSR,
611 show_multicast_startup_query_interval, 611 show_multicast_startup_query_interval,
612 store_multicast_startup_query_interval); 612 store_multicast_startup_query_interval);
613#endif 613#endif
614#ifdef CONFIG_BRIDGE_NETFILTER
615static ssize_t show_nf_call_iptables(
616 struct device *d, struct device_attribute *attr, char *buf)
617{
618 struct net_bridge *br = to_bridge(d);
619 return sprintf(buf, "%u\n", br->nf_call_iptables);
620}
621
622static int set_nf_call_iptables(struct net_bridge *br, unsigned long val)
623{
624 br->nf_call_iptables = val ? true : false;
625 return 0;
626}
627
628static ssize_t store_nf_call_iptables(
629 struct device *d, struct device_attribute *attr, const char *buf,
630 size_t len)
631{
632 return store_bridge_parm(d, buf, len, set_nf_call_iptables);
633}
634static DEVICE_ATTR(nf_call_iptables, S_IRUGO | S_IWUSR,
635 show_nf_call_iptables, store_nf_call_iptables);
636
637static ssize_t show_nf_call_ip6tables(
638 struct device *d, struct device_attribute *attr, char *buf)
639{
640 struct net_bridge *br = to_bridge(d);
641 return sprintf(buf, "%u\n", br->nf_call_ip6tables);
642}
643
644static int set_nf_call_ip6tables(struct net_bridge *br, unsigned long val)
645{
646 br->nf_call_ip6tables = val ? true : false;
647 return 0;
648}
649
650static ssize_t store_nf_call_ip6tables(
651 struct device *d, struct device_attribute *attr, const char *buf,
652 size_t len)
653{
654 return store_bridge_parm(d, buf, len, set_nf_call_ip6tables);
655}
656static DEVICE_ATTR(nf_call_ip6tables, S_IRUGO | S_IWUSR,
657 show_nf_call_ip6tables, store_nf_call_ip6tables);
658
659static ssize_t show_nf_call_arptables(
660 struct device *d, struct device_attribute *attr, char *buf)
661{
662 struct net_bridge *br = to_bridge(d);
663 return sprintf(buf, "%u\n", br->nf_call_arptables);
664}
665
666static int set_nf_call_arptables(struct net_bridge *br, unsigned long val)
667{
668 br->nf_call_arptables = val ? true : false;
669 return 0;
670}
671
672static ssize_t store_nf_call_arptables(
673 struct device *d, struct device_attribute *attr, const char *buf,
674 size_t len)
675{
676 return store_bridge_parm(d, buf, len, set_nf_call_arptables);
677}
678static DEVICE_ATTR(nf_call_arptables, S_IRUGO | S_IWUSR,
679 show_nf_call_arptables, store_nf_call_arptables);
680#endif
614 681
615static struct attribute *bridge_attrs[] = { 682static struct attribute *bridge_attrs[] = {
616 &dev_attr_forward_delay.attr, 683 &dev_attr_forward_delay.attr,
@@ -645,6 +712,11 @@ static struct attribute *bridge_attrs[] = {
645 &dev_attr_multicast_query_response_interval.attr, 712 &dev_attr_multicast_query_response_interval.attr,
646 &dev_attr_multicast_startup_query_interval.attr, 713 &dev_attr_multicast_startup_query_interval.attr,
647#endif 714#endif
715#ifdef CONFIG_BRIDGE_NETFILTER
716 &dev_attr_nf_call_iptables.attr,
717 &dev_attr_nf_call_ip6tables.attr,
718 &dev_attr_nf_call_arptables.attr,
719#endif
648 NULL 720 NULL
649}; 721};
650 722
@@ -659,7 +731,7 @@ static struct attribute_group bridge_group = {
659 * 731 *
660 * Returns the number of bytes read. 732 * Returns the number of bytes read.
661 */ 733 */
662static ssize_t brforward_read(struct kobject *kobj, 734static ssize_t brforward_read(struct file *filp, struct kobject *kobj,
663 struct bin_attribute *bin_attr, 735 struct bin_attribute *bin_attr,
664 char *buf, loff_t off, size_t count) 736 char *buf, loff_t off, size_t count)
665{ 737{
diff --git a/net/bridge/netfilter/ebt_redirect.c b/net/bridge/netfilter/ebt_redirect.c
index 9e19166ba453..46624bb6d9be 100644
--- a/net/bridge/netfilter/ebt_redirect.c
+++ b/net/bridge/netfilter/ebt_redirect.c
@@ -24,8 +24,9 @@ ebt_redirect_tg(struct sk_buff *skb, const struct xt_action_param *par)
24 return EBT_DROP; 24 return EBT_DROP;
25 25
26 if (par->hooknum != NF_BR_BROUTING) 26 if (par->hooknum != NF_BR_BROUTING)
27 /* rcu_read_lock()ed by nf_hook_slow */
27 memcpy(eth_hdr(skb)->h_dest, 28 memcpy(eth_hdr(skb)->h_dest,
28 par->in->br_port->br->dev->dev_addr, ETH_ALEN); 29 br_port_get_rcu(par->in)->br->dev->dev_addr, ETH_ALEN);
29 else 30 else
30 memcpy(eth_hdr(skb)->h_dest, par->in->dev_addr, ETH_ALEN); 31 memcpy(eth_hdr(skb)->h_dest, par->in->dev_addr, ETH_ALEN);
31 skb->pkt_type = PACKET_HOST; 32 skb->pkt_type = PACKET_HOST;
diff --git a/net/bridge/netfilter/ebt_ulog.c b/net/bridge/netfilter/ebt_ulog.c
index ae3c7cef1484..26377e96fa1c 100644
--- a/net/bridge/netfilter/ebt_ulog.c
+++ b/net/bridge/netfilter/ebt_ulog.c
@@ -177,8 +177,9 @@ static void ebt_ulog_packet(unsigned int hooknr, const struct sk_buff *skb,
177 if (in) { 177 if (in) {
178 strcpy(pm->physindev, in->name); 178 strcpy(pm->physindev, in->name);
179 /* If in isn't a bridge, then physindev==indev */ 179 /* If in isn't a bridge, then physindev==indev */
180 if (in->br_port) 180 if (br_port_exists(in))
181 strcpy(pm->indev, in->br_port->br->dev->name); 181 /* rcu_read_lock()ed by nf_hook_slow */
182 strcpy(pm->indev, br_port_get_rcu(in)->br->dev->name);
182 else 183 else
183 strcpy(pm->indev, in->name); 184 strcpy(pm->indev, in->name);
184 } else 185 } else
@@ -187,7 +188,8 @@ static void ebt_ulog_packet(unsigned int hooknr, const struct sk_buff *skb,
187 if (out) { 188 if (out) {
188 /* If out exists, then out is a bridge port */ 189 /* If out exists, then out is a bridge port */
189 strcpy(pm->physoutdev, out->name); 190 strcpy(pm->physoutdev, out->name);
190 strcpy(pm->outdev, out->br_port->br->dev->name); 191 /* rcu_read_lock()ed by nf_hook_slow */
192 strcpy(pm->outdev, br_port_get_rcu(out)->br->dev->name);
191 } else 193 } else
192 pm->outdev[0] = pm->physoutdev[0] = '\0'; 194 pm->outdev[0] = pm->physoutdev[0] = '\0';
193 195
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 59ca00e40dec..bcc102e3be4d 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -140,11 +140,14 @@ ebt_basic_match(const struct ebt_entry *e, const struct ethhdr *h,
140 return 1; 140 return 1;
141 if (FWINV2(ebt_dev_check(e->out, out), EBT_IOUT)) 141 if (FWINV2(ebt_dev_check(e->out, out), EBT_IOUT))
142 return 1; 142 return 1;
143 if ((!in || !in->br_port) ? 0 : FWINV2(ebt_dev_check( 143 /* rcu_read_lock()ed by nf_hook_slow */
144 e->logical_in, in->br_port->br->dev), EBT_ILOGICALIN)) 144 if (in && br_port_exists(in) &&
145 FWINV2(ebt_dev_check(e->logical_in, br_port_get_rcu(in)->br->dev),
146 EBT_ILOGICALIN))
145 return 1; 147 return 1;
146 if ((!out || !out->br_port) ? 0 : FWINV2(ebt_dev_check( 148 if (out && br_port_exists(out) &&
147 e->logical_out, out->br_port->br->dev), EBT_ILOGICALOUT)) 149 FWINV2(ebt_dev_check(e->logical_out, br_port_get_rcu(out)->br->dev),
150 EBT_ILOGICALOUT))
148 return 1; 151 return 1;
149 152
150 if (e->bitmask & EBT_SOURCEMAC) { 153 if (e->bitmask & EBT_SOURCEMAC) {
diff --git a/net/caif/Kconfig b/net/caif/Kconfig
index cd1daf6008bd..529750da9624 100644
--- a/net/caif/Kconfig
+++ b/net/caif/Kconfig
@@ -2,10 +2,8 @@
2# CAIF net configurations 2# CAIF net configurations
3# 3#
4 4
5#menu "CAIF Support"
6comment "CAIF Support"
7menuconfig CAIF 5menuconfig CAIF
8 tristate "Enable CAIF support" 6 tristate "CAIF support"
9 select CRC_CCITT 7 select CRC_CCITT
10 default n 8 default n
11 ---help--- 9 ---help---
@@ -23,19 +21,18 @@ menuconfig CAIF
23 See Documentation/networking/caif for a further explanation on how to 21 See Documentation/networking/caif for a further explanation on how to
24 use and configure CAIF. 22 use and configure CAIF.
25 23
26if CAIF
27
28config CAIF_DEBUG 24config CAIF_DEBUG
29 bool "Enable Debug" 25 bool "Enable Debug"
26 depends on CAIF
30 default n 27 default n
31 --- help --- 28 --- help ---
32 Enable the inclusion of debug code in the CAIF stack. 29 Enable the inclusion of debug code in the CAIF stack.
33 Be aware that doing this will impact performance. 30 Be aware that doing this will impact performance.
34 If unsure say N. 31 If unsure say N.
35 32
36
37config CAIF_NETDEV 33config CAIF_NETDEV
38 tristate "CAIF GPRS Network device" 34 tristate "CAIF GPRS Network device"
35 depends on CAIF
39 default CAIF 36 default CAIF
40 ---help--- 37 ---help---
41 Say Y if you will be using a CAIF based GPRS network device. 38 Say Y if you will be using a CAIF based GPRS network device.
@@ -43,6 +40,3 @@ config CAIF_NETDEV
43 If you select to build it as a built-in then the main CAIF device must 40 If you select to build it as a built-in then the main CAIF device must
44 also be a built-in. 41 also be a built-in.
45 If unsure say Y. 42 If unsure say Y.
46
47endif
48#endmenu
diff --git a/net/caif/Makefile b/net/caif/Makefile
index 34852af2595e..f87481fb0e65 100644
--- a/net/caif/Makefile
+++ b/net/caif/Makefile
@@ -1,23 +1,13 @@
1ifeq ($(CONFIG_CAIF_DEBUG),1) 1ifeq ($(CONFIG_CAIF_DEBUG),y)
2CAIF_DBG_FLAGS := -DDEBUG 2EXTRA_CFLAGS += -DDEBUG
3endif 3endif
4 4
5ccflags-y := $(CAIF_FLAGS) $(CAIF_DBG_FLAGS)
6
7caif-objs := caif_dev.o \ 5caif-objs := caif_dev.o \
8 cfcnfg.o cfmuxl.o cfctrl.o \ 6 cfcnfg.o cfmuxl.o cfctrl.o \
9 cffrml.o cfveil.o cfdbgl.o\ 7 cffrml.o cfveil.o cfdbgl.o\
10 cfserl.o cfdgml.o \ 8 cfserl.o cfdgml.o \
11 cfrfml.o cfvidl.o cfutill.o \ 9 cfrfml.o cfvidl.o cfutill.o \
12 cfsrvl.o cfpkt_skbuff.o caif_config_util.o 10 cfsrvl.o cfpkt_skbuff.o caif_config_util.o
13clean-dirs:= .tmp_versions
14
15clean-files:= \
16 Module.symvers \
17 modules.order \
18 *.cmd \
19 *.o \
20 *~
21 11
22obj-$(CONFIG_CAIF) += caif.o 12obj-$(CONFIG_CAIF) += caif.o
23obj-$(CONFIG_CAIF_NETDEV) += chnl_net.o 13obj-$(CONFIG_CAIF_NETDEV) += chnl_net.o
diff --git a/net/caif/caif_config_util.c b/net/caif/caif_config_util.c
index 6f36580366f0..76ae68303d3a 100644
--- a/net/caif/caif_config_util.c
+++ b/net/caif/caif_config_util.c
@@ -80,6 +80,11 @@ int connect_req_to_link_param(struct cfcnfg *cnfg,
80 l->u.utility.paramlen); 80 l->u.utility.paramlen);
81 81
82 break; 82 break;
83 case CAIFPROTO_DEBUG:
84 l->linktype = CFCTRL_SRV_DBG;
85 l->endpoint = s->sockaddr.u.dbg.service;
86 l->chtype = s->sockaddr.u.dbg.type;
87 break;
83 default: 88 default:
84 return -EINVAL; 89 return -EINVAL;
85 } 90 }
diff --git a/net/caif/caif_dev.c b/net/caif/caif_dev.c
index e2b86f1f5a47..0b586e9d1378 100644
--- a/net/caif/caif_dev.c
+++ b/net/caif/caif_dev.c
@@ -255,7 +255,7 @@ static int caif_device_notify(struct notifier_block *me, unsigned long what,
255 pref = CFPHYPREF_HIGH_BW; 255 pref = CFPHYPREF_HIGH_BW;
256 break; 256 break;
257 } 257 }
258 258 dev_hold(dev);
259 cfcnfg_add_phy_layer(get_caif_conf(), 259 cfcnfg_add_phy_layer(get_caif_conf(),
260 phy_type, 260 phy_type,
261 dev, 261 dev,
@@ -285,6 +285,7 @@ static int caif_device_notify(struct notifier_block *me, unsigned long what,
285 caifd->layer.up->ctrlcmd(caifd->layer.up, 285 caifd->layer.up->ctrlcmd(caifd->layer.up,
286 _CAIF_CTRLCMD_PHYIF_DOWN_IND, 286 _CAIF_CTRLCMD_PHYIF_DOWN_IND,
287 caifd->layer.id); 287 caifd->layer.id);
288 might_sleep();
288 res = wait_event_interruptible_timeout(caifd->event, 289 res = wait_event_interruptible_timeout(caifd->event,
289 atomic_read(&caifd->in_use) == 0, 290 atomic_read(&caifd->in_use) == 0,
290 TIMEOUT); 291 TIMEOUT);
@@ -300,6 +301,7 @@ static int caif_device_notify(struct notifier_block *me, unsigned long what,
300 "Unregistering an active CAIF device: %s\n", 301 "Unregistering an active CAIF device: %s\n",
301 __func__, dev->name); 302 __func__, dev->name);
302 cfcnfg_del_phy_layer(get_caif_conf(), &caifd->layer); 303 cfcnfg_del_phy_layer(get_caif_conf(), &caifd->layer);
304 dev_put(dev);
303 atomic_set(&caifd->state, what); 305 atomic_set(&caifd->state, what);
304 break; 306 break;
305 307
@@ -326,7 +328,8 @@ struct cfcnfg *get_caif_conf(void)
326EXPORT_SYMBOL(get_caif_conf); 328EXPORT_SYMBOL(get_caif_conf);
327 329
328int caif_connect_client(struct caif_connect_request *conn_req, 330int caif_connect_client(struct caif_connect_request *conn_req,
329 struct cflayer *client_layer) 331 struct cflayer *client_layer, int *ifindex,
332 int *headroom, int *tailroom)
330{ 333{
331 struct cfctrl_link_param param; 334 struct cfctrl_link_param param;
332 int ret; 335 int ret;
@@ -334,8 +337,9 @@ int caif_connect_client(struct caif_connect_request *conn_req,
334 if (ret) 337 if (ret)
335 return ret; 338 return ret;
336 /* Hook up the adaptation layer. */ 339 /* Hook up the adaptation layer. */
337 return cfcnfg_add_adaptation_layer(get_caif_conf(), 340 return cfcnfg_add_adaptation_layer(get_caif_conf(), &param,
338 &param, client_layer); 341 client_layer, ifindex,
342 headroom, tailroom);
339} 343}
340EXPORT_SYMBOL(caif_connect_client); 344EXPORT_SYMBOL(caif_connect_client);
341 345
diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c
index c3a70c5c893a..8ce904786116 100644
--- a/net/caif/caif_socket.c
+++ b/net/caif/caif_socket.c
@@ -28,8 +28,8 @@
28MODULE_LICENSE("GPL"); 28MODULE_LICENSE("GPL");
29MODULE_ALIAS_NETPROTO(AF_CAIF); 29MODULE_ALIAS_NETPROTO(AF_CAIF);
30 30
31#define CAIF_DEF_SNDBUF (CAIF_MAX_PAYLOAD_SIZE*10) 31#define CAIF_DEF_SNDBUF (4096*10)
32#define CAIF_DEF_RCVBUF (CAIF_MAX_PAYLOAD_SIZE*100) 32#define CAIF_DEF_RCVBUF (4096*100)
33 33
34/* 34/*
35 * CAIF state is re-using the TCP socket states. 35 * CAIF state is re-using the TCP socket states.
@@ -60,7 +60,7 @@ struct debug_fs_counter {
60 atomic_t num_rx_flow_off; 60 atomic_t num_rx_flow_off;
61 atomic_t num_rx_flow_on; 61 atomic_t num_rx_flow_on;
62}; 62};
63struct debug_fs_counter cnt; 63static struct debug_fs_counter cnt;
64#define dbfs_atomic_inc(v) atomic_inc(v) 64#define dbfs_atomic_inc(v) atomic_inc(v)
65#define dbfs_atomic_dec(v) atomic_dec(v) 65#define dbfs_atomic_dec(v) atomic_dec(v)
66#else 66#else
@@ -76,6 +76,7 @@ struct caifsock {
76 struct caif_connect_request conn_req; 76 struct caif_connect_request conn_req;
77 struct mutex readlock; 77 struct mutex readlock;
78 struct dentry *debugfs_socket_dir; 78 struct dentry *debugfs_socket_dir;
79 int headroom, tailroom, maxframe;
79}; 80};
80 81
81static int rx_flow_is_on(struct caifsock *cf_sk) 82static int rx_flow_is_on(struct caifsock *cf_sk)
@@ -128,17 +129,17 @@ static void caif_read_unlock(struct sock *sk)
128 mutex_unlock(&cf_sk->readlock); 129 mutex_unlock(&cf_sk->readlock);
129} 130}
130 131
131int sk_rcvbuf_lowwater(struct caifsock *cf_sk) 132static int sk_rcvbuf_lowwater(struct caifsock *cf_sk)
132{ 133{
133 /* A quarter of full buffer is used a low water mark */ 134 /* A quarter of full buffer is used a low water mark */
134 return cf_sk->sk.sk_rcvbuf / 4; 135 return cf_sk->sk.sk_rcvbuf / 4;
135} 136}
136 137
137void caif_flow_ctrl(struct sock *sk, int mode) 138static void caif_flow_ctrl(struct sock *sk, int mode)
138{ 139{
139 struct caifsock *cf_sk; 140 struct caifsock *cf_sk;
140 cf_sk = container_of(sk, struct caifsock, sk); 141 cf_sk = container_of(sk, struct caifsock, sk);
141 if (cf_sk->layer.dn) 142 if (cf_sk->layer.dn && cf_sk->layer.dn->modemcmd)
142 cf_sk->layer.dn->modemcmd(cf_sk->layer.dn, mode); 143 cf_sk->layer.dn->modemcmd(cf_sk->layer.dn, mode);
143} 144}
144 145
@@ -146,7 +147,7 @@ void caif_flow_ctrl(struct sock *sk, int mode)
146 * Copied from sock.c:sock_queue_rcv_skb(), but changed so packets are 147 * Copied from sock.c:sock_queue_rcv_skb(), but changed so packets are
147 * not dropped, but CAIF is sending flow off instead. 148 * not dropped, but CAIF is sending flow off instead.
148 */ 149 */
149int caif_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) 150static int caif_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
150{ 151{
151 int err; 152 int err;
152 int skb_len; 153 int skb_len;
@@ -162,9 +163,8 @@ int caif_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
162 atomic_read(&cf_sk->sk.sk_rmem_alloc), 163 atomic_read(&cf_sk->sk.sk_rmem_alloc),
163 sk_rcvbuf_lowwater(cf_sk)); 164 sk_rcvbuf_lowwater(cf_sk));
164 set_rx_flow_off(cf_sk); 165 set_rx_flow_off(cf_sk);
165 if (cf_sk->layer.dn) 166 dbfs_atomic_inc(&cnt.num_rx_flow_off);
166 cf_sk->layer.dn->modemcmd(cf_sk->layer.dn, 167 caif_flow_ctrl(sk, CAIF_MODEMCMD_FLOW_OFF_REQ);
167 CAIF_MODEMCMD_FLOW_OFF_REQ);
168 } 168 }
169 169
170 err = sk_filter(sk, skb); 170 err = sk_filter(sk, skb);
@@ -175,9 +175,8 @@ int caif_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
175 trace_printk("CAIF: %s():" 175 trace_printk("CAIF: %s():"
176 " sending flow OFF due to rmem_schedule\n", 176 " sending flow OFF due to rmem_schedule\n",
177 __func__); 177 __func__);
178 if (cf_sk->layer.dn) 178 dbfs_atomic_inc(&cnt.num_rx_flow_off);
179 cf_sk->layer.dn->modemcmd(cf_sk->layer.dn, 179 caif_flow_ctrl(sk, CAIF_MODEMCMD_FLOW_OFF_REQ);
180 CAIF_MODEMCMD_FLOW_OFF_REQ);
181 } 180 }
182 skb->dev = NULL; 181 skb->dev = NULL;
183 skb_set_owner_r(skb, sk); 182 skb_set_owner_r(skb, sk);
@@ -285,65 +284,51 @@ static void caif_check_flow_release(struct sock *sk)
285{ 284{
286 struct caifsock *cf_sk = container_of(sk, struct caifsock, sk); 285 struct caifsock *cf_sk = container_of(sk, struct caifsock, sk);
287 286
288 if (cf_sk->layer.dn == NULL || cf_sk->layer.dn->modemcmd == NULL)
289 return;
290 if (rx_flow_is_on(cf_sk)) 287 if (rx_flow_is_on(cf_sk))
291 return; 288 return;
292 289
293 if (atomic_read(&sk->sk_rmem_alloc) <= sk_rcvbuf_lowwater(cf_sk)) { 290 if (atomic_read(&sk->sk_rmem_alloc) <= sk_rcvbuf_lowwater(cf_sk)) {
294 dbfs_atomic_inc(&cnt.num_rx_flow_on); 291 dbfs_atomic_inc(&cnt.num_rx_flow_on);
295 set_rx_flow_on(cf_sk); 292 set_rx_flow_on(cf_sk);
296 cf_sk->layer.dn->modemcmd(cf_sk->layer.dn, 293 caif_flow_ctrl(sk, CAIF_MODEMCMD_FLOW_ON_REQ);
297 CAIF_MODEMCMD_FLOW_ON_REQ);
298 } 294 }
299} 295}
296
300/* 297/*
301 * Copied from sock.c:sock_queue_rcv_skb(), and added check that user buffer 298 * Copied from unix_dgram_recvmsg, but removed credit checks,
302 * has sufficient size. 299 * changed locking, address handling and added MSG_TRUNC.
303 */ 300 */
304
305static int caif_seqpkt_recvmsg(struct kiocb *iocb, struct socket *sock, 301static int caif_seqpkt_recvmsg(struct kiocb *iocb, struct socket *sock,
306 struct msghdr *m, size_t buf_len, int flags) 302 struct msghdr *m, size_t len, int flags)
307 303
308{ 304{
309 struct sock *sk = sock->sk; 305 struct sock *sk = sock->sk;
310 struct sk_buff *skb; 306 struct sk_buff *skb;
311 int ret = 0; 307 int ret;
312 int len; 308 int copylen;
313 309
314 if (unlikely(!buf_len)) 310 ret = -EOPNOTSUPP;
315 return -EINVAL; 311 if (m->msg_flags&MSG_OOB)
312 goto read_error;
316 313
317 skb = skb_recv_datagram(sk, flags, 0 , &ret); 314 skb = skb_recv_datagram(sk, flags, 0 , &ret);
318 if (!skb) 315 if (!skb)
319 goto read_error; 316 goto read_error;
320 317 copylen = skb->len;
321 len = skb->len; 318 if (len < copylen) {
322 319 m->msg_flags |= MSG_TRUNC;
323 if (skb && skb->len > buf_len && !(flags & MSG_PEEK)) { 320 copylen = len;
324 len = buf_len;
325 /*
326 * Push skb back on receive queue if buffer too small.
327 * This has a built-in race where multi-threaded receive
328 * may get packet in wrong order, but multiple read does
329 * not really guarantee ordered delivery anyway.
330 * Let's optimize for speed without taking locks.
331 */
332
333 skb_queue_head(&sk->sk_receive_queue, skb);
334 ret = -EMSGSIZE;
335 goto read_error;
336 } 321 }
337 322
338 ret = skb_copy_datagram_iovec(skb, 0, m->msg_iov, len); 323 ret = skb_copy_datagram_iovec(skb, 0, m->msg_iov, copylen);
339 if (ret) 324 if (ret)
340 goto read_error; 325 goto out_free;
341 326
327 ret = (flags & MSG_TRUNC) ? skb->len : copylen;
328out_free:
342 skb_free_datagram(sk, skb); 329 skb_free_datagram(sk, skb);
343
344 caif_check_flow_release(sk); 330 caif_check_flow_release(sk);
345 331 return ret;
346 return len;
347 332
348read_error: 333read_error:
349 return ret; 334 return ret;
@@ -610,27 +595,32 @@ static int caif_seqpkt_sendmsg(struct kiocb *kiocb, struct socket *sock,
610 goto err; 595 goto err;
611 noblock = msg->msg_flags & MSG_DONTWAIT; 596 noblock = msg->msg_flags & MSG_DONTWAIT;
612 597
613 buffer_size = len + CAIF_NEEDED_HEADROOM + CAIF_NEEDED_TAILROOM;
614
615 ret = -EMSGSIZE;
616 if (buffer_size > CAIF_MAX_PAYLOAD_SIZE)
617 goto err;
618
619 timeo = sock_sndtimeo(sk, noblock); 598 timeo = sock_sndtimeo(sk, noblock);
620 timeo = caif_wait_for_flow_on(container_of(sk, struct caifsock, sk), 599 timeo = caif_wait_for_flow_on(container_of(sk, struct caifsock, sk),
621 1, timeo, &ret); 600 1, timeo, &ret);
622 601
602 if (ret)
603 goto err;
623 ret = -EPIPE; 604 ret = -EPIPE;
624 if (cf_sk->sk.sk_state != CAIF_CONNECTED || 605 if (cf_sk->sk.sk_state != CAIF_CONNECTED ||
625 sock_flag(sk, SOCK_DEAD) || 606 sock_flag(sk, SOCK_DEAD) ||
626 (sk->sk_shutdown & RCV_SHUTDOWN)) 607 (sk->sk_shutdown & RCV_SHUTDOWN))
627 goto err; 608 goto err;
628 609
610 /* Error if trying to write more than maximum frame size. */
611 ret = -EMSGSIZE;
612 if (len > cf_sk->maxframe && cf_sk->sk.sk_protocol != CAIFPROTO_RFM)
613 goto err;
614
615 buffer_size = len + cf_sk->headroom + cf_sk->tailroom;
616
629 ret = -ENOMEM; 617 ret = -ENOMEM;
630 skb = sock_alloc_send_skb(sk, buffer_size, noblock, &ret); 618 skb = sock_alloc_send_skb(sk, buffer_size, noblock, &ret);
631 if (!skb) 619
620 if (!skb || skb_tailroom(skb) < buffer_size)
632 goto err; 621 goto err;
633 skb_reserve(skb, CAIF_NEEDED_HEADROOM); 622
623 skb_reserve(skb, cf_sk->headroom);
634 624
635 ret = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len); 625 ret = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
636 626
@@ -661,7 +651,6 @@ static int caif_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
661 long timeo; 651 long timeo;
662 652
663 err = -EOPNOTSUPP; 653 err = -EOPNOTSUPP;
664
665 if (unlikely(msg->msg_flags&MSG_OOB)) 654 if (unlikely(msg->msg_flags&MSG_OOB))
666 goto out_err; 655 goto out_err;
667 656
@@ -678,8 +667,8 @@ static int caif_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
678 667
679 size = len-sent; 668 size = len-sent;
680 669
681 if (size > CAIF_MAX_PAYLOAD_SIZE) 670 if (size > cf_sk->maxframe)
682 size = CAIF_MAX_PAYLOAD_SIZE; 671 size = cf_sk->maxframe;
683 672
684 /* If size is more than half of sndbuf, chop up message */ 673 /* If size is more than half of sndbuf, chop up message */
685 if (size > ((sk->sk_sndbuf >> 1) - 64)) 674 if (size > ((sk->sk_sndbuf >> 1) - 64))
@@ -689,14 +678,14 @@ static int caif_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
689 size = SKB_MAX_ALLOC; 678 size = SKB_MAX_ALLOC;
690 679
691 skb = sock_alloc_send_skb(sk, 680 skb = sock_alloc_send_skb(sk,
692 size + CAIF_NEEDED_HEADROOM 681 size + cf_sk->headroom +
693 + CAIF_NEEDED_TAILROOM, 682 cf_sk->tailroom,
694 msg->msg_flags&MSG_DONTWAIT, 683 msg->msg_flags&MSG_DONTWAIT,
695 &err); 684 &err);
696 if (skb == NULL) 685 if (skb == NULL)
697 goto out_err; 686 goto out_err;
698 687
699 skb_reserve(skb, CAIF_NEEDED_HEADROOM); 688 skb_reserve(skb, cf_sk->headroom);
700 /* 689 /*
701 * If you pass two values to the sock_alloc_send_skb 690 * If you pass two values to the sock_alloc_send_skb
702 * it tries to grab the large buffer with GFP_NOFS 691 * it tries to grab the large buffer with GFP_NOFS
@@ -837,17 +826,15 @@ static int caif_connect(struct socket *sock, struct sockaddr *uaddr,
837 struct caifsock *cf_sk = container_of(sk, struct caifsock, sk); 826 struct caifsock *cf_sk = container_of(sk, struct caifsock, sk);
838 long timeo; 827 long timeo;
839 int err; 828 int err;
829 int ifindex, headroom, tailroom;
830 struct net_device *dev;
831
840 lock_sock(sk); 832 lock_sock(sk);
841 833
842 err = -EAFNOSUPPORT; 834 err = -EAFNOSUPPORT;
843 if (uaddr->sa_family != AF_CAIF) 835 if (uaddr->sa_family != AF_CAIF)
844 goto out; 836 goto out;
845 837
846 err = -ESOCKTNOSUPPORT;
847 if (unlikely(!(sk->sk_type == SOCK_STREAM &&
848 cf_sk->sk.sk_protocol == CAIFPROTO_AT) &&
849 sk->sk_type != SOCK_SEQPACKET))
850 goto out;
851 switch (sock->state) { 838 switch (sock->state) {
852 case SS_UNCONNECTED: 839 case SS_UNCONNECTED:
853 /* Normal case, a fresh connect */ 840 /* Normal case, a fresh connect */
@@ -890,8 +877,7 @@ static int caif_connect(struct socket *sock, struct sockaddr *uaddr,
890 sk_stream_kill_queues(&cf_sk->sk); 877 sk_stream_kill_queues(&cf_sk->sk);
891 878
892 err = -EINVAL; 879 err = -EINVAL;
893 if (addr_len != sizeof(struct sockaddr_caif) || 880 if (addr_len != sizeof(struct sockaddr_caif))
894 !uaddr)
895 goto out; 881 goto out;
896 882
897 memcpy(&cf_sk->conn_req.sockaddr, uaddr, 883 memcpy(&cf_sk->conn_req.sockaddr, uaddr,
@@ -904,12 +890,23 @@ static int caif_connect(struct socket *sock, struct sockaddr *uaddr,
904 dbfs_atomic_inc(&cnt.num_connect_req); 890 dbfs_atomic_inc(&cnt.num_connect_req);
905 cf_sk->layer.receive = caif_sktrecv_cb; 891 cf_sk->layer.receive = caif_sktrecv_cb;
906 err = caif_connect_client(&cf_sk->conn_req, 892 err = caif_connect_client(&cf_sk->conn_req,
907 &cf_sk->layer); 893 &cf_sk->layer, &ifindex, &headroom, &tailroom);
908 if (err < 0) { 894 if (err < 0) {
909 cf_sk->sk.sk_socket->state = SS_UNCONNECTED; 895 cf_sk->sk.sk_socket->state = SS_UNCONNECTED;
910 cf_sk->sk.sk_state = CAIF_DISCONNECTED; 896 cf_sk->sk.sk_state = CAIF_DISCONNECTED;
911 goto out; 897 goto out;
912 } 898 }
899 dev = dev_get_by_index(sock_net(sk), ifindex);
900 cf_sk->headroom = LL_RESERVED_SPACE_EXTRA(dev, headroom);
901 cf_sk->tailroom = tailroom;
902 cf_sk->maxframe = dev->mtu - (headroom + tailroom);
903 dev_put(dev);
904 if (cf_sk->maxframe < 1) {
905 pr_warning("CAIF: %s(): CAIF Interface MTU too small (%d)\n",
906 __func__, dev->mtu);
907 err = -ENODEV;
908 goto out;
909 }
913 910
914 err = -EINPROGRESS; 911 err = -EINPROGRESS;
915wait_connect: 912wait_connect:
@@ -920,17 +917,17 @@ wait_connect:
920 timeo = sock_sndtimeo(sk, flags & O_NONBLOCK); 917 timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
921 918
922 release_sock(sk); 919 release_sock(sk);
923 err = wait_event_interruptible_timeout(*sk_sleep(sk), 920 err = -ERESTARTSYS;
921 timeo = wait_event_interruptible_timeout(*sk_sleep(sk),
924 sk->sk_state != CAIF_CONNECTING, 922 sk->sk_state != CAIF_CONNECTING,
925 timeo); 923 timeo);
926 lock_sock(sk); 924 lock_sock(sk);
927 if (err < 0) 925 if (timeo < 0)
928 goto out; /* -ERESTARTSYS */ 926 goto out; /* -ERESTARTSYS */
929 if (err == 0 && sk->sk_state != CAIF_CONNECTED) {
930 err = -ETIMEDOUT;
931 goto out;
932 }
933 927
928 err = -ETIMEDOUT;
929 if (timeo == 0 && sk->sk_state != CAIF_CONNECTED)
930 goto out;
934 if (sk->sk_state != CAIF_CONNECTED) { 931 if (sk->sk_state != CAIF_CONNECTED) {
935 sock->state = SS_UNCONNECTED; 932 sock->state = SS_UNCONNECTED;
936 err = sock_error(sk); 933 err = sock_error(sk);
@@ -945,7 +942,6 @@ out:
945 return err; 942 return err;
946} 943}
947 944
948
949/* 945/*
950 * caif_release() - Disconnect a CAIF Socket 946 * caif_release() - Disconnect a CAIF Socket
951 * Copied and modified af_irda.c:irda_release(). 947 * Copied and modified af_irda.c:irda_release().
@@ -1019,10 +1015,6 @@ static unsigned int caif_poll(struct file *file,
1019 (sk->sk_shutdown & RCV_SHUTDOWN)) 1015 (sk->sk_shutdown & RCV_SHUTDOWN))
1020 mask |= POLLIN | POLLRDNORM; 1016 mask |= POLLIN | POLLRDNORM;
1021 1017
1022 /* Connection-based need to check for termination and startup */
1023 if (sk->sk_state == CAIF_DISCONNECTED)
1024 mask |= POLLHUP;
1025
1026 /* 1018 /*
1027 * we set writable also when the other side has shut down the 1019 * we set writable also when the other side has shut down the
1028 * connection. This prevents stuck sockets. 1020 * connection. This prevents stuck sockets.
@@ -1194,7 +1186,7 @@ static struct net_proto_family caif_family_ops = {
1194 .owner = THIS_MODULE, 1186 .owner = THIS_MODULE,
1195}; 1187};
1196 1188
1197int af_caif_init(void) 1189static int af_caif_init(void)
1198{ 1190{
1199 int err = sock_register(&caif_family_ops); 1191 int err = sock_register(&caif_family_ops);
1200 if (!err) 1192 if (!err)
diff --git a/net/caif/cfcnfg.c b/net/caif/cfcnfg.c
index df43f264d9fb..1c29189b344d 100644
--- a/net/caif/cfcnfg.c
+++ b/net/caif/cfcnfg.c
@@ -6,6 +6,7 @@
6#include <linux/kernel.h> 6#include <linux/kernel.h>
7#include <linux/stddef.h> 7#include <linux/stddef.h>
8#include <linux/slab.h> 8#include <linux/slab.h>
9#include <linux/netdevice.h>
9#include <net/caif/caif_layer.h> 10#include <net/caif/caif_layer.h>
10#include <net/caif/cfpkt.h> 11#include <net/caif/cfpkt.h>
11#include <net/caif/cfcnfg.h> 12#include <net/caif/cfcnfg.h>
@@ -22,6 +23,7 @@
22#define PHY_NAME_LEN 20 23#define PHY_NAME_LEN 20
23 24
24#define container_obj(layr) container_of(layr, struct cfcnfg, layer) 25#define container_obj(layr) container_of(layr, struct cfcnfg, layer)
26#define RFM_FRAGMENT_SIZE 4030
25 27
26/* Information about CAIF physical interfaces held by Config Module in order 28/* Information about CAIF physical interfaces held by Config Module in order
27 * to manage physical interfaces 29 * to manage physical interfaces
@@ -41,6 +43,15 @@ struct cfcnfg_phyinfo {
41 43
42 /* Information about the physical device */ 44 /* Information about the physical device */
43 struct dev_info dev_info; 45 struct dev_info dev_info;
46
47 /* Interface index */
48 int ifindex;
49
50 /* Use Start of frame extension */
51 bool use_stx;
52
53 /* Use Start of frame checksum */
54 bool use_fcs;
44}; 55};
45 56
46struct cfcnfg { 57struct cfcnfg {
@@ -248,9 +259,20 @@ static void cfcnfg_linkdestroy_rsp(struct cflayer *layer, u8 channel_id)
248{ 259{
249} 260}
250 261
262int protohead[CFCTRL_SRV_MASK] = {
263 [CFCTRL_SRV_VEI] = 4,
264 [CFCTRL_SRV_DATAGRAM] = 7,
265 [CFCTRL_SRV_UTIL] = 4,
266 [CFCTRL_SRV_RFM] = 3,
267 [CFCTRL_SRV_DBG] = 3,
268};
269
251int cfcnfg_add_adaptation_layer(struct cfcnfg *cnfg, 270int cfcnfg_add_adaptation_layer(struct cfcnfg *cnfg,
252 struct cfctrl_link_param *param, 271 struct cfctrl_link_param *param,
253 struct cflayer *adap_layer) 272 struct cflayer *adap_layer,
273 int *ifindex,
274 int *proto_head,
275 int *proto_tail)
254{ 276{
255 struct cflayer *frml; 277 struct cflayer *frml;
256 if (adap_layer == NULL) { 278 if (adap_layer == NULL) {
@@ -276,6 +298,14 @@ int cfcnfg_add_adaptation_layer(struct cfcnfg *cnfg,
276 param->phyid); 298 param->phyid);
277 caif_assert(cnfg->phy_layers[param->phyid].phy_layer->id == 299 caif_assert(cnfg->phy_layers[param->phyid].phy_layer->id ==
278 param->phyid); 300 param->phyid);
301
302 *ifindex = cnfg->phy_layers[param->phyid].ifindex;
303 *proto_head =
304 protohead[param->linktype]+
305 (cnfg->phy_layers[param->phyid].use_stx ? 1 : 0);
306
307 *proto_tail = 2;
308
279 /* FIXME: ENUMERATE INITIALLY WHEN ACTIVATING PHYSICAL INTERFACE */ 309 /* FIXME: ENUMERATE INITIALLY WHEN ACTIVATING PHYSICAL INTERFACE */
280 cfctrl_enum_req(cnfg->ctrl, param->phyid); 310 cfctrl_enum_req(cnfg->ctrl, param->phyid);
281 return cfctrl_linkup_request(cnfg->ctrl, param, adap_layer); 311 return cfctrl_linkup_request(cnfg->ctrl, param, adap_layer);
@@ -297,6 +327,8 @@ cfcnfg_linkup_rsp(struct cflayer *layer, u8 channel_id, enum cfctrl_srv serv,
297 struct cfcnfg *cnfg = container_obj(layer); 327 struct cfcnfg *cnfg = container_obj(layer);
298 struct cflayer *servicel = NULL; 328 struct cflayer *servicel = NULL;
299 struct cfcnfg_phyinfo *phyinfo; 329 struct cfcnfg_phyinfo *phyinfo;
330 struct net_device *netdev;
331
300 if (adapt_layer == NULL) { 332 if (adapt_layer == NULL) {
301 pr_debug("CAIF: %s(): link setup response " 333 pr_debug("CAIF: %s(): link setup response "
302 "but no client exist, send linkdown back\n", 334 "but no client exist, send linkdown back\n",
@@ -308,19 +340,15 @@ cfcnfg_linkup_rsp(struct cflayer *layer, u8 channel_id, enum cfctrl_srv serv,
308 caif_assert(cnfg != NULL); 340 caif_assert(cnfg != NULL);
309 caif_assert(phyid != 0); 341 caif_assert(phyid != 0);
310 phyinfo = &cnfg->phy_layers[phyid]; 342 phyinfo = &cnfg->phy_layers[phyid];
311 caif_assert(phyinfo != NULL);
312 caif_assert(phyinfo->id == phyid); 343 caif_assert(phyinfo->id == phyid);
313 caif_assert(phyinfo->phy_layer != NULL); 344 caif_assert(phyinfo->phy_layer != NULL);
314 caif_assert(phyinfo->phy_layer->id == phyid); 345 caif_assert(phyinfo->phy_layer->id == phyid);
315 346
316 if (phyinfo != NULL && 347 phyinfo->phy_ref_count++;
317 phyinfo->phy_ref_count++ == 0 && 348 if (phyinfo->phy_ref_count == 1 &&
318 phyinfo->phy_layer != NULL &&
319 phyinfo->phy_layer->modemcmd != NULL) { 349 phyinfo->phy_layer->modemcmd != NULL) {
320 caif_assert(phyinfo->phy_layer->id == phyid);
321 phyinfo->phy_layer->modemcmd(phyinfo->phy_layer, 350 phyinfo->phy_layer->modemcmd(phyinfo->phy_layer,
322 _CAIF_MODEMCMD_PHYIF_USEFULL); 351 _CAIF_MODEMCMD_PHYIF_USEFULL);
323
324 } 352 }
325 adapt_layer->id = channel_id; 353 adapt_layer->id = channel_id;
326 354
@@ -332,7 +360,9 @@ cfcnfg_linkup_rsp(struct cflayer *layer, u8 channel_id, enum cfctrl_srv serv,
332 servicel = cfdgml_create(channel_id, &phyinfo->dev_info); 360 servicel = cfdgml_create(channel_id, &phyinfo->dev_info);
333 break; 361 break;
334 case CFCTRL_SRV_RFM: 362 case CFCTRL_SRV_RFM:
335 servicel = cfrfml_create(channel_id, &phyinfo->dev_info); 363 netdev = phyinfo->dev_info.dev;
364 servicel = cfrfml_create(channel_id, &phyinfo->dev_info,
365 netdev->mtu);
336 break; 366 break;
337 case CFCTRL_SRV_UTIL: 367 case CFCTRL_SRV_UTIL:
338 servicel = cfutill_create(channel_id, &phyinfo->dev_info); 368 servicel = cfutill_create(channel_id, &phyinfo->dev_info);
@@ -363,8 +393,8 @@ cfcnfg_linkup_rsp(struct cflayer *layer, u8 channel_id, enum cfctrl_srv serv,
363 393
364void 394void
365cfcnfg_add_phy_layer(struct cfcnfg *cnfg, enum cfcnfg_phy_type phy_type, 395cfcnfg_add_phy_layer(struct cfcnfg *cnfg, enum cfcnfg_phy_type phy_type,
366 void *dev, struct cflayer *phy_layer, u16 *phyid, 396 struct net_device *dev, struct cflayer *phy_layer,
367 enum cfcnfg_phy_preference pref, 397 u16 *phyid, enum cfcnfg_phy_preference pref,
368 bool fcs, bool stx) 398 bool fcs, bool stx)
369{ 399{
370 struct cflayer *frml; 400 struct cflayer *frml;
@@ -418,6 +448,10 @@ cfcnfg_add_phy_layer(struct cfcnfg *cnfg, enum cfcnfg_phy_type phy_type,
418 cnfg->phy_layers[*phyid].dev_info.dev = dev; 448 cnfg->phy_layers[*phyid].dev_info.dev = dev;
419 cnfg->phy_layers[*phyid].phy_layer = phy_layer; 449 cnfg->phy_layers[*phyid].phy_layer = phy_layer;
420 cnfg->phy_layers[*phyid].phy_ref_count = 0; 450 cnfg->phy_layers[*phyid].phy_ref_count = 0;
451 cnfg->phy_layers[*phyid].ifindex = dev->ifindex;
452 cnfg->phy_layers[*phyid].use_stx = stx;
453 cnfg->phy_layers[*phyid].use_fcs = fcs;
454
421 phy_layer->type = phy_type; 455 phy_layer->type = phy_type;
422 frml = cffrml_create(*phyid, fcs); 456 frml = cffrml_create(*phyid, fcs);
423 if (!frml) { 457 if (!frml) {
diff --git a/net/caif/cfctrl.c b/net/caif/cfctrl.c
index 0ffe1e1ce901..563145fdc4c3 100644
--- a/net/caif/cfctrl.c
+++ b/net/caif/cfctrl.c
@@ -19,7 +19,7 @@
19#ifdef CAIF_NO_LOOP 19#ifdef CAIF_NO_LOOP
20static int handle_loop(struct cfctrl *ctrl, 20static int handle_loop(struct cfctrl *ctrl,
21 int cmd, struct cfpkt *pkt){ 21 int cmd, struct cfpkt *pkt){
22 return CAIF_FAILURE; 22 return -1;
23} 23}
24#else 24#else
25static int handle_loop(struct cfctrl *ctrl, 25static int handle_loop(struct cfctrl *ctrl,
@@ -43,14 +43,15 @@ struct cflayer *cfctrl_create(void)
43 memset(&dev_info, 0, sizeof(dev_info)); 43 memset(&dev_info, 0, sizeof(dev_info));
44 dev_info.id = 0xff; 44 dev_info.id = 0xff;
45 memset(this, 0, sizeof(*this)); 45 memset(this, 0, sizeof(*this));
46 cfsrvl_init(&this->serv, 0, &dev_info); 46 cfsrvl_init(&this->serv, 0, &dev_info, false);
47 spin_lock_init(&this->info_list_lock);
48 atomic_set(&this->req_seq_no, 1); 47 atomic_set(&this->req_seq_no, 1);
49 atomic_set(&this->rsp_seq_no, 1); 48 atomic_set(&this->rsp_seq_no, 1);
50 this->serv.layer.receive = cfctrl_recv; 49 this->serv.layer.receive = cfctrl_recv;
51 sprintf(this->serv.layer.name, "ctrl"); 50 sprintf(this->serv.layer.name, "ctrl");
52 this->serv.layer.ctrlcmd = cfctrl_ctrlcmd; 51 this->serv.layer.ctrlcmd = cfctrl_ctrlcmd;
53 spin_lock_init(&this->loop_linkid_lock); 52 spin_lock_init(&this->loop_linkid_lock);
53 spin_lock_init(&this->info_list_lock);
54 INIT_LIST_HEAD(&this->list);
54 this->loop_linkid = 1; 55 this->loop_linkid = 1;
55 return &this->serv.layer; 56 return &this->serv.layer;
56} 57}
@@ -112,20 +113,10 @@ bool cfctrl_req_eq(struct cfctrl_request_info *r1,
112void cfctrl_insert_req(struct cfctrl *ctrl, 113void cfctrl_insert_req(struct cfctrl *ctrl,
113 struct cfctrl_request_info *req) 114 struct cfctrl_request_info *req)
114{ 115{
115 struct cfctrl_request_info *p;
116 spin_lock(&ctrl->info_list_lock); 116 spin_lock(&ctrl->info_list_lock);
117 req->next = NULL;
118 atomic_inc(&ctrl->req_seq_no); 117 atomic_inc(&ctrl->req_seq_no);
119 req->sequence_no = atomic_read(&ctrl->req_seq_no); 118 req->sequence_no = atomic_read(&ctrl->req_seq_no);
120 if (ctrl->first_req == NULL) { 119 list_add_tail(&req->list, &ctrl->list);
121 ctrl->first_req = req;
122 spin_unlock(&ctrl->info_list_lock);
123 return;
124 }
125 p = ctrl->first_req;
126 while (p->next != NULL)
127 p = p->next;
128 p->next = req;
129 spin_unlock(&ctrl->info_list_lock); 120 spin_unlock(&ctrl->info_list_lock);
130} 121}
131 122
@@ -133,46 +124,28 @@ void cfctrl_insert_req(struct cfctrl *ctrl,
133struct cfctrl_request_info *cfctrl_remove_req(struct cfctrl *ctrl, 124struct cfctrl_request_info *cfctrl_remove_req(struct cfctrl *ctrl,
134 struct cfctrl_request_info *req) 125 struct cfctrl_request_info *req)
135{ 126{
136 struct cfctrl_request_info *p; 127 struct cfctrl_request_info *p, *tmp, *first;
137 struct cfctrl_request_info *ret;
138 128
139 spin_lock(&ctrl->info_list_lock); 129 spin_lock(&ctrl->info_list_lock);
140 if (ctrl->first_req == NULL) { 130 first = list_first_entry(&ctrl->list, struct cfctrl_request_info, list);
141 spin_unlock(&ctrl->info_list_lock);
142 return NULL;
143 }
144
145 if (cfctrl_req_eq(req, ctrl->first_req)) {
146 ret = ctrl->first_req;
147 caif_assert(ctrl->first_req);
148 atomic_set(&ctrl->rsp_seq_no,
149 ctrl->first_req->sequence_no);
150 ctrl->first_req = ctrl->first_req->next;
151 spin_unlock(&ctrl->info_list_lock);
152 return ret;
153 }
154 131
155 p = ctrl->first_req; 132 list_for_each_entry_safe(p, tmp, &ctrl->list, list) {
156 133 if (cfctrl_req_eq(req, p)) {
157 while (p->next != NULL) { 134 if (p != first)
158 if (cfctrl_req_eq(req, p->next)) { 135 pr_warning("CAIF: %s(): Requests are not "
159 pr_warning("CAIF: %s(): Requests are not "
160 "received in order\n", 136 "received in order\n",
161 __func__); 137 __func__);
162 ret = p->next; 138
163 atomic_set(&ctrl->rsp_seq_no, 139 atomic_set(&ctrl->rsp_seq_no,
164 p->next->sequence_no); 140 p->sequence_no);
165 p->next = p->next->next; 141 list_del(&p->list);
166 spin_unlock(&ctrl->info_list_lock); 142 goto out;
167 return ret;
168 } 143 }
169 p = p->next;
170 } 144 }
145 p = NULL;
146out:
171 spin_unlock(&ctrl->info_list_lock); 147 spin_unlock(&ctrl->info_list_lock);
172 148 return p;
173 pr_warning("CAIF: %s(): Request does not match\n",
174 __func__);
175 return NULL;
176} 149}
177 150
178struct cfctrl_rsp *cfctrl_get_respfuncs(struct cflayer *layer) 151struct cfctrl_rsp *cfctrl_get_respfuncs(struct cflayer *layer)
@@ -388,31 +361,18 @@ void cfctrl_getstartreason_req(struct cflayer *layer)
388 361
389void cfctrl_cancel_req(struct cflayer *layr, struct cflayer *adap_layer) 362void cfctrl_cancel_req(struct cflayer *layr, struct cflayer *adap_layer)
390{ 363{
391 struct cfctrl_request_info *p, *req; 364 struct cfctrl_request_info *p, *tmp;
392 struct cfctrl *ctrl = container_obj(layr); 365 struct cfctrl *ctrl = container_obj(layr);
393 spin_lock(&ctrl->info_list_lock); 366 spin_lock(&ctrl->info_list_lock);
394 367 pr_warning("CAIF: %s(): enter\n", __func__);
395 if (ctrl->first_req == NULL) { 368
396 spin_unlock(&ctrl->info_list_lock); 369 list_for_each_entry_safe(p, tmp, &ctrl->list, list) {
397 return; 370 if (p->client_layer == adap_layer) {
398 } 371 pr_warning("CAIF: %s(): cancel req :%d\n", __func__,
399 372 p->sequence_no);
400 if (ctrl->first_req->client_layer == adap_layer) { 373 list_del(&p->list);
401 374 kfree(p);
402 req = ctrl->first_req;
403 ctrl->first_req = ctrl->first_req->next;
404 kfree(req);
405 }
406
407 p = ctrl->first_req;
408 while (p != NULL && p->next != NULL) {
409 if (p->next->client_layer == adap_layer) {
410
411 req = p->next;
412 p->next = p->next->next;
413 kfree(p->next);
414 } 375 }
415 p = p->next;
416 } 376 }
417 377
418 spin_unlock(&ctrl->info_list_lock); 378 spin_unlock(&ctrl->info_list_lock);
@@ -435,7 +395,7 @@ static int cfctrl_recv(struct cflayer *layer, struct cfpkt *pkt)
435 cmd = cmdrsp & CFCTRL_CMD_MASK; 395 cmd = cmdrsp & CFCTRL_CMD_MASK;
436 if (cmd != CFCTRL_CMD_LINK_ERR 396 if (cmd != CFCTRL_CMD_LINK_ERR
437 && CFCTRL_RSP_BIT != (CFCTRL_RSP_BIT & cmdrsp)) { 397 && CFCTRL_RSP_BIT != (CFCTRL_RSP_BIT & cmdrsp)) {
438 if (handle_loop(cfctrl, cmd, pkt) == CAIF_FAILURE) 398 if (handle_loop(cfctrl, cmd, pkt) != 0)
439 cmdrsp |= CFCTRL_ERR_BIT; 399 cmdrsp |= CFCTRL_ERR_BIT;
440 } 400 }
441 401
@@ -634,7 +594,7 @@ static void cfctrl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl,
634 case _CAIF_CTRLCMD_PHYIF_FLOW_OFF_IND: 594 case _CAIF_CTRLCMD_PHYIF_FLOW_OFF_IND:
635 case CAIF_CTRLCMD_FLOW_OFF_IND: 595 case CAIF_CTRLCMD_FLOW_OFF_IND:
636 spin_lock(&this->info_list_lock); 596 spin_lock(&this->info_list_lock);
637 if (this->first_req != NULL) { 597 if (!list_empty(&this->list)) {
638 pr_debug("CAIF: %s(): Received flow off in " 598 pr_debug("CAIF: %s(): Received flow off in "
639 "control layer", __func__); 599 "control layer", __func__);
640 } 600 }
@@ -687,6 +647,6 @@ found:
687 default: 647 default:
688 break; 648 break;
689 } 649 }
690 return CAIF_SUCCESS; 650 return 0;
691} 651}
692#endif 652#endif
diff --git a/net/caif/cfdbgl.c b/net/caif/cfdbgl.c
index ab6b6dc34cf8..676648cac8dd 100644
--- a/net/caif/cfdbgl.c
+++ b/net/caif/cfdbgl.c
@@ -22,7 +22,7 @@ struct cflayer *cfdbgl_create(u8 channel_id, struct dev_info *dev_info)
22 } 22 }
23 caif_assert(offsetof(struct cfsrvl, layer) == 0); 23 caif_assert(offsetof(struct cfsrvl, layer) == 0);
24 memset(dbg, 0, sizeof(struct cfsrvl)); 24 memset(dbg, 0, sizeof(struct cfsrvl));
25 cfsrvl_init(dbg, channel_id, dev_info); 25 cfsrvl_init(dbg, channel_id, dev_info, false);
26 dbg->layer.receive = cfdbgl_receive; 26 dbg->layer.receive = cfdbgl_receive;
27 dbg->layer.transmit = cfdbgl_transmit; 27 dbg->layer.transmit = cfdbgl_transmit;
28 snprintf(dbg->layer.name, CAIF_LAYER_NAME_SZ - 1, "dbg%d", channel_id); 28 snprintf(dbg->layer.name, CAIF_LAYER_NAME_SZ - 1, "dbg%d", channel_id);
diff --git a/net/caif/cfdgml.c b/net/caif/cfdgml.c
index 53194840ecb6..ed9d53aff280 100644
--- a/net/caif/cfdgml.c
+++ b/net/caif/cfdgml.c
@@ -17,6 +17,7 @@
17#define DGM_FLOW_OFF 0x81 17#define DGM_FLOW_OFF 0x81
18#define DGM_FLOW_ON 0x80 18#define DGM_FLOW_ON 0x80
19#define DGM_CTRL_PKT_SIZE 1 19#define DGM_CTRL_PKT_SIZE 1
20#define DGM_MTU 1500
20 21
21static int cfdgml_receive(struct cflayer *layr, struct cfpkt *pkt); 22static int cfdgml_receive(struct cflayer *layr, struct cfpkt *pkt);
22static int cfdgml_transmit(struct cflayer *layr, struct cfpkt *pkt); 23static int cfdgml_transmit(struct cflayer *layr, struct cfpkt *pkt);
@@ -30,7 +31,7 @@ struct cflayer *cfdgml_create(u8 channel_id, struct dev_info *dev_info)
30 } 31 }
31 caif_assert(offsetof(struct cfsrvl, layer) == 0); 32 caif_assert(offsetof(struct cfsrvl, layer) == 0);
32 memset(dgm, 0, sizeof(struct cfsrvl)); 33 memset(dgm, 0, sizeof(struct cfsrvl));
33 cfsrvl_init(dgm, channel_id, dev_info); 34 cfsrvl_init(dgm, channel_id, dev_info, true);
34 dgm->layer.receive = cfdgml_receive; 35 dgm->layer.receive = cfdgml_receive;
35 dgm->layer.transmit = cfdgml_transmit; 36 dgm->layer.transmit = cfdgml_transmit;
36 snprintf(dgm->layer.name, CAIF_LAYER_NAME_SZ - 1, "dgm%d", channel_id); 37 snprintf(dgm->layer.name, CAIF_LAYER_NAME_SZ - 1, "dgm%d", channel_id);
@@ -89,6 +90,10 @@ static int cfdgml_transmit(struct cflayer *layr, struct cfpkt *pkt)
89 if (!cfsrvl_ready(service, &ret)) 90 if (!cfsrvl_ready(service, &ret))
90 return ret; 91 return ret;
91 92
93 /* STE Modem cannot handle more than 1500 bytes datagrams */
94 if (cfpkt_getlen(pkt) > DGM_MTU)
95 return -EMSGSIZE;
96
92 cfpkt_add_head(pkt, &zero, 4); 97 cfpkt_add_head(pkt, &zero, 4);
93 98
94 /* Add info for MUX-layer to route the packet out. */ 99 /* Add info for MUX-layer to route the packet out. */
diff --git a/net/caif/cfmuxl.c b/net/caif/cfmuxl.c
index 7372f27f1d32..80c8d332b258 100644
--- a/net/caif/cfmuxl.c
+++ b/net/caif/cfmuxl.c
@@ -174,10 +174,11 @@ struct cflayer *cfmuxl_remove_uplayer(struct cflayer *layr, u8 id)
174 spin_lock(&muxl->receive_lock); 174 spin_lock(&muxl->receive_lock);
175 up = get_up(muxl, id); 175 up = get_up(muxl, id);
176 if (up == NULL) 176 if (up == NULL)
177 return NULL; 177 goto out;
178 memset(muxl->up_cache, 0, sizeof(muxl->up_cache)); 178 memset(muxl->up_cache, 0, sizeof(muxl->up_cache));
179 list_del(&up->node); 179 list_del(&up->node);
180 cfsrvl_put(up); 180 cfsrvl_put(up);
181out:
181 spin_unlock(&muxl->receive_lock); 182 spin_unlock(&muxl->receive_lock);
182 return up; 183 return up;
183} 184}
diff --git a/net/caif/cfpkt_skbuff.c b/net/caif/cfpkt_skbuff.c
index 83fff2ff6658..01f238ff2346 100644
--- a/net/caif/cfpkt_skbuff.c
+++ b/net/caif/cfpkt_skbuff.c
@@ -9,8 +9,8 @@
9#include <linux/hardirq.h> 9#include <linux/hardirq.h>
10#include <net/caif/cfpkt.h> 10#include <net/caif/cfpkt.h>
11 11
12#define PKT_PREFIX CAIF_NEEDED_HEADROOM 12#define PKT_PREFIX 16
13#define PKT_POSTFIX CAIF_NEEDED_TAILROOM 13#define PKT_POSTFIX 2
14#define PKT_LEN_WHEN_EXTENDING 128 14#define PKT_LEN_WHEN_EXTENDING 128
15#define PKT_ERROR(pkt, errmsg) do { \ 15#define PKT_ERROR(pkt, errmsg) do { \
16 cfpkt_priv(pkt)->erronous = true; \ 16 cfpkt_priv(pkt)->erronous = true; \
@@ -238,6 +238,7 @@ int cfpkt_add_head(struct cfpkt *pkt, const void *data2, u16 len)
238 struct sk_buff *lastskb; 238 struct sk_buff *lastskb;
239 u8 *to; 239 u8 *to;
240 const u8 *data = data2; 240 const u8 *data = data2;
241 int ret;
241 if (unlikely(is_erronous(pkt))) 242 if (unlikely(is_erronous(pkt)))
242 return -EPROTO; 243 return -EPROTO;
243 if (unlikely(skb_headroom(skb) < len)) { 244 if (unlikely(skb_headroom(skb) < len)) {
@@ -246,9 +247,10 @@ int cfpkt_add_head(struct cfpkt *pkt, const void *data2, u16 len)
246 } 247 }
247 248
248 /* Make sure data is writable */ 249 /* Make sure data is writable */
249 if (unlikely(skb_cow_data(skb, 0, &lastskb) < 0)) { 250 ret = skb_cow_data(skb, 0, &lastskb);
251 if (unlikely(ret < 0)) {
250 PKT_ERROR(pkt, "cfpkt_add_head: cow failed\n"); 252 PKT_ERROR(pkt, "cfpkt_add_head: cow failed\n");
251 return -EPROTO; 253 return ret;
252 } 254 }
253 255
254 to = skb_push(skb, len); 256 to = skb_push(skb, len);
@@ -316,6 +318,8 @@ EXPORT_SYMBOL(cfpkt_setlen);
316struct cfpkt *cfpkt_create_uplink(const unsigned char *data, unsigned int len) 318struct cfpkt *cfpkt_create_uplink(const unsigned char *data, unsigned int len)
317{ 319{
318 struct cfpkt *pkt = cfpkt_create_pfx(len + PKT_POSTFIX, PKT_PREFIX); 320 struct cfpkt *pkt = cfpkt_create_pfx(len + PKT_POSTFIX, PKT_PREFIX);
321 if (!pkt)
322 return NULL;
319 if (unlikely(data != NULL)) 323 if (unlikely(data != NULL))
320 cfpkt_add_body(pkt, data, len); 324 cfpkt_add_body(pkt, data, len);
321 return pkt; 325 return pkt;
@@ -334,7 +338,6 @@ struct cfpkt *cfpkt_append(struct cfpkt *dstpkt,
334 u16 dstlen; 338 u16 dstlen;
335 u16 createlen; 339 u16 createlen;
336 if (unlikely(is_erronous(dstpkt) || is_erronous(addpkt))) { 340 if (unlikely(is_erronous(dstpkt) || is_erronous(addpkt))) {
337 cfpkt_destroy(addpkt);
338 return dstpkt; 341 return dstpkt;
339 } 342 }
340 if (expectlen > addlen) 343 if (expectlen > addlen)
@@ -344,12 +347,13 @@ struct cfpkt *cfpkt_append(struct cfpkt *dstpkt,
344 347
345 if (dst->tail + neededtailspace > dst->end) { 348 if (dst->tail + neededtailspace > dst->end) {
346 /* Create a dumplicate of 'dst' with more tail space */ 349 /* Create a dumplicate of 'dst' with more tail space */
350 struct cfpkt *tmppkt;
347 dstlen = skb_headlen(dst); 351 dstlen = skb_headlen(dst);
348 createlen = dstlen + neededtailspace; 352 createlen = dstlen + neededtailspace;
349 tmp = pkt_to_skb( 353 tmppkt = cfpkt_create(createlen + PKT_PREFIX + PKT_POSTFIX);
350 cfpkt_create(createlen + PKT_PREFIX + PKT_POSTFIX)); 354 if (tmppkt == NULL)
351 if (!tmp)
352 return NULL; 355 return NULL;
356 tmp = pkt_to_skb(tmppkt);
353 skb_set_tail_pointer(tmp, dstlen); 357 skb_set_tail_pointer(tmp, dstlen);
354 tmp->len = dstlen; 358 tmp->len = dstlen;
355 memcpy(tmp->data, dst->data, dstlen); 359 memcpy(tmp->data, dst->data, dstlen);
@@ -368,6 +372,7 @@ struct cfpkt *cfpkt_split(struct cfpkt *pkt, u16 pos)
368{ 372{
369 struct sk_buff *skb2; 373 struct sk_buff *skb2;
370 struct sk_buff *skb = pkt_to_skb(pkt); 374 struct sk_buff *skb = pkt_to_skb(pkt);
375 struct cfpkt *tmppkt;
371 u8 *split = skb->data + pos; 376 u8 *split = skb->data + pos;
372 u16 len2nd = skb_tail_pointer(skb) - split; 377 u16 len2nd = skb_tail_pointer(skb) - split;
373 378
@@ -381,9 +386,12 @@ struct cfpkt *cfpkt_split(struct cfpkt *pkt, u16 pos)
381 } 386 }
382 387
383 /* Create a new packet for the second part of the data */ 388 /* Create a new packet for the second part of the data */
384 skb2 = pkt_to_skb( 389 tmppkt = cfpkt_create_pfx(len2nd + PKT_PREFIX + PKT_POSTFIX,
385 cfpkt_create_pfx(len2nd + PKT_PREFIX + PKT_POSTFIX, 390 PKT_PREFIX);
386 PKT_PREFIX)); 391 if (tmppkt == NULL)
392 return NULL;
393 skb2 = pkt_to_skb(tmppkt);
394
387 395
388 if (skb2 == NULL) 396 if (skb2 == NULL)
389 return NULL; 397 return NULL;
diff --git a/net/caif/cfrfml.c b/net/caif/cfrfml.c
index cd2830fec935..4b04d25b6a3f 100644
--- a/net/caif/cfrfml.c
+++ b/net/caif/cfrfml.c
@@ -7,102 +7,304 @@
7#include <linux/stddef.h> 7#include <linux/stddef.h>
8#include <linux/spinlock.h> 8#include <linux/spinlock.h>
9#include <linux/slab.h> 9#include <linux/slab.h>
10#include <linux/unaligned/le_byteshift.h>
10#include <net/caif/caif_layer.h> 11#include <net/caif/caif_layer.h>
11#include <net/caif/cfsrvl.h> 12#include <net/caif/cfsrvl.h>
12#include <net/caif/cfpkt.h> 13#include <net/caif/cfpkt.h>
13 14
14#define container_obj(layr) container_of(layr, struct cfsrvl, layer) 15#define container_obj(layr) container_of(layr, struct cfrfml, serv.layer)
15
16#define RFM_SEGMENTATION_BIT 0x01 16#define RFM_SEGMENTATION_BIT 0x01
17#define RFM_PAYLOAD 0x00 17#define RFM_HEAD_SIZE 7
18#define RFM_CMD_BIT 0x80
19#define RFM_FLOW_OFF 0x81
20#define RFM_FLOW_ON 0x80
21#define RFM_SET_PIN 0x82
22#define RFM_CTRL_PKT_SIZE 1
23 18
24static int cfrfml_receive(struct cflayer *layr, struct cfpkt *pkt); 19static int cfrfml_receive(struct cflayer *layr, struct cfpkt *pkt);
25static int cfrfml_transmit(struct cflayer *layr, struct cfpkt *pkt); 20static int cfrfml_transmit(struct cflayer *layr, struct cfpkt *pkt);
26static int cfservl_modemcmd(struct cflayer *layr, enum caif_modemcmd ctrl);
27 21
28struct cflayer *cfrfml_create(u8 channel_id, struct dev_info *dev_info) 22struct cfrfml {
23 struct cfsrvl serv;
24 struct cfpkt *incomplete_frm;
25 int fragment_size;
26 u8 seghead[6];
27 u16 pdu_size;
28 /* Protects serialized processing of packets */
29 spinlock_t sync;
30};
31
32static void cfrfml_release(struct kref *kref)
33{
34 struct cfsrvl *srvl = container_of(kref, struct cfsrvl, ref);
35 struct cfrfml *rfml = container_obj(&srvl->layer);
36
37 if (rfml->incomplete_frm)
38 cfpkt_destroy(rfml->incomplete_frm);
39
40 kfree(srvl);
41}
42
43struct cflayer *cfrfml_create(u8 channel_id, struct dev_info *dev_info,
44 int mtu_size)
29{ 45{
30 struct cfsrvl *rfm = kmalloc(sizeof(struct cfsrvl), GFP_ATOMIC); 46 int tmp;
31 if (!rfm) { 47 struct cfrfml *this =
48 kzalloc(sizeof(struct cfrfml), GFP_ATOMIC);
49
50 if (!this) {
32 pr_warning("CAIF: %s(): Out of memory\n", __func__); 51 pr_warning("CAIF: %s(): Out of memory\n", __func__);
33 return NULL; 52 return NULL;
34 } 53 }
35 caif_assert(offsetof(struct cfsrvl, layer) == 0); 54
36 memset(rfm, 0, sizeof(struct cfsrvl)); 55 cfsrvl_init(&this->serv, channel_id, dev_info, false);
37 cfsrvl_init(rfm, channel_id, dev_info); 56 this->serv.release = cfrfml_release;
38 rfm->layer.modemcmd = cfservl_modemcmd; 57 this->serv.layer.receive = cfrfml_receive;
39 rfm->layer.receive = cfrfml_receive; 58 this->serv.layer.transmit = cfrfml_transmit;
40 rfm->layer.transmit = cfrfml_transmit; 59
41 snprintf(rfm->layer.name, CAIF_LAYER_NAME_SZ, "rfm%d", channel_id); 60 /* Round down to closest multiple of 16 */
42 return &rfm->layer; 61 tmp = (mtu_size - RFM_HEAD_SIZE - 6) / 16;
62 tmp *= 16;
63
64 this->fragment_size = tmp;
65 spin_lock_init(&this->sync);
66 snprintf(this->serv.layer.name, CAIF_LAYER_NAME_SZ,
67 "rfm%d", channel_id);
68
69 return &this->serv.layer;
43} 70}
44 71
45static int cfservl_modemcmd(struct cflayer *layr, enum caif_modemcmd ctrl) 72static struct cfpkt *rfm_append(struct cfrfml *rfml, char *seghead,
73 struct cfpkt *pkt, int *err)
46{ 74{
47 return -EPROTO; 75 struct cfpkt *tmppkt;
76 *err = -EPROTO;
77 /* n-th but not last segment */
78
79 if (cfpkt_extr_head(pkt, seghead, 6) < 0)
80 return NULL;
81
82 /* Verify correct header */
83 if (memcmp(seghead, rfml->seghead, 6) != 0)
84 return NULL;
85
86 tmppkt = cfpkt_append(rfml->incomplete_frm, pkt,
87 rfml->pdu_size + RFM_HEAD_SIZE);
88
89 /* If cfpkt_append failes input pkts are not freed */
90 *err = -ENOMEM;
91 if (tmppkt == NULL)
92 return NULL;
93
94 *err = 0;
95 return tmppkt;
48} 96}
49 97
50static int cfrfml_receive(struct cflayer *layr, struct cfpkt *pkt) 98static int cfrfml_receive(struct cflayer *layr, struct cfpkt *pkt)
51{ 99{
52 u8 tmp; 100 u8 tmp;
53 bool segmented; 101 bool segmented;
54 int ret; 102 int err;
103 u8 seghead[6];
104 struct cfrfml *rfml;
105 struct cfpkt *tmppkt = NULL;
106
55 caif_assert(layr->up != NULL); 107 caif_assert(layr->up != NULL);
56 caif_assert(layr->receive != NULL); 108 caif_assert(layr->receive != NULL);
109 rfml = container_obj(layr);
110 spin_lock(&rfml->sync);
111
112 err = -EPROTO;
113 if (cfpkt_extr_head(pkt, &tmp, 1) < 0)
114 goto out;
115 segmented = tmp & RFM_SEGMENTATION_BIT;
116
117 if (segmented) {
118 if (rfml->incomplete_frm == NULL) {
119 /* Initial Segment */
120 if (cfpkt_peek_head(pkt, rfml->seghead, 6) < 0)
121 goto out;
122
123 rfml->pdu_size = get_unaligned_le16(rfml->seghead+4);
124
125 if (cfpkt_erroneous(pkt))
126 goto out;
127 rfml->incomplete_frm = pkt;
128 pkt = NULL;
129 } else {
130
131 tmppkt = rfm_append(rfml, seghead, pkt, &err);
132 if (tmppkt == NULL)
133 goto out;
134
135 if (cfpkt_erroneous(tmppkt))
136 goto out;
137
138 rfml->incomplete_frm = tmppkt;
139
140
141 if (cfpkt_erroneous(tmppkt))
142 goto out;
143 }
144 err = 0;
145 goto out;
146 }
147
148 if (rfml->incomplete_frm) {
149
150 /* Last Segment */
151 tmppkt = rfm_append(rfml, seghead, pkt, &err);
152 if (tmppkt == NULL)
153 goto out;
154
155 if (cfpkt_erroneous(tmppkt))
156 goto out;
157
158 rfml->incomplete_frm = NULL;
159 pkt = tmppkt;
160 tmppkt = NULL;
161
162 /* Verify that length is correct */
163 err = EPROTO;
164 if (rfml->pdu_size != cfpkt_getlen(pkt) - RFM_HEAD_SIZE + 1)
165 goto out;
166 }
167
168 err = rfml->serv.layer.up->receive(rfml->serv.layer.up, pkt);
169
170out:
171
172 if (err != 0) {
173 if (tmppkt)
174 cfpkt_destroy(tmppkt);
175 if (pkt)
176 cfpkt_destroy(pkt);
177 if (rfml->incomplete_frm)
178 cfpkt_destroy(rfml->incomplete_frm);
179 rfml->incomplete_frm = NULL;
180
181 pr_info("CAIF: %s(): "
182 "Connection error %d triggered on RFM link\n",
183 __func__, err);
184
185 /* Trigger connection error upon failure.*/
186 layr->up->ctrlcmd(layr->up, CAIF_CTRLCMD_REMOTE_SHUTDOWN_IND,
187 rfml->serv.dev_info.id);
188 }
189 spin_unlock(&rfml->sync);
190 return err;
191}
192
193
194static int cfrfml_transmit_segment(struct cfrfml *rfml, struct cfpkt *pkt)
195{
196 caif_assert(!cfpkt_getlen(pkt) < rfml->fragment_size);
197
198 /* Add info for MUX-layer to route the packet out. */
199 cfpkt_info(pkt)->channel_id = rfml->serv.layer.id;
57 200
58 /* 201 /*
59 * RFM is taking care of segmentation and stripping of 202 * To optimize alignment, we add up the size of CAIF header before
60 * segmentation bit. 203 * payload.
61 */ 204 */
62 if (cfpkt_extr_head(pkt, &tmp, 1) < 0) { 205 cfpkt_info(pkt)->hdr_len = RFM_HEAD_SIZE;
63 pr_err("CAIF: %s(): Packet is erroneous!\n", __func__); 206 cfpkt_info(pkt)->dev_info = &rfml->serv.dev_info;
64 cfpkt_destroy(pkt);
65 return -EPROTO;
66 }
67 segmented = tmp & RFM_SEGMENTATION_BIT;
68 caif_assert(!segmented);
69 207
70 ret = layr->up->receive(layr->up, pkt); 208 return rfml->serv.layer.dn->transmit(rfml->serv.layer.dn, pkt);
71 return ret;
72} 209}
73 210
74static int cfrfml_transmit(struct cflayer *layr, struct cfpkt *pkt) 211static int cfrfml_transmit(struct cflayer *layr, struct cfpkt *pkt)
75{ 212{
76 u8 tmp = 0; 213 int err;
77 int ret; 214 u8 seg;
78 struct cfsrvl *service = container_obj(layr); 215 u8 head[6];
216 struct cfpkt *rearpkt = NULL;
217 struct cfpkt *frontpkt = pkt;
218 struct cfrfml *rfml = container_obj(layr);
79 219
80 caif_assert(layr->dn != NULL); 220 caif_assert(layr->dn != NULL);
81 caif_assert(layr->dn->transmit != NULL); 221 caif_assert(layr->dn->transmit != NULL);
82 222
83 if (!cfsrvl_ready(service, &ret)) 223 if (!cfsrvl_ready(&rfml->serv, &err))
84 return ret; 224 return err;
225
226 err = -EPROTO;
227 if (cfpkt_getlen(pkt) <= RFM_HEAD_SIZE-1)
228 goto out;
229
230 err = 0;
231 if (cfpkt_getlen(pkt) > rfml->fragment_size + RFM_HEAD_SIZE)
232 err = cfpkt_peek_head(pkt, head, 6);
233
234 if (err < 0)
235 goto out;
236
237 while (cfpkt_getlen(frontpkt) > rfml->fragment_size + RFM_HEAD_SIZE) {
238
239 seg = 1;
240 err = -EPROTO;
241
242 if (cfpkt_add_head(frontpkt, &seg, 1) < 0)
243 goto out;
244 /*
245 * On OOM error cfpkt_split returns NULL.
246 *
247 * NOTE: Segmented pdu is not correctly aligned.
248 * This has negative performance impact.
249 */
250
251 rearpkt = cfpkt_split(frontpkt, rfml->fragment_size);
252 if (rearpkt == NULL)
253 goto out;
254
255 err = cfrfml_transmit_segment(rfml, frontpkt);
256
257 if (err != 0)
258 goto out;
259 frontpkt = rearpkt;
260 rearpkt = NULL;
261
262 err = -ENOMEM;
263 if (frontpkt == NULL)
264 goto out;
265 err = -EPROTO;
266 if (cfpkt_add_head(frontpkt, head, 6) < 0)
267 goto out;
85 268
86 if (!cfpkt_getlen(pkt) > CAIF_MAX_PAYLOAD_SIZE) {
87 pr_err("CAIF: %s():Packet too large - size=%d\n",
88 __func__, cfpkt_getlen(pkt));
89 return -EOVERFLOW;
90 } 269 }
91 if (cfpkt_add_head(pkt, &tmp, 1) < 0) { 270
92 pr_err("CAIF: %s(): Packet is erroneous!\n", __func__); 271 seg = 0;
93 return -EPROTO; 272 err = -EPROTO;
273
274 if (cfpkt_add_head(frontpkt, &seg, 1) < 0)
275 goto out;
276
277 err = cfrfml_transmit_segment(rfml, frontpkt);
278
279 frontpkt = NULL;
280out:
281
282 if (err != 0) {
283 pr_info("CAIF: %s(): "
284 "Connection error %d triggered on RFM link\n",
285 __func__, err);
286 /* Trigger connection error upon failure.*/
287
288 layr->up->ctrlcmd(layr->up, CAIF_CTRLCMD_REMOTE_SHUTDOWN_IND,
289 rfml->serv.dev_info.id);
290
291 if (rearpkt)
292 cfpkt_destroy(rearpkt);
293
294 if (frontpkt && frontpkt != pkt) {
295
296 cfpkt_destroy(frontpkt);
297 /*
298 * Socket layer will free the original packet,
299 * but this packet may already be sent and
300 * freed. So we have to return 0 in this case
301 * to avoid socket layer to re-free this packet.
302 * The return of shutdown indication will
303 * cause connection to be invalidated anyhow.
304 */
305 err = 0;
306 }
94 } 307 }
95 308
96 /* Add info for MUX-layer to route the packet out. */ 309 return err;
97 cfpkt_info(pkt)->channel_id = service->layer.id;
98 /*
99 * To optimize alignment, we add up the size of CAIF header before
100 * payload.
101 */
102 cfpkt_info(pkt)->hdr_len = 1;
103 cfpkt_info(pkt)->dev_info = &service->dev_info;
104 ret = layr->dn->transmit(layr->dn, pkt);
105 if (ret < 0)
106 cfpkt_extr_head(pkt, &tmp, 1);
107 return ret;
108} 310}
diff --git a/net/caif/cfserl.c b/net/caif/cfserl.c
index 06029ea2da2f..a11fbd68a13d 100644
--- a/net/caif/cfserl.c
+++ b/net/caif/cfserl.c
@@ -14,7 +14,8 @@
14#define container_obj(layr) ((struct cfserl *) layr) 14#define container_obj(layr) ((struct cfserl *) layr)
15 15
16#define CFSERL_STX 0x02 16#define CFSERL_STX 0x02
17#define CAIF_MINIUM_PACKET_SIZE 4 17#define SERIAL_MINIUM_PACKET_SIZE 4
18#define SERIAL_MAX_FRAMESIZE 4096
18struct cfserl { 19struct cfserl {
19 struct cflayer layer; 20 struct cflayer layer;
20 struct cfpkt *incomplete_frm; 21 struct cfpkt *incomplete_frm;
@@ -59,14 +60,18 @@ static int cfserl_receive(struct cflayer *l, struct cfpkt *newpkt)
59 u8 stx = CFSERL_STX; 60 u8 stx = CFSERL_STX;
60 int ret; 61 int ret;
61 u16 expectlen = 0; 62 u16 expectlen = 0;
63
62 caif_assert(newpkt != NULL); 64 caif_assert(newpkt != NULL);
63 spin_lock(&layr->sync); 65 spin_lock(&layr->sync);
64 66
65 if (layr->incomplete_frm != NULL) { 67 if (layr->incomplete_frm != NULL) {
66
67 layr->incomplete_frm = 68 layr->incomplete_frm =
68 cfpkt_append(layr->incomplete_frm, newpkt, expectlen); 69 cfpkt_append(layr->incomplete_frm, newpkt, expectlen);
69 pkt = layr->incomplete_frm; 70 pkt = layr->incomplete_frm;
71 if (pkt == NULL) {
72 spin_unlock(&layr->sync);
73 return -ENOMEM;
74 }
70 } else { 75 } else {
71 pkt = newpkt; 76 pkt = newpkt;
72 } 77 }
@@ -115,8 +120,8 @@ static int cfserl_receive(struct cflayer *l, struct cfpkt *newpkt)
115 /* 120 /*
116 * Frame error handling 121 * Frame error handling
117 */ 122 */
118 if (expectlen < CAIF_MINIUM_PACKET_SIZE 123 if (expectlen < SERIAL_MINIUM_PACKET_SIZE
119 || expectlen > CAIF_MAX_FRAMESIZE) { 124 || expectlen > SERIAL_MAX_FRAMESIZE) {
120 if (!layr->usestx) { 125 if (!layr->usestx) {
121 if (pkt != NULL) 126 if (pkt != NULL)
122 cfpkt_destroy(pkt); 127 cfpkt_destroy(pkt);
@@ -154,7 +159,6 @@ static int cfserl_receive(struct cflayer *l, struct cfpkt *newpkt)
154 if (layr->usestx) { 159 if (layr->usestx) {
155 if (tail_pkt != NULL) 160 if (tail_pkt != NULL)
156 pkt = cfpkt_append(pkt, tail_pkt, 0); 161 pkt = cfpkt_append(pkt, tail_pkt, 0);
157
158 /* Start search for next STX if frame failed */ 162 /* Start search for next STX if frame failed */
159 continue; 163 continue;
160 } else { 164 } else {
diff --git a/net/caif/cfsrvl.c b/net/caif/cfsrvl.c
index aff31f34528f..f40939a91211 100644
--- a/net/caif/cfsrvl.c
+++ b/net/caif/cfsrvl.c
@@ -24,8 +24,10 @@ static void cfservl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl,
24 int phyid) 24 int phyid)
25{ 25{
26 struct cfsrvl *service = container_obj(layr); 26 struct cfsrvl *service = container_obj(layr);
27
27 caif_assert(layr->up != NULL); 28 caif_assert(layr->up != NULL);
28 caif_assert(layr->up->ctrlcmd != NULL); 29 caif_assert(layr->up->ctrlcmd != NULL);
30
29 switch (ctrl) { 31 switch (ctrl) {
30 case CAIF_CTRLCMD_INIT_RSP: 32 case CAIF_CTRLCMD_INIT_RSP:
31 service->open = true; 33 service->open = true;
@@ -89,9 +91,14 @@ static void cfservl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl,
89static int cfservl_modemcmd(struct cflayer *layr, enum caif_modemcmd ctrl) 91static int cfservl_modemcmd(struct cflayer *layr, enum caif_modemcmd ctrl)
90{ 92{
91 struct cfsrvl *service = container_obj(layr); 93 struct cfsrvl *service = container_obj(layr);
94
92 caif_assert(layr != NULL); 95 caif_assert(layr != NULL);
93 caif_assert(layr->dn != NULL); 96 caif_assert(layr->dn != NULL);
94 caif_assert(layr->dn->transmit != NULL); 97 caif_assert(layr->dn->transmit != NULL);
98
99 if (!service->supports_flowctrl)
100 return 0;
101
95 switch (ctrl) { 102 switch (ctrl) {
96 case CAIF_MODEMCMD_FLOW_ON_REQ: 103 case CAIF_MODEMCMD_FLOW_ON_REQ:
97 { 104 {
@@ -123,6 +130,12 @@ static int cfservl_modemcmd(struct cflayer *layr, enum caif_modemcmd ctrl)
123 struct caif_payload_info *info; 130 struct caif_payload_info *info;
124 u8 flow_off = SRVL_FLOW_OFF; 131 u8 flow_off = SRVL_FLOW_OFF;
125 pkt = cfpkt_create(SRVL_CTRL_PKT_SIZE); 132 pkt = cfpkt_create(SRVL_CTRL_PKT_SIZE);
133 if (!pkt) {
134 pr_warning("CAIF: %s(): Out of memory\n",
135 __func__);
136 return -ENOMEM;
137 }
138
126 if (cfpkt_add_head(pkt, &flow_off, 1) < 0) { 139 if (cfpkt_add_head(pkt, &flow_off, 1) < 0) {
127 pr_err("CAIF: %s(): Packet is erroneous!\n", 140 pr_err("CAIF: %s(): Packet is erroneous!\n",
128 __func__); 141 __func__);
@@ -146,9 +159,17 @@ void cfservl_destroy(struct cflayer *layer)
146 kfree(layer); 159 kfree(layer);
147} 160}
148 161
162void cfsrvl_release(struct kref *kref)
163{
164 struct cfsrvl *service = container_of(kref, struct cfsrvl, ref);
165 kfree(service);
166}
167
149void cfsrvl_init(struct cfsrvl *service, 168void cfsrvl_init(struct cfsrvl *service,
150 u8 channel_id, 169 u8 channel_id,
151 struct dev_info *dev_info) 170 struct dev_info *dev_info,
171 bool supports_flowctrl
172 )
152{ 173{
153 caif_assert(offsetof(struct cfsrvl, layer) == 0); 174 caif_assert(offsetof(struct cfsrvl, layer) == 0);
154 service->open = false; 175 service->open = false;
@@ -158,14 +179,11 @@ void cfsrvl_init(struct cfsrvl *service,
158 service->layer.ctrlcmd = cfservl_ctrlcmd; 179 service->layer.ctrlcmd = cfservl_ctrlcmd;
159 service->layer.modemcmd = cfservl_modemcmd; 180 service->layer.modemcmd = cfservl_modemcmd;
160 service->dev_info = *dev_info; 181 service->dev_info = *dev_info;
182 service->supports_flowctrl = supports_flowctrl;
183 service->release = cfsrvl_release;
161 kref_init(&service->ref); 184 kref_init(&service->ref);
162} 185}
163 186
164void cfsrvl_release(struct kref *kref)
165{
166 struct cfsrvl *service = container_of(kref, struct cfsrvl, ref);
167 kfree(service);
168}
169 187
170bool cfsrvl_ready(struct cfsrvl *service, int *err) 188bool cfsrvl_ready(struct cfsrvl *service, int *err)
171{ 189{
diff --git a/net/caif/cfutill.c b/net/caif/cfutill.c
index 5fd2c9ea8b42..02795aff57a4 100644
--- a/net/caif/cfutill.c
+++ b/net/caif/cfutill.c
@@ -31,7 +31,7 @@ struct cflayer *cfutill_create(u8 channel_id, struct dev_info *dev_info)
31 } 31 }
32 caif_assert(offsetof(struct cfsrvl, layer) == 0); 32 caif_assert(offsetof(struct cfsrvl, layer) == 0);
33 memset(util, 0, sizeof(struct cfsrvl)); 33 memset(util, 0, sizeof(struct cfsrvl));
34 cfsrvl_init(util, channel_id, dev_info); 34 cfsrvl_init(util, channel_id, dev_info, true);
35 util->layer.receive = cfutill_receive; 35 util->layer.receive = cfutill_receive;
36 util->layer.transmit = cfutill_transmit; 36 util->layer.transmit = cfutill_transmit;
37 snprintf(util->layer.name, CAIF_LAYER_NAME_SZ - 1, "util1"); 37 snprintf(util->layer.name, CAIF_LAYER_NAME_SZ - 1, "util1");
@@ -90,12 +90,6 @@ static int cfutill_transmit(struct cflayer *layr, struct cfpkt *pkt)
90 if (!cfsrvl_ready(service, &ret)) 90 if (!cfsrvl_ready(service, &ret))
91 return ret; 91 return ret;
92 92
93 if (cfpkt_getlen(pkt) > CAIF_MAX_PAYLOAD_SIZE) {
94 pr_err("CAIF: %s(): packet too large size=%d\n",
95 __func__, cfpkt_getlen(pkt));
96 return -EOVERFLOW;
97 }
98
99 cfpkt_add_head(pkt, &zero, 1); 93 cfpkt_add_head(pkt, &zero, 1);
100 /* Add info for MUX-layer to route the packet out. */ 94 /* Add info for MUX-layer to route the packet out. */
101 info = cfpkt_info(pkt); 95 info = cfpkt_info(pkt);
diff --git a/net/caif/cfveil.c b/net/caif/cfveil.c
index 0fd827f49491..77cc09faac9a 100644
--- a/net/caif/cfveil.c
+++ b/net/caif/cfveil.c
@@ -30,7 +30,7 @@ struct cflayer *cfvei_create(u8 channel_id, struct dev_info *dev_info)
30 } 30 }
31 caif_assert(offsetof(struct cfsrvl, layer) == 0); 31 caif_assert(offsetof(struct cfsrvl, layer) == 0);
32 memset(vei, 0, sizeof(struct cfsrvl)); 32 memset(vei, 0, sizeof(struct cfsrvl));
33 cfsrvl_init(vei, channel_id, dev_info); 33 cfsrvl_init(vei, channel_id, dev_info, true);
34 vei->layer.receive = cfvei_receive; 34 vei->layer.receive = cfvei_receive;
35 vei->layer.transmit = cfvei_transmit; 35 vei->layer.transmit = cfvei_transmit;
36 snprintf(vei->layer.name, CAIF_LAYER_NAME_SZ - 1, "vei%d", channel_id); 36 snprintf(vei->layer.name, CAIF_LAYER_NAME_SZ - 1, "vei%d", channel_id);
@@ -84,11 +84,6 @@ static int cfvei_transmit(struct cflayer *layr, struct cfpkt *pkt)
84 return ret; 84 return ret;
85 caif_assert(layr->dn != NULL); 85 caif_assert(layr->dn != NULL);
86 caif_assert(layr->dn->transmit != NULL); 86 caif_assert(layr->dn->transmit != NULL);
87 if (!cfpkt_getlen(pkt) > CAIF_MAX_PAYLOAD_SIZE) {
88 pr_warning("CAIF: %s(): Packet too large - size=%d\n",
89 __func__, cfpkt_getlen(pkt));
90 return -EOVERFLOW;
91 }
92 87
93 if (cfpkt_add_head(pkt, &tmp, 1) < 0) { 88 if (cfpkt_add_head(pkt, &tmp, 1) < 0) {
94 pr_err("CAIF: %s(): Packet is erroneous!\n", __func__); 89 pr_err("CAIF: %s(): Packet is erroneous!\n", __func__);
diff --git a/net/caif/cfvidl.c b/net/caif/cfvidl.c
index 89ad4ea239f1..ada6ee2d48f5 100644
--- a/net/caif/cfvidl.c
+++ b/net/caif/cfvidl.c
@@ -27,7 +27,7 @@ struct cflayer *cfvidl_create(u8 channel_id, struct dev_info *dev_info)
27 caif_assert(offsetof(struct cfsrvl, layer) == 0); 27 caif_assert(offsetof(struct cfsrvl, layer) == 0);
28 28
29 memset(vid, 0, sizeof(struct cfsrvl)); 29 memset(vid, 0, sizeof(struct cfsrvl));
30 cfsrvl_init(vid, channel_id, dev_info); 30 cfsrvl_init(vid, channel_id, dev_info, false);
31 vid->layer.receive = cfvidl_receive; 31 vid->layer.receive = cfvidl_receive;
32 vid->layer.transmit = cfvidl_transmit; 32 vid->layer.transmit = cfvidl_transmit;
33 snprintf(vid->layer.name, CAIF_LAYER_NAME_SZ - 1, "vid1"); 33 snprintf(vid->layer.name, CAIF_LAYER_NAME_SZ - 1, "vid1");
diff --git a/net/caif/chnl_net.c b/net/caif/chnl_net.c
index 610966abe2dc..4293e190ec53 100644
--- a/net/caif/chnl_net.c
+++ b/net/caif/chnl_net.c
@@ -23,7 +23,7 @@
23#include <net/caif/caif_dev.h> 23#include <net/caif/caif_dev.h>
24 24
25/* GPRS PDP connection has MTU to 1500 */ 25/* GPRS PDP connection has MTU to 1500 */
26#define SIZE_MTU 1500 26#define GPRS_PDP_MTU 1500
27/* 5 sec. connect timeout */ 27/* 5 sec. connect timeout */
28#define CONNECT_TIMEOUT (5 * HZ) 28#define CONNECT_TIMEOUT (5 * HZ)
29#define CAIF_NET_DEFAULT_QUEUE_LEN 500 29#define CAIF_NET_DEFAULT_QUEUE_LEN 500
@@ -232,6 +232,8 @@ static int chnl_net_open(struct net_device *dev)
232{ 232{
233 struct chnl_net *priv = NULL; 233 struct chnl_net *priv = NULL;
234 int result = -1; 234 int result = -1;
235 int llifindex, headroom, tailroom, mtu;
236 struct net_device *lldev;
235 ASSERT_RTNL(); 237 ASSERT_RTNL();
236 priv = netdev_priv(dev); 238 priv = netdev_priv(dev);
237 if (!priv) { 239 if (!priv) {
@@ -241,41 +243,88 @@ static int chnl_net_open(struct net_device *dev)
241 243
242 if (priv->state != CAIF_CONNECTING) { 244 if (priv->state != CAIF_CONNECTING) {
243 priv->state = CAIF_CONNECTING; 245 priv->state = CAIF_CONNECTING;
244 result = caif_connect_client(&priv->conn_req, &priv->chnl); 246 result = caif_connect_client(&priv->conn_req, &priv->chnl,
247 &llifindex, &headroom, &tailroom);
245 if (result != 0) { 248 if (result != 0) {
246 priv->state = CAIF_DISCONNECTED;
247 pr_debug("CAIF: %s(): err: " 249 pr_debug("CAIF: %s(): err: "
248 "Unable to register and open device," 250 "Unable to register and open device,"
249 " Err:%d\n", 251 " Err:%d\n",
250 __func__, 252 __func__,
251 result); 253 result);
252 return result; 254 goto error;
255 }
256
257 lldev = dev_get_by_index(dev_net(dev), llifindex);
258
259 if (lldev == NULL) {
260 pr_debug("CAIF: %s(): no interface?\n", __func__);
261 result = -ENODEV;
262 goto error;
263 }
264
265 dev->needed_tailroom = tailroom + lldev->needed_tailroom;
266 dev->hard_header_len = headroom + lldev->hard_header_len +
267 lldev->needed_tailroom;
268
269 /*
270 * MTU, head-room etc is not know before we have a
271 * CAIF link layer device available. MTU calculation may
272 * override initial RTNL configuration.
273 * MTU is minimum of current mtu, link layer mtu pluss
274 * CAIF head and tail, and PDP GPRS contexts max MTU.
275 */
276 mtu = min_t(int, dev->mtu, lldev->mtu - (headroom + tailroom));
277 mtu = min_t(int, GPRS_PDP_MTU, mtu);
278 dev_set_mtu(dev, mtu);
279 dev_put(lldev);
280
281 if (mtu < 100) {
282 pr_warning("CAIF: %s(): "
283 "CAIF Interface MTU too small (%d)\n",
284 __func__, mtu);
285 result = -ENODEV;
286 goto error;
253 } 287 }
254 } 288 }
255 289
290 rtnl_unlock(); /* Release RTNL lock during connect wait */
291
256 result = wait_event_interruptible_timeout(priv->netmgmt_wq, 292 result = wait_event_interruptible_timeout(priv->netmgmt_wq,
257 priv->state != CAIF_CONNECTING, 293 priv->state != CAIF_CONNECTING,
258 CONNECT_TIMEOUT); 294 CONNECT_TIMEOUT);
259 295
296 rtnl_lock();
297
260 if (result == -ERESTARTSYS) { 298 if (result == -ERESTARTSYS) {
261 pr_debug("CAIF: %s(): wait_event_interruptible" 299 pr_debug("CAIF: %s(): wait_event_interruptible"
262 " woken by a signal\n", __func__); 300 " woken by a signal\n", __func__);
263 return -ERESTARTSYS; 301 result = -ERESTARTSYS;
302 goto error;
264 } 303 }
304
265 if (result == 0) { 305 if (result == 0) {
266 pr_debug("CAIF: %s(): connect timeout\n", __func__); 306 pr_debug("CAIF: %s(): connect timeout\n", __func__);
267 caif_disconnect_client(&priv->chnl); 307 caif_disconnect_client(&priv->chnl);
268 priv->state = CAIF_DISCONNECTED; 308 priv->state = CAIF_DISCONNECTED;
269 pr_debug("CAIF: %s(): state disconnected\n", __func__); 309 pr_debug("CAIF: %s(): state disconnected\n", __func__);
270 return -ETIMEDOUT; 310 result = -ETIMEDOUT;
311 goto error;
271 } 312 }
272 313
273 if (priv->state != CAIF_CONNECTED) { 314 if (priv->state != CAIF_CONNECTED) {
274 pr_debug("CAIF: %s(): connect failed\n", __func__); 315 pr_debug("CAIF: %s(): connect failed\n", __func__);
275 return -ECONNREFUSED; 316 result = -ECONNREFUSED;
317 goto error;
276 } 318 }
277 pr_debug("CAIF: %s(): CAIF Netdevice connected\n", __func__); 319 pr_debug("CAIF: %s(): CAIF Netdevice connected\n", __func__);
278 return 0; 320 return 0;
321
322error:
323 caif_disconnect_client(&priv->chnl);
324 priv->state = CAIF_DISCONNECTED;
325 pr_debug("CAIF: %s(): state disconnected\n", __func__);
326 return result;
327
279} 328}
280 329
281static int chnl_net_stop(struct net_device *dev) 330static int chnl_net_stop(struct net_device *dev)
@@ -321,9 +370,7 @@ static void ipcaif_net_setup(struct net_device *dev)
321 dev->destructor = free_netdev; 370 dev->destructor = free_netdev;
322 dev->flags |= IFF_NOARP; 371 dev->flags |= IFF_NOARP;
323 dev->flags |= IFF_POINTOPOINT; 372 dev->flags |= IFF_POINTOPOINT;
324 dev->needed_headroom = CAIF_NEEDED_HEADROOM; 373 dev->mtu = GPRS_PDP_MTU;
325 dev->needed_tailroom = CAIF_NEEDED_TAILROOM;
326 dev->mtu = SIZE_MTU;
327 dev->tx_queue_len = CAIF_NET_DEFAULT_QUEUE_LEN; 374 dev->tx_queue_len = CAIF_NET_DEFAULT_QUEUE_LEN;
328 375
329 priv = netdev_priv(dev); 376 priv = netdev_priv(dev);
diff --git a/net/can/raw.c b/net/can/raw.c
index da99cf153b33..ccfe633eec8e 100644
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -436,14 +436,9 @@ static int raw_setsockopt(struct socket *sock, int level, int optname,
436 436
437 if (count > 1) { 437 if (count > 1) {
438 /* filter does not fit into dfilter => alloc space */ 438 /* filter does not fit into dfilter => alloc space */
439 filter = kmalloc(optlen, GFP_KERNEL); 439 filter = memdup_user(optval, optlen);
440 if (!filter) 440 if (IS_ERR(filter))
441 return -ENOMEM; 441 return PTR_ERR(filter);
442
443 if (copy_from_user(filter, optval, optlen)) {
444 kfree(filter);
445 return -EFAULT;
446 }
447 } else if (count == 1) { 442 } else if (count == 1) {
448 if (copy_from_user(&sfilter, optval, sizeof(sfilter))) 443 if (copy_from_user(&sfilter, optval, sizeof(sfilter)))
449 return -EFAULT; 444 return -EFAULT;
diff --git a/net/compat.c b/net/compat.c
index ec24d9edb025..63d260e81472 100644
--- a/net/compat.c
+++ b/net/compat.c
@@ -81,7 +81,7 @@ int verify_compat_iovec(struct msghdr *kern_msg, struct iovec *kern_iov,
81 int tot_len; 81 int tot_len;
82 82
83 if (kern_msg->msg_namelen) { 83 if (kern_msg->msg_namelen) {
84 if (mode==VERIFY_READ) { 84 if (mode == VERIFY_READ) {
85 int err = move_addr_to_kernel(kern_msg->msg_name, 85 int err = move_addr_to_kernel(kern_msg->msg_name,
86 kern_msg->msg_namelen, 86 kern_msg->msg_namelen,
87 kern_address); 87 kern_address);
@@ -354,7 +354,7 @@ static int do_set_attach_filter(struct socket *sock, int level, int optname,
354static int do_set_sock_timeout(struct socket *sock, int level, 354static int do_set_sock_timeout(struct socket *sock, int level,
355 int optname, char __user *optval, unsigned int optlen) 355 int optname, char __user *optval, unsigned int optlen)
356{ 356{
357 struct compat_timeval __user *up = (struct compat_timeval __user *) optval; 357 struct compat_timeval __user *up = (struct compat_timeval __user *)optval;
358 struct timeval ktime; 358 struct timeval ktime;
359 mm_segment_t old_fs; 359 mm_segment_t old_fs;
360 int err; 360 int err;
@@ -367,7 +367,7 @@ static int do_set_sock_timeout(struct socket *sock, int level,
367 return -EFAULT; 367 return -EFAULT;
368 old_fs = get_fs(); 368 old_fs = get_fs();
369 set_fs(KERNEL_DS); 369 set_fs(KERNEL_DS);
370 err = sock_setsockopt(sock, level, optname, (char *) &ktime, sizeof(ktime)); 370 err = sock_setsockopt(sock, level, optname, (char *)&ktime, sizeof(ktime));
371 set_fs(old_fs); 371 set_fs(old_fs);
372 372
373 return err; 373 return err;
@@ -389,11 +389,10 @@ asmlinkage long compat_sys_setsockopt(int fd, int level, int optname,
389 char __user *optval, unsigned int optlen) 389 char __user *optval, unsigned int optlen)
390{ 390{
391 int err; 391 int err;
392 struct socket *sock; 392 struct socket *sock = sockfd_lookup(fd, &err);
393 393
394 if ((sock = sockfd_lookup(fd, &err))!=NULL) 394 if (sock) {
395 { 395 err = security_socket_setsockopt(sock, level, optname);
396 err = security_socket_setsockopt(sock,level,optname);
397 if (err) { 396 if (err) {
398 sockfd_put(sock); 397 sockfd_put(sock);
399 return err; 398 return err;
@@ -453,7 +452,7 @@ static int compat_sock_getsockopt(struct socket *sock, int level, int optname,
453int compat_sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp) 452int compat_sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp)
454{ 453{
455 struct compat_timeval __user *ctv = 454 struct compat_timeval __user *ctv =
456 (struct compat_timeval __user*) userstamp; 455 (struct compat_timeval __user *) userstamp;
457 int err = -ENOENT; 456 int err = -ENOENT;
458 struct timeval tv; 457 struct timeval tv;
459 458
@@ -477,7 +476,7 @@ EXPORT_SYMBOL(compat_sock_get_timestamp);
477int compat_sock_get_timestampns(struct sock *sk, struct timespec __user *userstamp) 476int compat_sock_get_timestampns(struct sock *sk, struct timespec __user *userstamp)
478{ 477{
479 struct compat_timespec __user *ctv = 478 struct compat_timespec __user *ctv =
480 (struct compat_timespec __user*) userstamp; 479 (struct compat_timespec __user *) userstamp;
481 int err = -ENOENT; 480 int err = -ENOENT;
482 struct timespec ts; 481 struct timespec ts;
483 482
@@ -502,12 +501,10 @@ asmlinkage long compat_sys_getsockopt(int fd, int level, int optname,
502 char __user *optval, int __user *optlen) 501 char __user *optval, int __user *optlen)
503{ 502{
504 int err; 503 int err;
505 struct socket *sock; 504 struct socket *sock = sockfd_lookup(fd, &err);
506 505
507 if ((sock = sockfd_lookup(fd, &err))!=NULL) 506 if (sock) {
508 { 507 err = security_socket_getsockopt(sock, level, optname);
509 err = security_socket_getsockopt(sock, level,
510 optname);
511 if (err) { 508 if (err) {
512 sockfd_put(sock); 509 sockfd_put(sock);
513 return err; 510 return err;
@@ -531,7 +528,7 @@ struct compat_group_req {
531 __u32 gr_interface; 528 __u32 gr_interface;
532 struct __kernel_sockaddr_storage gr_group 529 struct __kernel_sockaddr_storage gr_group
533 __attribute__ ((aligned(4))); 530 __attribute__ ((aligned(4)));
534} __attribute__ ((packed)); 531} __packed;
535 532
536struct compat_group_source_req { 533struct compat_group_source_req {
537 __u32 gsr_interface; 534 __u32 gsr_interface;
@@ -539,7 +536,7 @@ struct compat_group_source_req {
539 __attribute__ ((aligned(4))); 536 __attribute__ ((aligned(4)));
540 struct __kernel_sockaddr_storage gsr_source 537 struct __kernel_sockaddr_storage gsr_source
541 __attribute__ ((aligned(4))); 538 __attribute__ ((aligned(4)));
542} __attribute__ ((packed)); 539} __packed;
543 540
544struct compat_group_filter { 541struct compat_group_filter {
545 __u32 gf_interface; 542 __u32 gf_interface;
@@ -549,7 +546,7 @@ struct compat_group_filter {
549 __u32 gf_numsrc; 546 __u32 gf_numsrc;
550 struct __kernel_sockaddr_storage gf_slist[1] 547 struct __kernel_sockaddr_storage gf_slist[1]
551 __attribute__ ((aligned(4))); 548 __attribute__ ((aligned(4)));
552} __attribute__ ((packed)); 549} __packed;
553 550
554#define __COMPAT_GF0_SIZE (sizeof(struct compat_group_filter) - \ 551#define __COMPAT_GF0_SIZE (sizeof(struct compat_group_filter) - \
555 sizeof(struct __kernel_sockaddr_storage)) 552 sizeof(struct __kernel_sockaddr_storage))
@@ -557,7 +554,7 @@ struct compat_group_filter {
557 554
558int compat_mc_setsockopt(struct sock *sock, int level, int optname, 555int compat_mc_setsockopt(struct sock *sock, int level, int optname,
559 char __user *optval, unsigned int optlen, 556 char __user *optval, unsigned int optlen,
560 int (*setsockopt)(struct sock *,int,int,char __user *,unsigned int)) 557 int (*setsockopt)(struct sock *, int, int, char __user *, unsigned int))
561{ 558{
562 char __user *koptval = optval; 559 char __user *koptval = optval;
563 int koptlen = optlen; 560 int koptlen = optlen;
@@ -640,12 +637,11 @@ int compat_mc_setsockopt(struct sock *sock, int level, int optname,
640 } 637 }
641 return setsockopt(sock, level, optname, koptval, koptlen); 638 return setsockopt(sock, level, optname, koptval, koptlen);
642} 639}
643
644EXPORT_SYMBOL(compat_mc_setsockopt); 640EXPORT_SYMBOL(compat_mc_setsockopt);
645 641
646int compat_mc_getsockopt(struct sock *sock, int level, int optname, 642int compat_mc_getsockopt(struct sock *sock, int level, int optname,
647 char __user *optval, int __user *optlen, 643 char __user *optval, int __user *optlen,
648 int (*getsockopt)(struct sock *,int,int,char __user *,int __user *)) 644 int (*getsockopt)(struct sock *, int, int, char __user *, int __user *))
649{ 645{
650 struct compat_group_filter __user *gf32 = (void *)optval; 646 struct compat_group_filter __user *gf32 = (void *)optval;
651 struct group_filter __user *kgf; 647 struct group_filter __user *kgf;
@@ -681,7 +677,7 @@ int compat_mc_getsockopt(struct sock *sock, int level, int optname,
681 __put_user(interface, &kgf->gf_interface) || 677 __put_user(interface, &kgf->gf_interface) ||
682 __put_user(fmode, &kgf->gf_fmode) || 678 __put_user(fmode, &kgf->gf_fmode) ||
683 __put_user(numsrc, &kgf->gf_numsrc) || 679 __put_user(numsrc, &kgf->gf_numsrc) ||
684 copy_in_user(&kgf->gf_group,&gf32->gf_group,sizeof(kgf->gf_group))) 680 copy_in_user(&kgf->gf_group, &gf32->gf_group, sizeof(kgf->gf_group)))
685 return -EFAULT; 681 return -EFAULT;
686 682
687 err = getsockopt(sock, level, optname, (char __user *)kgf, koptlen); 683 err = getsockopt(sock, level, optname, (char __user *)kgf, koptlen);
@@ -714,21 +710,22 @@ int compat_mc_getsockopt(struct sock *sock, int level, int optname,
714 copylen = numsrc * sizeof(gf32->gf_slist[0]); 710 copylen = numsrc * sizeof(gf32->gf_slist[0]);
715 if (copylen > klen) 711 if (copylen > klen)
716 copylen = klen; 712 copylen = klen;
717 if (copy_in_user(gf32->gf_slist, kgf->gf_slist, copylen)) 713 if (copy_in_user(gf32->gf_slist, kgf->gf_slist, copylen))
718 return -EFAULT; 714 return -EFAULT;
719 } 715 }
720 return err; 716 return err;
721} 717}
722
723EXPORT_SYMBOL(compat_mc_getsockopt); 718EXPORT_SYMBOL(compat_mc_getsockopt);
724 719
725 720
726/* Argument list sizes for compat_sys_socketcall */ 721/* Argument list sizes for compat_sys_socketcall */
727#define AL(x) ((x) * sizeof(u32)) 722#define AL(x) ((x) * sizeof(u32))
728static unsigned char nas[20]={AL(0),AL(3),AL(3),AL(3),AL(2),AL(3), 723static unsigned char nas[20] = {
729 AL(3),AL(3),AL(4),AL(4),AL(4),AL(6), 724 AL(0), AL(3), AL(3), AL(3), AL(2), AL(3),
730 AL(6),AL(2),AL(5),AL(5),AL(3),AL(3), 725 AL(3), AL(3), AL(4), AL(4), AL(4), AL(6),
731 AL(4),AL(5)}; 726 AL(6), AL(2), AL(5), AL(5), AL(3), AL(3),
727 AL(4), AL(5)
728};
732#undef AL 729#undef AL
733 730
734asmlinkage long compat_sys_sendmsg(int fd, struct compat_msghdr __user *msg, unsigned flags) 731asmlinkage long compat_sys_sendmsg(int fd, struct compat_msghdr __user *msg, unsigned flags)
@@ -827,7 +824,7 @@ asmlinkage long compat_sys_socketcall(int call, u32 __user *args)
827 compat_ptr(a[4]), compat_ptr(a[5])); 824 compat_ptr(a[4]), compat_ptr(a[5]));
828 break; 825 break;
829 case SYS_SHUTDOWN: 826 case SYS_SHUTDOWN:
830 ret = sys_shutdown(a0,a1); 827 ret = sys_shutdown(a0, a1);
831 break; 828 break;
832 case SYS_SETSOCKOPT: 829 case SYS_SETSOCKOPT:
833 ret = compat_sys_setsockopt(a0, a1, a[2], 830 ret = compat_sys_setsockopt(a0, a1, a[2],
diff --git a/net/core/Makefile b/net/core/Makefile
index 51c3eec850ef..8a04dd22cf77 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -18,4 +18,4 @@ obj-$(CONFIG_NET_DMA) += user_dma.o
18obj-$(CONFIG_FIB_RULES) += fib_rules.o 18obj-$(CONFIG_FIB_RULES) += fib_rules.o
19obj-$(CONFIG_TRACEPOINTS) += net-traces.o 19obj-$(CONFIG_TRACEPOINTS) += net-traces.o
20obj-$(CONFIG_NET_DROP_MONITOR) += drop_monitor.o 20obj-$(CONFIG_NET_DROP_MONITOR) += drop_monitor.o
21 21obj-$(CONFIG_NETWORK_PHY_TIMESTAMPING) += timestamping.o
diff --git a/net/core/datagram.c b/net/core/datagram.c
index e0097531417a..251997a95483 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -219,6 +219,7 @@ struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags,
219 return __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0), 219 return __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0),
220 &peeked, err); 220 &peeked, err);
221} 221}
222EXPORT_SYMBOL(skb_recv_datagram);
222 223
223void skb_free_datagram(struct sock *sk, struct sk_buff *skb) 224void skb_free_datagram(struct sock *sk, struct sk_buff *skb)
224{ 225{
@@ -229,15 +230,17 @@ EXPORT_SYMBOL(skb_free_datagram);
229 230
230void skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb) 231void skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb)
231{ 232{
233 bool slow;
234
232 if (likely(atomic_read(&skb->users) == 1)) 235 if (likely(atomic_read(&skb->users) == 1))
233 smp_rmb(); 236 smp_rmb();
234 else if (likely(!atomic_dec_and_test(&skb->users))) 237 else if (likely(!atomic_dec_and_test(&skb->users)))
235 return; 238 return;
236 239
237 lock_sock_bh(sk); 240 slow = lock_sock_fast(sk);
238 skb_orphan(skb); 241 skb_orphan(skb);
239 sk_mem_reclaim_partial(sk); 242 sk_mem_reclaim_partial(sk);
240 unlock_sock_bh(sk); 243 unlock_sock_fast(sk, slow);
241 244
242 /* skb is now orphaned, can be freed outside of locked section */ 245 /* skb is now orphaned, can be freed outside of locked section */
243 __kfree_skb(skb); 246 __kfree_skb(skb);
@@ -286,7 +289,6 @@ int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags)
286 289
287 return err; 290 return err;
288} 291}
289
290EXPORT_SYMBOL(skb_kill_datagram); 292EXPORT_SYMBOL(skb_kill_datagram);
291 293
292/** 294/**
@@ -371,6 +373,7 @@ int skb_copy_datagram_iovec(const struct sk_buff *skb, int offset,
371fault: 373fault:
372 return -EFAULT; 374 return -EFAULT;
373} 375}
376EXPORT_SYMBOL(skb_copy_datagram_iovec);
374 377
375/** 378/**
376 * skb_copy_datagram_const_iovec - Copy a datagram to an iovec. 379 * skb_copy_datagram_const_iovec - Copy a datagram to an iovec.
@@ -714,6 +717,7 @@ csum_error:
714fault: 717fault:
715 return -EFAULT; 718 return -EFAULT;
716} 719}
720EXPORT_SYMBOL(skb_copy_and_csum_datagram_iovec);
717 721
718/** 722/**
719 * datagram_poll - generic datagram poll 723 * datagram_poll - generic datagram poll
@@ -768,8 +772,4 @@ unsigned int datagram_poll(struct file *file, struct socket *sock,
768 772
769 return mask; 773 return mask;
770} 774}
771
772EXPORT_SYMBOL(datagram_poll); 775EXPORT_SYMBOL(datagram_poll);
773EXPORT_SYMBOL(skb_copy_and_csum_datagram_iovec);
774EXPORT_SYMBOL(skb_copy_datagram_iovec);
775EXPORT_SYMBOL(skb_recv_datagram);
diff --git a/net/core/dev.c b/net/core/dev.c
index 0aab66d68b19..6e1b4370781c 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -803,35 +803,31 @@ struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type)
803EXPORT_SYMBOL(dev_getfirstbyhwtype); 803EXPORT_SYMBOL(dev_getfirstbyhwtype);
804 804
805/** 805/**
806 * dev_get_by_flags - find any device with given flags 806 * dev_get_by_flags_rcu - find any device with given flags
807 * @net: the applicable net namespace 807 * @net: the applicable net namespace
808 * @if_flags: IFF_* values 808 * @if_flags: IFF_* values
809 * @mask: bitmask of bits in if_flags to check 809 * @mask: bitmask of bits in if_flags to check
810 * 810 *
811 * Search for any interface with the given flags. Returns NULL if a device 811 * Search for any interface with the given flags. Returns NULL if a device
812 * is not found or a pointer to the device. The device returned has 812 * is not found or a pointer to the device. Must be called inside
813 * had a reference added and the pointer is safe until the user calls 813 * rcu_read_lock(), and result refcount is unchanged.
814 * dev_put to indicate they have finished with it.
815 */ 814 */
816 815
817struct net_device *dev_get_by_flags(struct net *net, unsigned short if_flags, 816struct net_device *dev_get_by_flags_rcu(struct net *net, unsigned short if_flags,
818 unsigned short mask) 817 unsigned short mask)
819{ 818{
820 struct net_device *dev, *ret; 819 struct net_device *dev, *ret;
821 820
822 ret = NULL; 821 ret = NULL;
823 rcu_read_lock();
824 for_each_netdev_rcu(net, dev) { 822 for_each_netdev_rcu(net, dev) {
825 if (((dev->flags ^ if_flags) & mask) == 0) { 823 if (((dev->flags ^ if_flags) & mask) == 0) {
826 dev_hold(dev);
827 ret = dev; 824 ret = dev;
828 break; 825 break;
829 } 826 }
830 } 827 }
831 rcu_read_unlock();
832 return ret; 828 return ret;
833} 829}
834EXPORT_SYMBOL(dev_get_by_flags); 830EXPORT_SYMBOL(dev_get_by_flags_rcu);
835 831
836/** 832/**
837 * dev_valid_name - check if name is okay for network device 833 * dev_valid_name - check if name is okay for network device
@@ -954,18 +950,22 @@ int dev_alloc_name(struct net_device *dev, const char *name)
954} 950}
955EXPORT_SYMBOL(dev_alloc_name); 951EXPORT_SYMBOL(dev_alloc_name);
956 952
957static int dev_get_valid_name(struct net *net, const char *name, char *buf, 953static int dev_get_valid_name(struct net_device *dev, const char *name, bool fmt)
958 bool fmt)
959{ 954{
955 struct net *net;
956
957 BUG_ON(!dev_net(dev));
958 net = dev_net(dev);
959
960 if (!dev_valid_name(name)) 960 if (!dev_valid_name(name))
961 return -EINVAL; 961 return -EINVAL;
962 962
963 if (fmt && strchr(name, '%')) 963 if (fmt && strchr(name, '%'))
964 return __dev_alloc_name(net, name, buf); 964 return dev_alloc_name(dev, name);
965 else if (__dev_get_by_name(net, name)) 965 else if (__dev_get_by_name(net, name))
966 return -EEXIST; 966 return -EEXIST;
967 else if (buf != name) 967 else if (dev->name != name)
968 strlcpy(buf, name, IFNAMSIZ); 968 strlcpy(dev->name, name, IFNAMSIZ);
969 969
970 return 0; 970 return 0;
971} 971}
@@ -997,20 +997,15 @@ int dev_change_name(struct net_device *dev, const char *newname)
997 997
998 memcpy(oldname, dev->name, IFNAMSIZ); 998 memcpy(oldname, dev->name, IFNAMSIZ);
999 999
1000 err = dev_get_valid_name(net, newname, dev->name, 1); 1000 err = dev_get_valid_name(dev, newname, 1);
1001 if (err < 0) 1001 if (err < 0)
1002 return err; 1002 return err;
1003 1003
1004rollback: 1004rollback:
1005 /* For now only devices in the initial network namespace 1005 ret = device_rename(&dev->dev, dev->name);
1006 * are in sysfs. 1006 if (ret) {
1007 */ 1007 memcpy(dev->name, oldname, IFNAMSIZ);
1008 if (net_eq(net, &init_net)) { 1008 return ret;
1009 ret = device_rename(&dev->dev, dev->name);
1010 if (ret) {
1011 memcpy(dev->name, oldname, IFNAMSIZ);
1012 return ret;
1013 }
1014 } 1009 }
1015 1010
1016 write_lock_bh(&dev_base_lock); 1011 write_lock_bh(&dev_base_lock);
@@ -1542,7 +1537,8 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
1542 if (net_ratelimit()) 1537 if (net_ratelimit())
1543 printk(KERN_CRIT "protocol %04x is " 1538 printk(KERN_CRIT "protocol %04x is "
1544 "buggy, dev %s\n", 1539 "buggy, dev %s\n",
1545 skb2->protocol, dev->name); 1540 ntohs(skb2->protocol),
1541 dev->name);
1546 skb_reset_network_header(skb2); 1542 skb_reset_network_header(skb2);
1547 } 1543 }
1548 1544
@@ -1554,6 +1550,24 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
1554 rcu_read_unlock(); 1550 rcu_read_unlock();
1555} 1551}
1556 1552
1553/*
1554 * Routine to help set real_num_tx_queues. To avoid skbs mapped to queues
1555 * greater then real_num_tx_queues stale skbs on the qdisc must be flushed.
1556 */
1557void netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq)
1558{
1559 unsigned int real_num = dev->real_num_tx_queues;
1560
1561 if (unlikely(txq > dev->num_tx_queues))
1562 ;
1563 else if (txq > real_num)
1564 dev->real_num_tx_queues = txq;
1565 else if (txq < real_num) {
1566 dev->real_num_tx_queues = txq;
1567 qdisc_reset_all_tx_gt(dev, txq);
1568 }
1569}
1570EXPORT_SYMBOL(netif_set_real_num_tx_queues);
1557 1571
1558static inline void __netif_reschedule(struct Qdisc *q) 1572static inline void __netif_reschedule(struct Qdisc *q)
1559{ 1573{
@@ -1578,7 +1592,9 @@ EXPORT_SYMBOL(__netif_schedule);
1578 1592
1579void dev_kfree_skb_irq(struct sk_buff *skb) 1593void dev_kfree_skb_irq(struct sk_buff *skb)
1580{ 1594{
1581 if (atomic_dec_and_test(&skb->users)) { 1595 if (!skb->destructor)
1596 dev_kfree_skb(skb);
1597 else if (atomic_dec_and_test(&skb->users)) {
1582 struct softnet_data *sd; 1598 struct softnet_data *sd;
1583 unsigned long flags; 1599 unsigned long flags;
1584 1600
@@ -1894,8 +1910,32 @@ static int dev_gso_segment(struct sk_buff *skb)
1894 */ 1910 */
1895static inline void skb_orphan_try(struct sk_buff *skb) 1911static inline void skb_orphan_try(struct sk_buff *skb)
1896{ 1912{
1897 if (!skb_tx(skb)->flags) 1913 struct sock *sk = skb->sk;
1914
1915 if (sk && !skb_tx(skb)->flags) {
1916 /* skb_tx_hash() wont be able to get sk.
1917 * We copy sk_hash into skb->rxhash
1918 */
1919 if (!skb->rxhash)
1920 skb->rxhash = sk->sk_hash;
1898 skb_orphan(skb); 1921 skb_orphan(skb);
1922 }
1923}
1924
1925/*
1926 * Returns true if either:
1927 * 1. skb has frag_list and the device doesn't support FRAGLIST, or
1928 * 2. skb is fragmented and the device does not support SG, or if
1929 * at least one of fragments is in highmem and device does not
1930 * support DMA from it.
1931 */
1932static inline int skb_needs_linearize(struct sk_buff *skb,
1933 struct net_device *dev)
1934{
1935 return skb_is_nonlinear(skb) &&
1936 ((skb_has_frags(skb) && !(dev->features & NETIF_F_FRAGLIST)) ||
1937 (skb_shinfo(skb)->nr_frags && (!(dev->features & NETIF_F_SG) ||
1938 illegal_highdma(dev, skb))));
1899} 1939}
1900 1940
1901int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, 1941int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
@@ -1922,6 +1962,22 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
1922 goto out_kfree_skb; 1962 goto out_kfree_skb;
1923 if (skb->next) 1963 if (skb->next)
1924 goto gso; 1964 goto gso;
1965 } else {
1966 if (skb_needs_linearize(skb, dev) &&
1967 __skb_linearize(skb))
1968 goto out_kfree_skb;
1969
1970 /* If packet is not checksummed and device does not
1971 * support checksumming for this protocol, complete
1972 * checksumming here.
1973 */
1974 if (skb->ip_summed == CHECKSUM_PARTIAL) {
1975 skb_set_transport_header(skb, skb->csum_start -
1976 skb_headroom(skb));
1977 if (!dev_can_checksum(dev, skb) &&
1978 skb_checksum_help(skb))
1979 goto out_kfree_skb;
1980 }
1925 } 1981 }
1926 1982
1927 rc = ops->ndo_start_xmit(skb, dev); 1983 rc = ops->ndo_start_xmit(skb, dev);
@@ -1981,8 +2037,7 @@ u16 skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb)
1981 if (skb->sk && skb->sk->sk_hash) 2037 if (skb->sk && skb->sk->sk_hash)
1982 hash = skb->sk->sk_hash; 2038 hash = skb->sk->sk_hash;
1983 else 2039 else
1984 hash = (__force u16) skb->protocol; 2040 hash = (__force u16) skb->protocol ^ skb->rxhash;
1985
1986 hash = jhash_1word(hash, hashrnd); 2041 hash = jhash_1word(hash, hashrnd);
1987 2042
1988 return (u16) (((u64) hash * dev->real_num_tx_queues) >> 32); 2043 return (u16) (((u64) hash * dev->real_num_tx_queues) >> 32);
@@ -2005,12 +2060,11 @@ static inline u16 dev_cap_txqueue(struct net_device *dev, u16 queue_index)
2005static struct netdev_queue *dev_pick_tx(struct net_device *dev, 2060static struct netdev_queue *dev_pick_tx(struct net_device *dev,
2006 struct sk_buff *skb) 2061 struct sk_buff *skb)
2007{ 2062{
2008 u16 queue_index; 2063 int queue_index;
2009 struct sock *sk = skb->sk; 2064 struct sock *sk = skb->sk;
2010 2065
2011 if (sk_tx_queue_recorded(sk)) { 2066 queue_index = sk_tx_queue_get(sk);
2012 queue_index = sk_tx_queue_get(sk); 2067 if (queue_index < 0) {
2013 } else {
2014 const struct net_device_ops *ops = dev->netdev_ops; 2068 const struct net_device_ops *ops = dev->netdev_ops;
2015 2069
2016 if (ops->ndo_select_queue) { 2070 if (ops->ndo_select_queue) {
@@ -2039,14 +2093,24 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
2039 struct netdev_queue *txq) 2093 struct netdev_queue *txq)
2040{ 2094{
2041 spinlock_t *root_lock = qdisc_lock(q); 2095 spinlock_t *root_lock = qdisc_lock(q);
2096 bool contended = qdisc_is_running(q);
2042 int rc; 2097 int rc;
2043 2098
2099 /*
2100 * Heuristic to force contended enqueues to serialize on a
2101 * separate lock before trying to get qdisc main lock.
2102 * This permits __QDISC_STATE_RUNNING owner to get the lock more often
2103 * and dequeue packets faster.
2104 */
2105 if (unlikely(contended))
2106 spin_lock(&q->busylock);
2107
2044 spin_lock(root_lock); 2108 spin_lock(root_lock);
2045 if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) { 2109 if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) {
2046 kfree_skb(skb); 2110 kfree_skb(skb);
2047 rc = NET_XMIT_DROP; 2111 rc = NET_XMIT_DROP;
2048 } else if ((q->flags & TCQ_F_CAN_BYPASS) && !qdisc_qlen(q) && 2112 } else if ((q->flags & TCQ_F_CAN_BYPASS) && !qdisc_qlen(q) &&
2049 !test_and_set_bit(__QDISC_STATE_RUNNING, &q->state)) { 2113 qdisc_run_begin(q)) {
2050 /* 2114 /*
2051 * This is a work-conserving queue; there are no old skbs 2115 * This is a work-conserving queue; there are no old skbs
2052 * waiting to be sent out; and the qdisc is not running - 2116 * waiting to be sent out; and the qdisc is not running -
@@ -2055,37 +2119,33 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
2055 if (!(dev->priv_flags & IFF_XMIT_DST_RELEASE)) 2119 if (!(dev->priv_flags & IFF_XMIT_DST_RELEASE))
2056 skb_dst_force(skb); 2120 skb_dst_force(skb);
2057 __qdisc_update_bstats(q, skb->len); 2121 __qdisc_update_bstats(q, skb->len);
2058 if (sch_direct_xmit(skb, q, dev, txq, root_lock)) 2122 if (sch_direct_xmit(skb, q, dev, txq, root_lock)) {
2123 if (unlikely(contended)) {
2124 spin_unlock(&q->busylock);
2125 contended = false;
2126 }
2059 __qdisc_run(q); 2127 __qdisc_run(q);
2060 else 2128 } else
2061 clear_bit(__QDISC_STATE_RUNNING, &q->state); 2129 qdisc_run_end(q);
2062 2130
2063 rc = NET_XMIT_SUCCESS; 2131 rc = NET_XMIT_SUCCESS;
2064 } else { 2132 } else {
2065 skb_dst_force(skb); 2133 skb_dst_force(skb);
2066 rc = qdisc_enqueue_root(skb, q); 2134 rc = qdisc_enqueue_root(skb, q);
2067 qdisc_run(q); 2135 if (qdisc_run_begin(q)) {
2136 if (unlikely(contended)) {
2137 spin_unlock(&q->busylock);
2138 contended = false;
2139 }
2140 __qdisc_run(q);
2141 }
2068 } 2142 }
2069 spin_unlock(root_lock); 2143 spin_unlock(root_lock);
2070 2144 if (unlikely(contended))
2145 spin_unlock(&q->busylock);
2071 return rc; 2146 return rc;
2072} 2147}
2073 2148
2074/*
2075 * Returns true if either:
2076 * 1. skb has frag_list and the device doesn't support FRAGLIST, or
2077 * 2. skb is fragmented and the device does not support SG, or if
2078 * at least one of fragments is in highmem and device does not
2079 * support DMA from it.
2080 */
2081static inline int skb_needs_linearize(struct sk_buff *skb,
2082 struct net_device *dev)
2083{
2084 return (skb_has_frags(skb) && !(dev->features & NETIF_F_FRAGLIST)) ||
2085 (skb_shinfo(skb)->nr_frags && (!(dev->features & NETIF_F_SG) ||
2086 illegal_highdma(dev, skb)));
2087}
2088
2089/** 2149/**
2090 * dev_queue_xmit - transmit a buffer 2150 * dev_queue_xmit - transmit a buffer
2091 * @skb: buffer to transmit 2151 * @skb: buffer to transmit
@@ -2118,25 +2178,6 @@ int dev_queue_xmit(struct sk_buff *skb)
2118 struct Qdisc *q; 2178 struct Qdisc *q;
2119 int rc = -ENOMEM; 2179 int rc = -ENOMEM;
2120 2180
2121 /* GSO will handle the following emulations directly. */
2122 if (netif_needs_gso(dev, skb))
2123 goto gso;
2124
2125 /* Convert a paged skb to linear, if required */
2126 if (skb_needs_linearize(skb, dev) && __skb_linearize(skb))
2127 goto out_kfree_skb;
2128
2129 /* If packet is not checksummed and device does not support
2130 * checksumming for this protocol, complete checksumming here.
2131 */
2132 if (skb->ip_summed == CHECKSUM_PARTIAL) {
2133 skb_set_transport_header(skb, skb->csum_start -
2134 skb_headroom(skb));
2135 if (!dev_can_checksum(dev, skb) && skb_checksum_help(skb))
2136 goto out_kfree_skb;
2137 }
2138
2139gso:
2140 /* Disable soft irqs for various locks below. Also 2181 /* Disable soft irqs for various locks below. Also
2141 * stops preemption for RCU. 2182 * stops preemption for RCU.
2142 */ 2183 */
@@ -2195,7 +2236,6 @@ gso:
2195 rc = -ENETDOWN; 2236 rc = -ENETDOWN;
2196 rcu_read_unlock_bh(); 2237 rcu_read_unlock_bh();
2197 2238
2198out_kfree_skb:
2199 kfree_skb(skb); 2239 kfree_skb(skb);
2200 return rc; 2240 return rc;
2201out: 2241out:
@@ -2254,11 +2294,9 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
2254 if (skb_rx_queue_recorded(skb)) { 2294 if (skb_rx_queue_recorded(skb)) {
2255 u16 index = skb_get_rx_queue(skb); 2295 u16 index = skb_get_rx_queue(skb);
2256 if (unlikely(index >= dev->num_rx_queues)) { 2296 if (unlikely(index >= dev->num_rx_queues)) {
2257 if (net_ratelimit()) { 2297 WARN_ONCE(dev->num_rx_queues > 1, "%s received packet "
2258 pr_warning("%s received packet on queue " 2298 "on queue %u, but number of RX queues is %u\n",
2259 "%u, but number of RX queues is %u\n", 2299 dev->name, index, dev->num_rx_queues);
2260 dev->name, index, dev->num_rx_queues);
2261 }
2262 goto done; 2300 goto done;
2263 } 2301 }
2264 rxqueue = dev->_rx + index; 2302 rxqueue = dev->_rx + index;
@@ -2582,70 +2620,14 @@ static inline int deliver_skb(struct sk_buff *skb,
2582 return pt_prev->func(skb, skb->dev, pt_prev, orig_dev); 2620 return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
2583} 2621}
2584 2622
2585#if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE) 2623#if (defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)) && \
2586 2624 (defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE))
2587#if defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE)
2588/* This hook is defined here for ATM LANE */ 2625/* This hook is defined here for ATM LANE */
2589int (*br_fdb_test_addr_hook)(struct net_device *dev, 2626int (*br_fdb_test_addr_hook)(struct net_device *dev,
2590 unsigned char *addr) __read_mostly; 2627 unsigned char *addr) __read_mostly;
2591EXPORT_SYMBOL_GPL(br_fdb_test_addr_hook); 2628EXPORT_SYMBOL_GPL(br_fdb_test_addr_hook);
2592#endif 2629#endif
2593 2630
2594/*
2595 * If bridge module is loaded call bridging hook.
2596 * returns NULL if packet was consumed.
2597 */
2598struct sk_buff *(*br_handle_frame_hook)(struct net_bridge_port *p,
2599 struct sk_buff *skb) __read_mostly;
2600EXPORT_SYMBOL_GPL(br_handle_frame_hook);
2601
2602static inline struct sk_buff *handle_bridge(struct sk_buff *skb,
2603 struct packet_type **pt_prev, int *ret,
2604 struct net_device *orig_dev)
2605{
2606 struct net_bridge_port *port;
2607
2608 if (skb->pkt_type == PACKET_LOOPBACK ||
2609 (port = rcu_dereference(skb->dev->br_port)) == NULL)
2610 return skb;
2611
2612 if (*pt_prev) {
2613 *ret = deliver_skb(skb, *pt_prev, orig_dev);
2614 *pt_prev = NULL;
2615 }
2616
2617 return br_handle_frame_hook(port, skb);
2618}
2619#else
2620#define handle_bridge(skb, pt_prev, ret, orig_dev) (skb)
2621#endif
2622
2623#if defined(CONFIG_MACVLAN) || defined(CONFIG_MACVLAN_MODULE)
2624struct sk_buff *(*macvlan_handle_frame_hook)(struct macvlan_port *p,
2625 struct sk_buff *skb) __read_mostly;
2626EXPORT_SYMBOL_GPL(macvlan_handle_frame_hook);
2627
2628static inline struct sk_buff *handle_macvlan(struct sk_buff *skb,
2629 struct packet_type **pt_prev,
2630 int *ret,
2631 struct net_device *orig_dev)
2632{
2633 struct macvlan_port *port;
2634
2635 port = rcu_dereference(skb->dev->macvlan_port);
2636 if (!port)
2637 return skb;
2638
2639 if (*pt_prev) {
2640 *ret = deliver_skb(skb, *pt_prev, orig_dev);
2641 *pt_prev = NULL;
2642 }
2643 return macvlan_handle_frame_hook(port, skb);
2644}
2645#else
2646#define handle_macvlan(skb, pt_prev, ret, orig_dev) (skb)
2647#endif
2648
2649#ifdef CONFIG_NET_CLS_ACT 2631#ifdef CONFIG_NET_CLS_ACT
2650/* TODO: Maybe we should just force sch_ingress to be compiled in 2632/* TODO: Maybe we should just force sch_ingress to be compiled in
2651 * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions 2633 * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions
@@ -2696,9 +2678,6 @@ static inline struct sk_buff *handle_ing(struct sk_buff *skb,
2696 if (*pt_prev) { 2678 if (*pt_prev) {
2697 *ret = deliver_skb(skb, *pt_prev, orig_dev); 2679 *ret = deliver_skb(skb, *pt_prev, orig_dev);
2698 *pt_prev = NULL; 2680 *pt_prev = NULL;
2699 } else {
2700 /* Huh? Why does turning on AF_PACKET affect this? */
2701 skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd);
2702 } 2681 }
2703 2682
2704 switch (ing_filter(skb)) { 2683 switch (ing_filter(skb)) {
@@ -2741,6 +2720,51 @@ void netif_nit_deliver(struct sk_buff *skb)
2741 rcu_read_unlock(); 2720 rcu_read_unlock();
2742} 2721}
2743 2722
2723/**
2724 * netdev_rx_handler_register - register receive handler
2725 * @dev: device to register a handler for
2726 * @rx_handler: receive handler to register
2727 * @rx_handler_data: data pointer that is used by rx handler
2728 *
2729 * Register a receive hander for a device. This handler will then be
2730 * called from __netif_receive_skb. A negative errno code is returned
2731 * on a failure.
2732 *
2733 * The caller must hold the rtnl_mutex.
2734 */
2735int netdev_rx_handler_register(struct net_device *dev,
2736 rx_handler_func_t *rx_handler,
2737 void *rx_handler_data)
2738{
2739 ASSERT_RTNL();
2740
2741 if (dev->rx_handler)
2742 return -EBUSY;
2743
2744 rcu_assign_pointer(dev->rx_handler_data, rx_handler_data);
2745 rcu_assign_pointer(dev->rx_handler, rx_handler);
2746
2747 return 0;
2748}
2749EXPORT_SYMBOL_GPL(netdev_rx_handler_register);
2750
2751/**
2752 * netdev_rx_handler_unregister - unregister receive handler
2753 * @dev: device to unregister a handler from
2754 *
2755 * Unregister a receive hander from a device.
2756 *
2757 * The caller must hold the rtnl_mutex.
2758 */
2759void netdev_rx_handler_unregister(struct net_device *dev)
2760{
2761
2762 ASSERT_RTNL();
2763 rcu_assign_pointer(dev->rx_handler, NULL);
2764 rcu_assign_pointer(dev->rx_handler_data, NULL);
2765}
2766EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister);
2767
2744static inline void skb_bond_set_mac_by_master(struct sk_buff *skb, 2768static inline void skb_bond_set_mac_by_master(struct sk_buff *skb,
2745 struct net_device *master) 2769 struct net_device *master)
2746{ 2770{
@@ -2762,7 +2786,8 @@ int __skb_bond_should_drop(struct sk_buff *skb, struct net_device *master)
2762 if (master->priv_flags & IFF_MASTER_ARPMON) 2786 if (master->priv_flags & IFF_MASTER_ARPMON)
2763 dev->last_rx = jiffies; 2787 dev->last_rx = jiffies;
2764 2788
2765 if ((master->priv_flags & IFF_MASTER_ALB) && master->br_port) { 2789 if ((master->priv_flags & IFF_MASTER_ALB) &&
2790 (master->priv_flags & IFF_BRIDGE_PORT)) {
2766 /* Do address unmangle. The local destination address 2791 /* Do address unmangle. The local destination address
2767 * will be always the one master has. Provides the right 2792 * will be always the one master has. Provides the right
2768 * functionality in a bridge. 2793 * functionality in a bridge.
@@ -2793,10 +2818,11 @@ EXPORT_SYMBOL(__skb_bond_should_drop);
2793static int __netif_receive_skb(struct sk_buff *skb) 2818static int __netif_receive_skb(struct sk_buff *skb)
2794{ 2819{
2795 struct packet_type *ptype, *pt_prev; 2820 struct packet_type *ptype, *pt_prev;
2821 rx_handler_func_t *rx_handler;
2796 struct net_device *orig_dev; 2822 struct net_device *orig_dev;
2797 struct net_device *master; 2823 struct net_device *master;
2798 struct net_device *null_or_orig; 2824 struct net_device *null_or_orig;
2799 struct net_device *null_or_bond; 2825 struct net_device *orig_or_bond;
2800 int ret = NET_RX_DROP; 2826 int ret = NET_RX_DROP;
2801 __be16 type; 2827 __be16 type;
2802 2828
@@ -2813,18 +2839,28 @@ static int __netif_receive_skb(struct sk_buff *skb)
2813 if (!skb->skb_iif) 2839 if (!skb->skb_iif)
2814 skb->skb_iif = skb->dev->ifindex; 2840 skb->skb_iif = skb->dev->ifindex;
2815 2841
2842 /*
2843 * bonding note: skbs received on inactive slaves should only
2844 * be delivered to pkt handlers that are exact matches. Also
2845 * the deliver_no_wcard flag will be set. If packet handlers
2846 * are sensitive to duplicate packets these skbs will need to
2847 * be dropped at the handler. The vlan accel path may have
2848 * already set the deliver_no_wcard flag.
2849 */
2816 null_or_orig = NULL; 2850 null_or_orig = NULL;
2817 orig_dev = skb->dev; 2851 orig_dev = skb->dev;
2818 master = ACCESS_ONCE(orig_dev->master); 2852 master = ACCESS_ONCE(orig_dev->master);
2819 if (master) { 2853 if (skb->deliver_no_wcard)
2820 if (skb_bond_should_drop(skb, master)) 2854 null_or_orig = orig_dev;
2855 else if (master) {
2856 if (skb_bond_should_drop(skb, master)) {
2857 skb->deliver_no_wcard = 1;
2821 null_or_orig = orig_dev; /* deliver only exact match */ 2858 null_or_orig = orig_dev; /* deliver only exact match */
2822 else 2859 } else
2823 skb->dev = master; 2860 skb->dev = master;
2824 } 2861 }
2825 2862
2826 __get_cpu_var(softnet_data).processed++; 2863 __this_cpu_inc(softnet_data.processed);
2827
2828 skb_reset_network_header(skb); 2864 skb_reset_network_header(skb);
2829 skb_reset_transport_header(skb); 2865 skb_reset_transport_header(skb);
2830 skb->mac_len = skb->network_header - skb->mac_header; 2866 skb->mac_len = skb->network_header - skb->mac_header;
@@ -2856,12 +2892,17 @@ static int __netif_receive_skb(struct sk_buff *skb)
2856ncls: 2892ncls:
2857#endif 2893#endif
2858 2894
2859 skb = handle_bridge(skb, &pt_prev, &ret, orig_dev); 2895 /* Handle special case of bridge or macvlan */
2860 if (!skb) 2896 rx_handler = rcu_dereference(skb->dev->rx_handler);
2861 goto out; 2897 if (rx_handler) {
2862 skb = handle_macvlan(skb, &pt_prev, &ret, orig_dev); 2898 if (pt_prev) {
2863 if (!skb) 2899 ret = deliver_skb(skb, pt_prev, orig_dev);
2864 goto out; 2900 pt_prev = NULL;
2901 }
2902 skb = rx_handler(skb);
2903 if (!skb)
2904 goto out;
2905 }
2865 2906
2866 /* 2907 /*
2867 * Make sure frames received on VLAN interfaces stacked on 2908 * Make sure frames received on VLAN interfaces stacked on
@@ -2869,10 +2910,10 @@ ncls:
2869 * device that may have registered for a specific ptype. The 2910 * device that may have registered for a specific ptype. The
2870 * handler may have to adjust skb->dev and orig_dev. 2911 * handler may have to adjust skb->dev and orig_dev.
2871 */ 2912 */
2872 null_or_bond = NULL; 2913 orig_or_bond = orig_dev;
2873 if ((skb->dev->priv_flags & IFF_802_1Q_VLAN) && 2914 if ((skb->dev->priv_flags & IFF_802_1Q_VLAN) &&
2874 (vlan_dev_real_dev(skb->dev)->priv_flags & IFF_BONDING)) { 2915 (vlan_dev_real_dev(skb->dev)->priv_flags & IFF_BONDING)) {
2875 null_or_bond = vlan_dev_real_dev(skb->dev); 2916 orig_or_bond = vlan_dev_real_dev(skb->dev);
2876 } 2917 }
2877 2918
2878 type = skb->protocol; 2919 type = skb->protocol;
@@ -2880,7 +2921,7 @@ ncls:
2880 &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) { 2921 &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
2881 if (ptype->type == type && (ptype->dev == null_or_orig || 2922 if (ptype->type == type && (ptype->dev == null_or_orig ||
2882 ptype->dev == skb->dev || ptype->dev == orig_dev || 2923 ptype->dev == skb->dev || ptype->dev == orig_dev ||
2883 ptype->dev == null_or_bond)) { 2924 ptype->dev == orig_or_bond)) {
2884 if (pt_prev) 2925 if (pt_prev)
2885 ret = deliver_skb(skb, pt_prev, orig_dev); 2926 ret = deliver_skb(skb, pt_prev, orig_dev);
2886 pt_prev = ptype; 2927 pt_prev = ptype;
@@ -2922,6 +2963,9 @@ int netif_receive_skb(struct sk_buff *skb)
2922 if (netdev_tstamp_prequeue) 2963 if (netdev_tstamp_prequeue)
2923 net_timestamp_check(skb); 2964 net_timestamp_check(skb);
2924 2965
2966 if (skb_defer_rx_timestamp(skb))
2967 return NET_RX_SUCCESS;
2968
2925#ifdef CONFIG_RPS 2969#ifdef CONFIG_RPS
2926 { 2970 {
2927 struct rps_dev_flow voidflow, *rflow = &voidflow; 2971 struct rps_dev_flow voidflow, *rflow = &voidflow;
@@ -3686,10 +3730,11 @@ void dev_seq_stop(struct seq_file *seq, void *v)
3686 3730
3687static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev) 3731static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
3688{ 3732{
3689 const struct net_device_stats *stats = dev_get_stats(dev); 3733 struct rtnl_link_stats64 temp;
3734 const struct rtnl_link_stats64 *stats = dev_get_stats(dev, &temp);
3690 3735
3691 seq_printf(seq, "%6s: %7lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu " 3736 seq_printf(seq, "%6s: %7llu %7llu %4llu %4llu %4llu %5llu %10llu %9llu "
3692 "%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n", 3737 "%8llu %7llu %4llu %4llu %4llu %5llu %7llu %10llu\n",
3693 dev->name, stats->rx_bytes, stats->rx_packets, 3738 dev->name, stats->rx_bytes, stats->rx_packets,
3694 stats->rx_errors, 3739 stats->rx_errors,
3695 stats->rx_dropped + stats->rx_missed_errors, 3740 stats->rx_dropped + stats->rx_missed_errors,
@@ -4965,7 +5010,7 @@ int register_netdevice(struct net_device *dev)
4965 } 5010 }
4966 } 5011 }
4967 5012
4968 ret = dev_get_valid_name(net, dev->name, dev->name, 0); 5013 ret = dev_get_valid_name(dev, dev->name, 0);
4969 if (ret) 5014 if (ret)
4970 goto err_uninit; 5015 goto err_uninit;
4971 5016
@@ -4994,8 +5039,6 @@ int register_netdevice(struct net_device *dev)
4994 if (dev->features & NETIF_F_SG) 5039 if (dev->features & NETIF_F_SG)
4995 dev->features |= NETIF_F_GSO; 5040 dev->features |= NETIF_F_GSO;
4996 5041
4997 netdev_initialize_kobject(dev);
4998
4999 ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev); 5042 ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev);
5000 ret = notifier_to_errno(ret); 5043 ret = notifier_to_errno(ret);
5001 if (ret) 5044 if (ret)
@@ -5240,20 +5283,22 @@ void netdev_run_todo(void)
5240/** 5283/**
5241 * dev_txq_stats_fold - fold tx_queues stats 5284 * dev_txq_stats_fold - fold tx_queues stats
5242 * @dev: device to get statistics from 5285 * @dev: device to get statistics from
5243 * @stats: struct net_device_stats to hold results 5286 * @stats: struct rtnl_link_stats64 to hold results
5244 */ 5287 */
5245void dev_txq_stats_fold(const struct net_device *dev, 5288void dev_txq_stats_fold(const struct net_device *dev,
5246 struct net_device_stats *stats) 5289 struct rtnl_link_stats64 *stats)
5247{ 5290{
5248 unsigned long tx_bytes = 0, tx_packets = 0, tx_dropped = 0; 5291 u64 tx_bytes = 0, tx_packets = 0, tx_dropped = 0;
5249 unsigned int i; 5292 unsigned int i;
5250 struct netdev_queue *txq; 5293 struct netdev_queue *txq;
5251 5294
5252 for (i = 0; i < dev->num_tx_queues; i++) { 5295 for (i = 0; i < dev->num_tx_queues; i++) {
5253 txq = netdev_get_tx_queue(dev, i); 5296 txq = netdev_get_tx_queue(dev, i);
5297 spin_lock_bh(&txq->_xmit_lock);
5254 tx_bytes += txq->tx_bytes; 5298 tx_bytes += txq->tx_bytes;
5255 tx_packets += txq->tx_packets; 5299 tx_packets += txq->tx_packets;
5256 tx_dropped += txq->tx_dropped; 5300 tx_dropped += txq->tx_dropped;
5301 spin_unlock_bh(&txq->_xmit_lock);
5257 } 5302 }
5258 if (tx_bytes || tx_packets || tx_dropped) { 5303 if (tx_bytes || tx_packets || tx_dropped) {
5259 stats->tx_bytes = tx_bytes; 5304 stats->tx_bytes = tx_bytes;
@@ -5263,23 +5308,53 @@ void dev_txq_stats_fold(const struct net_device *dev,
5263} 5308}
5264EXPORT_SYMBOL(dev_txq_stats_fold); 5309EXPORT_SYMBOL(dev_txq_stats_fold);
5265 5310
5311/* Convert net_device_stats to rtnl_link_stats64. They have the same
5312 * fields in the same order, with only the type differing.
5313 */
5314static void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64,
5315 const struct net_device_stats *netdev_stats)
5316{
5317#if BITS_PER_LONG == 64
5318 BUILD_BUG_ON(sizeof(*stats64) != sizeof(*netdev_stats));
5319 memcpy(stats64, netdev_stats, sizeof(*stats64));
5320#else
5321 size_t i, n = sizeof(*stats64) / sizeof(u64);
5322 const unsigned long *src = (const unsigned long *)netdev_stats;
5323 u64 *dst = (u64 *)stats64;
5324
5325 BUILD_BUG_ON(sizeof(*netdev_stats) / sizeof(unsigned long) !=
5326 sizeof(*stats64) / sizeof(u64));
5327 for (i = 0; i < n; i++)
5328 dst[i] = src[i];
5329#endif
5330}
5331
5266/** 5332/**
5267 * dev_get_stats - get network device statistics 5333 * dev_get_stats - get network device statistics
5268 * @dev: device to get statistics from 5334 * @dev: device to get statistics from
5335 * @storage: place to store stats
5269 * 5336 *
5270 * Get network statistics from device. The device driver may provide 5337 * Get network statistics from device. Return @storage.
5271 * its own method by setting dev->netdev_ops->get_stats; otherwise 5338 * The device driver may provide its own method by setting
5272 * the internal statistics structure is used. 5339 * dev->netdev_ops->get_stats64 or dev->netdev_ops->get_stats;
5340 * otherwise the internal statistics structure is used.
5273 */ 5341 */
5274const struct net_device_stats *dev_get_stats(struct net_device *dev) 5342struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev,
5343 struct rtnl_link_stats64 *storage)
5275{ 5344{
5276 const struct net_device_ops *ops = dev->netdev_ops; 5345 const struct net_device_ops *ops = dev->netdev_ops;
5277 5346
5278 if (ops->ndo_get_stats) 5347 if (ops->ndo_get_stats64) {
5279 return ops->ndo_get_stats(dev); 5348 memset(storage, 0, sizeof(*storage));
5280 5349 return ops->ndo_get_stats64(dev, storage);
5281 dev_txq_stats_fold(dev, &dev->stats); 5350 }
5282 return &dev->stats; 5351 if (ops->ndo_get_stats) {
5352 netdev_stats_to_stats64(storage, ops->ndo_get_stats(dev));
5353 return storage;
5354 }
5355 netdev_stats_to_stats64(storage, &dev->stats);
5356 dev_txq_stats_fold(dev, storage);
5357 return storage;
5283} 5358}
5284EXPORT_SYMBOL(dev_get_stats); 5359EXPORT_SYMBOL(dev_get_stats);
5285 5360
@@ -5547,15 +5622,6 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
5547 if (dev->features & NETIF_F_NETNS_LOCAL) 5622 if (dev->features & NETIF_F_NETNS_LOCAL)
5548 goto out; 5623 goto out;
5549 5624
5550#ifdef CONFIG_SYSFS
5551 /* Don't allow real devices to be moved when sysfs
5552 * is enabled.
5553 */
5554 err = -EINVAL;
5555 if (dev->dev.parent)
5556 goto out;
5557#endif
5558
5559 /* Ensure the device has been registrered */ 5625 /* Ensure the device has been registrered */
5560 err = -EINVAL; 5626 err = -EINVAL;
5561 if (dev->reg_state != NETREG_REGISTERED) 5627 if (dev->reg_state != NETREG_REGISTERED)
@@ -5574,7 +5640,7 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
5574 /* We get here if we can't use the current device name */ 5640 /* We get here if we can't use the current device name */
5575 if (!pat) 5641 if (!pat)
5576 goto out; 5642 goto out;
5577 if (dev_get_valid_name(net, pat, dev->name, 1)) 5643 if (dev_get_valid_name(dev, pat, 1))
5578 goto out; 5644 goto out;
5579 } 5645 }
5580 5646
@@ -5606,8 +5672,6 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
5606 dev_uc_flush(dev); 5672 dev_uc_flush(dev);
5607 dev_mc_flush(dev); 5673 dev_mc_flush(dev);
5608 5674
5609 netdev_unregister_kobject(dev);
5610
5611 /* Actually switch the network namespace */ 5675 /* Actually switch the network namespace */
5612 dev_net_set(dev, net); 5676 dev_net_set(dev, net);
5613 5677
@@ -5620,7 +5684,7 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
5620 } 5684 }
5621 5685
5622 /* Fixup kobjects */ 5686 /* Fixup kobjects */
5623 err = netdev_register_kobject(dev); 5687 err = device_rename(&dev->dev, dev->name);
5624 WARN_ON(err); 5688 WARN_ON(err);
5625 5689
5626 /* Add the device back in the hashes */ 5690 /* Add the device back in the hashes */
@@ -5795,6 +5859,68 @@ char *netdev_drivername(const struct net_device *dev, char *buffer, int len)
5795 return buffer; 5859 return buffer;
5796} 5860}
5797 5861
5862static int __netdev_printk(const char *level, const struct net_device *dev,
5863 struct va_format *vaf)
5864{
5865 int r;
5866
5867 if (dev && dev->dev.parent)
5868 r = dev_printk(level, dev->dev.parent, "%s: %pV",
5869 netdev_name(dev), vaf);
5870 else if (dev)
5871 r = printk("%s%s: %pV", level, netdev_name(dev), vaf);
5872 else
5873 r = printk("%s(NULL net_device): %pV", level, vaf);
5874
5875 return r;
5876}
5877
5878int netdev_printk(const char *level, const struct net_device *dev,
5879 const char *format, ...)
5880{
5881 struct va_format vaf;
5882 va_list args;
5883 int r;
5884
5885 va_start(args, format);
5886
5887 vaf.fmt = format;
5888 vaf.va = &args;
5889
5890 r = __netdev_printk(level, dev, &vaf);
5891 va_end(args);
5892
5893 return r;
5894}
5895EXPORT_SYMBOL(netdev_printk);
5896
5897#define define_netdev_printk_level(func, level) \
5898int func(const struct net_device *dev, const char *fmt, ...) \
5899{ \
5900 int r; \
5901 struct va_format vaf; \
5902 va_list args; \
5903 \
5904 va_start(args, fmt); \
5905 \
5906 vaf.fmt = fmt; \
5907 vaf.va = &args; \
5908 \
5909 r = __netdev_printk(level, dev, &vaf); \
5910 va_end(args); \
5911 \
5912 return r; \
5913} \
5914EXPORT_SYMBOL(func);
5915
5916define_netdev_printk_level(netdev_emerg, KERN_EMERG);
5917define_netdev_printk_level(netdev_alert, KERN_ALERT);
5918define_netdev_printk_level(netdev_crit, KERN_CRIT);
5919define_netdev_printk_level(netdev_err, KERN_ERR);
5920define_netdev_printk_level(netdev_warn, KERN_WARNING);
5921define_netdev_printk_level(netdev_notice, KERN_NOTICE);
5922define_netdev_printk_level(netdev_info, KERN_INFO);
5923
5798static void __net_exit netdev_exit(struct net *net) 5924static void __net_exit netdev_exit(struct net *net)
5799{ 5925{
5800 kfree(net->dev_name_head); 5926 kfree(net->dev_name_head);
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index cf208d8042b1..646ef3bc7200 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -172,12 +172,12 @@ out:
172 return; 172 return;
173} 173}
174 174
175static void trace_kfree_skb_hit(struct sk_buff *skb, void *location) 175static void trace_kfree_skb_hit(void *ignore, struct sk_buff *skb, void *location)
176{ 176{
177 trace_drop_common(skb, location); 177 trace_drop_common(skb, location);
178} 178}
179 179
180static void trace_napi_poll_hit(struct napi_struct *napi) 180static void trace_napi_poll_hit(void *ignore, struct napi_struct *napi)
181{ 181{
182 struct dm_hw_stat_delta *new_stat; 182 struct dm_hw_stat_delta *new_stat;
183 183
@@ -223,14 +223,19 @@ static int set_all_monitor_traces(int state)
223 223
224 spin_lock(&trace_state_lock); 224 spin_lock(&trace_state_lock);
225 225
226 if (state == trace_state) {
227 rc = -EAGAIN;
228 goto out_unlock;
229 }
230
226 switch (state) { 231 switch (state) {
227 case TRACE_ON: 232 case TRACE_ON:
228 rc |= register_trace_kfree_skb(trace_kfree_skb_hit); 233 rc |= register_trace_kfree_skb(trace_kfree_skb_hit, NULL);
229 rc |= register_trace_napi_poll(trace_napi_poll_hit); 234 rc |= register_trace_napi_poll(trace_napi_poll_hit, NULL);
230 break; 235 break;
231 case TRACE_OFF: 236 case TRACE_OFF:
232 rc |= unregister_trace_kfree_skb(trace_kfree_skb_hit); 237 rc |= unregister_trace_kfree_skb(trace_kfree_skb_hit, NULL);
233 rc |= unregister_trace_napi_poll(trace_napi_poll_hit); 238 rc |= unregister_trace_napi_poll(trace_napi_poll_hit, NULL);
234 239
235 tracepoint_synchronize_unregister(); 240 tracepoint_synchronize_unregister();
236 241
@@ -251,11 +256,12 @@ static int set_all_monitor_traces(int state)
251 256
252 if (!rc) 257 if (!rc)
253 trace_state = state; 258 trace_state = state;
259 else
260 rc = -EINPROGRESS;
254 261
262out_unlock:
255 spin_unlock(&trace_state_lock); 263 spin_unlock(&trace_state_lock);
256 264
257 if (rc)
258 return -EINPROGRESS;
259 return rc; 265 return rc;
260} 266}
261 267
diff --git a/net/core/dst.c b/net/core/dst.c
index 9920722cc82b..6c41b1fac3db 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -197,7 +197,6 @@ static void ___dst_free(struct dst_entry *dst)
197 dst->input = dst->output = dst_discard; 197 dst->input = dst->output = dst_discard;
198 dst->obsolete = 2; 198 dst->obsolete = 2;
199} 199}
200EXPORT_SYMBOL(__dst_free);
201 200
202void __dst_free(struct dst_entry *dst) 201void __dst_free(struct dst_entry *dst)
203{ 202{
@@ -213,6 +212,7 @@ void __dst_free(struct dst_entry *dst)
213 } 212 }
214 spin_unlock_bh(&dst_garbage.lock); 213 spin_unlock_bh(&dst_garbage.lock);
215} 214}
215EXPORT_SYMBOL(__dst_free);
216 216
217struct dst_entry *dst_destroy(struct dst_entry * dst) 217struct dst_entry *dst_destroy(struct dst_entry * dst)
218{ 218{
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index a0f4964033d2..7a85367b3c2f 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -144,31 +144,13 @@ u32 ethtool_op_get_flags(struct net_device *dev)
144} 144}
145EXPORT_SYMBOL(ethtool_op_get_flags); 145EXPORT_SYMBOL(ethtool_op_get_flags);
146 146
147int ethtool_op_set_flags(struct net_device *dev, u32 data) 147int ethtool_op_set_flags(struct net_device *dev, u32 data, u32 supported)
148{ 148{
149 const struct ethtool_ops *ops = dev->ethtool_ops; 149 if (data & ~supported)
150 unsigned long features = dev->features; 150 return -EINVAL;
151
152 if (data & ETH_FLAG_LRO)
153 features |= NETIF_F_LRO;
154 else
155 features &= ~NETIF_F_LRO;
156
157 if (data & ETH_FLAG_NTUPLE) {
158 if (!ops->set_rx_ntuple)
159 return -EOPNOTSUPP;
160 features |= NETIF_F_NTUPLE;
161 } else {
162 /* safe to clear regardless */
163 features &= ~NETIF_F_NTUPLE;
164 }
165
166 if (data & ETH_FLAG_RXHASH)
167 features |= NETIF_F_RXHASH;
168 else
169 features &= ~NETIF_F_RXHASH;
170 151
171 dev->features = features; 152 dev->features = ((dev->features & ~flags_dup_features) |
153 (data & flags_dup_features));
172 return 0; 154 return 0;
173} 155}
174EXPORT_SYMBOL(ethtool_op_set_flags); 156EXPORT_SYMBOL(ethtool_op_set_flags);
@@ -318,23 +300,33 @@ out:
318} 300}
319 301
320static noinline_for_stack int ethtool_set_rxnfc(struct net_device *dev, 302static noinline_for_stack int ethtool_set_rxnfc(struct net_device *dev,
321 void __user *useraddr) 303 u32 cmd, void __user *useraddr)
322{ 304{
323 struct ethtool_rxnfc cmd; 305 struct ethtool_rxnfc info;
306 size_t info_size = sizeof(info);
324 307
325 if (!dev->ethtool_ops->set_rxnfc) 308 if (!dev->ethtool_ops->set_rxnfc)
326 return -EOPNOTSUPP; 309 return -EOPNOTSUPP;
327 310
328 if (copy_from_user(&cmd, useraddr, sizeof(cmd))) 311 /* struct ethtool_rxnfc was originally defined for
312 * ETHTOOL_{G,S}RXFH with only the cmd, flow_type and data
313 * members. User-space might still be using that
314 * definition. */
315 if (cmd == ETHTOOL_SRXFH)
316 info_size = (offsetof(struct ethtool_rxnfc, data) +
317 sizeof(info.data));
318
319 if (copy_from_user(&info, useraddr, info_size))
329 return -EFAULT; 320 return -EFAULT;
330 321
331 return dev->ethtool_ops->set_rxnfc(dev, &cmd); 322 return dev->ethtool_ops->set_rxnfc(dev, &info);
332} 323}
333 324
334static noinline_for_stack int ethtool_get_rxnfc(struct net_device *dev, 325static noinline_for_stack int ethtool_get_rxnfc(struct net_device *dev,
335 void __user *useraddr) 326 u32 cmd, void __user *useraddr)
336{ 327{
337 struct ethtool_rxnfc info; 328 struct ethtool_rxnfc info;
329 size_t info_size = sizeof(info);
338 const struct ethtool_ops *ops = dev->ethtool_ops; 330 const struct ethtool_ops *ops = dev->ethtool_ops;
339 int ret; 331 int ret;
340 void *rule_buf = NULL; 332 void *rule_buf = NULL;
@@ -342,13 +334,22 @@ static noinline_for_stack int ethtool_get_rxnfc(struct net_device *dev,
342 if (!ops->get_rxnfc) 334 if (!ops->get_rxnfc)
343 return -EOPNOTSUPP; 335 return -EOPNOTSUPP;
344 336
345 if (copy_from_user(&info, useraddr, sizeof(info))) 337 /* struct ethtool_rxnfc was originally defined for
338 * ETHTOOL_{G,S}RXFH with only the cmd, flow_type and data
339 * members. User-space might still be using that
340 * definition. */
341 if (cmd == ETHTOOL_GRXFH)
342 info_size = (offsetof(struct ethtool_rxnfc, data) +
343 sizeof(info.data));
344
345 if (copy_from_user(&info, useraddr, info_size))
346 return -EFAULT; 346 return -EFAULT;
347 347
348 if (info.cmd == ETHTOOL_GRXCLSRLALL) { 348 if (info.cmd == ETHTOOL_GRXCLSRLALL) {
349 if (info.rule_cnt > 0) { 349 if (info.rule_cnt > 0) {
350 rule_buf = kmalloc(info.rule_cnt * sizeof(u32), 350 if (info.rule_cnt <= KMALLOC_MAX_SIZE / sizeof(u32))
351 GFP_USER); 351 rule_buf = kmalloc(info.rule_cnt * sizeof(u32),
352 GFP_USER);
352 if (!rule_buf) 353 if (!rule_buf)
353 return -ENOMEM; 354 return -ENOMEM;
354 } 355 }
@@ -359,7 +360,7 @@ static noinline_for_stack int ethtool_get_rxnfc(struct net_device *dev,
359 goto err_out; 360 goto err_out;
360 361
361 ret = -EFAULT; 362 ret = -EFAULT;
362 if (copy_to_user(useraddr, &info, sizeof(info))) 363 if (copy_to_user(useraddr, &info, info_size))
363 goto err_out; 364 goto err_out;
364 365
365 if (rule_buf) { 366 if (rule_buf) {
@@ -376,6 +377,80 @@ err_out:
376 return ret; 377 return ret;
377} 378}
378 379
380static noinline_for_stack int ethtool_get_rxfh_indir(struct net_device *dev,
381 void __user *useraddr)
382{
383 struct ethtool_rxfh_indir *indir;
384 u32 table_size;
385 size_t full_size;
386 int ret;
387
388 if (!dev->ethtool_ops->get_rxfh_indir)
389 return -EOPNOTSUPP;
390
391 if (copy_from_user(&table_size,
392 useraddr + offsetof(struct ethtool_rxfh_indir, size),
393 sizeof(table_size)))
394 return -EFAULT;
395
396 if (table_size >
397 (KMALLOC_MAX_SIZE - sizeof(*indir)) / sizeof(*indir->ring_index))
398 return -ENOMEM;
399 full_size = sizeof(*indir) + sizeof(*indir->ring_index) * table_size;
400 indir = kmalloc(full_size, GFP_USER);
401 if (!indir)
402 return -ENOMEM;
403
404 indir->cmd = ETHTOOL_GRXFHINDIR;
405 indir->size = table_size;
406 ret = dev->ethtool_ops->get_rxfh_indir(dev, indir);
407 if (ret)
408 goto out;
409
410 if (copy_to_user(useraddr, indir, full_size))
411 ret = -EFAULT;
412
413out:
414 kfree(indir);
415 return ret;
416}
417
418static noinline_for_stack int ethtool_set_rxfh_indir(struct net_device *dev,
419 void __user *useraddr)
420{
421 struct ethtool_rxfh_indir *indir;
422 u32 table_size;
423 size_t full_size;
424 int ret;
425
426 if (!dev->ethtool_ops->set_rxfh_indir)
427 return -EOPNOTSUPP;
428
429 if (copy_from_user(&table_size,
430 useraddr + offsetof(struct ethtool_rxfh_indir, size),
431 sizeof(table_size)))
432 return -EFAULT;
433
434 if (table_size >
435 (KMALLOC_MAX_SIZE - sizeof(*indir)) / sizeof(*indir->ring_index))
436 return -ENOMEM;
437 full_size = sizeof(*indir) + sizeof(*indir->ring_index) * table_size;
438 indir = kmalloc(full_size, GFP_USER);
439 if (!indir)
440 return -ENOMEM;
441
442 if (copy_from_user(indir, useraddr, full_size)) {
443 ret = -EFAULT;
444 goto out;
445 }
446
447 ret = dev->ethtool_ops->set_rxfh_indir(dev, indir);
448
449out:
450 kfree(indir);
451 return ret;
452}
453
379static void __rx_ntuple_filter_add(struct ethtool_rx_ntuple_list *list, 454static void __rx_ntuple_filter_add(struct ethtool_rx_ntuple_list *list,
380 struct ethtool_rx_ntuple_flow_spec *spec, 455 struct ethtool_rx_ntuple_flow_spec *spec,
381 struct ethtool_rx_ntuple_flow_spec_container *fsc) 456 struct ethtool_rx_ntuple_flow_spec_container *fsc)
@@ -1516,12 +1591,12 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
1516 case ETHTOOL_GRXCLSRLCNT: 1591 case ETHTOOL_GRXCLSRLCNT:
1517 case ETHTOOL_GRXCLSRULE: 1592 case ETHTOOL_GRXCLSRULE:
1518 case ETHTOOL_GRXCLSRLALL: 1593 case ETHTOOL_GRXCLSRLALL:
1519 rc = ethtool_get_rxnfc(dev, useraddr); 1594 rc = ethtool_get_rxnfc(dev, ethcmd, useraddr);
1520 break; 1595 break;
1521 case ETHTOOL_SRXFH: 1596 case ETHTOOL_SRXFH:
1522 case ETHTOOL_SRXCLSRLDEL: 1597 case ETHTOOL_SRXCLSRLDEL:
1523 case ETHTOOL_SRXCLSRLINS: 1598 case ETHTOOL_SRXCLSRLINS:
1524 rc = ethtool_set_rxnfc(dev, useraddr); 1599 rc = ethtool_set_rxnfc(dev, ethcmd, useraddr);
1525 break; 1600 break;
1526 case ETHTOOL_GGRO: 1601 case ETHTOOL_GGRO:
1527 rc = ethtool_get_gro(dev, useraddr); 1602 rc = ethtool_get_gro(dev, useraddr);
@@ -1544,6 +1619,12 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
1544 case ETHTOOL_GSSET_INFO: 1619 case ETHTOOL_GSSET_INFO:
1545 rc = ethtool_get_sset_info(dev, useraddr); 1620 rc = ethtool_get_sset_info(dev, useraddr);
1546 break; 1621 break;
1622 case ETHTOOL_GRXFHINDIR:
1623 rc = ethtool_get_rxfh_indir(dev, useraddr);
1624 break;
1625 case ETHTOOL_SRXFHINDIR:
1626 rc = ethtool_set_rxfh_indir(dev, useraddr);
1627 break;
1547 default: 1628 default:
1548 rc = -EOPNOTSUPP; 1629 rc = -EOPNOTSUPP;
1549 } 1630 }
diff --git a/net/core/filter.c b/net/core/filter.c
index da69fb728d32..52b051f82a01 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -128,87 +128,87 @@ unsigned int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int
128 fentry = &filter[pc]; 128 fentry = &filter[pc];
129 129
130 switch (fentry->code) { 130 switch (fentry->code) {
131 case BPF_ALU|BPF_ADD|BPF_X: 131 case BPF_S_ALU_ADD_X:
132 A += X; 132 A += X;
133 continue; 133 continue;
134 case BPF_ALU|BPF_ADD|BPF_K: 134 case BPF_S_ALU_ADD_K:
135 A += fentry->k; 135 A += fentry->k;
136 continue; 136 continue;
137 case BPF_ALU|BPF_SUB|BPF_X: 137 case BPF_S_ALU_SUB_X:
138 A -= X; 138 A -= X;
139 continue; 139 continue;
140 case BPF_ALU|BPF_SUB|BPF_K: 140 case BPF_S_ALU_SUB_K:
141 A -= fentry->k; 141 A -= fentry->k;
142 continue; 142 continue;
143 case BPF_ALU|BPF_MUL|BPF_X: 143 case BPF_S_ALU_MUL_X:
144 A *= X; 144 A *= X;
145 continue; 145 continue;
146 case BPF_ALU|BPF_MUL|BPF_K: 146 case BPF_S_ALU_MUL_K:
147 A *= fentry->k; 147 A *= fentry->k;
148 continue; 148 continue;
149 case BPF_ALU|BPF_DIV|BPF_X: 149 case BPF_S_ALU_DIV_X:
150 if (X == 0) 150 if (X == 0)
151 return 0; 151 return 0;
152 A /= X; 152 A /= X;
153 continue; 153 continue;
154 case BPF_ALU|BPF_DIV|BPF_K: 154 case BPF_S_ALU_DIV_K:
155 A /= fentry->k; 155 A /= fentry->k;
156 continue; 156 continue;
157 case BPF_ALU|BPF_AND|BPF_X: 157 case BPF_S_ALU_AND_X:
158 A &= X; 158 A &= X;
159 continue; 159 continue;
160 case BPF_ALU|BPF_AND|BPF_K: 160 case BPF_S_ALU_AND_K:
161 A &= fentry->k; 161 A &= fentry->k;
162 continue; 162 continue;
163 case BPF_ALU|BPF_OR|BPF_X: 163 case BPF_S_ALU_OR_X:
164 A |= X; 164 A |= X;
165 continue; 165 continue;
166 case BPF_ALU|BPF_OR|BPF_K: 166 case BPF_S_ALU_OR_K:
167 A |= fentry->k; 167 A |= fentry->k;
168 continue; 168 continue;
169 case BPF_ALU|BPF_LSH|BPF_X: 169 case BPF_S_ALU_LSH_X:
170 A <<= X; 170 A <<= X;
171 continue; 171 continue;
172 case BPF_ALU|BPF_LSH|BPF_K: 172 case BPF_S_ALU_LSH_K:
173 A <<= fentry->k; 173 A <<= fentry->k;
174 continue; 174 continue;
175 case BPF_ALU|BPF_RSH|BPF_X: 175 case BPF_S_ALU_RSH_X:
176 A >>= X; 176 A >>= X;
177 continue; 177 continue;
178 case BPF_ALU|BPF_RSH|BPF_K: 178 case BPF_S_ALU_RSH_K:
179 A >>= fentry->k; 179 A >>= fentry->k;
180 continue; 180 continue;
181 case BPF_ALU|BPF_NEG: 181 case BPF_S_ALU_NEG:
182 A = -A; 182 A = -A;
183 continue; 183 continue;
184 case BPF_JMP|BPF_JA: 184 case BPF_S_JMP_JA:
185 pc += fentry->k; 185 pc += fentry->k;
186 continue; 186 continue;
187 case BPF_JMP|BPF_JGT|BPF_K: 187 case BPF_S_JMP_JGT_K:
188 pc += (A > fentry->k) ? fentry->jt : fentry->jf; 188 pc += (A > fentry->k) ? fentry->jt : fentry->jf;
189 continue; 189 continue;
190 case BPF_JMP|BPF_JGE|BPF_K: 190 case BPF_S_JMP_JGE_K:
191 pc += (A >= fentry->k) ? fentry->jt : fentry->jf; 191 pc += (A >= fentry->k) ? fentry->jt : fentry->jf;
192 continue; 192 continue;
193 case BPF_JMP|BPF_JEQ|BPF_K: 193 case BPF_S_JMP_JEQ_K:
194 pc += (A == fentry->k) ? fentry->jt : fentry->jf; 194 pc += (A == fentry->k) ? fentry->jt : fentry->jf;
195 continue; 195 continue;
196 case BPF_JMP|BPF_JSET|BPF_K: 196 case BPF_S_JMP_JSET_K:
197 pc += (A & fentry->k) ? fentry->jt : fentry->jf; 197 pc += (A & fentry->k) ? fentry->jt : fentry->jf;
198 continue; 198 continue;
199 case BPF_JMP|BPF_JGT|BPF_X: 199 case BPF_S_JMP_JGT_X:
200 pc += (A > X) ? fentry->jt : fentry->jf; 200 pc += (A > X) ? fentry->jt : fentry->jf;
201 continue; 201 continue;
202 case BPF_JMP|BPF_JGE|BPF_X: 202 case BPF_S_JMP_JGE_X:
203 pc += (A >= X) ? fentry->jt : fentry->jf; 203 pc += (A >= X) ? fentry->jt : fentry->jf;
204 continue; 204 continue;
205 case BPF_JMP|BPF_JEQ|BPF_X: 205 case BPF_S_JMP_JEQ_X:
206 pc += (A == X) ? fentry->jt : fentry->jf; 206 pc += (A == X) ? fentry->jt : fentry->jf;
207 continue; 207 continue;
208 case BPF_JMP|BPF_JSET|BPF_X: 208 case BPF_S_JMP_JSET_X:
209 pc += (A & X) ? fentry->jt : fentry->jf; 209 pc += (A & X) ? fentry->jt : fentry->jf;
210 continue; 210 continue;
211 case BPF_LD|BPF_W|BPF_ABS: 211 case BPF_S_LD_W_ABS:
212 k = fentry->k; 212 k = fentry->k;
213load_w: 213load_w:
214 ptr = load_pointer(skb, k, 4, &tmp); 214 ptr = load_pointer(skb, k, 4, &tmp);
@@ -217,7 +217,7 @@ load_w:
217 continue; 217 continue;
218 } 218 }
219 break; 219 break;
220 case BPF_LD|BPF_H|BPF_ABS: 220 case BPF_S_LD_H_ABS:
221 k = fentry->k; 221 k = fentry->k;
222load_h: 222load_h:
223 ptr = load_pointer(skb, k, 2, &tmp); 223 ptr = load_pointer(skb, k, 2, &tmp);
@@ -226,7 +226,7 @@ load_h:
226 continue; 226 continue;
227 } 227 }
228 break; 228 break;
229 case BPF_LD|BPF_B|BPF_ABS: 229 case BPF_S_LD_B_ABS:
230 k = fentry->k; 230 k = fentry->k;
231load_b: 231load_b:
232 ptr = load_pointer(skb, k, 1, &tmp); 232 ptr = load_pointer(skb, k, 1, &tmp);
@@ -235,54 +235,54 @@ load_b:
235 continue; 235 continue;
236 } 236 }
237 break; 237 break;
238 case BPF_LD|BPF_W|BPF_LEN: 238 case BPF_S_LD_W_LEN:
239 A = skb->len; 239 A = skb->len;
240 continue; 240 continue;
241 case BPF_LDX|BPF_W|BPF_LEN: 241 case BPF_S_LDX_W_LEN:
242 X = skb->len; 242 X = skb->len;
243 continue; 243 continue;
244 case BPF_LD|BPF_W|BPF_IND: 244 case BPF_S_LD_W_IND:
245 k = X + fentry->k; 245 k = X + fentry->k;
246 goto load_w; 246 goto load_w;
247 case BPF_LD|BPF_H|BPF_IND: 247 case BPF_S_LD_H_IND:
248 k = X + fentry->k; 248 k = X + fentry->k;
249 goto load_h; 249 goto load_h;
250 case BPF_LD|BPF_B|BPF_IND: 250 case BPF_S_LD_B_IND:
251 k = X + fentry->k; 251 k = X + fentry->k;
252 goto load_b; 252 goto load_b;
253 case BPF_LDX|BPF_B|BPF_MSH: 253 case BPF_S_LDX_B_MSH:
254 ptr = load_pointer(skb, fentry->k, 1, &tmp); 254 ptr = load_pointer(skb, fentry->k, 1, &tmp);
255 if (ptr != NULL) { 255 if (ptr != NULL) {
256 X = (*(u8 *)ptr & 0xf) << 2; 256 X = (*(u8 *)ptr & 0xf) << 2;
257 continue; 257 continue;
258 } 258 }
259 return 0; 259 return 0;
260 case BPF_LD|BPF_IMM: 260 case BPF_S_LD_IMM:
261 A = fentry->k; 261 A = fentry->k;
262 continue; 262 continue;
263 case BPF_LDX|BPF_IMM: 263 case BPF_S_LDX_IMM:
264 X = fentry->k; 264 X = fentry->k;
265 continue; 265 continue;
266 case BPF_LD|BPF_MEM: 266 case BPF_S_LD_MEM:
267 A = mem[fentry->k]; 267 A = mem[fentry->k];
268 continue; 268 continue;
269 case BPF_LDX|BPF_MEM: 269 case BPF_S_LDX_MEM:
270 X = mem[fentry->k]; 270 X = mem[fentry->k];
271 continue; 271 continue;
272 case BPF_MISC|BPF_TAX: 272 case BPF_S_MISC_TAX:
273 X = A; 273 X = A;
274 continue; 274 continue;
275 case BPF_MISC|BPF_TXA: 275 case BPF_S_MISC_TXA:
276 A = X; 276 A = X;
277 continue; 277 continue;
278 case BPF_RET|BPF_K: 278 case BPF_S_RET_K:
279 return fentry->k; 279 return fentry->k;
280 case BPF_RET|BPF_A: 280 case BPF_S_RET_A:
281 return A; 281 return A;
282 case BPF_ST: 282 case BPF_S_ST:
283 mem[fentry->k] = A; 283 mem[fentry->k] = A;
284 continue; 284 continue;
285 case BPF_STX: 285 case BPF_S_STX:
286 mem[fentry->k] = X; 286 mem[fentry->k] = X;
287 continue; 287 continue;
288 default: 288 default:
@@ -390,53 +390,128 @@ int sk_chk_filter(struct sock_filter *filter, int flen)
390 /* Only allow valid instructions */ 390 /* Only allow valid instructions */
391 switch (ftest->code) { 391 switch (ftest->code) {
392 case BPF_ALU|BPF_ADD|BPF_K: 392 case BPF_ALU|BPF_ADD|BPF_K:
393 ftest->code = BPF_S_ALU_ADD_K;
394 break;
393 case BPF_ALU|BPF_ADD|BPF_X: 395 case BPF_ALU|BPF_ADD|BPF_X:
396 ftest->code = BPF_S_ALU_ADD_X;
397 break;
394 case BPF_ALU|BPF_SUB|BPF_K: 398 case BPF_ALU|BPF_SUB|BPF_K:
399 ftest->code = BPF_S_ALU_SUB_K;
400 break;
395 case BPF_ALU|BPF_SUB|BPF_X: 401 case BPF_ALU|BPF_SUB|BPF_X:
402 ftest->code = BPF_S_ALU_SUB_X;
403 break;
396 case BPF_ALU|BPF_MUL|BPF_K: 404 case BPF_ALU|BPF_MUL|BPF_K:
405 ftest->code = BPF_S_ALU_MUL_K;
406 break;
397 case BPF_ALU|BPF_MUL|BPF_X: 407 case BPF_ALU|BPF_MUL|BPF_X:
408 ftest->code = BPF_S_ALU_MUL_X;
409 break;
398 case BPF_ALU|BPF_DIV|BPF_X: 410 case BPF_ALU|BPF_DIV|BPF_X:
411 ftest->code = BPF_S_ALU_DIV_X;
412 break;
399 case BPF_ALU|BPF_AND|BPF_K: 413 case BPF_ALU|BPF_AND|BPF_K:
414 ftest->code = BPF_S_ALU_AND_K;
415 break;
400 case BPF_ALU|BPF_AND|BPF_X: 416 case BPF_ALU|BPF_AND|BPF_X:
417 ftest->code = BPF_S_ALU_AND_X;
418 break;
401 case BPF_ALU|BPF_OR|BPF_K: 419 case BPF_ALU|BPF_OR|BPF_K:
420 ftest->code = BPF_S_ALU_OR_K;
421 break;
402 case BPF_ALU|BPF_OR|BPF_X: 422 case BPF_ALU|BPF_OR|BPF_X:
423 ftest->code = BPF_S_ALU_OR_X;
424 break;
403 case BPF_ALU|BPF_LSH|BPF_K: 425 case BPF_ALU|BPF_LSH|BPF_K:
426 ftest->code = BPF_S_ALU_LSH_K;
427 break;
404 case BPF_ALU|BPF_LSH|BPF_X: 428 case BPF_ALU|BPF_LSH|BPF_X:
429 ftest->code = BPF_S_ALU_LSH_X;
430 break;
405 case BPF_ALU|BPF_RSH|BPF_K: 431 case BPF_ALU|BPF_RSH|BPF_K:
432 ftest->code = BPF_S_ALU_RSH_K;
433 break;
406 case BPF_ALU|BPF_RSH|BPF_X: 434 case BPF_ALU|BPF_RSH|BPF_X:
435 ftest->code = BPF_S_ALU_RSH_X;
436 break;
407 case BPF_ALU|BPF_NEG: 437 case BPF_ALU|BPF_NEG:
438 ftest->code = BPF_S_ALU_NEG;
439 break;
408 case BPF_LD|BPF_W|BPF_ABS: 440 case BPF_LD|BPF_W|BPF_ABS:
441 ftest->code = BPF_S_LD_W_ABS;
442 break;
409 case BPF_LD|BPF_H|BPF_ABS: 443 case BPF_LD|BPF_H|BPF_ABS:
444 ftest->code = BPF_S_LD_H_ABS;
445 break;
410 case BPF_LD|BPF_B|BPF_ABS: 446 case BPF_LD|BPF_B|BPF_ABS:
447 ftest->code = BPF_S_LD_B_ABS;
448 break;
411 case BPF_LD|BPF_W|BPF_LEN: 449 case BPF_LD|BPF_W|BPF_LEN:
450 ftest->code = BPF_S_LD_W_LEN;
451 break;
412 case BPF_LD|BPF_W|BPF_IND: 452 case BPF_LD|BPF_W|BPF_IND:
453 ftest->code = BPF_S_LD_W_IND;
454 break;
413 case BPF_LD|BPF_H|BPF_IND: 455 case BPF_LD|BPF_H|BPF_IND:
456 ftest->code = BPF_S_LD_H_IND;
457 break;
414 case BPF_LD|BPF_B|BPF_IND: 458 case BPF_LD|BPF_B|BPF_IND:
459 ftest->code = BPF_S_LD_B_IND;
460 break;
415 case BPF_LD|BPF_IMM: 461 case BPF_LD|BPF_IMM:
462 ftest->code = BPF_S_LD_IMM;
463 break;
416 case BPF_LDX|BPF_W|BPF_LEN: 464 case BPF_LDX|BPF_W|BPF_LEN:
465 ftest->code = BPF_S_LDX_W_LEN;
466 break;
417 case BPF_LDX|BPF_B|BPF_MSH: 467 case BPF_LDX|BPF_B|BPF_MSH:
468 ftest->code = BPF_S_LDX_B_MSH;
469 break;
418 case BPF_LDX|BPF_IMM: 470 case BPF_LDX|BPF_IMM:
471 ftest->code = BPF_S_LDX_IMM;
472 break;
419 case BPF_MISC|BPF_TAX: 473 case BPF_MISC|BPF_TAX:
474 ftest->code = BPF_S_MISC_TAX;
475 break;
420 case BPF_MISC|BPF_TXA: 476 case BPF_MISC|BPF_TXA:
477 ftest->code = BPF_S_MISC_TXA;
478 break;
421 case BPF_RET|BPF_K: 479 case BPF_RET|BPF_K:
480 ftest->code = BPF_S_RET_K;
481 break;
422 case BPF_RET|BPF_A: 482 case BPF_RET|BPF_A:
483 ftest->code = BPF_S_RET_A;
423 break; 484 break;
424 485
425 /* Some instructions need special checks */ 486 /* Some instructions need special checks */
426 487
427 case BPF_ALU|BPF_DIV|BPF_K:
428 /* check for division by zero */ 488 /* check for division by zero */
489 case BPF_ALU|BPF_DIV|BPF_K:
429 if (ftest->k == 0) 490 if (ftest->k == 0)
430 return -EINVAL; 491 return -EINVAL;
492 ftest->code = BPF_S_ALU_DIV_K;
431 break; 493 break;
432 494
495 /* check for invalid memory addresses */
433 case BPF_LD|BPF_MEM: 496 case BPF_LD|BPF_MEM:
497 if (ftest->k >= BPF_MEMWORDS)
498 return -EINVAL;
499 ftest->code = BPF_S_LD_MEM;
500 break;
434 case BPF_LDX|BPF_MEM: 501 case BPF_LDX|BPF_MEM:
502 if (ftest->k >= BPF_MEMWORDS)
503 return -EINVAL;
504 ftest->code = BPF_S_LDX_MEM;
505 break;
435 case BPF_ST: 506 case BPF_ST:
507 if (ftest->k >= BPF_MEMWORDS)
508 return -EINVAL;
509 ftest->code = BPF_S_ST;
510 break;
436 case BPF_STX: 511 case BPF_STX:
437 /* check for invalid memory addresses */
438 if (ftest->k >= BPF_MEMWORDS) 512 if (ftest->k >= BPF_MEMWORDS)
439 return -EINVAL; 513 return -EINVAL;
514 ftest->code = BPF_S_STX;
440 break; 515 break;
441 516
442 case BPF_JMP|BPF_JA: 517 case BPF_JMP|BPF_JA:
@@ -447,28 +522,63 @@ int sk_chk_filter(struct sock_filter *filter, int flen)
447 */ 522 */
448 if (ftest->k >= (unsigned)(flen-pc-1)) 523 if (ftest->k >= (unsigned)(flen-pc-1))
449 return -EINVAL; 524 return -EINVAL;
525 ftest->code = BPF_S_JMP_JA;
450 break; 526 break;
451 527
452 case BPF_JMP|BPF_JEQ|BPF_K: 528 case BPF_JMP|BPF_JEQ|BPF_K:
529 ftest->code = BPF_S_JMP_JEQ_K;
530 break;
453 case BPF_JMP|BPF_JEQ|BPF_X: 531 case BPF_JMP|BPF_JEQ|BPF_X:
532 ftest->code = BPF_S_JMP_JEQ_X;
533 break;
454 case BPF_JMP|BPF_JGE|BPF_K: 534 case BPF_JMP|BPF_JGE|BPF_K:
535 ftest->code = BPF_S_JMP_JGE_K;
536 break;
455 case BPF_JMP|BPF_JGE|BPF_X: 537 case BPF_JMP|BPF_JGE|BPF_X:
538 ftest->code = BPF_S_JMP_JGE_X;
539 break;
456 case BPF_JMP|BPF_JGT|BPF_K: 540 case BPF_JMP|BPF_JGT|BPF_K:
541 ftest->code = BPF_S_JMP_JGT_K;
542 break;
457 case BPF_JMP|BPF_JGT|BPF_X: 543 case BPF_JMP|BPF_JGT|BPF_X:
544 ftest->code = BPF_S_JMP_JGT_X;
545 break;
458 case BPF_JMP|BPF_JSET|BPF_K: 546 case BPF_JMP|BPF_JSET|BPF_K:
547 ftest->code = BPF_S_JMP_JSET_K;
548 break;
459 case BPF_JMP|BPF_JSET|BPF_X: 549 case BPF_JMP|BPF_JSET|BPF_X:
550 ftest->code = BPF_S_JMP_JSET_X;
551 break;
552
553 default:
554 return -EINVAL;
555 }
556
460 /* for conditionals both must be safe */ 557 /* for conditionals both must be safe */
558 switch (ftest->code) {
559 case BPF_S_JMP_JEQ_K:
560 case BPF_S_JMP_JEQ_X:
561 case BPF_S_JMP_JGE_K:
562 case BPF_S_JMP_JGE_X:
563 case BPF_S_JMP_JGT_K:
564 case BPF_S_JMP_JGT_X:
565 case BPF_S_JMP_JSET_X:
566 case BPF_S_JMP_JSET_K:
461 if (pc + ftest->jt + 1 >= flen || 567 if (pc + ftest->jt + 1 >= flen ||
462 pc + ftest->jf + 1 >= flen) 568 pc + ftest->jf + 1 >= flen)
463 return -EINVAL; 569 return -EINVAL;
464 break; 570 }
571 }
465 572
573 /* last instruction must be a RET code */
574 switch (filter[flen - 1].code) {
575 case BPF_S_RET_K:
576 case BPF_S_RET_A:
577 return 0;
578 break;
466 default: 579 default:
467 return -EINVAL; 580 return -EINVAL;
468 } 581 }
469 }
470
471 return (BPF_CLASS(filter[flen - 1].code) == BPF_RET) ? 0 : -EINVAL;
472} 582}
473EXPORT_SYMBOL(sk_chk_filter); 583EXPORT_SYMBOL(sk_chk_filter);
474 584
diff --git a/net/core/flow.c b/net/core/flow.c
index 161900674009..f67dcbfe54ef 100644
--- a/net/core/flow.c
+++ b/net/core/flow.c
@@ -62,6 +62,7 @@ struct flow_cache {
62}; 62};
63 63
64atomic_t flow_cache_genid = ATOMIC_INIT(0); 64atomic_t flow_cache_genid = ATOMIC_INIT(0);
65EXPORT_SYMBOL(flow_cache_genid);
65static struct flow_cache flow_cache_global; 66static struct flow_cache flow_cache_global;
66static struct kmem_cache *flow_cachep; 67static struct kmem_cache *flow_cachep;
67 68
@@ -222,7 +223,7 @@ flow_cache_lookup(struct net *net, struct flowi *key, u16 family, u8 dir,
222 unsigned int hash; 223 unsigned int hash;
223 224
224 local_bh_disable(); 225 local_bh_disable();
225 fcp = per_cpu_ptr(fc->percpu, smp_processor_id()); 226 fcp = this_cpu_ptr(fc->percpu);
226 227
227 fle = NULL; 228 fle = NULL;
228 flo = NULL; 229 flo = NULL;
@@ -291,6 +292,7 @@ ret_object:
291 local_bh_enable(); 292 local_bh_enable();
292 return flo; 293 return flo;
293} 294}
295EXPORT_SYMBOL(flow_cache_lookup);
294 296
295static void flow_cache_flush_tasklet(unsigned long data) 297static void flow_cache_flush_tasklet(unsigned long data)
296{ 298{
@@ -302,7 +304,7 @@ static void flow_cache_flush_tasklet(unsigned long data)
302 LIST_HEAD(gc_list); 304 LIST_HEAD(gc_list);
303 int i, deleted = 0; 305 int i, deleted = 0;
304 306
305 fcp = per_cpu_ptr(fc->percpu, smp_processor_id()); 307 fcp = this_cpu_ptr(fc->percpu);
306 for (i = 0; i < flow_cache_hash_size(fc); i++) { 308 for (i = 0; i < flow_cache_hash_size(fc); i++) {
307 hlist_for_each_entry_safe(fle, entry, tmp, 309 hlist_for_each_entry_safe(fle, entry, tmp,
308 &fcp->hash_table[i], u.hlist) { 310 &fcp->hash_table[i], u.hlist) {
@@ -424,6 +426,3 @@ static int __init flow_cache_init_global(void)
424} 426}
425 427
426module_init(flow_cache_init_global); 428module_init(flow_cache_init_global);
427
428EXPORT_SYMBOL(flow_cache_genid);
429EXPORT_SYMBOL(flow_cache_lookup);
diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c
index cf8e70392fe0..9fbe7f7429b0 100644
--- a/net/core/gen_estimator.c
+++ b/net/core/gen_estimator.c
@@ -107,6 +107,7 @@ static DEFINE_RWLOCK(est_lock);
107 107
108/* Protects against soft lockup during large deletion */ 108/* Protects against soft lockup during large deletion */
109static struct rb_root est_root = RB_ROOT; 109static struct rb_root est_root = RB_ROOT;
110static DEFINE_SPINLOCK(est_tree_lock);
110 111
111static void est_timer(unsigned long arg) 112static void est_timer(unsigned long arg)
112{ 113{
@@ -201,7 +202,6 @@ struct gen_estimator *gen_find_node(const struct gnet_stats_basic_packed *bstats
201 * 202 *
202 * Returns 0 on success or a negative error code. 203 * Returns 0 on success or a negative error code.
203 * 204 *
204 * NOTE: Called under rtnl_mutex
205 */ 205 */
206int gen_new_estimator(struct gnet_stats_basic_packed *bstats, 206int gen_new_estimator(struct gnet_stats_basic_packed *bstats,
207 struct gnet_stats_rate_est *rate_est, 207 struct gnet_stats_rate_est *rate_est,
@@ -232,6 +232,7 @@ int gen_new_estimator(struct gnet_stats_basic_packed *bstats,
232 est->last_packets = bstats->packets; 232 est->last_packets = bstats->packets;
233 est->avpps = rate_est->pps<<10; 233 est->avpps = rate_est->pps<<10;
234 234
235 spin_lock(&est_tree_lock);
235 if (!elist[idx].timer.function) { 236 if (!elist[idx].timer.function) {
236 INIT_LIST_HEAD(&elist[idx].list); 237 INIT_LIST_HEAD(&elist[idx].list);
237 setup_timer(&elist[idx].timer, est_timer, idx); 238 setup_timer(&elist[idx].timer, est_timer, idx);
@@ -242,6 +243,7 @@ int gen_new_estimator(struct gnet_stats_basic_packed *bstats,
242 243
243 list_add_rcu(&est->list, &elist[idx].list); 244 list_add_rcu(&est->list, &elist[idx].list);
244 gen_add_node(est); 245 gen_add_node(est);
246 spin_unlock(&est_tree_lock);
245 247
246 return 0; 248 return 0;
247} 249}
@@ -261,13 +263,14 @@ static void __gen_kill_estimator(struct rcu_head *head)
261 * 263 *
262 * Removes the rate estimator specified by &bstats and &rate_est. 264 * Removes the rate estimator specified by &bstats and &rate_est.
263 * 265 *
264 * NOTE: Called under rtnl_mutex 266 * Note : Caller should respect an RCU grace period before freeing stats_lock
265 */ 267 */
266void gen_kill_estimator(struct gnet_stats_basic_packed *bstats, 268void gen_kill_estimator(struct gnet_stats_basic_packed *bstats,
267 struct gnet_stats_rate_est *rate_est) 269 struct gnet_stats_rate_est *rate_est)
268{ 270{
269 struct gen_estimator *e; 271 struct gen_estimator *e;
270 272
273 spin_lock(&est_tree_lock);
271 while ((e = gen_find_node(bstats, rate_est))) { 274 while ((e = gen_find_node(bstats, rate_est))) {
272 rb_erase(&e->node, &est_root); 275 rb_erase(&e->node, &est_root);
273 276
@@ -278,6 +281,7 @@ void gen_kill_estimator(struct gnet_stats_basic_packed *bstats,
278 list_del_rcu(&e->list); 281 list_del_rcu(&e->list);
279 call_rcu(&e->e_rcu, __gen_kill_estimator); 282 call_rcu(&e->e_rcu, __gen_kill_estimator);
280 } 283 }
284 spin_unlock(&est_tree_lock);
281} 285}
282EXPORT_SYMBOL(gen_kill_estimator); 286EXPORT_SYMBOL(gen_kill_estimator);
283 287
@@ -312,8 +316,14 @@ EXPORT_SYMBOL(gen_replace_estimator);
312bool gen_estimator_active(const struct gnet_stats_basic_packed *bstats, 316bool gen_estimator_active(const struct gnet_stats_basic_packed *bstats,
313 const struct gnet_stats_rate_est *rate_est) 317 const struct gnet_stats_rate_est *rate_est)
314{ 318{
319 bool res;
320
315 ASSERT_RTNL(); 321 ASSERT_RTNL();
316 322
317 return gen_find_node(bstats, rate_est) != NULL; 323 spin_lock(&est_tree_lock);
324 res = gen_find_node(bstats, rate_est) != NULL;
325 spin_unlock(&est_tree_lock);
326
327 return res;
318} 328}
319EXPORT_SYMBOL(gen_estimator_active); 329EXPORT_SYMBOL(gen_estimator_active);
diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c
index 393b1d8618e2..0452eb27a272 100644
--- a/net/core/gen_stats.c
+++ b/net/core/gen_stats.c
@@ -73,6 +73,7 @@ gnet_stats_start_copy_compat(struct sk_buff *skb, int type, int tc_stats_type,
73 73
74 return 0; 74 return 0;
75} 75}
76EXPORT_SYMBOL(gnet_stats_start_copy_compat);
76 77
77/** 78/**
78 * gnet_stats_start_copy_compat - start dumping procedure in compatibility mode 79 * gnet_stats_start_copy_compat - start dumping procedure in compatibility mode
@@ -93,6 +94,7 @@ gnet_stats_start_copy(struct sk_buff *skb, int type, spinlock_t *lock,
93{ 94{
94 return gnet_stats_start_copy_compat(skb, type, 0, 0, lock, d); 95 return gnet_stats_start_copy_compat(skb, type, 0, 0, lock, d);
95} 96}
97EXPORT_SYMBOL(gnet_stats_start_copy);
96 98
97/** 99/**
98 * gnet_stats_copy_basic - copy basic statistics into statistic TLV 100 * gnet_stats_copy_basic - copy basic statistics into statistic TLV
@@ -123,6 +125,7 @@ gnet_stats_copy_basic(struct gnet_dump *d, struct gnet_stats_basic_packed *b)
123 } 125 }
124 return 0; 126 return 0;
125} 127}
128EXPORT_SYMBOL(gnet_stats_copy_basic);
126 129
127/** 130/**
128 * gnet_stats_copy_rate_est - copy rate estimator statistics into statistics TLV 131 * gnet_stats_copy_rate_est - copy rate estimator statistics into statistics TLV
@@ -154,6 +157,7 @@ gnet_stats_copy_rate_est(struct gnet_dump *d,
154 157
155 return 0; 158 return 0;
156} 159}
160EXPORT_SYMBOL(gnet_stats_copy_rate_est);
157 161
158/** 162/**
159 * gnet_stats_copy_queue - copy queue statistics into statistics TLV 163 * gnet_stats_copy_queue - copy queue statistics into statistics TLV
@@ -181,6 +185,7 @@ gnet_stats_copy_queue(struct gnet_dump *d, struct gnet_stats_queue *q)
181 185
182 return 0; 186 return 0;
183} 187}
188EXPORT_SYMBOL(gnet_stats_copy_queue);
184 189
185/** 190/**
186 * gnet_stats_copy_app - copy application specific statistics into statistics TLV 191 * gnet_stats_copy_app - copy application specific statistics into statistics TLV
@@ -208,6 +213,7 @@ gnet_stats_copy_app(struct gnet_dump *d, void *st, int len)
208 213
209 return 0; 214 return 0;
210} 215}
216EXPORT_SYMBOL(gnet_stats_copy_app);
211 217
212/** 218/**
213 * gnet_stats_finish_copy - finish dumping procedure 219 * gnet_stats_finish_copy - finish dumping procedure
@@ -241,12 +247,4 @@ gnet_stats_finish_copy(struct gnet_dump *d)
241 spin_unlock_bh(d->lock); 247 spin_unlock_bh(d->lock);
242 return 0; 248 return 0;
243} 249}
244
245
246EXPORT_SYMBOL(gnet_stats_start_copy);
247EXPORT_SYMBOL(gnet_stats_start_copy_compat);
248EXPORT_SYMBOL(gnet_stats_copy_basic);
249EXPORT_SYMBOL(gnet_stats_copy_rate_est);
250EXPORT_SYMBOL(gnet_stats_copy_queue);
251EXPORT_SYMBOL(gnet_stats_copy_app);
252EXPORT_SYMBOL(gnet_stats_finish_copy); 250EXPORT_SYMBOL(gnet_stats_finish_copy);
diff --git a/net/core/iovec.c b/net/core/iovec.c
index 1e7f4e91a935..1cd98df412df 100644
--- a/net/core/iovec.c
+++ b/net/core/iovec.c
@@ -95,6 +95,7 @@ int memcpy_toiovec(struct iovec *iov, unsigned char *kdata, int len)
95 95
96 return 0; 96 return 0;
97} 97}
98EXPORT_SYMBOL(memcpy_toiovec);
98 99
99/* 100/*
100 * Copy kernel to iovec. Returns -EFAULT on error. 101 * Copy kernel to iovec. Returns -EFAULT on error.
@@ -120,6 +121,7 @@ int memcpy_toiovecend(const struct iovec *iov, unsigned char *kdata,
120 121
121 return 0; 122 return 0;
122} 123}
124EXPORT_SYMBOL(memcpy_toiovecend);
123 125
124/* 126/*
125 * Copy iovec to kernel. Returns -EFAULT on error. 127 * Copy iovec to kernel. Returns -EFAULT on error.
@@ -144,6 +146,7 @@ int memcpy_fromiovec(unsigned char *kdata, struct iovec *iov, int len)
144 146
145 return 0; 147 return 0;
146} 148}
149EXPORT_SYMBOL(memcpy_fromiovec);
147 150
148/* 151/*
149 * Copy iovec from kernel. Returns -EFAULT on error. 152 * Copy iovec from kernel. Returns -EFAULT on error.
@@ -172,6 +175,7 @@ int memcpy_fromiovecend(unsigned char *kdata, const struct iovec *iov,
172 175
173 return 0; 176 return 0;
174} 177}
178EXPORT_SYMBOL(memcpy_fromiovecend);
175 179
176/* 180/*
177 * And now for the all-in-one: copy and checksum from a user iovec 181 * And now for the all-in-one: copy and checksum from a user iovec
@@ -256,9 +260,4 @@ out_fault:
256 err = -EFAULT; 260 err = -EFAULT;
257 goto out; 261 goto out;
258} 262}
259
260EXPORT_SYMBOL(csum_partial_copy_fromiovecend); 263EXPORT_SYMBOL(csum_partial_copy_fromiovecend);
261EXPORT_SYMBOL(memcpy_fromiovec);
262EXPORT_SYMBOL(memcpy_fromiovecend);
263EXPORT_SYMBOL(memcpy_toiovec);
264EXPORT_SYMBOL(memcpy_toiovecend);
diff --git a/net/core/link_watch.c b/net/core/link_watch.c
index bdbce2f5875b..01a1101b5936 100644
--- a/net/core/link_watch.c
+++ b/net/core/link_watch.c
@@ -243,5 +243,4 @@ void linkwatch_fire_event(struct net_device *dev)
243 243
244 linkwatch_schedule_work(urgent); 244 linkwatch_schedule_work(urgent);
245} 245}
246
247EXPORT_SYMBOL(linkwatch_fire_event); 246EXPORT_SYMBOL(linkwatch_fire_event);
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index bff37908bd55..a4e0a7482c2b 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -934,6 +934,7 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
934 kfree_skb(buff); 934 kfree_skb(buff);
935 NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards); 935 NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards);
936 } 936 }
937 skb_dst_force(skb);
937 __skb_queue_tail(&neigh->arp_queue, skb); 938 __skb_queue_tail(&neigh->arp_queue, skb);
938 } 939 }
939 rc = 1; 940 rc = 1;
@@ -948,7 +949,10 @@ static void neigh_update_hhs(struct neighbour *neigh)
948{ 949{
949 struct hh_cache *hh; 950 struct hh_cache *hh;
950 void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *) 951 void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *)
951 = neigh->dev->header_ops->cache_update; 952 = NULL;
953
954 if (neigh->dev->header_ops)
955 update = neigh->dev->header_ops->cache_update;
952 956
953 if (update) { 957 if (update) {
954 for (hh = neigh->hh; hh; hh = hh->hh_next) { 958 for (hh = neigh->hh; hh; hh = hh->hh_next) {
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index c57c4b228bb5..d2b596537d41 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -14,7 +14,9 @@
14#include <linux/netdevice.h> 14#include <linux/netdevice.h>
15#include <linux/if_arp.h> 15#include <linux/if_arp.h>
16#include <linux/slab.h> 16#include <linux/slab.h>
17#include <linux/nsproxy.h>
17#include <net/sock.h> 18#include <net/sock.h>
19#include <net/net_namespace.h>
18#include <linux/rtnetlink.h> 20#include <linux/rtnetlink.h>
19#include <linux/wireless.h> 21#include <linux/wireless.h>
20#include <linux/vmalloc.h> 22#include <linux/vmalloc.h>
@@ -27,6 +29,7 @@ static const char fmt_hex[] = "%#x\n";
27static const char fmt_long_hex[] = "%#lx\n"; 29static const char fmt_long_hex[] = "%#lx\n";
28static const char fmt_dec[] = "%d\n"; 30static const char fmt_dec[] = "%d\n";
29static const char fmt_ulong[] = "%lu\n"; 31static const char fmt_ulong[] = "%lu\n";
32static const char fmt_u64[] = "%llu\n";
30 33
31static inline int dev_isalive(const struct net_device *dev) 34static inline int dev_isalive(const struct net_device *dev)
32{ 35{
@@ -322,14 +325,15 @@ static ssize_t netstat_show(const struct device *d,
322 struct net_device *dev = to_net_dev(d); 325 struct net_device *dev = to_net_dev(d);
323 ssize_t ret = -EINVAL; 326 ssize_t ret = -EINVAL;
324 327
325 WARN_ON(offset > sizeof(struct net_device_stats) || 328 WARN_ON(offset > sizeof(struct rtnl_link_stats64) ||
326 offset % sizeof(unsigned long) != 0); 329 offset % sizeof(u64) != 0);
327 330
328 read_lock(&dev_base_lock); 331 read_lock(&dev_base_lock);
329 if (dev_isalive(dev)) { 332 if (dev_isalive(dev)) {
330 const struct net_device_stats *stats = dev_get_stats(dev); 333 struct rtnl_link_stats64 temp;
331 ret = sprintf(buf, fmt_ulong, 334 const struct rtnl_link_stats64 *stats = dev_get_stats(dev, &temp);
332 *(unsigned long *)(((u8 *) stats) + offset)); 335
336 ret = sprintf(buf, fmt_u64, *(u64 *)(((u8 *) stats) + offset));
333 } 337 }
334 read_unlock(&dev_base_lock); 338 read_unlock(&dev_base_lock);
335 return ret; 339 return ret;
@@ -341,7 +345,7 @@ static ssize_t show_##name(struct device *d, \
341 struct device_attribute *attr, char *buf) \ 345 struct device_attribute *attr, char *buf) \
342{ \ 346{ \
343 return netstat_show(d, attr, buf, \ 347 return netstat_show(d, attr, buf, \
344 offsetof(struct net_device_stats, name)); \ 348 offsetof(struct rtnl_link_stats64, name)); \
345} \ 349} \
346static DEVICE_ATTR(name, S_IRUGO, show_##name, NULL) 350static DEVICE_ATTR(name, S_IRUGO, show_##name, NULL)
347 351
@@ -467,6 +471,7 @@ static struct attribute_group wireless_group = {
467 .attrs = wireless_attrs, 471 .attrs = wireless_attrs,
468}; 472};
469#endif 473#endif
474#endif /* CONFIG_SYSFS */
470 475
471#ifdef CONFIG_RPS 476#ifdef CONFIG_RPS
472/* 477/*
@@ -766,7 +771,38 @@ static void rx_queue_remove_kobjects(struct net_device *net)
766 kset_unregister(net->queues_kset); 771 kset_unregister(net->queues_kset);
767} 772}
768#endif /* CONFIG_RPS */ 773#endif /* CONFIG_RPS */
769#endif /* CONFIG_SYSFS */ 774
775static const void *net_current_ns(void)
776{
777 return current->nsproxy->net_ns;
778}
779
780static const void *net_initial_ns(void)
781{
782 return &init_net;
783}
784
785static const void *net_netlink_ns(struct sock *sk)
786{
787 return sock_net(sk);
788}
789
790static struct kobj_ns_type_operations net_ns_type_operations = {
791 .type = KOBJ_NS_TYPE_NET,
792 .current_ns = net_current_ns,
793 .netlink_ns = net_netlink_ns,
794 .initial_ns = net_initial_ns,
795};
796
797static void net_kobj_ns_exit(struct net *net)
798{
799 kobj_ns_exit(KOBJ_NS_TYPE_NET, net);
800}
801
802static struct pernet_operations kobj_net_ops = {
803 .exit = net_kobj_ns_exit,
804};
805
770 806
771#ifdef CONFIG_HOTPLUG 807#ifdef CONFIG_HOTPLUG
772static int netdev_uevent(struct device *d, struct kobj_uevent_env *env) 808static int netdev_uevent(struct device *d, struct kobj_uevent_env *env)
@@ -774,9 +810,6 @@ static int netdev_uevent(struct device *d, struct kobj_uevent_env *env)
774 struct net_device *dev = to_net_dev(d); 810 struct net_device *dev = to_net_dev(d);
775 int retval; 811 int retval;
776 812
777 if (!net_eq(dev_net(dev), &init_net))
778 return 0;
779
780 /* pass interface to uevent. */ 813 /* pass interface to uevent. */
781 retval = add_uevent_var(env, "INTERFACE=%s", dev->name); 814 retval = add_uevent_var(env, "INTERFACE=%s", dev->name);
782 if (retval) 815 if (retval)
@@ -806,6 +839,13 @@ static void netdev_release(struct device *d)
806 kfree((char *)dev - dev->padded); 839 kfree((char *)dev - dev->padded);
807} 840}
808 841
842static const void *net_namespace(struct device *d)
843{
844 struct net_device *dev;
845 dev = container_of(d, struct net_device, dev);
846 return dev_net(dev);
847}
848
809static struct class net_class = { 849static struct class net_class = {
810 .name = "net", 850 .name = "net",
811 .dev_release = netdev_release, 851 .dev_release = netdev_release,
@@ -815,6 +855,8 @@ static struct class net_class = {
815#ifdef CONFIG_HOTPLUG 855#ifdef CONFIG_HOTPLUG
816 .dev_uevent = netdev_uevent, 856 .dev_uevent = netdev_uevent,
817#endif 857#endif
858 .ns_type = &net_ns_type_operations,
859 .namespace = net_namespace,
818}; 860};
819 861
820/* Delete sysfs entries but hold kobject reference until after all 862/* Delete sysfs entries but hold kobject reference until after all
@@ -826,9 +868,6 @@ void netdev_unregister_kobject(struct net_device * net)
826 868
827 kobject_get(&dev->kobj); 869 kobject_get(&dev->kobj);
828 870
829 if (!net_eq(dev_net(net), &init_net))
830 return;
831
832#ifdef CONFIG_RPS 871#ifdef CONFIG_RPS
833 rx_queue_remove_kobjects(net); 872 rx_queue_remove_kobjects(net);
834#endif 873#endif
@@ -843,6 +882,7 @@ int netdev_register_kobject(struct net_device *net)
843 const struct attribute_group **groups = net->sysfs_groups; 882 const struct attribute_group **groups = net->sysfs_groups;
844 int error = 0; 883 int error = 0;
845 884
885 device_initialize(dev);
846 dev->class = &net_class; 886 dev->class = &net_class;
847 dev->platform_data = net; 887 dev->platform_data = net;
848 dev->groups = groups; 888 dev->groups = groups;
@@ -865,9 +905,6 @@ int netdev_register_kobject(struct net_device *net)
865#endif 905#endif
866#endif /* CONFIG_SYSFS */ 906#endif /* CONFIG_SYSFS */
867 907
868 if (!net_eq(dev_net(net), &init_net))
869 return 0;
870
871 error = device_add(dev); 908 error = device_add(dev);
872 if (error) 909 if (error)
873 return error; 910 return error;
@@ -887,22 +924,17 @@ int netdev_class_create_file(struct class_attribute *class_attr)
887{ 924{
888 return class_create_file(&net_class, class_attr); 925 return class_create_file(&net_class, class_attr);
889} 926}
927EXPORT_SYMBOL(netdev_class_create_file);
890 928
891void netdev_class_remove_file(struct class_attribute *class_attr) 929void netdev_class_remove_file(struct class_attribute *class_attr)
892{ 930{
893 class_remove_file(&net_class, class_attr); 931 class_remove_file(&net_class, class_attr);
894} 932}
895
896EXPORT_SYMBOL(netdev_class_create_file);
897EXPORT_SYMBOL(netdev_class_remove_file); 933EXPORT_SYMBOL(netdev_class_remove_file);
898 934
899void netdev_initialize_kobject(struct net_device *net)
900{
901 struct device *device = &(net->dev);
902 device_initialize(device);
903}
904
905int netdev_kobject_init(void) 935int netdev_kobject_init(void)
906{ 936{
937 kobj_ns_type_register(&net_ns_type_operations);
938 register_pernet_subsys(&kobj_net_ops);
907 return class_register(&net_class); 939 return class_register(&net_class);
908} 940}
diff --git a/net/core/net-sysfs.h b/net/core/net-sysfs.h
index 14e7524260b3..805555e8b187 100644
--- a/net/core/net-sysfs.h
+++ b/net/core/net-sysfs.h
@@ -4,5 +4,4 @@
4int netdev_kobject_init(void); 4int netdev_kobject_init(void);
5int netdev_register_kobject(struct net_device *); 5int netdev_register_kobject(struct net_device *);
6void netdev_unregister_kobject(struct net_device *); 6void netdev_unregister_kobject(struct net_device *);
7void netdev_initialize_kobject(struct net_device *);
8#endif 7#endif
diff --git a/net/core/netevent.c b/net/core/netevent.c
index 95f81de87502..865f0ceb81fb 100644
--- a/net/core/netevent.c
+++ b/net/core/netevent.c
@@ -35,6 +35,7 @@ int register_netevent_notifier(struct notifier_block *nb)
35 err = atomic_notifier_chain_register(&netevent_notif_chain, nb); 35 err = atomic_notifier_chain_register(&netevent_notif_chain, nb);
36 return err; 36 return err;
37} 37}
38EXPORT_SYMBOL_GPL(register_netevent_notifier);
38 39
39/** 40/**
40 * netevent_unregister_notifier - unregister a netevent notifier block 41 * netevent_unregister_notifier - unregister a netevent notifier block
@@ -50,6 +51,7 @@ int unregister_netevent_notifier(struct notifier_block *nb)
50{ 51{
51 return atomic_notifier_chain_unregister(&netevent_notif_chain, nb); 52 return atomic_notifier_chain_unregister(&netevent_notif_chain, nb);
52} 53}
54EXPORT_SYMBOL_GPL(unregister_netevent_notifier);
53 55
54/** 56/**
55 * call_netevent_notifiers - call all netevent notifier blocks 57 * call_netevent_notifiers - call all netevent notifier blocks
@@ -64,7 +66,4 @@ int call_netevent_notifiers(unsigned long val, void *v)
64{ 66{
65 return atomic_notifier_call_chain(&netevent_notif_chain, val, v); 67 return atomic_notifier_call_chain(&netevent_notif_chain, val, v);
66} 68}
67
68EXPORT_SYMBOL_GPL(register_netevent_notifier);
69EXPORT_SYMBOL_GPL(unregister_netevent_notifier);
70EXPORT_SYMBOL_GPL(call_netevent_notifiers); 69EXPORT_SYMBOL_GPL(call_netevent_notifiers);
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 94825b109551..c2b7a8bed8f6 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -49,7 +49,6 @@ static atomic_t trapped;
49 (MAX_UDP_CHUNK + sizeof(struct udphdr) + \ 49 (MAX_UDP_CHUNK + sizeof(struct udphdr) + \
50 sizeof(struct iphdr) + sizeof(struct ethhdr)) 50 sizeof(struct iphdr) + sizeof(struct ethhdr))
51 51
52static void zap_completion_queue(void);
53static void arp_reply(struct sk_buff *skb); 52static void arp_reply(struct sk_buff *skb);
54 53
55static unsigned int carrier_timeout = 4; 54static unsigned int carrier_timeout = 4;
@@ -197,13 +196,14 @@ void netpoll_poll_dev(struct net_device *dev)
197 196
198 service_arp_queue(dev->npinfo); 197 service_arp_queue(dev->npinfo);
199 198
200 zap_completion_queue();
201} 199}
200EXPORT_SYMBOL(netpoll_poll_dev);
202 201
203void netpoll_poll(struct netpoll *np) 202void netpoll_poll(struct netpoll *np)
204{ 203{
205 netpoll_poll_dev(np->dev); 204 netpoll_poll_dev(np->dev);
206} 205}
206EXPORT_SYMBOL(netpoll_poll);
207 207
208static void refill_skbs(void) 208static void refill_skbs(void)
209{ 209{
@@ -221,40 +221,11 @@ static void refill_skbs(void)
221 spin_unlock_irqrestore(&skb_pool.lock, flags); 221 spin_unlock_irqrestore(&skb_pool.lock, flags);
222} 222}
223 223
224static void zap_completion_queue(void)
225{
226 unsigned long flags;
227 struct softnet_data *sd = &get_cpu_var(softnet_data);
228
229 if (sd->completion_queue) {
230 struct sk_buff *clist;
231
232 local_irq_save(flags);
233 clist = sd->completion_queue;
234 sd->completion_queue = NULL;
235 local_irq_restore(flags);
236
237 while (clist != NULL) {
238 struct sk_buff *skb = clist;
239 clist = clist->next;
240 if (skb->destructor) {
241 atomic_inc(&skb->users);
242 dev_kfree_skb_any(skb); /* put this one back */
243 } else {
244 __kfree_skb(skb);
245 }
246 }
247 }
248
249 put_cpu_var(softnet_data);
250}
251
252static struct sk_buff *find_skb(struct netpoll *np, int len, int reserve) 224static struct sk_buff *find_skb(struct netpoll *np, int len, int reserve)
253{ 225{
254 int count = 0; 226 int count = 0;
255 struct sk_buff *skb; 227 struct sk_buff *skb;
256 228
257 zap_completion_queue();
258 refill_skbs(); 229 refill_skbs();
259repeat: 230repeat:
260 231
@@ -292,6 +263,7 @@ void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
292 unsigned long tries; 263 unsigned long tries;
293 struct net_device *dev = np->dev; 264 struct net_device *dev = np->dev;
294 const struct net_device_ops *ops = dev->netdev_ops; 265 const struct net_device_ops *ops = dev->netdev_ops;
266 /* It is up to the caller to keep npinfo alive. */
295 struct netpoll_info *npinfo = np->dev->npinfo; 267 struct netpoll_info *npinfo = np->dev->npinfo;
296 268
297 if (!npinfo || !netif_running(dev) || !netif_device_present(dev)) { 269 if (!npinfo || !netif_running(dev) || !netif_device_present(dev)) {
@@ -343,6 +315,7 @@ void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
343 schedule_delayed_work(&npinfo->tx_work,0); 315 schedule_delayed_work(&npinfo->tx_work,0);
344 } 316 }
345} 317}
318EXPORT_SYMBOL(netpoll_send_skb);
346 319
347void netpoll_send_udp(struct netpoll *np, const char *msg, int len) 320void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
348{ 321{
@@ -404,6 +377,7 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
404 377
405 netpoll_send_skb(np, skb); 378 netpoll_send_skb(np, skb);
406} 379}
380EXPORT_SYMBOL(netpoll_send_udp);
407 381
408static void arp_reply(struct sk_buff *skb) 382static void arp_reply(struct sk_buff *skb)
409{ 383{
@@ -630,6 +604,7 @@ void netpoll_print_options(struct netpoll *np)
630 printk(KERN_INFO "%s: remote ethernet address %pM\n", 604 printk(KERN_INFO "%s: remote ethernet address %pM\n",
631 np->name, np->remote_mac); 605 np->name, np->remote_mac);
632} 606}
607EXPORT_SYMBOL(netpoll_print_options);
633 608
634int netpoll_parse_options(struct netpoll *np, char *opt) 609int netpoll_parse_options(struct netpoll *np, char *opt)
635{ 610{
@@ -722,30 +697,29 @@ int netpoll_parse_options(struct netpoll *np, char *opt)
722 np->name, cur); 697 np->name, cur);
723 return -1; 698 return -1;
724} 699}
700EXPORT_SYMBOL(netpoll_parse_options);
725 701
726int netpoll_setup(struct netpoll *np) 702int __netpoll_setup(struct netpoll *np)
727{ 703{
728 struct net_device *ndev = NULL; 704 struct net_device *ndev = np->dev;
729 struct in_device *in_dev;
730 struct netpoll_info *npinfo; 705 struct netpoll_info *npinfo;
731 struct netpoll *npe, *tmp; 706 const struct net_device_ops *ops;
732 unsigned long flags; 707 unsigned long flags;
733 int err; 708 int err;
734 709
735 if (np->dev_name) 710 if ((ndev->priv_flags & IFF_DISABLE_NETPOLL) ||
736 ndev = dev_get_by_name(&init_net, np->dev_name); 711 !ndev->netdev_ops->ndo_poll_controller) {
737 if (!ndev) { 712 printk(KERN_ERR "%s: %s doesn't support polling, aborting.\n",
738 printk(KERN_ERR "%s: %s doesn't exist, aborting.\n",
739 np->name, np->dev_name); 713 np->name, np->dev_name);
740 return -ENODEV; 714 err = -ENOTSUPP;
715 goto out;
741 } 716 }
742 717
743 np->dev = ndev;
744 if (!ndev->npinfo) { 718 if (!ndev->npinfo) {
745 npinfo = kmalloc(sizeof(*npinfo), GFP_KERNEL); 719 npinfo = kmalloc(sizeof(*npinfo), GFP_KERNEL);
746 if (!npinfo) { 720 if (!npinfo) {
747 err = -ENOMEM; 721 err = -ENOMEM;
748 goto put; 722 goto out;
749 } 723 }
750 724
751 npinfo->rx_flags = 0; 725 npinfo->rx_flags = 0;
@@ -757,6 +731,13 @@ int netpoll_setup(struct netpoll *np)
757 INIT_DELAYED_WORK(&npinfo->tx_work, queue_process); 731 INIT_DELAYED_WORK(&npinfo->tx_work, queue_process);
758 732
759 atomic_set(&npinfo->refcnt, 1); 733 atomic_set(&npinfo->refcnt, 1);
734
735 ops = np->dev->netdev_ops;
736 if (ops->ndo_netpoll_setup) {
737 err = ops->ndo_netpoll_setup(ndev, npinfo);
738 if (err)
739 goto free_npinfo;
740 }
760 } else { 741 } else {
761 npinfo = ndev->npinfo; 742 npinfo = ndev->npinfo;
762 atomic_inc(&npinfo->refcnt); 743 atomic_inc(&npinfo->refcnt);
@@ -764,12 +745,37 @@ int netpoll_setup(struct netpoll *np)
764 745
765 npinfo->netpoll = np; 746 npinfo->netpoll = np;
766 747
767 if ((ndev->priv_flags & IFF_DISABLE_NETPOLL) || 748 if (np->rx_hook) {
768 !ndev->netdev_ops->ndo_poll_controller) { 749 spin_lock_irqsave(&npinfo->rx_lock, flags);
769 printk(KERN_ERR "%s: %s doesn't support polling, aborting.\n", 750 npinfo->rx_flags |= NETPOLL_RX_ENABLED;
751 list_add_tail(&np->rx, &npinfo->rx_np);
752 spin_unlock_irqrestore(&npinfo->rx_lock, flags);
753 }
754
755 /* last thing to do is link it to the net device structure */
756 rcu_assign_pointer(ndev->npinfo, npinfo);
757
758 return 0;
759
760free_npinfo:
761 kfree(npinfo);
762out:
763 return err;
764}
765EXPORT_SYMBOL_GPL(__netpoll_setup);
766
767int netpoll_setup(struct netpoll *np)
768{
769 struct net_device *ndev = NULL;
770 struct in_device *in_dev;
771 int err;
772
773 if (np->dev_name)
774 ndev = dev_get_by_name(&init_net, np->dev_name);
775 if (!ndev) {
776 printk(KERN_ERR "%s: %s doesn't exist, aborting.\n",
770 np->name, np->dev_name); 777 np->name, np->dev_name);
771 err = -ENOTSUPP; 778 return -ENODEV;
772 goto release;
773 } 779 }
774 780
775 if (!netif_running(ndev)) { 781 if (!netif_running(ndev)) {
@@ -785,7 +791,7 @@ int netpoll_setup(struct netpoll *np)
785 if (err) { 791 if (err) {
786 printk(KERN_ERR "%s: failed to open %s\n", 792 printk(KERN_ERR "%s: failed to open %s\n",
787 np->name, ndev->name); 793 np->name, ndev->name);
788 goto release; 794 goto put;
789 } 795 }
790 796
791 atleast = jiffies + HZ/10; 797 atleast = jiffies + HZ/10;
@@ -822,7 +828,7 @@ int netpoll_setup(struct netpoll *np)
822 printk(KERN_ERR "%s: no IP address for %s, aborting\n", 828 printk(KERN_ERR "%s: no IP address for %s, aborting\n",
823 np->name, np->dev_name); 829 np->name, np->dev_name);
824 err = -EDESTADDRREQ; 830 err = -EDESTADDRREQ;
825 goto release; 831 goto put;
826 } 832 }
827 833
828 np->local_ip = in_dev->ifa_list->ifa_local; 834 np->local_ip = in_dev->ifa_list->ifa_local;
@@ -830,38 +836,25 @@ int netpoll_setup(struct netpoll *np)
830 printk(KERN_INFO "%s: local IP %pI4\n", np->name, &np->local_ip); 836 printk(KERN_INFO "%s: local IP %pI4\n", np->name, &np->local_ip);
831 } 837 }
832 838
833 if (np->rx_hook) { 839 np->dev = ndev;
834 spin_lock_irqsave(&npinfo->rx_lock, flags);
835 npinfo->rx_flags |= NETPOLL_RX_ENABLED;
836 list_add_tail(&np->rx, &npinfo->rx_np);
837 spin_unlock_irqrestore(&npinfo->rx_lock, flags);
838 }
839 840
840 /* fill up the skb queue */ 841 /* fill up the skb queue */
841 refill_skbs(); 842 refill_skbs();
842 843
843 /* last thing to do is link it to the net device structure */ 844 rtnl_lock();
844 ndev->npinfo = npinfo; 845 err = __netpoll_setup(np);
846 rtnl_unlock();
845 847
846 /* avoid racing with NAPI reading npinfo */ 848 if (err)
847 synchronize_rcu(); 849 goto put;
848 850
849 return 0; 851 return 0;
850 852
851 release:
852 if (!ndev->npinfo) {
853 spin_lock_irqsave(&npinfo->rx_lock, flags);
854 list_for_each_entry_safe(npe, tmp, &npinfo->rx_np, rx) {
855 npe->dev = NULL;
856 }
857 spin_unlock_irqrestore(&npinfo->rx_lock, flags);
858
859 kfree(npinfo);
860 }
861put: 853put:
862 dev_put(ndev); 854 dev_put(ndev);
863 return err; 855 return err;
864} 856}
857EXPORT_SYMBOL(netpoll_setup);
865 858
866static int __init netpoll_init(void) 859static int __init netpoll_init(void)
867{ 860{
@@ -870,49 +863,65 @@ static int __init netpoll_init(void)
870} 863}
871core_initcall(netpoll_init); 864core_initcall(netpoll_init);
872 865
873void netpoll_cleanup(struct netpoll *np) 866void __netpoll_cleanup(struct netpoll *np)
874{ 867{
875 struct netpoll_info *npinfo; 868 struct netpoll_info *npinfo;
876 unsigned long flags; 869 unsigned long flags;
877 870
878 if (np->dev) { 871 npinfo = np->dev->npinfo;
879 npinfo = np->dev->npinfo; 872 if (!npinfo)
880 if (npinfo) { 873 return;
881 if (!list_empty(&npinfo->rx_np)) {
882 spin_lock_irqsave(&npinfo->rx_lock, flags);
883 list_del(&np->rx);
884 if (list_empty(&npinfo->rx_np))
885 npinfo->rx_flags &= ~NETPOLL_RX_ENABLED;
886 spin_unlock_irqrestore(&npinfo->rx_lock, flags);
887 }
888 874
889 if (atomic_dec_and_test(&npinfo->refcnt)) { 875 if (!list_empty(&npinfo->rx_np)) {
890 const struct net_device_ops *ops; 876 spin_lock_irqsave(&npinfo->rx_lock, flags);
891 skb_queue_purge(&npinfo->arp_tx); 877 list_del(&np->rx);
892 skb_queue_purge(&npinfo->txq); 878 if (list_empty(&npinfo->rx_np))
893 cancel_rearming_delayed_work(&npinfo->tx_work); 879 npinfo->rx_flags &= ~NETPOLL_RX_ENABLED;
894 880 spin_unlock_irqrestore(&npinfo->rx_lock, flags);
895 /* clean after last, unfinished work */ 881 }
896 __skb_queue_purge(&npinfo->txq); 882
897 kfree(npinfo); 883 if (atomic_dec_and_test(&npinfo->refcnt)) {
898 ops = np->dev->netdev_ops; 884 const struct net_device_ops *ops;
899 if (ops->ndo_netpoll_cleanup) 885
900 ops->ndo_netpoll_cleanup(np->dev); 886 ops = np->dev->netdev_ops;
901 else 887 if (ops->ndo_netpoll_cleanup)
902 np->dev->npinfo = NULL; 888 ops->ndo_netpoll_cleanup(np->dev);
903 } 889
904 } 890 rcu_assign_pointer(np->dev->npinfo, NULL);
905 891
906 dev_put(np->dev); 892 /* avoid racing with NAPI reading npinfo */
893 synchronize_rcu_bh();
894
895 skb_queue_purge(&npinfo->arp_tx);
896 skb_queue_purge(&npinfo->txq);
897 cancel_rearming_delayed_work(&npinfo->tx_work);
898
899 /* clean after last, unfinished work */
900 __skb_queue_purge(&npinfo->txq);
901 kfree(npinfo);
907 } 902 }
903}
904EXPORT_SYMBOL_GPL(__netpoll_cleanup);
908 905
906void netpoll_cleanup(struct netpoll *np)
907{
908 if (!np->dev)
909 return;
910
911 rtnl_lock();
912 __netpoll_cleanup(np);
913 rtnl_unlock();
914
915 dev_put(np->dev);
909 np->dev = NULL; 916 np->dev = NULL;
910} 917}
918EXPORT_SYMBOL(netpoll_cleanup);
911 919
912int netpoll_trap(void) 920int netpoll_trap(void)
913{ 921{
914 return atomic_read(&trapped); 922 return atomic_read(&trapped);
915} 923}
924EXPORT_SYMBOL(netpoll_trap);
916 925
917void netpoll_set_trap(int trap) 926void netpoll_set_trap(int trap)
918{ 927{
@@ -921,14 +930,4 @@ void netpoll_set_trap(int trap)
921 else 930 else
922 atomic_dec(&trapped); 931 atomic_dec(&trapped);
923} 932}
924
925EXPORT_SYMBOL(netpoll_send_skb);
926EXPORT_SYMBOL(netpoll_set_trap); 933EXPORT_SYMBOL(netpoll_set_trap);
927EXPORT_SYMBOL(netpoll_trap);
928EXPORT_SYMBOL(netpoll_print_options);
929EXPORT_SYMBOL(netpoll_parse_options);
930EXPORT_SYMBOL(netpoll_setup);
931EXPORT_SYMBOL(netpoll_cleanup);
932EXPORT_SYMBOL(netpoll_send_udp);
933EXPORT_SYMBOL(netpoll_poll_dev);
934EXPORT_SYMBOL(netpoll_poll);
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 2ad68da418df..24a19debda1b 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -115,6 +115,9 @@
115 * command by Adit Ranadive <adit.262@gmail.com> 115 * command by Adit Ranadive <adit.262@gmail.com>
116 * 116 *
117 */ 117 */
118
119#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
120
118#include <linux/sys.h> 121#include <linux/sys.h>
119#include <linux/types.h> 122#include <linux/types.h>
120#include <linux/module.h> 123#include <linux/module.h>
@@ -169,11 +172,13 @@
169#include <asm/dma.h> 172#include <asm/dma.h>
170#include <asm/div64.h> /* do_div */ 173#include <asm/div64.h> /* do_div */
171 174
172#define VERSION "2.73" 175#define VERSION "2.74"
173#define IP_NAME_SZ 32 176#define IP_NAME_SZ 32
174#define MAX_MPLS_LABELS 16 /* This is the max label stack depth */ 177#define MAX_MPLS_LABELS 16 /* This is the max label stack depth */
175#define MPLS_STACK_BOTTOM htonl(0x00000100) 178#define MPLS_STACK_BOTTOM htonl(0x00000100)
176 179
180#define func_enter() pr_debug("entering %s\n", __func__);
181
177/* Device flag bits */ 182/* Device flag bits */
178#define F_IPSRC_RND (1<<0) /* IP-Src Random */ 183#define F_IPSRC_RND (1<<0) /* IP-Src Random */
179#define F_IPDST_RND (1<<1) /* IP-Dst Random */ 184#define F_IPDST_RND (1<<1) /* IP-Dst Random */
@@ -424,7 +429,8 @@ static inline int ktime_lt(const ktime_t cmp1, const ktime_t cmp2)
424} 429}
425 430
426static const char version[] = 431static const char version[] =
427 "pktgen " VERSION ": Packet Generator for packet performance testing.\n"; 432 "Packet Generator for packet performance testing. "
433 "Version: " VERSION "\n";
428 434
429static int pktgen_remove_device(struct pktgen_thread *t, struct pktgen_dev *i); 435static int pktgen_remove_device(struct pktgen_thread *t, struct pktgen_dev *i);
430static int pktgen_add_device(struct pktgen_thread *t, const char *ifname); 436static int pktgen_add_device(struct pktgen_thread *t, const char *ifname);
@@ -495,7 +501,7 @@ static ssize_t pgctrl_write(struct file *file, const char __user *buf,
495 pktgen_reset_all_threads(); 501 pktgen_reset_all_threads();
496 502
497 else 503 else
498 printk(KERN_WARNING "pktgen: Unknown command: %s\n", data); 504 pr_warning("Unknown command: %s\n", data);
499 505
500 err = count; 506 err = count;
501 507
@@ -840,7 +846,7 @@ static ssize_t pktgen_if_write(struct file *file,
840 const char __user * user_buffer, size_t count, 846 const char __user * user_buffer, size_t count,
841 loff_t * offset) 847 loff_t * offset)
842{ 848{
843 struct seq_file *seq = (struct seq_file *)file->private_data; 849 struct seq_file *seq = file->private_data;
844 struct pktgen_dev *pkt_dev = seq->private; 850 struct pktgen_dev *pkt_dev = seq->private;
845 int i = 0, max, len; 851 int i = 0, max, len;
846 char name[16], valstr[32]; 852 char name[16], valstr[32];
@@ -852,14 +858,14 @@ static ssize_t pktgen_if_write(struct file *file,
852 pg_result = &(pkt_dev->result[0]); 858 pg_result = &(pkt_dev->result[0]);
853 859
854 if (count < 1) { 860 if (count < 1) {
855 printk(KERN_WARNING "pktgen: wrong command format\n"); 861 pr_warning("wrong command format\n");
856 return -EINVAL; 862 return -EINVAL;
857 } 863 }
858 864
859 max = count - i; 865 max = count - i;
860 tmp = count_trail_chars(&user_buffer[i], max); 866 tmp = count_trail_chars(&user_buffer[i], max);
861 if (tmp < 0) { 867 if (tmp < 0) {
862 printk(KERN_WARNING "pktgen: illegal format\n"); 868 pr_warning("illegal format\n");
863 return tmp; 869 return tmp;
864 } 870 }
865 i += tmp; 871 i += tmp;
@@ -980,6 +986,36 @@ static ssize_t pktgen_if_write(struct file *file,
980 (unsigned long long) pkt_dev->delay); 986 (unsigned long long) pkt_dev->delay);
981 return count; 987 return count;
982 } 988 }
989 if (!strcmp(name, "rate")) {
990 len = num_arg(&user_buffer[i], 10, &value);
991 if (len < 0)
992 return len;
993
994 i += len;
995 if (!value)
996 return len;
997 pkt_dev->delay = pkt_dev->min_pkt_size*8*NSEC_PER_USEC/value;
998 if (debug)
999 pr_info("Delay set at: %llu ns\n", pkt_dev->delay);
1000
1001 sprintf(pg_result, "OK: rate=%lu", value);
1002 return count;
1003 }
1004 if (!strcmp(name, "ratep")) {
1005 len = num_arg(&user_buffer[i], 10, &value);
1006 if (len < 0)
1007 return len;
1008
1009 i += len;
1010 if (!value)
1011 return len;
1012 pkt_dev->delay = NSEC_PER_SEC/value;
1013 if (debug)
1014 pr_info("Delay set at: %llu ns\n", pkt_dev->delay);
1015
1016 sprintf(pg_result, "OK: rate=%lu", value);
1017 return count;
1018 }
983 if (!strcmp(name, "udp_src_min")) { 1019 if (!strcmp(name, "udp_src_min")) {
984 len = num_arg(&user_buffer[i], 10, &value); 1020 len = num_arg(&user_buffer[i], 10, &value);
985 if (len < 0) 1021 if (len < 0)
@@ -1740,7 +1776,7 @@ static ssize_t pktgen_thread_write(struct file *file,
1740 const char __user * user_buffer, 1776 const char __user * user_buffer,
1741 size_t count, loff_t * offset) 1777 size_t count, loff_t * offset)
1742{ 1778{
1743 struct seq_file *seq = (struct seq_file *)file->private_data; 1779 struct seq_file *seq = file->private_data;
1744 struct pktgen_thread *t = seq->private; 1780 struct pktgen_thread *t = seq->private;
1745 int i = 0, max, len, ret; 1781 int i = 0, max, len, ret;
1746 char name[40]; 1782 char name[40];
@@ -1781,7 +1817,7 @@ static ssize_t pktgen_thread_write(struct file *file,
1781 name, (unsigned long)count); 1817 name, (unsigned long)count);
1782 1818
1783 if (!t) { 1819 if (!t) {
1784 printk(KERN_ERR "pktgen: ERROR: No thread\n"); 1820 pr_err("ERROR: No thread\n");
1785 ret = -EINVAL; 1821 ret = -EINVAL;
1786 goto out; 1822 goto out;
1787 } 1823 }
@@ -1874,7 +1910,7 @@ static void pktgen_mark_device(const char *ifname)
1874 int i = 0; 1910 int i = 0;
1875 1911
1876 mutex_lock(&pktgen_thread_lock); 1912 mutex_lock(&pktgen_thread_lock);
1877 pr_debug("pktgen: pktgen_mark_device marking %s for removal\n", ifname); 1913 pr_debug("%s: marking %s for removal\n", __func__, ifname);
1878 1914
1879 while (1) { 1915 while (1) {
1880 1916
@@ -1883,15 +1919,14 @@ static void pktgen_mark_device(const char *ifname)
1883 break; /* success */ 1919 break; /* success */
1884 1920
1885 mutex_unlock(&pktgen_thread_lock); 1921 mutex_unlock(&pktgen_thread_lock);
1886 pr_debug("pktgen: pktgen_mark_device waiting for %s " 1922 pr_debug("%s: waiting for %s to disappear....\n",
1887 "to disappear....\n", ifname); 1923 __func__, ifname);
1888 schedule_timeout_interruptible(msecs_to_jiffies(msec_per_try)); 1924 schedule_timeout_interruptible(msecs_to_jiffies(msec_per_try));
1889 mutex_lock(&pktgen_thread_lock); 1925 mutex_lock(&pktgen_thread_lock);
1890 1926
1891 if (++i >= max_tries) { 1927 if (++i >= max_tries) {
1892 printk(KERN_ERR "pktgen_mark_device: timed out after " 1928 pr_err("%s: timed out after waiting %d msec for device %s to be removed\n",
1893 "waiting %d msec for device %s to be removed\n", 1929 __func__, msec_per_try * i, ifname);
1894 msec_per_try * i, ifname);
1895 break; 1930 break;
1896 } 1931 }
1897 1932
@@ -1918,8 +1953,8 @@ static void pktgen_change_name(struct net_device *dev)
1918 &pktgen_if_fops, 1953 &pktgen_if_fops,
1919 pkt_dev); 1954 pkt_dev);
1920 if (!pkt_dev->entry) 1955 if (!pkt_dev->entry)
1921 printk(KERN_ERR "pktgen: can't move proc " 1956 pr_err("can't move proc entry for '%s'\n",
1922 " entry for '%s'\n", dev->name); 1957 dev->name);
1923 break; 1958 break;
1924 } 1959 }
1925 } 1960 }
@@ -1983,15 +2018,15 @@ static int pktgen_setup_dev(struct pktgen_dev *pkt_dev, const char *ifname)
1983 2018
1984 odev = pktgen_dev_get_by_name(pkt_dev, ifname); 2019 odev = pktgen_dev_get_by_name(pkt_dev, ifname);
1985 if (!odev) { 2020 if (!odev) {
1986 printk(KERN_ERR "pktgen: no such netdevice: \"%s\"\n", ifname); 2021 pr_err("no such netdevice: \"%s\"\n", ifname);
1987 return -ENODEV; 2022 return -ENODEV;
1988 } 2023 }
1989 2024
1990 if (odev->type != ARPHRD_ETHER) { 2025 if (odev->type != ARPHRD_ETHER) {
1991 printk(KERN_ERR "pktgen: not an ethernet device: \"%s\"\n", ifname); 2026 pr_err("not an ethernet device: \"%s\"\n", ifname);
1992 err = -EINVAL; 2027 err = -EINVAL;
1993 } else if (!netif_running(odev)) { 2028 } else if (!netif_running(odev)) {
1994 printk(KERN_ERR "pktgen: device is down: \"%s\"\n", ifname); 2029 pr_err("device is down: \"%s\"\n", ifname);
1995 err = -ENETDOWN; 2030 err = -ENETDOWN;
1996 } else { 2031 } else {
1997 pkt_dev->odev = odev; 2032 pkt_dev->odev = odev;
@@ -2010,8 +2045,7 @@ static void pktgen_setup_inject(struct pktgen_dev *pkt_dev)
2010 int ntxq; 2045 int ntxq;
2011 2046
2012 if (!pkt_dev->odev) { 2047 if (!pkt_dev->odev) {
2013 printk(KERN_ERR "pktgen: ERROR: pkt_dev->odev == NULL in " 2048 pr_err("ERROR: pkt_dev->odev == NULL in setup_inject\n");
2014 "setup_inject.\n");
2015 sprintf(pkt_dev->result, 2049 sprintf(pkt_dev->result,
2016 "ERROR: pkt_dev->odev == NULL in setup_inject.\n"); 2050 "ERROR: pkt_dev->odev == NULL in setup_inject.\n");
2017 return; 2051 return;
@@ -2021,19 +2055,15 @@ static void pktgen_setup_inject(struct pktgen_dev *pkt_dev)
2021 ntxq = pkt_dev->odev->real_num_tx_queues; 2055 ntxq = pkt_dev->odev->real_num_tx_queues;
2022 2056
2023 if (ntxq <= pkt_dev->queue_map_min) { 2057 if (ntxq <= pkt_dev->queue_map_min) {
2024 printk(KERN_WARNING "pktgen: WARNING: Requested " 2058 pr_warning("WARNING: Requested queue_map_min (zero-based) (%d) exceeds valid range [0 - %d] for (%d) queues on %s, resetting\n",
2025 "queue_map_min (zero-based) (%d) exceeds valid range " 2059 pkt_dev->queue_map_min, (ntxq ?: 1) - 1, ntxq,
2026 "[0 - %d] for (%d) queues on %s, resetting\n", 2060 pkt_dev->odevname);
2027 pkt_dev->queue_map_min, (ntxq ?: 1) - 1, ntxq,
2028 pkt_dev->odevname);
2029 pkt_dev->queue_map_min = ntxq - 1; 2061 pkt_dev->queue_map_min = ntxq - 1;
2030 } 2062 }
2031 if (pkt_dev->queue_map_max >= ntxq) { 2063 if (pkt_dev->queue_map_max >= ntxq) {
2032 printk(KERN_WARNING "pktgen: WARNING: Requested " 2064 pr_warning("WARNING: Requested queue_map_max (zero-based) (%d) exceeds valid range [0 - %d] for (%d) queues on %s, resetting\n",
2033 "queue_map_max (zero-based) (%d) exceeds valid range " 2065 pkt_dev->queue_map_max, (ntxq ?: 1) - 1, ntxq,
2034 "[0 - %d] for (%d) queues on %s, resetting\n", 2066 pkt_dev->odevname);
2035 pkt_dev->queue_map_max, (ntxq ?: 1) - 1, ntxq,
2036 pkt_dev->odevname);
2037 pkt_dev->queue_map_max = ntxq - 1; 2067 pkt_dev->queue_map_max = ntxq - 1;
2038 } 2068 }
2039 2069
@@ -2093,8 +2123,7 @@ static void pktgen_setup_inject(struct pktgen_dev *pkt_dev)
2093 } 2123 }
2094 rcu_read_unlock(); 2124 rcu_read_unlock();
2095 if (err) 2125 if (err)
2096 printk(KERN_ERR "pktgen: ERROR: IPv6 link " 2126 pr_err("ERROR: IPv6 link address not available\n");
2097 "address not availble.\n");
2098 } 2127 }
2099#endif 2128#endif
2100 } else { 2129 } else {
@@ -2142,15 +2171,15 @@ static void spin(struct pktgen_dev *pkt_dev, ktime_t spin_until)
2142 hrtimer_init_on_stack(&t.timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); 2171 hrtimer_init_on_stack(&t.timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
2143 hrtimer_set_expires(&t.timer, spin_until); 2172 hrtimer_set_expires(&t.timer, spin_until);
2144 2173
2145 remaining = ktime_to_us(hrtimer_expires_remaining(&t.timer)); 2174 remaining = ktime_to_ns(hrtimer_expires_remaining(&t.timer));
2146 if (remaining <= 0) { 2175 if (remaining <= 0) {
2147 pkt_dev->next_tx = ktime_add_ns(spin_until, pkt_dev->delay); 2176 pkt_dev->next_tx = ktime_add_ns(spin_until, pkt_dev->delay);
2148 return; 2177 return;
2149 } 2178 }
2150 2179
2151 start_time = ktime_now(); 2180 start_time = ktime_now();
2152 if (remaining < 100) 2181 if (remaining < 100000)
2153 udelay(remaining); /* really small just spin */ 2182 ndelay(remaining); /* really small just spin */
2154 else { 2183 else {
2155 /* see do_nanosleep */ 2184 /* see do_nanosleep */
2156 hrtimer_init_sleeper(&t, current); 2185 hrtimer_init_sleeper(&t, current);
@@ -2170,7 +2199,7 @@ static void spin(struct pktgen_dev *pkt_dev, ktime_t spin_until)
2170 end_time = ktime_now(); 2199 end_time = ktime_now();
2171 2200
2172 pkt_dev->idle_acc += ktime_to_ns(ktime_sub(end_time, start_time)); 2201 pkt_dev->idle_acc += ktime_to_ns(ktime_sub(end_time, start_time));
2173 pkt_dev->next_tx = ktime_add_ns(end_time, pkt_dev->delay); 2202 pkt_dev->next_tx = ktime_add_ns(spin_until, pkt_dev->delay);
2174} 2203}
2175 2204
2176static inline void set_pkt_overhead(struct pktgen_dev *pkt_dev) 2205static inline void set_pkt_overhead(struct pktgen_dev *pkt_dev)
@@ -2528,8 +2557,8 @@ static int process_ipsec(struct pktgen_dev *pkt_dev,
2528 if (nhead > 0) { 2557 if (nhead > 0) {
2529 ret = pskb_expand_head(skb, nhead, 0, GFP_ATOMIC); 2558 ret = pskb_expand_head(skb, nhead, 0, GFP_ATOMIC);
2530 if (ret < 0) { 2559 if (ret < 0) {
2531 printk(KERN_ERR "Error expanding " 2560 pr_err("Error expanding ipsec packet %d\n",
2532 "ipsec packet %d\n", ret); 2561 ret);
2533 goto err; 2562 goto err;
2534 } 2563 }
2535 } 2564 }
@@ -2538,8 +2567,7 @@ static int process_ipsec(struct pktgen_dev *pkt_dev,
2538 skb_pull(skb, ETH_HLEN); 2567 skb_pull(skb, ETH_HLEN);
2539 ret = pktgen_output_ipsec(skb, pkt_dev); 2568 ret = pktgen_output_ipsec(skb, pkt_dev);
2540 if (ret) { 2569 if (ret) {
2541 printk(KERN_ERR "Error creating ipsec " 2570 pr_err("Error creating ipsec packet %d\n", ret);
2542 "packet %d\n", ret);
2543 goto err; 2571 goto err;
2544 } 2572 }
2545 /* restore ll */ 2573 /* restore ll */
@@ -3015,8 +3043,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
3015 if (datalen < sizeof(struct pktgen_hdr)) { 3043 if (datalen < sizeof(struct pktgen_hdr)) {
3016 datalen = sizeof(struct pktgen_hdr); 3044 datalen = sizeof(struct pktgen_hdr);
3017 if (net_ratelimit()) 3045 if (net_ratelimit())
3018 printk(KERN_INFO "pktgen: increased datalen to %d\n", 3046 pr_info("increased datalen to %d\n", datalen);
3019 datalen);
3020 } 3047 }
3021 3048
3022 udph->source = htons(pkt_dev->cur_udp_src); 3049 udph->source = htons(pkt_dev->cur_udp_src);
@@ -3143,7 +3170,7 @@ static void pktgen_run(struct pktgen_thread *t)
3143 struct pktgen_dev *pkt_dev; 3170 struct pktgen_dev *pkt_dev;
3144 int started = 0; 3171 int started = 0;
3145 3172
3146 pr_debug("pktgen: entering pktgen_run. %p\n", t); 3173 func_enter();
3147 3174
3148 if_lock(t); 3175 if_lock(t);
3149 list_for_each_entry(pkt_dev, &t->if_list, list) { 3176 list_for_each_entry(pkt_dev, &t->if_list, list) {
@@ -3176,7 +3203,7 @@ static void pktgen_stop_all_threads_ifs(void)
3176{ 3203{
3177 struct pktgen_thread *t; 3204 struct pktgen_thread *t;
3178 3205
3179 pr_debug("pktgen: entering pktgen_stop_all_threads_ifs.\n"); 3206 func_enter();
3180 3207
3181 mutex_lock(&pktgen_thread_lock); 3208 mutex_lock(&pktgen_thread_lock);
3182 3209
@@ -3241,7 +3268,7 @@ static void pktgen_run_all_threads(void)
3241{ 3268{
3242 struct pktgen_thread *t; 3269 struct pktgen_thread *t;
3243 3270
3244 pr_debug("pktgen: entering pktgen_run_all_threads.\n"); 3271 func_enter();
3245 3272
3246 mutex_lock(&pktgen_thread_lock); 3273 mutex_lock(&pktgen_thread_lock);
3247 3274
@@ -3260,7 +3287,7 @@ static void pktgen_reset_all_threads(void)
3260{ 3287{
3261 struct pktgen_thread *t; 3288 struct pktgen_thread *t;
3262 3289
3263 pr_debug("pktgen: entering pktgen_reset_all_threads.\n"); 3290 func_enter();
3264 3291
3265 mutex_lock(&pktgen_thread_lock); 3292 mutex_lock(&pktgen_thread_lock);
3266 3293
@@ -3310,8 +3337,8 @@ static int pktgen_stop_device(struct pktgen_dev *pkt_dev)
3310 int nr_frags = pkt_dev->skb ? skb_shinfo(pkt_dev->skb)->nr_frags : -1; 3337 int nr_frags = pkt_dev->skb ? skb_shinfo(pkt_dev->skb)->nr_frags : -1;
3311 3338
3312 if (!pkt_dev->running) { 3339 if (!pkt_dev->running) {
3313 printk(KERN_WARNING "pktgen: interface: %s is already " 3340 pr_warning("interface: %s is already stopped\n",
3314 "stopped\n", pkt_dev->odevname); 3341 pkt_dev->odevname);
3315 return -EINVAL; 3342 return -EINVAL;
3316 } 3343 }
3317 3344
@@ -3347,7 +3374,7 @@ static void pktgen_stop(struct pktgen_thread *t)
3347{ 3374{
3348 struct pktgen_dev *pkt_dev; 3375 struct pktgen_dev *pkt_dev;
3349 3376
3350 pr_debug("pktgen: entering pktgen_stop\n"); 3377 func_enter();
3351 3378
3352 if_lock(t); 3379 if_lock(t);
3353 3380
@@ -3367,7 +3394,7 @@ static void pktgen_rem_one_if(struct pktgen_thread *t)
3367 struct list_head *q, *n; 3394 struct list_head *q, *n;
3368 struct pktgen_dev *cur; 3395 struct pktgen_dev *cur;
3369 3396
3370 pr_debug("pktgen: entering pktgen_rem_one_if\n"); 3397 func_enter();
3371 3398
3372 if_lock(t); 3399 if_lock(t);
3373 3400
@@ -3393,9 +3420,10 @@ static void pktgen_rem_all_ifs(struct pktgen_thread *t)
3393 struct list_head *q, *n; 3420 struct list_head *q, *n;
3394 struct pktgen_dev *cur; 3421 struct pktgen_dev *cur;
3395 3422
3423 func_enter();
3424
3396 /* Remove all devices, free mem */ 3425 /* Remove all devices, free mem */
3397 3426
3398 pr_debug("pktgen: entering pktgen_rem_all_ifs\n");
3399 if_lock(t); 3427 if_lock(t);
3400 3428
3401 list_for_each_safe(q, n, &t->if_list) { 3429 list_for_each_safe(q, n, &t->if_list) {
@@ -3477,8 +3505,7 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev)
3477 3505
3478 pkt_dev->skb = fill_packet(odev, pkt_dev); 3506 pkt_dev->skb = fill_packet(odev, pkt_dev);
3479 if (pkt_dev->skb == NULL) { 3507 if (pkt_dev->skb == NULL) {
3480 printk(KERN_ERR "pktgen: ERROR: couldn't " 3508 pr_err("ERROR: couldn't allocate skb in fill_packet\n");
3481 "allocate skb in fill_packet.\n");
3482 schedule(); 3509 schedule();
3483 pkt_dev->clone_count--; /* back out increment, OOM */ 3510 pkt_dev->clone_count--; /* back out increment, OOM */
3484 return; 3511 return;
@@ -3558,8 +3585,7 @@ static int pktgen_thread_worker(void *arg)
3558 init_waitqueue_head(&t->queue); 3585 init_waitqueue_head(&t->queue);
3559 complete(&t->start_done); 3586 complete(&t->start_done);
3560 3587
3561 pr_debug("pktgen: starting pktgen/%d: pid=%d\n", 3588 pr_debug("starting pktgen/%d: pid=%d\n", cpu, task_pid_nr(current));
3562 cpu, task_pid_nr(current));
3563 3589
3564 set_current_state(TASK_INTERRUPTIBLE); 3590 set_current_state(TASK_INTERRUPTIBLE);
3565 3591
@@ -3612,13 +3638,13 @@ static int pktgen_thread_worker(void *arg)
3612 set_current_state(TASK_INTERRUPTIBLE); 3638 set_current_state(TASK_INTERRUPTIBLE);
3613 } 3639 }
3614 3640
3615 pr_debug("pktgen: %s stopping all device\n", t->tsk->comm); 3641 pr_debug("%s stopping all device\n", t->tsk->comm);
3616 pktgen_stop(t); 3642 pktgen_stop(t);
3617 3643
3618 pr_debug("pktgen: %s removing all device\n", t->tsk->comm); 3644 pr_debug("%s removing all device\n", t->tsk->comm);
3619 pktgen_rem_all_ifs(t); 3645 pktgen_rem_all_ifs(t);
3620 3646
3621 pr_debug("pktgen: %s removing thread.\n", t->tsk->comm); 3647 pr_debug("%s removing thread\n", t->tsk->comm);
3622 pktgen_rem_thread(t); 3648 pktgen_rem_thread(t);
3623 3649
3624 return 0; 3650 return 0;
@@ -3642,7 +3668,7 @@ static struct pktgen_dev *pktgen_find_dev(struct pktgen_thread *t,
3642 } 3668 }
3643 3669
3644 if_unlock(t); 3670 if_unlock(t);
3645 pr_debug("pktgen: find_dev(%s) returning %p\n", ifname, pkt_dev); 3671 pr_debug("find_dev(%s) returning %p\n", ifname, pkt_dev);
3646 return pkt_dev; 3672 return pkt_dev;
3647} 3673}
3648 3674
@@ -3658,8 +3684,7 @@ static int add_dev_to_thread(struct pktgen_thread *t,
3658 if_lock(t); 3684 if_lock(t);
3659 3685
3660 if (pkt_dev->pg_thread) { 3686 if (pkt_dev->pg_thread) {
3661 printk(KERN_ERR "pktgen: ERROR: already assigned " 3687 pr_err("ERROR: already assigned to a thread\n");
3662 "to a thread.\n");
3663 rv = -EBUSY; 3688 rv = -EBUSY;
3664 goto out; 3689 goto out;
3665 } 3690 }
@@ -3685,7 +3710,7 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname)
3685 3710
3686 pkt_dev = __pktgen_NN_threads(ifname, FIND); 3711 pkt_dev = __pktgen_NN_threads(ifname, FIND);
3687 if (pkt_dev) { 3712 if (pkt_dev) {
3688 printk(KERN_ERR "pktgen: ERROR: interface already used.\n"); 3713 pr_err("ERROR: interface already used\n");
3689 return -EBUSY; 3714 return -EBUSY;
3690 } 3715 }
3691 3716
@@ -3730,7 +3755,7 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname)
3730 pkt_dev->entry = proc_create_data(ifname, 0600, pg_proc_dir, 3755 pkt_dev->entry = proc_create_data(ifname, 0600, pg_proc_dir,
3731 &pktgen_if_fops, pkt_dev); 3756 &pktgen_if_fops, pkt_dev);
3732 if (!pkt_dev->entry) { 3757 if (!pkt_dev->entry) {
3733 printk(KERN_ERR "pktgen: cannot create %s/%s procfs entry.\n", 3758 pr_err("cannot create %s/%s procfs entry\n",
3734 PG_PROC_DIR, ifname); 3759 PG_PROC_DIR, ifname);
3735 err = -EINVAL; 3760 err = -EINVAL;
3736 goto out2; 3761 goto out2;
@@ -3761,8 +3786,7 @@ static int __init pktgen_create_thread(int cpu)
3761 t = kzalloc_node(sizeof(struct pktgen_thread), GFP_KERNEL, 3786 t = kzalloc_node(sizeof(struct pktgen_thread), GFP_KERNEL,
3762 cpu_to_node(cpu)); 3787 cpu_to_node(cpu));
3763 if (!t) { 3788 if (!t) {
3764 printk(KERN_ERR "pktgen: ERROR: out of memory, can't " 3789 pr_err("ERROR: out of memory, can't create new thread\n");
3765 "create new thread.\n");
3766 return -ENOMEM; 3790 return -ENOMEM;
3767 } 3791 }
3768 3792
@@ -3776,8 +3800,7 @@ static int __init pktgen_create_thread(int cpu)
3776 3800
3777 p = kthread_create(pktgen_thread_worker, t, "kpktgend_%d", cpu); 3801 p = kthread_create(pktgen_thread_worker, t, "kpktgend_%d", cpu);
3778 if (IS_ERR(p)) { 3802 if (IS_ERR(p)) {
3779 printk(KERN_ERR "pktgen: kernel_thread() failed " 3803 pr_err("kernel_thread() failed for cpu %d\n", t->cpu);
3780 "for cpu %d\n", t->cpu);
3781 list_del(&t->th_list); 3804 list_del(&t->th_list);
3782 kfree(t); 3805 kfree(t);
3783 return PTR_ERR(p); 3806 return PTR_ERR(p);
@@ -3788,7 +3811,7 @@ static int __init pktgen_create_thread(int cpu)
3788 pe = proc_create_data(t->tsk->comm, 0600, pg_proc_dir, 3811 pe = proc_create_data(t->tsk->comm, 0600, pg_proc_dir,
3789 &pktgen_thread_fops, t); 3812 &pktgen_thread_fops, t);
3790 if (!pe) { 3813 if (!pe) {
3791 printk(KERN_ERR "pktgen: cannot create %s/%s procfs entry.\n", 3814 pr_err("cannot create %s/%s procfs entry\n",
3792 PG_PROC_DIR, t->tsk->comm); 3815 PG_PROC_DIR, t->tsk->comm);
3793 kthread_stop(p); 3816 kthread_stop(p);
3794 list_del(&t->th_list); 3817 list_del(&t->th_list);
@@ -3822,11 +3845,10 @@ static int pktgen_remove_device(struct pktgen_thread *t,
3822 struct pktgen_dev *pkt_dev) 3845 struct pktgen_dev *pkt_dev)
3823{ 3846{
3824 3847
3825 pr_debug("pktgen: remove_device pkt_dev=%p\n", pkt_dev); 3848 pr_debug("remove_device pkt_dev=%p\n", pkt_dev);
3826 3849
3827 if (pkt_dev->running) { 3850 if (pkt_dev->running) {
3828 printk(KERN_WARNING "pktgen: WARNING: trying to remove a " 3851 pr_warning("WARNING: trying to remove a running interface, stopping it now\n");
3829 "running interface, stopping it now.\n");
3830 pktgen_stop_device(pkt_dev); 3852 pktgen_stop_device(pkt_dev);
3831 } 3853 }
3832 3854
@@ -3857,7 +3879,7 @@ static int __init pg_init(void)
3857 int cpu; 3879 int cpu;
3858 struct proc_dir_entry *pe; 3880 struct proc_dir_entry *pe;
3859 3881
3860 printk(KERN_INFO "%s", version); 3882 pr_info("%s", version);
3861 3883
3862 pg_proc_dir = proc_mkdir(PG_PROC_DIR, init_net.proc_net); 3884 pg_proc_dir = proc_mkdir(PG_PROC_DIR, init_net.proc_net);
3863 if (!pg_proc_dir) 3885 if (!pg_proc_dir)
@@ -3865,8 +3887,7 @@ static int __init pg_init(void)
3865 3887
3866 pe = proc_create(PGCTRL, 0600, pg_proc_dir, &pktgen_fops); 3888 pe = proc_create(PGCTRL, 0600, pg_proc_dir, &pktgen_fops);
3867 if (pe == NULL) { 3889 if (pe == NULL) {
3868 printk(KERN_ERR "pktgen: ERROR: cannot create %s " 3890 pr_err("ERROR: cannot create %s procfs entry\n", PGCTRL);
3869 "procfs entry.\n", PGCTRL);
3870 proc_net_remove(&init_net, PG_PROC_DIR); 3891 proc_net_remove(&init_net, PG_PROC_DIR);
3871 return -EINVAL; 3892 return -EINVAL;
3872 } 3893 }
@@ -3879,13 +3900,12 @@ static int __init pg_init(void)
3879 3900
3880 err = pktgen_create_thread(cpu); 3901 err = pktgen_create_thread(cpu);
3881 if (err) 3902 if (err)
3882 printk(KERN_WARNING "pktgen: WARNING: Cannot create " 3903 pr_warning("WARNING: Cannot create thread for cpu %d (%d)\n",
3883 "thread for cpu %d (%d)\n", cpu, err); 3904 cpu, err);
3884 } 3905 }
3885 3906
3886 if (list_empty(&pktgen_threads)) { 3907 if (list_empty(&pktgen_threads)) {
3887 printk(KERN_ERR "pktgen: ERROR: Initialization failed for " 3908 pr_err("ERROR: Initialization failed for all threads\n");
3888 "all threads\n");
3889 unregister_netdevice_notifier(&pktgen_notifier_block); 3909 unregister_netdevice_notifier(&pktgen_notifier_block);
3890 remove_proc_entry(PGCTRL, pg_proc_dir); 3910 remove_proc_entry(PGCTRL, pg_proc_dir);
3891 proc_net_remove(&init_net, PG_PROC_DIR); 3911 proc_net_remove(&init_net, PG_PROC_DIR);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index e4b9870e4706..f78d821bd935 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -579,7 +579,7 @@ static unsigned int rtnl_dev_combine_flags(const struct net_device *dev,
579} 579}
580 580
581static void copy_rtnl_link_stats(struct rtnl_link_stats *a, 581static void copy_rtnl_link_stats(struct rtnl_link_stats *a,
582 const struct net_device_stats *b) 582 const struct rtnl_link_stats64 *b)
583{ 583{
584 a->rx_packets = b->rx_packets; 584 a->rx_packets = b->rx_packets;
585 a->tx_packets = b->tx_packets; 585 a->tx_packets = b->tx_packets;
@@ -610,7 +610,7 @@ static void copy_rtnl_link_stats(struct rtnl_link_stats *a,
610 a->tx_compressed = b->tx_compressed; 610 a->tx_compressed = b->tx_compressed;
611} 611}
612 612
613static void copy_rtnl_link_stats64(void *v, const struct net_device_stats *b) 613static void copy_rtnl_link_stats64(void *v, const struct rtnl_link_stats64 *b)
614{ 614{
615 struct rtnl_link_stats64 a; 615 struct rtnl_link_stats64 a;
616 616
@@ -650,11 +650,12 @@ static inline int rtnl_vfinfo_size(const struct net_device *dev)
650 if (dev->dev.parent && dev_is_pci(dev->dev.parent)) { 650 if (dev->dev.parent && dev_is_pci(dev->dev.parent)) {
651 651
652 int num_vfs = dev_num_vf(dev->dev.parent); 652 int num_vfs = dev_num_vf(dev->dev.parent);
653 size_t size = nlmsg_total_size(sizeof(struct nlattr)); 653 size_t size = nla_total_size(sizeof(struct nlattr));
654 size += nlmsg_total_size(num_vfs * sizeof(struct nlattr)); 654 size += nla_total_size(num_vfs * sizeof(struct nlattr));
655 size += num_vfs * (sizeof(struct ifla_vf_mac) + 655 size += num_vfs *
656 sizeof(struct ifla_vf_vlan) + 656 (nla_total_size(sizeof(struct ifla_vf_mac)) +
657 sizeof(struct ifla_vf_tx_rate)); 657 nla_total_size(sizeof(struct ifla_vf_vlan)) +
658 nla_total_size(sizeof(struct ifla_vf_tx_rate)));
658 return size; 659 return size;
659 } else 660 } else
660 return 0; 661 return 0;
@@ -685,7 +686,7 @@ static size_t rtnl_port_size(const struct net_device *dev)
685 return port_self_size; 686 return port_self_size;
686} 687}
687 688
688static inline size_t if_nlmsg_size(const struct net_device *dev) 689static noinline size_t if_nlmsg_size(const struct net_device *dev)
689{ 690{
690 return NLMSG_ALIGN(sizeof(struct ifinfomsg)) 691 return NLMSG_ALIGN(sizeof(struct ifinfomsg))
691 + nla_total_size(IFNAMSIZ) /* IFLA_IFNAME */ 692 + nla_total_size(IFNAMSIZ) /* IFLA_IFNAME */
@@ -722,14 +723,13 @@ static int rtnl_vf_ports_fill(struct sk_buff *skb, struct net_device *dev)
722 723
723 for (vf = 0; vf < dev_num_vf(dev->dev.parent); vf++) { 724 for (vf = 0; vf < dev_num_vf(dev->dev.parent); vf++) {
724 vf_port = nla_nest_start(skb, IFLA_VF_PORT); 725 vf_port = nla_nest_start(skb, IFLA_VF_PORT);
725 if (!vf_port) { 726 if (!vf_port)
726 nla_nest_cancel(skb, vf_ports); 727 goto nla_put_failure;
727 return -EMSGSIZE;
728 }
729 NLA_PUT_U32(skb, IFLA_PORT_VF, vf); 728 NLA_PUT_U32(skb, IFLA_PORT_VF, vf);
730 err = dev->netdev_ops->ndo_get_vf_port(dev, vf, skb); 729 err = dev->netdev_ops->ndo_get_vf_port(dev, vf, skb);
730 if (err == -EMSGSIZE)
731 goto nla_put_failure;
731 if (err) { 732 if (err) {
732nla_put_failure:
733 nla_nest_cancel(skb, vf_port); 733 nla_nest_cancel(skb, vf_port);
734 continue; 734 continue;
735 } 735 }
@@ -739,6 +739,10 @@ nla_put_failure:
739 nla_nest_end(skb, vf_ports); 739 nla_nest_end(skb, vf_ports);
740 740
741 return 0; 741 return 0;
742
743nla_put_failure:
744 nla_nest_cancel(skb, vf_ports);
745 return -EMSGSIZE;
742} 746}
743 747
744static int rtnl_port_self_fill(struct sk_buff *skb, struct net_device *dev) 748static int rtnl_port_self_fill(struct sk_buff *skb, struct net_device *dev)
@@ -753,7 +757,7 @@ static int rtnl_port_self_fill(struct sk_buff *skb, struct net_device *dev)
753 err = dev->netdev_ops->ndo_get_vf_port(dev, PORT_SELF_VF, skb); 757 err = dev->netdev_ops->ndo_get_vf_port(dev, PORT_SELF_VF, skb);
754 if (err) { 758 if (err) {
755 nla_nest_cancel(skb, port_self); 759 nla_nest_cancel(skb, port_self);
756 return err; 760 return (err == -EMSGSIZE) ? err : 0;
757 } 761 }
758 762
759 nla_nest_end(skb, port_self); 763 nla_nest_end(skb, port_self);
@@ -787,7 +791,8 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
787{ 791{
788 struct ifinfomsg *ifm; 792 struct ifinfomsg *ifm;
789 struct nlmsghdr *nlh; 793 struct nlmsghdr *nlh;
790 const struct net_device_stats *stats; 794 struct rtnl_link_stats64 temp;
795 const struct rtnl_link_stats64 *stats;
791 struct nlattr *attr; 796 struct nlattr *attr;
792 797
793 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ifm), flags); 798 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ifm), flags);
@@ -843,7 +848,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
843 if (attr == NULL) 848 if (attr == NULL)
844 goto nla_put_failure; 849 goto nla_put_failure;
845 850
846 stats = dev_get_stats(dev); 851 stats = dev_get_stats(dev, &temp);
847 copy_rtnl_link_stats(nla_data(attr), stats); 852 copy_rtnl_link_stats(nla_data(attr), stats);
848 853
849 attr = nla_reserve(skb, IFLA_STATS64, 854 attr = nla_reserve(skb, IFLA_STATS64,
@@ -1199,8 +1204,10 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
1199 struct nlattr *attr; 1204 struct nlattr *attr;
1200 int rem; 1205 int rem;
1201 nla_for_each_nested(attr, tb[IFLA_VFINFO_LIST], rem) { 1206 nla_for_each_nested(attr, tb[IFLA_VFINFO_LIST], rem) {
1202 if (nla_type(attr) != IFLA_VF_INFO) 1207 if (nla_type(attr) != IFLA_VF_INFO) {
1208 err = -EINVAL;
1203 goto errout; 1209 goto errout;
1210 }
1204 err = do_setvfinfo(dev, attr); 1211 err = do_setvfinfo(dev, attr);
1205 if (err < 0) 1212 if (err < 0)
1206 goto errout; 1213 goto errout;
diff --git a/net/core/scm.c b/net/core/scm.c
index b88f6f9d0b97..413cab89017d 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -130,6 +130,7 @@ void __scm_destroy(struct scm_cookie *scm)
130 } 130 }
131 } 131 }
132} 132}
133EXPORT_SYMBOL(__scm_destroy);
133 134
134int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *p) 135int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *p)
135{ 136{
@@ -170,6 +171,30 @@ int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *p)
170 err = scm_check_creds(&p->creds); 171 err = scm_check_creds(&p->creds);
171 if (err) 172 if (err)
172 goto error; 173 goto error;
174
175 if (pid_vnr(p->pid) != p->creds.pid) {
176 struct pid *pid;
177 err = -ESRCH;
178 pid = find_get_pid(p->creds.pid);
179 if (!pid)
180 goto error;
181 put_pid(p->pid);
182 p->pid = pid;
183 }
184
185 if ((p->cred->euid != p->creds.uid) ||
186 (p->cred->egid != p->creds.gid)) {
187 struct cred *cred;
188 err = -ENOMEM;
189 cred = prepare_creds();
190 if (!cred)
191 goto error;
192
193 cred->uid = cred->euid = p->creds.uid;
194 cred->gid = cred->egid = p->creds.uid;
195 put_cred(p->cred);
196 p->cred = cred;
197 }
173 break; 198 break;
174 default: 199 default:
175 goto error; 200 goto error;
@@ -187,6 +212,7 @@ error:
187 scm_destroy(p); 212 scm_destroy(p);
188 return err; 213 return err;
189} 214}
215EXPORT_SYMBOL(__scm_send);
190 216
191int put_cmsg(struct msghdr * msg, int level, int type, int len, void *data) 217int put_cmsg(struct msghdr * msg, int level, int type, int len, void *data)
192{ 218{
@@ -225,6 +251,7 @@ int put_cmsg(struct msghdr * msg, int level, int type, int len, void *data)
225out: 251out:
226 return err; 252 return err;
227} 253}
254EXPORT_SYMBOL(put_cmsg);
228 255
229void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm) 256void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm)
230{ 257{
@@ -294,6 +321,7 @@ void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm)
294 */ 321 */
295 __scm_destroy(scm); 322 __scm_destroy(scm);
296} 323}
324EXPORT_SYMBOL(scm_detach_fds);
297 325
298struct scm_fp_list *scm_fp_dup(struct scm_fp_list *fpl) 326struct scm_fp_list *scm_fp_dup(struct scm_fp_list *fpl)
299{ 327{
@@ -311,9 +339,4 @@ struct scm_fp_list *scm_fp_dup(struct scm_fp_list *fpl)
311 } 339 }
312 return new_fpl; 340 return new_fpl;
313} 341}
314
315EXPORT_SYMBOL(__scm_destroy);
316EXPORT_SYMBOL(__scm_send);
317EXPORT_SYMBOL(put_cmsg);
318EXPORT_SYMBOL(scm_detach_fds);
319EXPORT_SYMBOL(scm_fp_dup); 342EXPORT_SYMBOL(scm_fp_dup);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 4c11000a96aa..76d33ca5f037 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -482,22 +482,22 @@ EXPORT_SYMBOL(consume_skb);
482 * reference count dropping and cleans up the skbuff as if it 482 * reference count dropping and cleans up the skbuff as if it
483 * just came from __alloc_skb(). 483 * just came from __alloc_skb().
484 */ 484 */
485int skb_recycle_check(struct sk_buff *skb, int skb_size) 485bool skb_recycle_check(struct sk_buff *skb, int skb_size)
486{ 486{
487 struct skb_shared_info *shinfo; 487 struct skb_shared_info *shinfo;
488 488
489 if (irqs_disabled()) 489 if (irqs_disabled())
490 return 0; 490 return false;
491 491
492 if (skb_is_nonlinear(skb) || skb->fclone != SKB_FCLONE_UNAVAILABLE) 492 if (skb_is_nonlinear(skb) || skb->fclone != SKB_FCLONE_UNAVAILABLE)
493 return 0; 493 return false;
494 494
495 skb_size = SKB_DATA_ALIGN(skb_size + NET_SKB_PAD); 495 skb_size = SKB_DATA_ALIGN(skb_size + NET_SKB_PAD);
496 if (skb_end_pointer(skb) - skb->head < skb_size) 496 if (skb_end_pointer(skb) - skb->head < skb_size)
497 return 0; 497 return false;
498 498
499 if (skb_shared(skb) || skb_cloned(skb)) 499 if (skb_shared(skb) || skb_cloned(skb))
500 return 0; 500 return false;
501 501
502 skb_release_head_state(skb); 502 skb_release_head_state(skb);
503 503
@@ -509,7 +509,7 @@ int skb_recycle_check(struct sk_buff *skb, int skb_size)
509 skb->data = skb->head + NET_SKB_PAD; 509 skb->data = skb->head + NET_SKB_PAD;
510 skb_reset_tail_pointer(skb); 510 skb_reset_tail_pointer(skb);
511 511
512 return 1; 512 return true;
513} 513}
514EXPORT_SYMBOL(skb_recycle_check); 514EXPORT_SYMBOL(skb_recycle_check);
515 515
@@ -532,6 +532,7 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
532 new->ip_summed = old->ip_summed; 532 new->ip_summed = old->ip_summed;
533 skb_copy_queue_mapping(new, old); 533 skb_copy_queue_mapping(new, old);
534 new->priority = old->priority; 534 new->priority = old->priority;
535 new->deliver_no_wcard = old->deliver_no_wcard;
535#if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE) 536#if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE)
536 new->ipvs_property = old->ipvs_property; 537 new->ipvs_property = old->ipvs_property;
537#endif 538#endif
@@ -569,7 +570,6 @@ static struct sk_buff *__skb_clone(struct sk_buff *n, struct sk_buff *skb)
569 C(len); 570 C(len);
570 C(data_len); 571 C(data_len);
571 C(mac_len); 572 C(mac_len);
572 C(rxhash);
573 n->hdr_len = skb->nohdr ? skb_headroom(skb) : skb->hdr_len; 573 n->hdr_len = skb->nohdr ? skb_headroom(skb) : skb->hdr_len;
574 n->cloned = 1; 574 n->cloned = 1;
575 n->nohdr = 0; 575 n->nohdr = 0;
@@ -1406,12 +1406,13 @@ new_page:
1406/* 1406/*
1407 * Fill page/offset/length into spd, if it can hold more pages. 1407 * Fill page/offset/length into spd, if it can hold more pages.
1408 */ 1408 */
1409static inline int spd_fill_page(struct splice_pipe_desc *spd, struct page *page, 1409static inline int spd_fill_page(struct splice_pipe_desc *spd,
1410 struct pipe_inode_info *pipe, struct page *page,
1410 unsigned int *len, unsigned int offset, 1411 unsigned int *len, unsigned int offset,
1411 struct sk_buff *skb, int linear, 1412 struct sk_buff *skb, int linear,
1412 struct sock *sk) 1413 struct sock *sk)
1413{ 1414{
1414 if (unlikely(spd->nr_pages == PIPE_BUFFERS)) 1415 if (unlikely(spd->nr_pages == pipe->buffers))
1415 return 1; 1416 return 1;
1416 1417
1417 if (linear) { 1418 if (linear) {
@@ -1447,7 +1448,8 @@ static inline int __splice_segment(struct page *page, unsigned int poff,
1447 unsigned int plen, unsigned int *off, 1448 unsigned int plen, unsigned int *off,
1448 unsigned int *len, struct sk_buff *skb, 1449 unsigned int *len, struct sk_buff *skb,
1449 struct splice_pipe_desc *spd, int linear, 1450 struct splice_pipe_desc *spd, int linear,
1450 struct sock *sk) 1451 struct sock *sk,
1452 struct pipe_inode_info *pipe)
1451{ 1453{
1452 if (!*len) 1454 if (!*len)
1453 return 1; 1455 return 1;
@@ -1470,7 +1472,7 @@ static inline int __splice_segment(struct page *page, unsigned int poff,
1470 /* the linear region may spread across several pages */ 1472 /* the linear region may spread across several pages */
1471 flen = min_t(unsigned int, flen, PAGE_SIZE - poff); 1473 flen = min_t(unsigned int, flen, PAGE_SIZE - poff);
1472 1474
1473 if (spd_fill_page(spd, page, &flen, poff, skb, linear, sk)) 1475 if (spd_fill_page(spd, pipe, page, &flen, poff, skb, linear, sk))
1474 return 1; 1476 return 1;
1475 1477
1476 __segment_seek(&page, &poff, &plen, flen); 1478 __segment_seek(&page, &poff, &plen, flen);
@@ -1485,9 +1487,9 @@ static inline int __splice_segment(struct page *page, unsigned int poff,
1485 * Map linear and fragment data from the skb to spd. It reports failure if the 1487 * Map linear and fragment data from the skb to spd. It reports failure if the
1486 * pipe is full or if we already spliced the requested length. 1488 * pipe is full or if we already spliced the requested length.
1487 */ 1489 */
1488static int __skb_splice_bits(struct sk_buff *skb, unsigned int *offset, 1490static int __skb_splice_bits(struct sk_buff *skb, struct pipe_inode_info *pipe,
1489 unsigned int *len, struct splice_pipe_desc *spd, 1491 unsigned int *offset, unsigned int *len,
1490 struct sock *sk) 1492 struct splice_pipe_desc *spd, struct sock *sk)
1491{ 1493{
1492 int seg; 1494 int seg;
1493 1495
@@ -1497,7 +1499,7 @@ static int __skb_splice_bits(struct sk_buff *skb, unsigned int *offset,
1497 if (__splice_segment(virt_to_page(skb->data), 1499 if (__splice_segment(virt_to_page(skb->data),
1498 (unsigned long) skb->data & (PAGE_SIZE - 1), 1500 (unsigned long) skb->data & (PAGE_SIZE - 1),
1499 skb_headlen(skb), 1501 skb_headlen(skb),
1500 offset, len, skb, spd, 1, sk)) 1502 offset, len, skb, spd, 1, sk, pipe))
1501 return 1; 1503 return 1;
1502 1504
1503 /* 1505 /*
@@ -1507,7 +1509,7 @@ static int __skb_splice_bits(struct sk_buff *skb, unsigned int *offset,
1507 const skb_frag_t *f = &skb_shinfo(skb)->frags[seg]; 1509 const skb_frag_t *f = &skb_shinfo(skb)->frags[seg];
1508 1510
1509 if (__splice_segment(f->page, f->page_offset, f->size, 1511 if (__splice_segment(f->page, f->page_offset, f->size,
1510 offset, len, skb, spd, 0, sk)) 1512 offset, len, skb, spd, 0, sk, pipe))
1511 return 1; 1513 return 1;
1512 } 1514 }
1513 1515
@@ -1524,8 +1526,8 @@ int skb_splice_bits(struct sk_buff *skb, unsigned int offset,
1524 struct pipe_inode_info *pipe, unsigned int tlen, 1526 struct pipe_inode_info *pipe, unsigned int tlen,
1525 unsigned int flags) 1527 unsigned int flags)
1526{ 1528{
1527 struct partial_page partial[PIPE_BUFFERS]; 1529 struct partial_page partial[PIPE_DEF_BUFFERS];
1528 struct page *pages[PIPE_BUFFERS]; 1530 struct page *pages[PIPE_DEF_BUFFERS];
1529 struct splice_pipe_desc spd = { 1531 struct splice_pipe_desc spd = {
1530 .pages = pages, 1532 .pages = pages,
1531 .partial = partial, 1533 .partial = partial,
@@ -1535,12 +1537,16 @@ int skb_splice_bits(struct sk_buff *skb, unsigned int offset,
1535 }; 1537 };
1536 struct sk_buff *frag_iter; 1538 struct sk_buff *frag_iter;
1537 struct sock *sk = skb->sk; 1539 struct sock *sk = skb->sk;
1540 int ret = 0;
1541
1542 if (splice_grow_spd(pipe, &spd))
1543 return -ENOMEM;
1538 1544
1539 /* 1545 /*
1540 * __skb_splice_bits() only fails if the output has no room left, 1546 * __skb_splice_bits() only fails if the output has no room left,
1541 * so no point in going over the frag_list for the error case. 1547 * so no point in going over the frag_list for the error case.
1542 */ 1548 */
1543 if (__skb_splice_bits(skb, &offset, &tlen, &spd, sk)) 1549 if (__skb_splice_bits(skb, pipe, &offset, &tlen, &spd, sk))
1544 goto done; 1550 goto done;
1545 else if (!tlen) 1551 else if (!tlen)
1546 goto done; 1552 goto done;
@@ -1551,14 +1557,12 @@ int skb_splice_bits(struct sk_buff *skb, unsigned int offset,
1551 skb_walk_frags(skb, frag_iter) { 1557 skb_walk_frags(skb, frag_iter) {
1552 if (!tlen) 1558 if (!tlen)
1553 break; 1559 break;
1554 if (__skb_splice_bits(frag_iter, &offset, &tlen, &spd, sk)) 1560 if (__skb_splice_bits(frag_iter, pipe, &offset, &tlen, &spd, sk))
1555 break; 1561 break;
1556 } 1562 }
1557 1563
1558done: 1564done:
1559 if (spd.nr_pages) { 1565 if (spd.nr_pages) {
1560 int ret;
1561
1562 /* 1566 /*
1563 * Drop the socket lock, otherwise we have reverse 1567 * Drop the socket lock, otherwise we have reverse
1564 * locking dependencies between sk_lock and i_mutex 1568 * locking dependencies between sk_lock and i_mutex
@@ -1571,10 +1575,10 @@ done:
1571 release_sock(sk); 1575 release_sock(sk);
1572 ret = splice_to_pipe(pipe, &spd); 1576 ret = splice_to_pipe(pipe, &spd);
1573 lock_sock(sk); 1577 lock_sock(sk);
1574 return ret;
1575 } 1578 }
1576 1579
1577 return 0; 1580 splice_shrink_spd(pipe, &spd);
1581 return ret;
1578} 1582}
1579 1583
1580/** 1584/**
@@ -2479,7 +2483,6 @@ unsigned char *skb_pull_rcsum(struct sk_buff *skb, unsigned int len)
2479 skb_postpull_rcsum(skb, skb->data, len); 2483 skb_postpull_rcsum(skb, skb->data, len);
2480 return skb->data += len; 2484 return skb->data += len;
2481} 2485}
2482
2483EXPORT_SYMBOL_GPL(skb_pull_rcsum); 2486EXPORT_SYMBOL_GPL(skb_pull_rcsum);
2484 2487
2485/** 2488/**
@@ -2961,6 +2964,34 @@ int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer)
2961} 2964}
2962EXPORT_SYMBOL_GPL(skb_cow_data); 2965EXPORT_SYMBOL_GPL(skb_cow_data);
2963 2966
2967static void sock_rmem_free(struct sk_buff *skb)
2968{
2969 struct sock *sk = skb->sk;
2970
2971 atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
2972}
2973
2974/*
2975 * Note: We dont mem charge error packets (no sk_forward_alloc changes)
2976 */
2977int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb)
2978{
2979 if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
2980 (unsigned)sk->sk_rcvbuf)
2981 return -ENOMEM;
2982
2983 skb_orphan(skb);
2984 skb->sk = sk;
2985 skb->destructor = sock_rmem_free;
2986 atomic_add(skb->truesize, &sk->sk_rmem_alloc);
2987
2988 skb_queue_tail(&sk->sk_error_queue, skb);
2989 if (!sock_flag(sk, SOCK_DEAD))
2990 sk->sk_data_ready(sk, skb->len);
2991 return 0;
2992}
2993EXPORT_SYMBOL(sock_queue_err_skb);
2994
2964void skb_tstamp_tx(struct sk_buff *orig_skb, 2995void skb_tstamp_tx(struct sk_buff *orig_skb,
2965 struct skb_shared_hwtstamps *hwtstamps) 2996 struct skb_shared_hwtstamps *hwtstamps)
2966{ 2997{
@@ -2992,7 +3023,9 @@ void skb_tstamp_tx(struct sk_buff *orig_skb,
2992 memset(serr, 0, sizeof(*serr)); 3023 memset(serr, 0, sizeof(*serr));
2993 serr->ee.ee_errno = ENOMSG; 3024 serr->ee.ee_errno = ENOMSG;
2994 serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING; 3025 serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING;
3026
2995 err = sock_queue_err_skb(sk, skb); 3027 err = sock_queue_err_skb(sk, skb);
3028
2996 if (err) 3029 if (err)
2997 kfree_skb(skb); 3030 kfree_skb(skb);
2998} 3031}
diff --git a/net/core/sock.c b/net/core/sock.c
index bf88a167c8f2..b05b9b6ddb87 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -110,6 +110,7 @@
110#include <linux/tcp.h> 110#include <linux/tcp.h>
111#include <linux/init.h> 111#include <linux/init.h>
112#include <linux/highmem.h> 112#include <linux/highmem.h>
113#include <linux/user_namespace.h>
113 114
114#include <asm/uaccess.h> 115#include <asm/uaccess.h>
115#include <asm/system.h> 116#include <asm/system.h>
@@ -123,6 +124,7 @@
123#include <linux/net_tstamp.h> 124#include <linux/net_tstamp.h>
124#include <net/xfrm.h> 125#include <net/xfrm.h>
125#include <linux/ipsec.h> 126#include <linux/ipsec.h>
127#include <net/cls_cgroup.h>
126 128
127#include <linux/filter.h> 129#include <linux/filter.h>
128 130
@@ -155,7 +157,7 @@ static const char *const af_family_key_strings[AF_MAX+1] = {
155 "sk_lock-27" , "sk_lock-28" , "sk_lock-AF_CAN" , 157 "sk_lock-27" , "sk_lock-28" , "sk_lock-AF_CAN" ,
156 "sk_lock-AF_TIPC" , "sk_lock-AF_BLUETOOTH", "sk_lock-IUCV" , 158 "sk_lock-AF_TIPC" , "sk_lock-AF_BLUETOOTH", "sk_lock-IUCV" ,
157 "sk_lock-AF_RXRPC" , "sk_lock-AF_ISDN" , "sk_lock-AF_PHONET" , 159 "sk_lock-AF_RXRPC" , "sk_lock-AF_ISDN" , "sk_lock-AF_PHONET" ,
158 "sk_lock-AF_IEEE802154", 160 "sk_lock-AF_IEEE802154", "sk_lock-AF_CAIF" ,
159 "sk_lock-AF_MAX" 161 "sk_lock-AF_MAX"
160}; 162};
161static const char *const af_family_slock_key_strings[AF_MAX+1] = { 163static const char *const af_family_slock_key_strings[AF_MAX+1] = {
@@ -171,7 +173,7 @@ static const char *const af_family_slock_key_strings[AF_MAX+1] = {
171 "slock-27" , "slock-28" , "slock-AF_CAN" , 173 "slock-27" , "slock-28" , "slock-AF_CAN" ,
172 "slock-AF_TIPC" , "slock-AF_BLUETOOTH", "slock-AF_IUCV" , 174 "slock-AF_TIPC" , "slock-AF_BLUETOOTH", "slock-AF_IUCV" ,
173 "slock-AF_RXRPC" , "slock-AF_ISDN" , "slock-AF_PHONET" , 175 "slock-AF_RXRPC" , "slock-AF_ISDN" , "slock-AF_PHONET" ,
174 "slock-AF_IEEE802154", 176 "slock-AF_IEEE802154", "slock-AF_CAIF" ,
175 "slock-AF_MAX" 177 "slock-AF_MAX"
176}; 178};
177static const char *const af_family_clock_key_strings[AF_MAX+1] = { 179static const char *const af_family_clock_key_strings[AF_MAX+1] = {
@@ -187,7 +189,7 @@ static const char *const af_family_clock_key_strings[AF_MAX+1] = {
187 "clock-27" , "clock-28" , "clock-AF_CAN" , 189 "clock-27" , "clock-28" , "clock-AF_CAN" ,
188 "clock-AF_TIPC" , "clock-AF_BLUETOOTH", "clock-AF_IUCV" , 190 "clock-AF_TIPC" , "clock-AF_BLUETOOTH", "clock-AF_IUCV" ,
189 "clock-AF_RXRPC" , "clock-AF_ISDN" , "clock-AF_PHONET" , 191 "clock-AF_RXRPC" , "clock-AF_ISDN" , "clock-AF_PHONET" ,
190 "clock-AF_IEEE802154", 192 "clock-AF_IEEE802154", "clock-AF_CAIF" ,
191 "clock-AF_MAX" 193 "clock-AF_MAX"
192}; 194};
193 195
@@ -217,6 +219,11 @@ __u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX;
217int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512); 219int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512);
218EXPORT_SYMBOL(sysctl_optmem_max); 220EXPORT_SYMBOL(sysctl_optmem_max);
219 221
222#if defined(CONFIG_CGROUPS) && !defined(CONFIG_NET_CLS_CGROUP)
223int net_cls_subsys_id = -1;
224EXPORT_SYMBOL_GPL(net_cls_subsys_id);
225#endif
226
220static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen) 227static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen)
221{ 228{
222 struct timeval tv; 229 struct timeval tv;
@@ -743,6 +750,20 @@ set_rcvbuf:
743EXPORT_SYMBOL(sock_setsockopt); 750EXPORT_SYMBOL(sock_setsockopt);
744 751
745 752
753void cred_to_ucred(struct pid *pid, const struct cred *cred,
754 struct ucred *ucred)
755{
756 ucred->pid = pid_vnr(pid);
757 ucred->uid = ucred->gid = -1;
758 if (cred) {
759 struct user_namespace *current_ns = current_user_ns();
760
761 ucred->uid = user_ns_map_uid(current_ns, cred, cred->euid);
762 ucred->gid = user_ns_map_gid(current_ns, cred, cred->egid);
763 }
764}
765EXPORT_SYMBOL_GPL(cred_to_ucred);
766
746int sock_getsockopt(struct socket *sock, int level, int optname, 767int sock_getsockopt(struct socket *sock, int level, int optname,
747 char __user *optval, int __user *optlen) 768 char __user *optval, int __user *optlen)
748{ 769{
@@ -895,11 +916,15 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
895 break; 916 break;
896 917
897 case SO_PEERCRED: 918 case SO_PEERCRED:
898 if (len > sizeof(sk->sk_peercred)) 919 {
899 len = sizeof(sk->sk_peercred); 920 struct ucred peercred;
900 if (copy_to_user(optval, &sk->sk_peercred, len)) 921 if (len > sizeof(peercred))
922 len = sizeof(peercred);
923 cred_to_ucred(sk->sk_peer_pid, sk->sk_peer_cred, &peercred);
924 if (copy_to_user(optval, &peercred, len))
901 return -EFAULT; 925 return -EFAULT;
902 goto lenout; 926 goto lenout;
927 }
903 928
904 case SO_PEERNAME: 929 case SO_PEERNAME:
905 { 930 {
@@ -1050,6 +1075,17 @@ static void sk_prot_free(struct proto *prot, struct sock *sk)
1050 module_put(owner); 1075 module_put(owner);
1051} 1076}
1052 1077
1078#ifdef CONFIG_CGROUPS
1079void sock_update_classid(struct sock *sk)
1080{
1081 u32 classid = task_cls_classid(current);
1082
1083 if (classid && classid != sk->sk_classid)
1084 sk->sk_classid = classid;
1085}
1086EXPORT_SYMBOL(sock_update_classid);
1087#endif
1088
1053/** 1089/**
1054 * sk_alloc - All socket objects are allocated here 1090 * sk_alloc - All socket objects are allocated here
1055 * @net: the applicable net namespace 1091 * @net: the applicable net namespace
@@ -1073,6 +1109,8 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
1073 sock_lock_init(sk); 1109 sock_lock_init(sk);
1074 sock_net_set(sk, get_net(net)); 1110 sock_net_set(sk, get_net(net));
1075 atomic_set(&sk->sk_wmem_alloc, 1); 1111 atomic_set(&sk->sk_wmem_alloc, 1);
1112
1113 sock_update_classid(sk);
1076 } 1114 }
1077 1115
1078 return sk; 1116 return sk;
@@ -1100,6 +1138,9 @@ static void __sk_free(struct sock *sk)
1100 printk(KERN_DEBUG "%s: optmem leakage (%d bytes) detected.\n", 1138 printk(KERN_DEBUG "%s: optmem leakage (%d bytes) detected.\n",
1101 __func__, atomic_read(&sk->sk_omem_alloc)); 1139 __func__, atomic_read(&sk->sk_omem_alloc));
1102 1140
1141 if (sk->sk_peer_cred)
1142 put_cred(sk->sk_peer_cred);
1143 put_pid(sk->sk_peer_pid);
1103 put_net(sock_net(sk)); 1144 put_net(sock_net(sk));
1104 sk_prot_free(sk->sk_prot_creator, sk); 1145 sk_prot_free(sk->sk_prot_creator, sk);
1105} 1146}
@@ -1298,9 +1339,10 @@ EXPORT_SYMBOL(sock_wfree);
1298void sock_rfree(struct sk_buff *skb) 1339void sock_rfree(struct sk_buff *skb)
1299{ 1340{
1300 struct sock *sk = skb->sk; 1341 struct sock *sk = skb->sk;
1342 unsigned int len = skb->truesize;
1301 1343
1302 atomic_sub(skb->truesize, &sk->sk_rmem_alloc); 1344 atomic_sub(len, &sk->sk_rmem_alloc);
1303 sk_mem_uncharge(skb->sk, skb->truesize); 1345 sk_mem_uncharge(sk, len);
1304} 1346}
1305EXPORT_SYMBOL(sock_rfree); 1347EXPORT_SYMBOL(sock_rfree);
1306 1348
@@ -1935,9 +1977,8 @@ void sock_init_data(struct socket *sock, struct sock *sk)
1935 sk->sk_sndmsg_page = NULL; 1977 sk->sk_sndmsg_page = NULL;
1936 sk->sk_sndmsg_off = 0; 1978 sk->sk_sndmsg_off = 0;
1937 1979
1938 sk->sk_peercred.pid = 0; 1980 sk->sk_peer_pid = NULL;
1939 sk->sk_peercred.uid = -1; 1981 sk->sk_peer_cred = NULL;
1940 sk->sk_peercred.gid = -1;
1941 sk->sk_write_pending = 0; 1982 sk->sk_write_pending = 0;
1942 sk->sk_rcvlowat = 1; 1983 sk->sk_rcvlowat = 1;
1943 sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT; 1984 sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
@@ -1988,6 +2029,39 @@ void release_sock(struct sock *sk)
1988} 2029}
1989EXPORT_SYMBOL(release_sock); 2030EXPORT_SYMBOL(release_sock);
1990 2031
2032/**
2033 * lock_sock_fast - fast version of lock_sock
2034 * @sk: socket
2035 *
2036 * This version should be used for very small section, where process wont block
2037 * return false if fast path is taken
2038 * sk_lock.slock locked, owned = 0, BH disabled
2039 * return true if slow path is taken
2040 * sk_lock.slock unlocked, owned = 1, BH enabled
2041 */
2042bool lock_sock_fast(struct sock *sk)
2043{
2044 might_sleep();
2045 spin_lock_bh(&sk->sk_lock.slock);
2046
2047 if (!sk->sk_lock.owned)
2048 /*
2049 * Note : We must disable BH
2050 */
2051 return false;
2052
2053 __lock_sock(sk);
2054 sk->sk_lock.owned = 1;
2055 spin_unlock(&sk->sk_lock.slock);
2056 /*
2057 * The sk_lock has mutex_lock() semantics here:
2058 */
2059 mutex_acquire(&sk->sk_lock.dep_map, 0, 0, _RET_IP_);
2060 local_bh_enable();
2061 return true;
2062}
2063EXPORT_SYMBOL(lock_sock_fast);
2064
1991int sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp) 2065int sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp)
1992{ 2066{
1993 struct timeval tv; 2067 struct timeval tv;
@@ -2158,8 +2232,7 @@ static DECLARE_BITMAP(proto_inuse_idx, PROTO_INUSE_NR);
2158#ifdef CONFIG_NET_NS 2232#ifdef CONFIG_NET_NS
2159void sock_prot_inuse_add(struct net *net, struct proto *prot, int val) 2233void sock_prot_inuse_add(struct net *net, struct proto *prot, int val)
2160{ 2234{
2161 int cpu = smp_processor_id(); 2235 __this_cpu_add(net->core.inuse->val[prot->inuse_idx], val);
2162 per_cpu_ptr(net->core.inuse, cpu)->val[prot->inuse_idx] += val;
2163} 2236}
2164EXPORT_SYMBOL_GPL(sock_prot_inuse_add); 2237EXPORT_SYMBOL_GPL(sock_prot_inuse_add);
2165 2238
@@ -2205,7 +2278,7 @@ static DEFINE_PER_CPU(struct prot_inuse, prot_inuse);
2205 2278
2206void sock_prot_inuse_add(struct net *net, struct proto *prot, int val) 2279void sock_prot_inuse_add(struct net *net, struct proto *prot, int val)
2207{ 2280{
2208 __get_cpu_var(prot_inuse).val[prot->inuse_idx] += val; 2281 __this_cpu_add(prot_inuse.val[prot->inuse_idx], val);
2209} 2282}
2210EXPORT_SYMBOL_GPL(sock_prot_inuse_add); 2283EXPORT_SYMBOL_GPL(sock_prot_inuse_add);
2211 2284
diff --git a/net/core/stream.c b/net/core/stream.c
index cc196f42b8d8..d959e0f41528 100644
--- a/net/core/stream.c
+++ b/net/core/stream.c
@@ -43,7 +43,6 @@ void sk_stream_write_space(struct sock *sk)
43 rcu_read_unlock(); 43 rcu_read_unlock();
44 } 44 }
45} 45}
46
47EXPORT_SYMBOL(sk_stream_write_space); 46EXPORT_SYMBOL(sk_stream_write_space);
48 47
49/** 48/**
@@ -81,7 +80,6 @@ int sk_stream_wait_connect(struct sock *sk, long *timeo_p)
81 } while (!done); 80 } while (!done);
82 return 0; 81 return 0;
83} 82}
84
85EXPORT_SYMBOL(sk_stream_wait_connect); 83EXPORT_SYMBOL(sk_stream_wait_connect);
86 84
87/** 85/**
@@ -109,7 +107,6 @@ void sk_stream_wait_close(struct sock *sk, long timeout)
109 finish_wait(sk_sleep(sk), &wait); 107 finish_wait(sk_sleep(sk), &wait);
110 } 108 }
111} 109}
112
113EXPORT_SYMBOL(sk_stream_wait_close); 110EXPORT_SYMBOL(sk_stream_wait_close);
114 111
115/** 112/**
@@ -174,7 +171,6 @@ do_interrupted:
174 err = sock_intr_errno(*timeo_p); 171 err = sock_intr_errno(*timeo_p);
175 goto out; 172 goto out;
176} 173}
177
178EXPORT_SYMBOL(sk_stream_wait_memory); 174EXPORT_SYMBOL(sk_stream_wait_memory);
179 175
180int sk_stream_error(struct sock *sk, int flags, int err) 176int sk_stream_error(struct sock *sk, int flags, int err)
@@ -185,7 +181,6 @@ int sk_stream_error(struct sock *sk, int flags, int err)
185 send_sig(SIGPIPE, current, 0); 181 send_sig(SIGPIPE, current, 0);
186 return err; 182 return err;
187} 183}
188
189EXPORT_SYMBOL(sk_stream_error); 184EXPORT_SYMBOL(sk_stream_error);
190 185
191void sk_stream_kill_queues(struct sock *sk) 186void sk_stream_kill_queues(struct sock *sk)
@@ -210,5 +205,4 @@ void sk_stream_kill_queues(struct sock *sk)
210 * have gone away, only the net layer knows can touch it. 205 * have gone away, only the net layer knows can touch it.
211 */ 206 */
212} 207}
213
214EXPORT_SYMBOL(sk_stream_kill_queues); 208EXPORT_SYMBOL(sk_stream_kill_queues);
diff --git a/net/core/timestamping.c b/net/core/timestamping.c
new file mode 100644
index 000000000000..0ae6c22da85b
--- /dev/null
+++ b/net/core/timestamping.c
@@ -0,0 +1,126 @@
1/*
2 * PTP 1588 clock support - support for timestamping in PHY devices
3 *
4 * Copyright (C) 2010 OMICRON electronics GmbH
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19 */
20#include <linux/errqueue.h>
21#include <linux/phy.h>
22#include <linux/ptp_classify.h>
23#include <linux/skbuff.h>
24
25static struct sock_filter ptp_filter[] = {
26 PTP_FILTER
27};
28
29static unsigned int classify(struct sk_buff *skb)
30{
31 if (likely(skb->dev &&
32 skb->dev->phydev &&
33 skb->dev->phydev->drv))
34 return sk_run_filter(skb, ptp_filter, ARRAY_SIZE(ptp_filter));
35 else
36 return PTP_CLASS_NONE;
37}
38
39void skb_clone_tx_timestamp(struct sk_buff *skb)
40{
41 struct phy_device *phydev;
42 struct sk_buff *clone;
43 struct sock *sk = skb->sk;
44 unsigned int type;
45
46 if (!sk)
47 return;
48
49 type = classify(skb);
50
51 switch (type) {
52 case PTP_CLASS_V1_IPV4:
53 case PTP_CLASS_V1_IPV6:
54 case PTP_CLASS_V2_IPV4:
55 case PTP_CLASS_V2_IPV6:
56 case PTP_CLASS_V2_L2:
57 case PTP_CLASS_V2_VLAN:
58 phydev = skb->dev->phydev;
59 if (likely(phydev->drv->txtstamp)) {
60 clone = skb_clone(skb, GFP_ATOMIC);
61 if (!clone)
62 return;
63 clone->sk = sk;
64 phydev->drv->txtstamp(phydev, clone, type);
65 }
66 break;
67 default:
68 break;
69 }
70}
71
72void skb_complete_tx_timestamp(struct sk_buff *skb,
73 struct skb_shared_hwtstamps *hwtstamps)
74{
75 struct sock *sk = skb->sk;
76 struct sock_exterr_skb *serr;
77 int err;
78
79 if (!hwtstamps)
80 return;
81
82 *skb_hwtstamps(skb) = *hwtstamps;
83 serr = SKB_EXT_ERR(skb);
84 memset(serr, 0, sizeof(*serr));
85 serr->ee.ee_errno = ENOMSG;
86 serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING;
87 skb->sk = NULL;
88 err = sock_queue_err_skb(sk, skb);
89 if (err)
90 kfree_skb(skb);
91}
92EXPORT_SYMBOL_GPL(skb_complete_tx_timestamp);
93
94bool skb_defer_rx_timestamp(struct sk_buff *skb)
95{
96 struct phy_device *phydev;
97 unsigned int type;
98
99 skb_push(skb, ETH_HLEN);
100
101 type = classify(skb);
102
103 skb_pull(skb, ETH_HLEN);
104
105 switch (type) {
106 case PTP_CLASS_V1_IPV4:
107 case PTP_CLASS_V1_IPV6:
108 case PTP_CLASS_V2_IPV4:
109 case PTP_CLASS_V2_IPV6:
110 case PTP_CLASS_V2_L2:
111 case PTP_CLASS_V2_VLAN:
112 phydev = skb->dev->phydev;
113 if (likely(phydev->drv->rxtstamp))
114 return phydev->drv->rxtstamp(phydev, skb, type);
115 break;
116 default:
117 break;
118 }
119
120 return false;
121}
122
123void __init skb_timestamping_init(void)
124{
125 BUG_ON(sk_chk_filter(ptp_filter, ARRAY_SIZE(ptp_filter)));
126}
diff --git a/net/core/utils.c b/net/core/utils.c
index 838250241d26..f41854470539 100644
--- a/net/core/utils.c
+++ b/net/core/utils.c
@@ -77,7 +77,6 @@ __be32 in_aton(const char *str)
77 } 77 }
78 return(htonl(l)); 78 return(htonl(l));
79} 79}
80
81EXPORT_SYMBOL(in_aton); 80EXPORT_SYMBOL(in_aton);
82 81
83#define IN6PTON_XDIGIT 0x00010000 82#define IN6PTON_XDIGIT 0x00010000
@@ -162,7 +161,6 @@ out:
162 *end = s; 161 *end = s;
163 return ret; 162 return ret;
164} 163}
165
166EXPORT_SYMBOL(in4_pton); 164EXPORT_SYMBOL(in4_pton);
167 165
168int in6_pton(const char *src, int srclen, 166int in6_pton(const char *src, int srclen,
@@ -280,7 +278,6 @@ out:
280 *end = s; 278 *end = s;
281 return ret; 279 return ret;
282} 280}
283
284EXPORT_SYMBOL(in6_pton); 281EXPORT_SYMBOL(in6_pton);
285 282
286void inet_proto_csum_replace4(__sum16 *sum, struct sk_buff *skb, 283void inet_proto_csum_replace4(__sum16 *sum, struct sk_buff *skb,
diff --git a/net/dccp/ackvec.c b/net/dccp/ackvec.c
index 01e4d39fa232..92a6fcb40d7d 100644
--- a/net/dccp/ackvec.c
+++ b/net/dccp/ackvec.c
@@ -82,7 +82,7 @@ int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb)
82 elapsed_time = delta / 10; 82 elapsed_time = delta / 10;
83 83
84 if (elapsed_time != 0 && 84 if (elapsed_time != 0 &&
85 dccp_insert_option_elapsed_time(sk, skb, elapsed_time)) 85 dccp_insert_option_elapsed_time(skb, elapsed_time))
86 return -1; 86 return -1;
87 87
88 avr = dccp_ackvec_record_new(); 88 avr = dccp_ackvec_record_new();
@@ -201,7 +201,7 @@ static inline int dccp_ackvec_set_buf_head_state(struct dccp_ackvec *av,
201 const unsigned int packets, 201 const unsigned int packets,
202 const unsigned char state) 202 const unsigned char state)
203{ 203{
204 unsigned int gap; 204 long gap;
205 long new_head; 205 long new_head;
206 206
207 if (av->av_vec_len + packets > DCCP_MAX_ACKVEC_LEN) 207 if (av->av_vec_len + packets > DCCP_MAX_ACKVEC_LEN)
diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index d3235899c7e3..95f752986497 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -715,9 +715,9 @@ static int ccid3_hc_rx_insert_options(struct sock *sk, struct sk_buff *skb)
715 x_recv = htonl(hc->rx_x_recv); 715 x_recv = htonl(hc->rx_x_recv);
716 pinv = htonl(hc->rx_pinv); 716 pinv = htonl(hc->rx_pinv);
717 717
718 if (dccp_insert_option(sk, skb, TFRC_OPT_LOSS_EVENT_RATE, 718 if (dccp_insert_option(skb, TFRC_OPT_LOSS_EVENT_RATE,
719 &pinv, sizeof(pinv)) || 719 &pinv, sizeof(pinv)) ||
720 dccp_insert_option(sk, skb, TFRC_OPT_RECEIVE_RATE, 720 dccp_insert_option(skb, TFRC_OPT_RECEIVE_RATE,
721 &x_recv, sizeof(x_recv))) 721 &x_recv, sizeof(x_recv)))
722 return -1; 722 return -1;
723 723
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index a10a61a1ded2..3ccef1b70fee 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -446,16 +446,12 @@ extern void dccp_feat_list_purge(struct list_head *fn_list);
446 446
447extern int dccp_insert_options(struct sock *sk, struct sk_buff *skb); 447extern int dccp_insert_options(struct sock *sk, struct sk_buff *skb);
448extern int dccp_insert_options_rsk(struct dccp_request_sock*, struct sk_buff*); 448extern int dccp_insert_options_rsk(struct dccp_request_sock*, struct sk_buff*);
449extern int dccp_insert_option_elapsed_time(struct sock *sk, 449extern int dccp_insert_option_elapsed_time(struct sk_buff *skb, u32 elapsed);
450 struct sk_buff *skb,
451 u32 elapsed_time);
452extern u32 dccp_timestamp(void); 450extern u32 dccp_timestamp(void);
453extern void dccp_timestamping_init(void); 451extern void dccp_timestamping_init(void);
454extern int dccp_insert_option_timestamp(struct sock *sk, 452extern int dccp_insert_option_timestamp(struct sk_buff *skb);
455 struct sk_buff *skb); 453extern int dccp_insert_option(struct sk_buff *skb, unsigned char option,
456extern int dccp_insert_option(struct sock *sk, struct sk_buff *skb, 454 const void *value, unsigned char len);
457 unsigned char option,
458 const void *value, unsigned char len);
459 455
460#ifdef CONFIG_SYSCTL 456#ifdef CONFIG_SYSCTL
461extern int dccp_sysctl_init(void); 457extern int dccp_sysctl_init(void);
diff --git a/net/dccp/input.c b/net/dccp/input.c
index 58f7bc156850..10c957a88f4f 100644
--- a/net/dccp/input.c
+++ b/net/dccp/input.c
@@ -124,9 +124,9 @@ static int dccp_rcv_closereq(struct sock *sk, struct sk_buff *skb)
124 return queued; 124 return queued;
125} 125}
126 126
127static u8 dccp_reset_code_convert(const u8 code) 127static u16 dccp_reset_code_convert(const u8 code)
128{ 128{
129 const u8 error_code[] = { 129 const u16 error_code[] = {
130 [DCCP_RESET_CODE_CLOSED] = 0, /* normal termination */ 130 [DCCP_RESET_CODE_CLOSED] = 0, /* normal termination */
131 [DCCP_RESET_CODE_UNSPECIFIED] = 0, /* nothing known */ 131 [DCCP_RESET_CODE_UNSPECIFIED] = 0, /* nothing known */
132 [DCCP_RESET_CODE_ABORTED] = ECONNRESET, 132 [DCCP_RESET_CODE_ABORTED] = ECONNRESET,
@@ -148,7 +148,7 @@ static u8 dccp_reset_code_convert(const u8 code)
148 148
149static void dccp_rcv_reset(struct sock *sk, struct sk_buff *skb) 149static void dccp_rcv_reset(struct sock *sk, struct sk_buff *skb)
150{ 150{
151 u8 err = dccp_reset_code_convert(dccp_hdr_reset(skb)->dccph_reset_code); 151 u16 err = dccp_reset_code_convert(dccp_hdr_reset(skb)->dccph_reset_code);
152 152
153 sk->sk_err = err; 153 sk->sk_err = err;
154 154
@@ -430,7 +430,7 @@ static int dccp_rcv_request_sent_state_process(struct sock *sk,
430 if (dccp_parse_options(sk, NULL, skb)) 430 if (dccp_parse_options(sk, NULL, skb))
431 return 1; 431 return 1;
432 432
433 /* Obtain usec RTT sample from SYN exchange (used by CCID 3) */ 433 /* Obtain usec RTT sample from SYN exchange (used by TFRC). */
434 if (likely(dp->dccps_options_received.dccpor_timestamp_echo)) 434 if (likely(dp->dccps_options_received.dccpor_timestamp_echo))
435 dp->dccps_syn_rtt = dccp_sample_rtt(sk, 10 * (tstamp - 435 dp->dccps_syn_rtt = dccp_sample_rtt(sk, 10 * (tstamp -
436 dp->dccps_options_received.dccpor_timestamp_echo)); 436 dp->dccps_options_received.dccpor_timestamp_echo));
@@ -535,6 +535,8 @@ static int dccp_rcv_respond_partopen_state_process(struct sock *sk,
535 const struct dccp_hdr *dh, 535 const struct dccp_hdr *dh,
536 const unsigned len) 536 const unsigned len)
537{ 537{
538 struct dccp_sock *dp = dccp_sk(sk);
539 u32 sample = dp->dccps_options_received.dccpor_timestamp_echo;
538 int queued = 0; 540 int queued = 0;
539 541
540 switch (dh->dccph_type) { 542 switch (dh->dccph_type) {
@@ -559,7 +561,14 @@ static int dccp_rcv_respond_partopen_state_process(struct sock *sk,
559 if (sk->sk_state == DCCP_PARTOPEN) 561 if (sk->sk_state == DCCP_PARTOPEN)
560 inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK); 562 inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
561 563
562 dccp_sk(sk)->dccps_osr = DCCP_SKB_CB(skb)->dccpd_seq; 564 /* Obtain usec RTT sample from SYN exchange (used by TFRC). */
565 if (likely(sample)) {
566 long delta = dccp_timestamp() - sample;
567
568 dp->dccps_syn_rtt = dccp_sample_rtt(sk, 10 * delta);
569 }
570
571 dp->dccps_osr = DCCP_SKB_CB(skb)->dccpd_seq;
563 dccp_set_state(sk, DCCP_OPEN); 572 dccp_set_state(sk, DCCP_OPEN);
564 573
565 if (dh->dccph_type == DCCP_PKT_DATAACK || 574 if (dh->dccph_type == DCCP_PKT_DATAACK ||
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index d9b11ef8694c..d4a166f0f391 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -105,7 +105,7 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
105 goto failure; 105 goto failure;
106 106
107 /* OK, now commit destination to socket. */ 107 /* OK, now commit destination to socket. */
108 sk_setup_caps(sk, &rt->u.dst); 108 sk_setup_caps(sk, &rt->dst);
109 109
110 dp->dccps_iss = secure_dccp_sequence_number(inet->inet_saddr, 110 dp->dccps_iss = secure_dccp_sequence_number(inet->inet_saddr,
111 inet->inet_daddr, 111 inet->inet_daddr,
@@ -475,7 +475,7 @@ static struct dst_entry* dccp_v4_route_skb(struct net *net, struct sock *sk,
475 return NULL; 475 return NULL;
476 } 476 }
477 477
478 return &rt->u.dst; 478 return &rt->dst;
479} 479}
480 480
481static int dccp_v4_send_response(struct sock *sk, struct request_sock *req, 481static int dccp_v4_send_response(struct sock *sk, struct request_sock *req,
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 091698899594..6e3f32575df7 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -248,7 +248,7 @@ static int dccp_v6_send_response(struct sock *sk, struct request_sock *req,
248 struct ipv6_pinfo *np = inet6_sk(sk); 248 struct ipv6_pinfo *np = inet6_sk(sk);
249 struct sk_buff *skb; 249 struct sk_buff *skb;
250 struct ipv6_txoptions *opt = NULL; 250 struct ipv6_txoptions *opt = NULL;
251 struct in6_addr *final_p = NULL, final; 251 struct in6_addr *final_p, final;
252 struct flowi fl; 252 struct flowi fl;
253 int err = -1; 253 int err = -1;
254 struct dst_entry *dst; 254 struct dst_entry *dst;
@@ -265,13 +265,7 @@ static int dccp_v6_send_response(struct sock *sk, struct request_sock *req,
265 265
266 opt = np->opt; 266 opt = np->opt;
267 267
268 if (opt != NULL && opt->srcrt != NULL) { 268 final_p = fl6_update_dst(&fl, opt, &final);
269 const struct rt0_hdr *rt0 = (struct rt0_hdr *)opt->srcrt;
270
271 ipv6_addr_copy(&final, &fl.fl6_dst);
272 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
273 final_p = &final;
274 }
275 269
276 err = ip6_dst_lookup(sk, &dst, &fl); 270 err = ip6_dst_lookup(sk, &dst, &fl);
277 if (err) 271 if (err)
@@ -545,19 +539,13 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
545 goto out_overflow; 539 goto out_overflow;
546 540
547 if (dst == NULL) { 541 if (dst == NULL) {
548 struct in6_addr *final_p = NULL, final; 542 struct in6_addr *final_p, final;
549 struct flowi fl; 543 struct flowi fl;
550 544
551 memset(&fl, 0, sizeof(fl)); 545 memset(&fl, 0, sizeof(fl));
552 fl.proto = IPPROTO_DCCP; 546 fl.proto = IPPROTO_DCCP;
553 ipv6_addr_copy(&fl.fl6_dst, &ireq6->rmt_addr); 547 ipv6_addr_copy(&fl.fl6_dst, &ireq6->rmt_addr);
554 if (opt != NULL && opt->srcrt != NULL) { 548 final_p = fl6_update_dst(&fl, opt, &final);
555 const struct rt0_hdr *rt0 = (struct rt0_hdr *)opt->srcrt;
556
557 ipv6_addr_copy(&final, &fl.fl6_dst);
558 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
559 final_p = &final;
560 }
561 ipv6_addr_copy(&fl.fl6_src, &ireq6->loc_addr); 549 ipv6_addr_copy(&fl.fl6_src, &ireq6->loc_addr);
562 fl.oif = sk->sk_bound_dev_if; 550 fl.oif = sk->sk_bound_dev_if;
563 fl.fl_ip_dport = inet_rsk(req)->rmt_port; 551 fl.fl_ip_dport = inet_rsk(req)->rmt_port;
@@ -885,7 +873,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
885 struct inet_sock *inet = inet_sk(sk); 873 struct inet_sock *inet = inet_sk(sk);
886 struct ipv6_pinfo *np = inet6_sk(sk); 874 struct ipv6_pinfo *np = inet6_sk(sk);
887 struct dccp_sock *dp = dccp_sk(sk); 875 struct dccp_sock *dp = dccp_sk(sk);
888 struct in6_addr *saddr = NULL, *final_p = NULL, final; 876 struct in6_addr *saddr = NULL, *final_p, final;
889 struct flowi fl; 877 struct flowi fl;
890 struct dst_entry *dst; 878 struct dst_entry *dst;
891 int addr_type; 879 int addr_type;
@@ -988,13 +976,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
988 fl.fl_ip_sport = inet->inet_sport; 976 fl.fl_ip_sport = inet->inet_sport;
989 security_sk_classify_flow(sk, &fl); 977 security_sk_classify_flow(sk, &fl);
990 978
991 if (np->opt != NULL && np->opt->srcrt != NULL) { 979 final_p = fl6_update_dst(&fl, np->opt, &final);
992 const struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt;
993
994 ipv6_addr_copy(&final, &fl.fl6_dst);
995 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
996 final_p = &final;
997 }
998 980
999 err = ip6_dst_lookup(sk, &dst, &fl); 981 err = ip6_dst_lookup(sk, &dst, &fl);
1000 if (err) 982 if (err)
diff --git a/net/dccp/options.c b/net/dccp/options.c
index 1b08cae9c65b..bfda087bd90d 100644
--- a/net/dccp/options.c
+++ b/net/dccp/options.c
@@ -296,12 +296,11 @@ static inline u8 dccp_ndp_len(const u64 ndp)
296{ 296{
297 if (likely(ndp <= 0xFF)) 297 if (likely(ndp <= 0xFF))
298 return 1; 298 return 1;
299 return likely(ndp <= USHORT_MAX) ? 2 : (ndp <= UINT_MAX ? 4 : 6); 299 return likely(ndp <= USHRT_MAX) ? 2 : (ndp <= UINT_MAX ? 4 : 6);
300} 300}
301 301
302int dccp_insert_option(struct sock *sk, struct sk_buff *skb, 302int dccp_insert_option(struct sk_buff *skb, const unsigned char option,
303 const unsigned char option, 303 const void *value, const unsigned char len)
304 const void *value, const unsigned char len)
305{ 304{
306 unsigned char *to; 305 unsigned char *to;
307 306
@@ -354,8 +353,7 @@ static inline int dccp_elapsed_time_len(const u32 elapsed_time)
354 return elapsed_time == 0 ? 0 : elapsed_time <= 0xFFFF ? 2 : 4; 353 return elapsed_time == 0 ? 0 : elapsed_time <= 0xFFFF ? 2 : 4;
355} 354}
356 355
357int dccp_insert_option_elapsed_time(struct sock *sk, struct sk_buff *skb, 356int dccp_insert_option_elapsed_time(struct sk_buff *skb, u32 elapsed_time)
358 u32 elapsed_time)
359{ 357{
360 const int elapsed_time_len = dccp_elapsed_time_len(elapsed_time); 358 const int elapsed_time_len = dccp_elapsed_time_len(elapsed_time);
361 const int len = 2 + elapsed_time_len; 359 const int len = 2 + elapsed_time_len;
@@ -386,13 +384,13 @@ int dccp_insert_option_elapsed_time(struct sock *sk, struct sk_buff *skb,
386 384
387EXPORT_SYMBOL_GPL(dccp_insert_option_elapsed_time); 385EXPORT_SYMBOL_GPL(dccp_insert_option_elapsed_time);
388 386
389int dccp_insert_option_timestamp(struct sock *sk, struct sk_buff *skb) 387int dccp_insert_option_timestamp(struct sk_buff *skb)
390{ 388{
391 __be32 now = htonl(dccp_timestamp()); 389 __be32 now = htonl(dccp_timestamp());
392 /* yes this will overflow but that is the point as we want a 390 /* yes this will overflow but that is the point as we want a
393 * 10 usec 32 bit timer which mean it wraps every 11.9 hours */ 391 * 10 usec 32 bit timer which mean it wraps every 11.9 hours */
394 392
395 return dccp_insert_option(sk, skb, DCCPO_TIMESTAMP, &now, sizeof(now)); 393 return dccp_insert_option(skb, DCCPO_TIMESTAMP, &now, sizeof(now));
396} 394}
397 395
398EXPORT_SYMBOL_GPL(dccp_insert_option_timestamp); 396EXPORT_SYMBOL_GPL(dccp_insert_option_timestamp);
@@ -531,9 +529,9 @@ int dccp_insert_options(struct sock *sk, struct sk_buff *skb)
531 if (DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_REQUEST) { 529 if (DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_REQUEST) {
532 /* 530 /*
533 * Obtain RTT sample from Request/Response exchange. 531 * Obtain RTT sample from Request/Response exchange.
534 * This is currently used in CCID 3 initialisation. 532 * This is currently used for TFRC initialisation.
535 */ 533 */
536 if (dccp_insert_option_timestamp(sk, skb)) 534 if (dccp_insert_option_timestamp(skb))
537 return -1; 535 return -1;
538 536
539 } else if (dp->dccps_hc_rx_ackvec != NULL && 537 } else if (dp->dccps_hc_rx_ackvec != NULL &&
@@ -564,6 +562,10 @@ int dccp_insert_options_rsk(struct dccp_request_sock *dreq, struct sk_buff *skb)
564 if (dccp_feat_insert_opts(NULL, dreq, skb)) 562 if (dccp_feat_insert_opts(NULL, dreq, skb))
565 return -1; 563 return -1;
566 564
565 /* Obtain RTT sample from Response/Ack exchange (used by TFRC). */
566 if (dccp_insert_option_timestamp(skb))
567 return -1;
568
567 if (dreq->dreq_timestamp_echo != 0 && 569 if (dreq->dreq_timestamp_echo != 0 &&
568 dccp_insert_option_timestamp_echo(NULL, dreq, skb)) 570 dccp_insert_option_timestamp_echo(NULL, dreq, skb))
569 return -1; 571 return -1;
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index b03ecf6b2bb0..096250d1323b 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -473,14 +473,9 @@ static int dccp_setsockopt_ccid(struct sock *sk, int type,
473 if (optlen < 1 || optlen > DCCP_FEAT_MAX_SP_VALS) 473 if (optlen < 1 || optlen > DCCP_FEAT_MAX_SP_VALS)
474 return -EINVAL; 474 return -EINVAL;
475 475
476 val = kmalloc(optlen, GFP_KERNEL); 476 val = memdup_user(optval, optlen);
477 if (val == NULL) 477 if (IS_ERR(val))
478 return -ENOMEM; 478 return PTR_ERR(val);
479
480 if (copy_from_user(val, optval, optlen)) {
481 kfree(val);
482 return -EFAULT;
483 }
484 479
485 lock_sock(sk); 480 lock_sock(sk);
486 if (type == DCCP_SOCKOPT_TX_CCID || type == DCCP_SOCKOPT_CCID) 481 if (type == DCCP_SOCKOPT_TX_CCID || type == DCCP_SOCKOPT_CCID)
@@ -1007,7 +1002,8 @@ EXPORT_SYMBOL_GPL(dccp_shutdown);
1007static inline int dccp_mib_init(void) 1002static inline int dccp_mib_init(void)
1008{ 1003{
1009 return snmp_mib_init((void __percpu **)dccp_statistics, 1004 return snmp_mib_init((void __percpu **)dccp_statistics,
1010 sizeof(struct dccp_mib)); 1005 sizeof(struct dccp_mib),
1006 __alignof__(struct dccp_mib));
1011} 1007}
1012 1008
1013static inline void dccp_mib_exit(void) 1009static inline void dccp_mib_exit(void)
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index 812e6dff6067..6585ea6d1182 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -146,13 +146,13 @@ static __inline__ unsigned dn_hash(__le16 src, __le16 dst)
146 146
147static inline void dnrt_free(struct dn_route *rt) 147static inline void dnrt_free(struct dn_route *rt)
148{ 148{
149 call_rcu_bh(&rt->u.dst.rcu_head, dst_rcu_free); 149 call_rcu_bh(&rt->dst.rcu_head, dst_rcu_free);
150} 150}
151 151
152static inline void dnrt_drop(struct dn_route *rt) 152static inline void dnrt_drop(struct dn_route *rt)
153{ 153{
154 dst_release(&rt->u.dst); 154 dst_release(&rt->dst);
155 call_rcu_bh(&rt->u.dst.rcu_head, dst_rcu_free); 155 call_rcu_bh(&rt->dst.rcu_head, dst_rcu_free);
156} 156}
157 157
158static void dn_dst_check_expire(unsigned long dummy) 158static void dn_dst_check_expire(unsigned long dummy)
@@ -167,13 +167,13 @@ static void dn_dst_check_expire(unsigned long dummy)
167 167
168 spin_lock(&dn_rt_hash_table[i].lock); 168 spin_lock(&dn_rt_hash_table[i].lock);
169 while((rt=*rtp) != NULL) { 169 while((rt=*rtp) != NULL) {
170 if (atomic_read(&rt->u.dst.__refcnt) || 170 if (atomic_read(&rt->dst.__refcnt) ||
171 (now - rt->u.dst.lastuse) < expire) { 171 (now - rt->dst.lastuse) < expire) {
172 rtp = &rt->u.dst.dn_next; 172 rtp = &rt->dst.dn_next;
173 continue; 173 continue;
174 } 174 }
175 *rtp = rt->u.dst.dn_next; 175 *rtp = rt->dst.dn_next;
176 rt->u.dst.dn_next = NULL; 176 rt->dst.dn_next = NULL;
177 dnrt_free(rt); 177 dnrt_free(rt);
178 } 178 }
179 spin_unlock(&dn_rt_hash_table[i].lock); 179 spin_unlock(&dn_rt_hash_table[i].lock);
@@ -198,13 +198,13 @@ static int dn_dst_gc(struct dst_ops *ops)
198 rtp = &dn_rt_hash_table[i].chain; 198 rtp = &dn_rt_hash_table[i].chain;
199 199
200 while((rt=*rtp) != NULL) { 200 while((rt=*rtp) != NULL) {
201 if (atomic_read(&rt->u.dst.__refcnt) || 201 if (atomic_read(&rt->dst.__refcnt) ||
202 (now - rt->u.dst.lastuse) < expire) { 202 (now - rt->dst.lastuse) < expire) {
203 rtp = &rt->u.dst.dn_next; 203 rtp = &rt->dst.dn_next;
204 continue; 204 continue;
205 } 205 }
206 *rtp = rt->u.dst.dn_next; 206 *rtp = rt->dst.dn_next;
207 rt->u.dst.dn_next = NULL; 207 rt->dst.dn_next = NULL;
208 dnrt_drop(rt); 208 dnrt_drop(rt);
209 break; 209 break;
210 } 210 }
@@ -287,25 +287,25 @@ static int dn_insert_route(struct dn_route *rt, unsigned hash, struct dn_route *
287 while((rth = *rthp) != NULL) { 287 while((rth = *rthp) != NULL) {
288 if (compare_keys(&rth->fl, &rt->fl)) { 288 if (compare_keys(&rth->fl, &rt->fl)) {
289 /* Put it first */ 289 /* Put it first */
290 *rthp = rth->u.dst.dn_next; 290 *rthp = rth->dst.dn_next;
291 rcu_assign_pointer(rth->u.dst.dn_next, 291 rcu_assign_pointer(rth->dst.dn_next,
292 dn_rt_hash_table[hash].chain); 292 dn_rt_hash_table[hash].chain);
293 rcu_assign_pointer(dn_rt_hash_table[hash].chain, rth); 293 rcu_assign_pointer(dn_rt_hash_table[hash].chain, rth);
294 294
295 dst_use(&rth->u.dst, now); 295 dst_use(&rth->dst, now);
296 spin_unlock_bh(&dn_rt_hash_table[hash].lock); 296 spin_unlock_bh(&dn_rt_hash_table[hash].lock);
297 297
298 dnrt_drop(rt); 298 dnrt_drop(rt);
299 *rp = rth; 299 *rp = rth;
300 return 0; 300 return 0;
301 } 301 }
302 rthp = &rth->u.dst.dn_next; 302 rthp = &rth->dst.dn_next;
303 } 303 }
304 304
305 rcu_assign_pointer(rt->u.dst.dn_next, dn_rt_hash_table[hash].chain); 305 rcu_assign_pointer(rt->dst.dn_next, dn_rt_hash_table[hash].chain);
306 rcu_assign_pointer(dn_rt_hash_table[hash].chain, rt); 306 rcu_assign_pointer(dn_rt_hash_table[hash].chain, rt);
307 307
308 dst_use(&rt->u.dst, now); 308 dst_use(&rt->dst, now);
309 spin_unlock_bh(&dn_rt_hash_table[hash].lock); 309 spin_unlock_bh(&dn_rt_hash_table[hash].lock);
310 *rp = rt; 310 *rp = rt;
311 return 0; 311 return 0;
@@ -323,8 +323,8 @@ static void dn_run_flush(unsigned long dummy)
323 goto nothing_to_declare; 323 goto nothing_to_declare;
324 324
325 for(; rt; rt=next) { 325 for(; rt; rt=next) {
326 next = rt->u.dst.dn_next; 326 next = rt->dst.dn_next;
327 rt->u.dst.dn_next = NULL; 327 rt->dst.dn_next = NULL;
328 dst_free((struct dst_entry *)rt); 328 dst_free((struct dst_entry *)rt);
329 } 329 }
330 330
@@ -743,7 +743,7 @@ static int dn_forward(struct sk_buff *skb)
743 /* Ensure that we have enough space for headers */ 743 /* Ensure that we have enough space for headers */
744 rt = (struct dn_route *)skb_dst(skb); 744 rt = (struct dn_route *)skb_dst(skb);
745 header_len = dn_db->use_long ? 21 : 6; 745 header_len = dn_db->use_long ? 21 : 6;
746 if (skb_cow(skb, LL_RESERVED_SPACE(rt->u.dst.dev)+header_len)) 746 if (skb_cow(skb, LL_RESERVED_SPACE(rt->dst.dev)+header_len))
747 goto drop; 747 goto drop;
748 748
749 /* 749 /*
@@ -752,7 +752,7 @@ static int dn_forward(struct sk_buff *skb)
752 if (++cb->hops > 30) 752 if (++cb->hops > 30)
753 goto drop; 753 goto drop;
754 754
755 skb->dev = rt->u.dst.dev; 755 skb->dev = rt->dst.dev;
756 756
757 /* 757 /*
758 * If packet goes out same interface it came in on, then set 758 * If packet goes out same interface it came in on, then set
@@ -792,7 +792,7 @@ static int dn_rt_bug(struct sk_buff *skb)
792static int dn_rt_set_next_hop(struct dn_route *rt, struct dn_fib_res *res) 792static int dn_rt_set_next_hop(struct dn_route *rt, struct dn_fib_res *res)
793{ 793{
794 struct dn_fib_info *fi = res->fi; 794 struct dn_fib_info *fi = res->fi;
795 struct net_device *dev = rt->u.dst.dev; 795 struct net_device *dev = rt->dst.dev;
796 struct neighbour *n; 796 struct neighbour *n;
797 unsigned mss; 797 unsigned mss;
798 798
@@ -800,25 +800,25 @@ static int dn_rt_set_next_hop(struct dn_route *rt, struct dn_fib_res *res)
800 if (DN_FIB_RES_GW(*res) && 800 if (DN_FIB_RES_GW(*res) &&
801 DN_FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) 801 DN_FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK)
802 rt->rt_gateway = DN_FIB_RES_GW(*res); 802 rt->rt_gateway = DN_FIB_RES_GW(*res);
803 memcpy(rt->u.dst.metrics, fi->fib_metrics, 803 memcpy(rt->dst.metrics, fi->fib_metrics,
804 sizeof(rt->u.dst.metrics)); 804 sizeof(rt->dst.metrics));
805 } 805 }
806 rt->rt_type = res->type; 806 rt->rt_type = res->type;
807 807
808 if (dev != NULL && rt->u.dst.neighbour == NULL) { 808 if (dev != NULL && rt->dst.neighbour == NULL) {
809 n = __neigh_lookup_errno(&dn_neigh_table, &rt->rt_gateway, dev); 809 n = __neigh_lookup_errno(&dn_neigh_table, &rt->rt_gateway, dev);
810 if (IS_ERR(n)) 810 if (IS_ERR(n))
811 return PTR_ERR(n); 811 return PTR_ERR(n);
812 rt->u.dst.neighbour = n; 812 rt->dst.neighbour = n;
813 } 813 }
814 814
815 if (dst_metric(&rt->u.dst, RTAX_MTU) == 0 || 815 if (dst_metric(&rt->dst, RTAX_MTU) == 0 ||
816 dst_metric(&rt->u.dst, RTAX_MTU) > rt->u.dst.dev->mtu) 816 dst_metric(&rt->dst, RTAX_MTU) > rt->dst.dev->mtu)
817 rt->u.dst.metrics[RTAX_MTU-1] = rt->u.dst.dev->mtu; 817 rt->dst.metrics[RTAX_MTU-1] = rt->dst.dev->mtu;
818 mss = dn_mss_from_pmtu(dev, dst_mtu(&rt->u.dst)); 818 mss = dn_mss_from_pmtu(dev, dst_mtu(&rt->dst));
819 if (dst_metric(&rt->u.dst, RTAX_ADVMSS) == 0 || 819 if (dst_metric(&rt->dst, RTAX_ADVMSS) == 0 ||
820 dst_metric(&rt->u.dst, RTAX_ADVMSS) > mss) 820 dst_metric(&rt->dst, RTAX_ADVMSS) > mss)
821 rt->u.dst.metrics[RTAX_ADVMSS-1] = mss; 821 rt->dst.metrics[RTAX_ADVMSS-1] = mss;
822 return 0; 822 return 0;
823} 823}
824 824
@@ -1096,8 +1096,8 @@ make_route:
1096 if (rt == NULL) 1096 if (rt == NULL)
1097 goto e_nobufs; 1097 goto e_nobufs;
1098 1098
1099 atomic_set(&rt->u.dst.__refcnt, 1); 1099 atomic_set(&rt->dst.__refcnt, 1);
1100 rt->u.dst.flags = DST_HOST; 1100 rt->dst.flags = DST_HOST;
1101 1101
1102 rt->fl.fld_src = oldflp->fld_src; 1102 rt->fl.fld_src = oldflp->fld_src;
1103 rt->fl.fld_dst = oldflp->fld_dst; 1103 rt->fl.fld_dst = oldflp->fld_dst;
@@ -1113,17 +1113,17 @@ make_route:
1113 rt->rt_dst_map = fl.fld_dst; 1113 rt->rt_dst_map = fl.fld_dst;
1114 rt->rt_src_map = fl.fld_src; 1114 rt->rt_src_map = fl.fld_src;
1115 1115
1116 rt->u.dst.dev = dev_out; 1116 rt->dst.dev = dev_out;
1117 dev_hold(dev_out); 1117 dev_hold(dev_out);
1118 rt->u.dst.neighbour = neigh; 1118 rt->dst.neighbour = neigh;
1119 neigh = NULL; 1119 neigh = NULL;
1120 1120
1121 rt->u.dst.lastuse = jiffies; 1121 rt->dst.lastuse = jiffies;
1122 rt->u.dst.output = dn_output; 1122 rt->dst.output = dn_output;
1123 rt->u.dst.input = dn_rt_bug; 1123 rt->dst.input = dn_rt_bug;
1124 rt->rt_flags = flags; 1124 rt->rt_flags = flags;
1125 if (flags & RTCF_LOCAL) 1125 if (flags & RTCF_LOCAL)
1126 rt->u.dst.input = dn_nsp_rx; 1126 rt->dst.input = dn_nsp_rx;
1127 1127
1128 err = dn_rt_set_next_hop(rt, &res); 1128 err = dn_rt_set_next_hop(rt, &res);
1129 if (err) 1129 if (err)
@@ -1152,7 +1152,7 @@ e_nobufs:
1152 err = -ENOBUFS; 1152 err = -ENOBUFS;
1153 goto done; 1153 goto done;
1154e_neighbour: 1154e_neighbour:
1155 dst_free(&rt->u.dst); 1155 dst_free(&rt->dst);
1156 goto e_nobufs; 1156 goto e_nobufs;
1157} 1157}
1158 1158
@@ -1168,15 +1168,15 @@ static int __dn_route_output_key(struct dst_entry **pprt, const struct flowi *fl
1168 if (!(flags & MSG_TRYHARD)) { 1168 if (!(flags & MSG_TRYHARD)) {
1169 rcu_read_lock_bh(); 1169 rcu_read_lock_bh();
1170 for (rt = rcu_dereference_bh(dn_rt_hash_table[hash].chain); rt; 1170 for (rt = rcu_dereference_bh(dn_rt_hash_table[hash].chain); rt;
1171 rt = rcu_dereference_bh(rt->u.dst.dn_next)) { 1171 rt = rcu_dereference_bh(rt->dst.dn_next)) {
1172 if ((flp->fld_dst == rt->fl.fld_dst) && 1172 if ((flp->fld_dst == rt->fl.fld_dst) &&
1173 (flp->fld_src == rt->fl.fld_src) && 1173 (flp->fld_src == rt->fl.fld_src) &&
1174 (flp->mark == rt->fl.mark) && 1174 (flp->mark == rt->fl.mark) &&
1175 (rt->fl.iif == 0) && 1175 (rt->fl.iif == 0) &&
1176 (rt->fl.oif == flp->oif)) { 1176 (rt->fl.oif == flp->oif)) {
1177 dst_use(&rt->u.dst, jiffies); 1177 dst_use(&rt->dst, jiffies);
1178 rcu_read_unlock_bh(); 1178 rcu_read_unlock_bh();
1179 *pprt = &rt->u.dst; 1179 *pprt = &rt->dst;
1180 return 0; 1180 return 0;
1181 } 1181 }
1182 } 1182 }
@@ -1375,29 +1375,29 @@ make_route:
1375 rt->fl.iif = in_dev->ifindex; 1375 rt->fl.iif = in_dev->ifindex;
1376 rt->fl.mark = fl.mark; 1376 rt->fl.mark = fl.mark;
1377 1377
1378 rt->u.dst.flags = DST_HOST; 1378 rt->dst.flags = DST_HOST;
1379 rt->u.dst.neighbour = neigh; 1379 rt->dst.neighbour = neigh;
1380 rt->u.dst.dev = out_dev; 1380 rt->dst.dev = out_dev;
1381 rt->u.dst.lastuse = jiffies; 1381 rt->dst.lastuse = jiffies;
1382 rt->u.dst.output = dn_rt_bug; 1382 rt->dst.output = dn_rt_bug;
1383 switch(res.type) { 1383 switch(res.type) {
1384 case RTN_UNICAST: 1384 case RTN_UNICAST:
1385 rt->u.dst.input = dn_forward; 1385 rt->dst.input = dn_forward;
1386 break; 1386 break;
1387 case RTN_LOCAL: 1387 case RTN_LOCAL:
1388 rt->u.dst.output = dn_output; 1388 rt->dst.output = dn_output;
1389 rt->u.dst.input = dn_nsp_rx; 1389 rt->dst.input = dn_nsp_rx;
1390 rt->u.dst.dev = in_dev; 1390 rt->dst.dev = in_dev;
1391 flags |= RTCF_LOCAL; 1391 flags |= RTCF_LOCAL;
1392 break; 1392 break;
1393 default: 1393 default:
1394 case RTN_UNREACHABLE: 1394 case RTN_UNREACHABLE:
1395 case RTN_BLACKHOLE: 1395 case RTN_BLACKHOLE:
1396 rt->u.dst.input = dst_discard; 1396 rt->dst.input = dst_discard;
1397 } 1397 }
1398 rt->rt_flags = flags; 1398 rt->rt_flags = flags;
1399 if (rt->u.dst.dev) 1399 if (rt->dst.dev)
1400 dev_hold(rt->u.dst.dev); 1400 dev_hold(rt->dst.dev);
1401 1401
1402 err = dn_rt_set_next_hop(rt, &res); 1402 err = dn_rt_set_next_hop(rt, &res);
1403 if (err) 1403 if (err)
@@ -1405,7 +1405,7 @@ make_route:
1405 1405
1406 hash = dn_hash(rt->fl.fld_src, rt->fl.fld_dst); 1406 hash = dn_hash(rt->fl.fld_src, rt->fl.fld_dst);
1407 dn_insert_route(rt, hash, &rt); 1407 dn_insert_route(rt, hash, &rt);
1408 skb_dst_set(skb, &rt->u.dst); 1408 skb_dst_set(skb, &rt->dst);
1409 1409
1410done: 1410done:
1411 if (neigh) 1411 if (neigh)
@@ -1427,7 +1427,7 @@ e_nobufs:
1427 goto done; 1427 goto done;
1428 1428
1429e_neighbour: 1429e_neighbour:
1430 dst_free(&rt->u.dst); 1430 dst_free(&rt->dst);
1431 goto done; 1431 goto done;
1432} 1432}
1433 1433
@@ -1442,13 +1442,13 @@ static int dn_route_input(struct sk_buff *skb)
1442 1442
1443 rcu_read_lock(); 1443 rcu_read_lock();
1444 for(rt = rcu_dereference(dn_rt_hash_table[hash].chain); rt != NULL; 1444 for(rt = rcu_dereference(dn_rt_hash_table[hash].chain); rt != NULL;
1445 rt = rcu_dereference(rt->u.dst.dn_next)) { 1445 rt = rcu_dereference(rt->dst.dn_next)) {
1446 if ((rt->fl.fld_src == cb->src) && 1446 if ((rt->fl.fld_src == cb->src) &&
1447 (rt->fl.fld_dst == cb->dst) && 1447 (rt->fl.fld_dst == cb->dst) &&
1448 (rt->fl.oif == 0) && 1448 (rt->fl.oif == 0) &&
1449 (rt->fl.mark == skb->mark) && 1449 (rt->fl.mark == skb->mark) &&
1450 (rt->fl.iif == cb->iif)) { 1450 (rt->fl.iif == cb->iif)) {
1451 dst_use(&rt->u.dst, jiffies); 1451 dst_use(&rt->dst, jiffies);
1452 rcu_read_unlock(); 1452 rcu_read_unlock();
1453 skb_dst_set(skb, (struct dst_entry *)rt); 1453 skb_dst_set(skb, (struct dst_entry *)rt);
1454 return 0; 1454 return 0;
@@ -1487,8 +1487,8 @@ static int dn_rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
1487 r->rtm_src_len = 16; 1487 r->rtm_src_len = 16;
1488 RTA_PUT(skb, RTA_SRC, 2, &rt->fl.fld_src); 1488 RTA_PUT(skb, RTA_SRC, 2, &rt->fl.fld_src);
1489 } 1489 }
1490 if (rt->u.dst.dev) 1490 if (rt->dst.dev)
1491 RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->u.dst.dev->ifindex); 1491 RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->dst.dev->ifindex);
1492 /* 1492 /*
1493 * Note to self - change this if input routes reverse direction when 1493 * Note to self - change this if input routes reverse direction when
1494 * they deal only with inputs and not with replies like they do 1494 * they deal only with inputs and not with replies like they do
@@ -1497,11 +1497,11 @@ static int dn_rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
1497 RTA_PUT(skb, RTA_PREFSRC, 2, &rt->rt_local_src); 1497 RTA_PUT(skb, RTA_PREFSRC, 2, &rt->rt_local_src);
1498 if (rt->rt_daddr != rt->rt_gateway) 1498 if (rt->rt_daddr != rt->rt_gateway)
1499 RTA_PUT(skb, RTA_GATEWAY, 2, &rt->rt_gateway); 1499 RTA_PUT(skb, RTA_GATEWAY, 2, &rt->rt_gateway);
1500 if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0) 1500 if (rtnetlink_put_metrics(skb, rt->dst.metrics) < 0)
1501 goto rtattr_failure; 1501 goto rtattr_failure;
1502 expires = rt->u.dst.expires ? rt->u.dst.expires - jiffies : 0; 1502 expires = rt->dst.expires ? rt->dst.expires - jiffies : 0;
1503 if (rtnl_put_cacheinfo(skb, &rt->u.dst, 0, 0, 0, expires, 1503 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, 0, 0, expires,
1504 rt->u.dst.error) < 0) 1504 rt->dst.error) < 0)
1505 goto rtattr_failure; 1505 goto rtattr_failure;
1506 if (rt->fl.iif) 1506 if (rt->fl.iif)
1507 RTA_PUT(skb, RTA_IIF, sizeof(int), &rt->fl.iif); 1507 RTA_PUT(skb, RTA_IIF, sizeof(int), &rt->fl.iif);
@@ -1568,8 +1568,8 @@ static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void
1568 local_bh_enable(); 1568 local_bh_enable();
1569 memset(cb, 0, sizeof(struct dn_skb_cb)); 1569 memset(cb, 0, sizeof(struct dn_skb_cb));
1570 rt = (struct dn_route *)skb_dst(skb); 1570 rt = (struct dn_route *)skb_dst(skb);
1571 if (!err && -rt->u.dst.error) 1571 if (!err && -rt->dst.error)
1572 err = rt->u.dst.error; 1572 err = rt->dst.error;
1573 } else { 1573 } else {
1574 int oif = 0; 1574 int oif = 0;
1575 if (rta[RTA_OIF - 1]) 1575 if (rta[RTA_OIF - 1])
@@ -1583,7 +1583,7 @@ static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void
1583 skb->dev = NULL; 1583 skb->dev = NULL;
1584 if (err) 1584 if (err)
1585 goto out_free; 1585 goto out_free;
1586 skb_dst_set(skb, &rt->u.dst); 1586 skb_dst_set(skb, &rt->dst);
1587 if (rtm->rtm_flags & RTM_F_NOTIFY) 1587 if (rtm->rtm_flags & RTM_F_NOTIFY)
1588 rt->rt_flags |= RTCF_NOTIFY; 1588 rt->rt_flags |= RTCF_NOTIFY;
1589 1589
@@ -1632,10 +1632,10 @@ int dn_cache_dump(struct sk_buff *skb, struct netlink_callback *cb)
1632 rcu_read_lock_bh(); 1632 rcu_read_lock_bh();
1633 for(rt = rcu_dereference_bh(dn_rt_hash_table[h].chain), idx = 0; 1633 for(rt = rcu_dereference_bh(dn_rt_hash_table[h].chain), idx = 0;
1634 rt; 1634 rt;
1635 rt = rcu_dereference_bh(rt->u.dst.dn_next), idx++) { 1635 rt = rcu_dereference_bh(rt->dst.dn_next), idx++) {
1636 if (idx < s_idx) 1636 if (idx < s_idx)
1637 continue; 1637 continue;
1638 skb_dst_set(skb, dst_clone(&rt->u.dst)); 1638 skb_dst_set(skb, dst_clone(&rt->dst));
1639 if (dn_rt_fill_info(skb, NETLINK_CB(cb->skb).pid, 1639 if (dn_rt_fill_info(skb, NETLINK_CB(cb->skb).pid,
1640 cb->nlh->nlmsg_seq, RTM_NEWROUTE, 1640 cb->nlh->nlmsg_seq, RTM_NEWROUTE,
1641 1, NLM_F_MULTI) <= 0) { 1641 1, NLM_F_MULTI) <= 0) {
@@ -1678,7 +1678,7 @@ static struct dn_route *dn_rt_cache_get_next(struct seq_file *seq, struct dn_rou
1678{ 1678{
1679 struct dn_rt_cache_iter_state *s = seq->private; 1679 struct dn_rt_cache_iter_state *s = seq->private;
1680 1680
1681 rt = rt->u.dst.dn_next; 1681 rt = rt->dst.dn_next;
1682 while(!rt) { 1682 while(!rt) {
1683 rcu_read_unlock_bh(); 1683 rcu_read_unlock_bh();
1684 if (--s->bucket < 0) 1684 if (--s->bucket < 0)
@@ -1719,12 +1719,12 @@ static int dn_rt_cache_seq_show(struct seq_file *seq, void *v)
1719 char buf1[DN_ASCBUF_LEN], buf2[DN_ASCBUF_LEN]; 1719 char buf1[DN_ASCBUF_LEN], buf2[DN_ASCBUF_LEN];
1720 1720
1721 seq_printf(seq, "%-8s %-7s %-7s %04d %04d %04d\n", 1721 seq_printf(seq, "%-8s %-7s %-7s %04d %04d %04d\n",
1722 rt->u.dst.dev ? rt->u.dst.dev->name : "*", 1722 rt->dst.dev ? rt->dst.dev->name : "*",
1723 dn_addr2asc(le16_to_cpu(rt->rt_daddr), buf1), 1723 dn_addr2asc(le16_to_cpu(rt->rt_daddr), buf1),
1724 dn_addr2asc(le16_to_cpu(rt->rt_saddr), buf2), 1724 dn_addr2asc(le16_to_cpu(rt->rt_saddr), buf2),
1725 atomic_read(&rt->u.dst.__refcnt), 1725 atomic_read(&rt->dst.__refcnt),
1726 rt->u.dst.__use, 1726 rt->dst.__use,
1727 (int) dst_metric(&rt->u.dst, RTAX_RTT)); 1727 (int) dst_metric(&rt->dst, RTAX_RTT));
1728 return 0; 1728 return 0;
1729} 1729}
1730 1730
diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig
index c51b55400dc5..11201784d29a 100644
--- a/net/dsa/Kconfig
+++ b/net/dsa/Kconfig
@@ -1,7 +1,7 @@
1menuconfig NET_DSA 1menuconfig NET_DSA
2 bool "Distributed Switch Architecture support" 2 bool "Distributed Switch Architecture support"
3 default n 3 default n
4 depends on EXPERIMENTAL && !S390 4 depends on EXPERIMENTAL && NET_ETHERNET && !S390
5 select PHYLIB 5 select PHYLIB
6 ---help--- 6 ---help---
7 This allows you to use hardware switch chips that use 7 This allows you to use hardware switch chips that use
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 8fdca56bb08f..64ca2a6fa0d4 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -164,10 +164,9 @@ out:
164static int dsa_slave_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) 164static int dsa_slave_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
165{ 165{
166 struct dsa_slave_priv *p = netdev_priv(dev); 166 struct dsa_slave_priv *p = netdev_priv(dev);
167 struct mii_ioctl_data *mii_data = if_mii(ifr);
168 167
169 if (p->phy != NULL) 168 if (p->phy != NULL)
170 return phy_mii_ioctl(p->phy, mii_data, cmd); 169 return phy_mii_ioctl(p->phy, ifr, cmd);
171 170
172 return -EOPNOTSUPP; 171 return -EOPNOTSUPP;
173} 172}
diff --git a/net/econet/af_econet.c b/net/econet/af_econet.c
index 2a5a8053e000..dc54bd0d083b 100644
--- a/net/econet/af_econet.c
+++ b/net/econet/af_econet.c
@@ -48,7 +48,7 @@
48 48
49static const struct proto_ops econet_ops; 49static const struct proto_ops econet_ops;
50static struct hlist_head econet_sklist; 50static struct hlist_head econet_sklist;
51static DEFINE_RWLOCK(econet_lock); 51static DEFINE_SPINLOCK(econet_lock);
52static DEFINE_MUTEX(econet_mutex); 52static DEFINE_MUTEX(econet_mutex);
53 53
54/* Since there are only 256 possible network numbers (or fewer, depends 54/* Since there are only 256 possible network numbers (or fewer, depends
@@ -98,16 +98,16 @@ struct ec_cb
98 98
99static void econet_remove_socket(struct hlist_head *list, struct sock *sk) 99static void econet_remove_socket(struct hlist_head *list, struct sock *sk)
100{ 100{
101 write_lock_bh(&econet_lock); 101 spin_lock_bh(&econet_lock);
102 sk_del_node_init(sk); 102 sk_del_node_init(sk);
103 write_unlock_bh(&econet_lock); 103 spin_unlock_bh(&econet_lock);
104} 104}
105 105
106static void econet_insert_socket(struct hlist_head *list, struct sock *sk) 106static void econet_insert_socket(struct hlist_head *list, struct sock *sk)
107{ 107{
108 write_lock_bh(&econet_lock); 108 spin_lock_bh(&econet_lock);
109 sk_add_node(sk, list); 109 sk_add_node(sk, list);
110 write_unlock_bh(&econet_lock); 110 spin_unlock_bh(&econet_lock);
111} 111}
112 112
113/* 113/*
@@ -782,15 +782,19 @@ static struct sock *ec_listening_socket(unsigned char port, unsigned char
782 struct sock *sk; 782 struct sock *sk;
783 struct hlist_node *node; 783 struct hlist_node *node;
784 784
785 spin_lock(&econet_lock);
785 sk_for_each(sk, node, &econet_sklist) { 786 sk_for_each(sk, node, &econet_sklist) {
786 struct econet_sock *opt = ec_sk(sk); 787 struct econet_sock *opt = ec_sk(sk);
787 if ((opt->port == port || opt->port == 0) && 788 if ((opt->port == port || opt->port == 0) &&
788 (opt->station == station || opt->station == 0) && 789 (opt->station == station || opt->station == 0) &&
789 (opt->net == net || opt->net == 0)) 790 (opt->net == net || opt->net == 0)) {
791 sock_hold(sk);
790 goto found; 792 goto found;
793 }
791 } 794 }
792 sk = NULL; 795 sk = NULL;
793found: 796found:
797 spin_unlock(&econet_lock);
794 return sk; 798 return sk;
795} 799}
796 800
@@ -852,7 +856,7 @@ static void aun_incoming(struct sk_buff *skb, struct aunhdr *ah, size_t len)
852{ 856{
853 struct iphdr *ip = ip_hdr(skb); 857 struct iphdr *ip = ip_hdr(skb);
854 unsigned char stn = ntohl(ip->saddr) & 0xff; 858 unsigned char stn = ntohl(ip->saddr) & 0xff;
855 struct sock *sk; 859 struct sock *sk = NULL;
856 struct sk_buff *newskb; 860 struct sk_buff *newskb;
857 struct ec_device *edev = skb->dev->ec_ptr; 861 struct ec_device *edev = skb->dev->ec_ptr;
858 862
@@ -882,10 +886,13 @@ static void aun_incoming(struct sk_buff *skb, struct aunhdr *ah, size_t len)
882 } 886 }
883 887
884 aun_send_response(ip->saddr, ah->handle, 3, 0); 888 aun_send_response(ip->saddr, ah->handle, 3, 0);
889 sock_put(sk);
885 return; 890 return;
886 891
887bad: 892bad:
888 aun_send_response(ip->saddr, ah->handle, 4, 0); 893 aun_send_response(ip->saddr, ah->handle, 4, 0);
894 if (sk)
895 sock_put(sk);
889} 896}
890 897
891/* 898/*
@@ -1050,7 +1057,7 @@ release:
1050static int econet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) 1057static int econet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
1051{ 1058{
1052 struct ec_framehdr *hdr; 1059 struct ec_framehdr *hdr;
1053 struct sock *sk; 1060 struct sock *sk = NULL;
1054 struct ec_device *edev = dev->ec_ptr; 1061 struct ec_device *edev = dev->ec_ptr;
1055 1062
1056 if (!net_eq(dev_net(dev), &init_net)) 1063 if (!net_eq(dev_net(dev), &init_net))
@@ -1085,10 +1092,12 @@ static int econet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet
1085 if (ec_queue_packet(sk, skb, edev->net, hdr->src_stn, hdr->cb, 1092 if (ec_queue_packet(sk, skb, edev->net, hdr->src_stn, hdr->cb,
1086 hdr->port)) 1093 hdr->port))
1087 goto drop; 1094 goto drop;
1088 1095 sock_put(sk);
1089 return NET_RX_SUCCESS; 1096 return NET_RX_SUCCESS;
1090 1097
1091drop: 1098drop:
1099 if (sk)
1100 sock_put(sk);
1092 kfree_skb(skb); 1101 kfree_skb(skb);
1093 return NET_RX_DROP; 1102 return NET_RX_DROP;
1094} 1103}
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index 61ec0329316c..215c83986a9d 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -158,7 +158,6 @@ EXPORT_SYMBOL(eth_rebuild_header);
158__be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev) 158__be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev)
159{ 159{
160 struct ethhdr *eth; 160 struct ethhdr *eth;
161 unsigned char *rawp;
162 161
163 skb->dev = dev; 162 skb->dev = dev;
164 skb_reset_mac_header(skb); 163 skb_reset_mac_header(skb);
@@ -199,15 +198,13 @@ __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev)
199 if (ntohs(eth->h_proto) >= 1536) 198 if (ntohs(eth->h_proto) >= 1536)
200 return eth->h_proto; 199 return eth->h_proto;
201 200
202 rawp = skb->data;
203
204 /* 201 /*
205 * This is a magic hack to spot IPX packets. Older Novell breaks 202 * This is a magic hack to spot IPX packets. Older Novell breaks
206 * the protocol design and runs IPX over 802.3 without an 802.2 LLC 203 * the protocol design and runs IPX over 802.3 without an 802.2 LLC
207 * layer. We look for FFFF which isn't a used 802.2 SSAP/DSAP. This 204 * layer. We look for FFFF which isn't a used 802.2 SSAP/DSAP. This
208 * won't work for fault tolerant netware but does for the rest. 205 * won't work for fault tolerant netware but does for the rest.
209 */ 206 */
210 if (*(unsigned short *)rawp == 0xFFFF) 207 if (skb->len >= 2 && *(unsigned short *)(skb->data) == 0xFFFF)
211 return htons(ETH_P_802_3); 208 return htons(ETH_P_802_3);
212 209
213 /* 210 /*
diff --git a/net/ethernet/pe2.c b/net/ethernet/pe2.c
index eb00796758c3..85d574addbc1 100644
--- a/net/ethernet/pe2.c
+++ b/net/ethernet/pe2.c
@@ -28,11 +28,10 @@ struct datalink_proto *make_EII_client(void)
28 28
29 return proto; 29 return proto;
30} 30}
31EXPORT_SYMBOL(make_EII_client);
31 32
32void destroy_EII_client(struct datalink_proto *dl) 33void destroy_EII_client(struct datalink_proto *dl)
33{ 34{
34 kfree(dl); 35 kfree(dl);
35} 36}
36
37EXPORT_SYMBOL(destroy_EII_client); 37EXPORT_SYMBOL(destroy_EII_client);
38EXPORT_SYMBOL(make_EII_client);
diff --git a/net/ieee802154/wpan-class.c b/net/ieee802154/wpan-class.c
index 3d803a1b9fb6..1627ef2e8522 100644
--- a/net/ieee802154/wpan-class.c
+++ b/net/ieee802154/wpan-class.c
@@ -147,13 +147,15 @@ struct wpan_phy *wpan_phy_alloc(size_t priv_size)
147 struct wpan_phy *phy = kzalloc(sizeof(*phy) + priv_size, 147 struct wpan_phy *phy = kzalloc(sizeof(*phy) + priv_size,
148 GFP_KERNEL); 148 GFP_KERNEL);
149 149
150 if (!phy)
151 goto out;
150 mutex_lock(&wpan_phy_mutex); 152 mutex_lock(&wpan_phy_mutex);
151 phy->idx = wpan_phy_idx++; 153 phy->idx = wpan_phy_idx++;
152 if (unlikely(!wpan_phy_idx_valid(phy->idx))) { 154 if (unlikely(!wpan_phy_idx_valid(phy->idx))) {
153 wpan_phy_idx--; 155 wpan_phy_idx--;
154 mutex_unlock(&wpan_phy_mutex); 156 mutex_unlock(&wpan_phy_mutex);
155 kfree(phy); 157 kfree(phy);
156 return NULL; 158 goto out;
157 } 159 }
158 mutex_unlock(&wpan_phy_mutex); 160 mutex_unlock(&wpan_phy_mutex);
159 161
@@ -168,6 +170,9 @@ struct wpan_phy *wpan_phy_alloc(size_t priv_size)
168 phy->current_page = 0; /* for compatibility */ 170 phy->current_page = 0; /* for compatibility */
169 171
170 return phy; 172 return phy;
173
174out:
175 return NULL;
171} 176}
172EXPORT_SYMBOL(wpan_phy_alloc); 177EXPORT_SYMBOL(wpan_phy_alloc);
173 178
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index 8e3a1fd938ab..7c3a7d191249 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -303,7 +303,7 @@ config ARPD
303 If unsure, say N. 303 If unsure, say N.
304 304
305config SYN_COOKIES 305config SYN_COOKIES
306 bool "IP: TCP syncookie support (disabled per default)" 306 bool "IP: TCP syncookie support"
307 ---help--- 307 ---help---
308 Normal TCP/IP networking is open to an attack known as "SYN 308 Normal TCP/IP networking is open to an attack known as "SYN
309 flooding". This denial-of-service attack prevents legitimate remote 309 flooding". This denial-of-service attack prevents legitimate remote
@@ -328,13 +328,13 @@ config SYN_COOKIES
328 server is really overloaded. If this happens frequently better turn 328 server is really overloaded. If this happens frequently better turn
329 them off. 329 them off.
330 330
331 If you say Y here, note that SYN cookies aren't enabled by default; 331 If you say Y here, you can disable SYN cookies at run time by
332 you can enable them by saying Y to "/proc file system support" and 332 saying Y to "/proc file system support" and
333 "Sysctl support" below and executing the command 333 "Sysctl support" below and executing the command
334 334
335 echo 1 >/proc/sys/net/ipv4/tcp_syncookies 335 echo 0 > /proc/sys/net/ipv4/tcp_syncookies
336 336
337 at boot time after the /proc file system has been mounted. 337 after the /proc file system has been mounted.
338 338
339 If unsure, say N. 339 If unsure, say N.
340 340
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 551ce564b035..6a1100c25a9f 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -355,6 +355,8 @@ lookup_protocol:
355 inet = inet_sk(sk); 355 inet = inet_sk(sk);
356 inet->is_icsk = (INET_PROTOSW_ICSK & answer_flags) != 0; 356 inet->is_icsk = (INET_PROTOSW_ICSK & answer_flags) != 0;
357 357
358 inet->nodefrag = 0;
359
358 if (SOCK_RAW == sock->type) { 360 if (SOCK_RAW == sock->type) {
359 inet->inet_num = protocol; 361 inet->inet_num = protocol;
360 if (IPPROTO_RAW == protocol) 362 if (IPPROTO_RAW == protocol)
@@ -725,28 +727,31 @@ int inet_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
725 sock_rps_record_flow(sk); 727 sock_rps_record_flow(sk);
726 728
727 /* We may need to bind the socket. */ 729 /* We may need to bind the socket. */
728 if (!inet_sk(sk)->inet_num && inet_autobind(sk)) 730 if (!inet_sk(sk)->inet_num && !sk->sk_prot->no_autobind &&
731 inet_autobind(sk))
729 return -EAGAIN; 732 return -EAGAIN;
730 733
731 return sk->sk_prot->sendmsg(iocb, sk, msg, size); 734 return sk->sk_prot->sendmsg(iocb, sk, msg, size);
732} 735}
733EXPORT_SYMBOL(inet_sendmsg); 736EXPORT_SYMBOL(inet_sendmsg);
734 737
735static ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset, 738ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset,
736 size_t size, int flags) 739 size_t size, int flags)
737{ 740{
738 struct sock *sk = sock->sk; 741 struct sock *sk = sock->sk;
739 742
740 sock_rps_record_flow(sk); 743 sock_rps_record_flow(sk);
741 744
742 /* We may need to bind the socket. */ 745 /* We may need to bind the socket. */
743 if (!inet_sk(sk)->inet_num && inet_autobind(sk)) 746 if (!inet_sk(sk)->inet_num && !sk->sk_prot->no_autobind &&
747 inet_autobind(sk))
744 return -EAGAIN; 748 return -EAGAIN;
745 749
746 if (sk->sk_prot->sendpage) 750 if (sk->sk_prot->sendpage)
747 return sk->sk_prot->sendpage(sk, page, offset, size, flags); 751 return sk->sk_prot->sendpage(sk, page, offset, size, flags);
748 return sock_no_sendpage(sock, page, offset, size, flags); 752 return sock_no_sendpage(sock, page, offset, size, flags);
749} 753}
754EXPORT_SYMBOL(inet_sendpage);
750 755
751int inet_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, 756int inet_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
752 size_t size, int flags) 757 size_t size, int flags)
@@ -892,10 +897,10 @@ const struct proto_ops inet_stream_ops = {
892 .shutdown = inet_shutdown, 897 .shutdown = inet_shutdown,
893 .setsockopt = sock_common_setsockopt, 898 .setsockopt = sock_common_setsockopt,
894 .getsockopt = sock_common_getsockopt, 899 .getsockopt = sock_common_getsockopt,
895 .sendmsg = tcp_sendmsg, 900 .sendmsg = inet_sendmsg,
896 .recvmsg = inet_recvmsg, 901 .recvmsg = inet_recvmsg,
897 .mmap = sock_no_mmap, 902 .mmap = sock_no_mmap,
898 .sendpage = tcp_sendpage, 903 .sendpage = inet_sendpage,
899 .splice_read = tcp_splice_read, 904 .splice_read = tcp_splice_read,
900#ifdef CONFIG_COMPAT 905#ifdef CONFIG_COMPAT
901 .compat_setsockopt = compat_sock_common_setsockopt, 906 .compat_setsockopt = compat_sock_common_setsockopt,
@@ -1100,7 +1105,7 @@ static int inet_sk_reselect_saddr(struct sock *sk)
1100 if (err) 1105 if (err)
1101 return err; 1106 return err;
1102 1107
1103 sk_setup_caps(sk, &rt->u.dst); 1108 sk_setup_caps(sk, &rt->dst);
1104 1109
1105 new_saddr = rt->rt_src; 1110 new_saddr = rt->rt_src;
1106 1111
@@ -1166,7 +1171,7 @@ int inet_sk_rebuild_header(struct sock *sk)
1166 err = ip_route_output_flow(sock_net(sk), &rt, &fl, sk, 0); 1171 err = ip_route_output_flow(sock_net(sk), &rt, &fl, sk, 0);
1167} 1172}
1168 if (!err) 1173 if (!err)
1169 sk_setup_caps(sk, &rt->u.dst); 1174 sk_setup_caps(sk, &rt->dst);
1170 else { 1175 else {
1171 /* Routing failed... */ 1176 /* Routing failed... */
1172 sk->sk_route_caps = 0; 1177 sk->sk_route_caps = 0;
@@ -1425,13 +1430,49 @@ unsigned long snmp_fold_field(void __percpu *mib[], int offt)
1425} 1430}
1426EXPORT_SYMBOL_GPL(snmp_fold_field); 1431EXPORT_SYMBOL_GPL(snmp_fold_field);
1427 1432
1428int snmp_mib_init(void __percpu *ptr[2], size_t mibsize) 1433#if BITS_PER_LONG==32
1434
1435u64 snmp_fold_field64(void __percpu *mib[], int offt, size_t syncp_offset)
1436{
1437 u64 res = 0;
1438 int cpu;
1439
1440 for_each_possible_cpu(cpu) {
1441 void *bhptr, *userptr;
1442 struct u64_stats_sync *syncp;
1443 u64 v_bh, v_user;
1444 unsigned int start;
1445
1446 /* first mib used by softirq context, we must use _bh() accessors */
1447 bhptr = per_cpu_ptr(SNMP_STAT_BHPTR(mib), cpu);
1448 syncp = (struct u64_stats_sync *)(bhptr + syncp_offset);
1449 do {
1450 start = u64_stats_fetch_begin_bh(syncp);
1451 v_bh = *(((u64 *) bhptr) + offt);
1452 } while (u64_stats_fetch_retry_bh(syncp, start));
1453
1454 /* second mib used in USER context */
1455 userptr = per_cpu_ptr(SNMP_STAT_USRPTR(mib), cpu);
1456 syncp = (struct u64_stats_sync *)(userptr + syncp_offset);
1457 do {
1458 start = u64_stats_fetch_begin(syncp);
1459 v_user = *(((u64 *) userptr) + offt);
1460 } while (u64_stats_fetch_retry(syncp, start));
1461
1462 res += v_bh + v_user;
1463 }
1464 return res;
1465}
1466EXPORT_SYMBOL_GPL(snmp_fold_field64);
1467#endif
1468
1469int snmp_mib_init(void __percpu *ptr[2], size_t mibsize, size_t align)
1429{ 1470{
1430 BUG_ON(ptr == NULL); 1471 BUG_ON(ptr == NULL);
1431 ptr[0] = __alloc_percpu(mibsize, __alignof__(unsigned long)); 1472 ptr[0] = __alloc_percpu(mibsize, align);
1432 if (!ptr[0]) 1473 if (!ptr[0])
1433 goto err0; 1474 goto err0;
1434 ptr[1] = __alloc_percpu(mibsize, __alignof__(unsigned long)); 1475 ptr[1] = __alloc_percpu(mibsize, align);
1435 if (!ptr[1]) 1476 if (!ptr[1])
1436 goto err1; 1477 goto err1;
1437 return 0; 1478 return 0;
@@ -1488,25 +1529,32 @@ static const struct net_protocol icmp_protocol = {
1488static __net_init int ipv4_mib_init_net(struct net *net) 1529static __net_init int ipv4_mib_init_net(struct net *net)
1489{ 1530{
1490 if (snmp_mib_init((void __percpu **)net->mib.tcp_statistics, 1531 if (snmp_mib_init((void __percpu **)net->mib.tcp_statistics,
1491 sizeof(struct tcp_mib)) < 0) 1532 sizeof(struct tcp_mib),
1533 __alignof__(struct tcp_mib)) < 0)
1492 goto err_tcp_mib; 1534 goto err_tcp_mib;
1493 if (snmp_mib_init((void __percpu **)net->mib.ip_statistics, 1535 if (snmp_mib_init((void __percpu **)net->mib.ip_statistics,
1494 sizeof(struct ipstats_mib)) < 0) 1536 sizeof(struct ipstats_mib),
1537 __alignof__(struct ipstats_mib)) < 0)
1495 goto err_ip_mib; 1538 goto err_ip_mib;
1496 if (snmp_mib_init((void __percpu **)net->mib.net_statistics, 1539 if (snmp_mib_init((void __percpu **)net->mib.net_statistics,
1497 sizeof(struct linux_mib)) < 0) 1540 sizeof(struct linux_mib),
1541 __alignof__(struct linux_mib)) < 0)
1498 goto err_net_mib; 1542 goto err_net_mib;
1499 if (snmp_mib_init((void __percpu **)net->mib.udp_statistics, 1543 if (snmp_mib_init((void __percpu **)net->mib.udp_statistics,
1500 sizeof(struct udp_mib)) < 0) 1544 sizeof(struct udp_mib),
1545 __alignof__(struct udp_mib)) < 0)
1501 goto err_udp_mib; 1546 goto err_udp_mib;
1502 if (snmp_mib_init((void __percpu **)net->mib.udplite_statistics, 1547 if (snmp_mib_init((void __percpu **)net->mib.udplite_statistics,
1503 sizeof(struct udp_mib)) < 0) 1548 sizeof(struct udp_mib),
1549 __alignof__(struct udp_mib)) < 0)
1504 goto err_udplite_mib; 1550 goto err_udplite_mib;
1505 if (snmp_mib_init((void __percpu **)net->mib.icmp_statistics, 1551 if (snmp_mib_init((void __percpu **)net->mib.icmp_statistics,
1506 sizeof(struct icmp_mib)) < 0) 1552 sizeof(struct icmp_mib),
1553 __alignof__(struct icmp_mib)) < 0)
1507 goto err_icmp_mib; 1554 goto err_icmp_mib;
1508 if (snmp_mib_init((void __percpu **)net->mib.icmpmsg_statistics, 1555 if (snmp_mib_init((void __percpu **)net->mib.icmpmsg_statistics,
1509 sizeof(struct icmpmsg_mib)) < 0) 1556 sizeof(struct icmpmsg_mib),
1557 __alignof__(struct icmpmsg_mib)) < 0)
1510 goto err_icmpmsg_mib; 1558 goto err_icmpmsg_mib;
1511 1559
1512 tcp_mib_init(net); 1560 tcp_mib_init(net);
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index f094b75810db..96c1955b3e2f 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -116,6 +116,7 @@
116#if defined(CONFIG_ATM_CLIP) || defined(CONFIG_ATM_CLIP_MODULE) 116#if defined(CONFIG_ATM_CLIP) || defined(CONFIG_ATM_CLIP_MODULE)
117#include <net/atmclip.h> 117#include <net/atmclip.h>
118struct neigh_table *clip_tbl_hook; 118struct neigh_table *clip_tbl_hook;
119EXPORT_SYMBOL(clip_tbl_hook);
119#endif 120#endif
120 121
121#include <asm/system.h> 122#include <asm/system.h>
@@ -169,6 +170,7 @@ const struct neigh_ops arp_broken_ops = {
169 .hh_output = dev_queue_xmit, 170 .hh_output = dev_queue_xmit,
170 .queue_xmit = dev_queue_xmit, 171 .queue_xmit = dev_queue_xmit,
171}; 172};
173EXPORT_SYMBOL(arp_broken_ops);
172 174
173struct neigh_table arp_tbl = { 175struct neigh_table arp_tbl = {
174 .family = AF_INET, 176 .family = AF_INET,
@@ -198,6 +200,7 @@ struct neigh_table arp_tbl = {
198 .gc_thresh2 = 512, 200 .gc_thresh2 = 512,
199 .gc_thresh3 = 1024, 201 .gc_thresh3 = 1024,
200}; 202};
203EXPORT_SYMBOL(arp_tbl);
201 204
202int arp_mc_map(__be32 addr, u8 *haddr, struct net_device *dev, int dir) 205int arp_mc_map(__be32 addr, u8 *haddr, struct net_device *dev, int dir)
203{ 206{
@@ -333,11 +336,14 @@ static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb)
333 struct net_device *dev = neigh->dev; 336 struct net_device *dev = neigh->dev;
334 __be32 target = *(__be32*)neigh->primary_key; 337 __be32 target = *(__be32*)neigh->primary_key;
335 int probes = atomic_read(&neigh->probes); 338 int probes = atomic_read(&neigh->probes);
336 struct in_device *in_dev = in_dev_get(dev); 339 struct in_device *in_dev;
337 340
338 if (!in_dev) 341 rcu_read_lock();
342 in_dev = __in_dev_get_rcu(dev);
343 if (!in_dev) {
344 rcu_read_unlock();
339 return; 345 return;
340 346 }
341 switch (IN_DEV_ARP_ANNOUNCE(in_dev)) { 347 switch (IN_DEV_ARP_ANNOUNCE(in_dev)) {
342 default: 348 default:
343 case 0: /* By default announce any local IP */ 349 case 0: /* By default announce any local IP */
@@ -358,9 +364,8 @@ static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb)
358 case 2: /* Avoid secondary IPs, get a primary/preferred one */ 364 case 2: /* Avoid secondary IPs, get a primary/preferred one */
359 break; 365 break;
360 } 366 }
367 rcu_read_unlock();
361 368
362 if (in_dev)
363 in_dev_put(in_dev);
364 if (!saddr) 369 if (!saddr)
365 saddr = inet_select_addr(dev, target, RT_SCOPE_LINK); 370 saddr = inet_select_addr(dev, target, RT_SCOPE_LINK);
366 371
@@ -427,7 +432,7 @@ static int arp_filter(__be32 sip, __be32 tip, struct net_device *dev)
427 432
428 if (ip_route_output_key(net, &rt, &fl) < 0) 433 if (ip_route_output_key(net, &rt, &fl) < 0)
429 return 1; 434 return 1;
430 if (rt->u.dst.dev != dev) { 435 if (rt->dst.dev != dev) {
431 NET_INC_STATS_BH(net, LINUX_MIB_ARPFILTER); 436 NET_INC_STATS_BH(net, LINUX_MIB_ARPFILTER);
432 flag = 1; 437 flag = 1;
433 } 438 }
@@ -497,6 +502,7 @@ int arp_find(unsigned char *haddr, struct sk_buff *skb)
497 kfree_skb(skb); 502 kfree_skb(skb);
498 return 1; 503 return 1;
499} 504}
505EXPORT_SYMBOL(arp_find);
500 506
501/* END OF OBSOLETE FUNCTIONS */ 507/* END OF OBSOLETE FUNCTIONS */
502 508
@@ -532,7 +538,7 @@ static inline int arp_fwd_proxy(struct in_device *in_dev,
532 struct in_device *out_dev; 538 struct in_device *out_dev;
533 int imi, omi = -1; 539 int imi, omi = -1;
534 540
535 if (rt->u.dst.dev == dev) 541 if (rt->dst.dev == dev)
536 return 0; 542 return 0;
537 543
538 if (!IN_DEV_PROXY_ARP(in_dev)) 544 if (!IN_DEV_PROXY_ARP(in_dev))
@@ -545,10 +551,10 @@ static inline int arp_fwd_proxy(struct in_device *in_dev,
545 551
546 /* place to check for proxy_arp for routes */ 552 /* place to check for proxy_arp for routes */
547 553
548 if ((out_dev = in_dev_get(rt->u.dst.dev)) != NULL) { 554 out_dev = __in_dev_get_rcu(rt->dst.dev);
555 if (out_dev)
549 omi = IN_DEV_MEDIUM_ID(out_dev); 556 omi = IN_DEV_MEDIUM_ID(out_dev);
550 in_dev_put(out_dev); 557
551 }
552 return (omi != imi && omi != -1); 558 return (omi != imi && omi != -1);
553} 559}
554 560
@@ -576,7 +582,7 @@ static inline int arp_fwd_pvlan(struct in_device *in_dev,
576 __be32 sip, __be32 tip) 582 __be32 sip, __be32 tip)
577{ 583{
578 /* Private VLAN is only concerned about the same ethernet segment */ 584 /* Private VLAN is only concerned about the same ethernet segment */
579 if (rt->u.dst.dev != dev) 585 if (rt->dst.dev != dev)
580 return 0; 586 return 0;
581 587
582 /* Don't reply on self probes (often done by windowz boxes)*/ 588 /* Don't reply on self probes (often done by windowz boxes)*/
@@ -698,6 +704,7 @@ out:
698 kfree_skb(skb); 704 kfree_skb(skb);
699 return NULL; 705 return NULL;
700} 706}
707EXPORT_SYMBOL(arp_create);
701 708
702/* 709/*
703 * Send an arp packet. 710 * Send an arp packet.
@@ -707,6 +714,7 @@ void arp_xmit(struct sk_buff *skb)
707 /* Send it off, maybe filter it using firewalling first. */ 714 /* Send it off, maybe filter it using firewalling first. */
708 NF_HOOK(NFPROTO_ARP, NF_ARP_OUT, skb, NULL, skb->dev, dev_queue_xmit); 715 NF_HOOK(NFPROTO_ARP, NF_ARP_OUT, skb, NULL, skb->dev, dev_queue_xmit);
709} 716}
717EXPORT_SYMBOL(arp_xmit);
710 718
711/* 719/*
712 * Create and send an arp packet. 720 * Create and send an arp packet.
@@ -733,6 +741,7 @@ void arp_send(int type, int ptype, __be32 dest_ip,
733 741
734 arp_xmit(skb); 742 arp_xmit(skb);
735} 743}
744EXPORT_SYMBOL(arp_send);
736 745
737/* 746/*
738 * Process an arp request. 747 * Process an arp request.
@@ -741,7 +750,7 @@ void arp_send(int type, int ptype, __be32 dest_ip,
741static int arp_process(struct sk_buff *skb) 750static int arp_process(struct sk_buff *skb)
742{ 751{
743 struct net_device *dev = skb->dev; 752 struct net_device *dev = skb->dev;
744 struct in_device *in_dev = in_dev_get(dev); 753 struct in_device *in_dev = __in_dev_get_rcu(dev);
745 struct arphdr *arp; 754 struct arphdr *arp;
746 unsigned char *arp_ptr; 755 unsigned char *arp_ptr;
747 struct rtable *rt; 756 struct rtable *rt;
@@ -890,7 +899,6 @@ static int arp_process(struct sk_buff *skb)
890 arp_send(ARPOP_REPLY,ETH_P_ARP,sip,dev,tip,sha,dev->dev_addr,sha); 899 arp_send(ARPOP_REPLY,ETH_P_ARP,sip,dev,tip,sha,dev->dev_addr,sha);
891 } else { 900 } else {
892 pneigh_enqueue(&arp_tbl, in_dev->arp_parms, skb); 901 pneigh_enqueue(&arp_tbl, in_dev->arp_parms, skb);
893 in_dev_put(in_dev);
894 return 0; 902 return 0;
895 } 903 }
896 goto out; 904 goto out;
@@ -936,8 +944,6 @@ static int arp_process(struct sk_buff *skb)
936 } 944 }
937 945
938out: 946out:
939 if (in_dev)
940 in_dev_put(in_dev);
941 consume_skb(skb); 947 consume_skb(skb);
942 return 0; 948 return 0;
943} 949}
@@ -1045,7 +1051,7 @@ static int arp_req_set(struct net *net, struct arpreq *r,
1045 struct rtable * rt; 1051 struct rtable * rt;
1046 if ((err = ip_route_output_key(net, &rt, &fl)) != 0) 1052 if ((err = ip_route_output_key(net, &rt, &fl)) != 0)
1047 return err; 1053 return err;
1048 dev = rt->u.dst.dev; 1054 dev = rt->dst.dev;
1049 ip_rt_put(rt); 1055 ip_rt_put(rt);
1050 if (!dev) 1056 if (!dev)
1051 return -EINVAL; 1057 return -EINVAL;
@@ -1152,7 +1158,7 @@ static int arp_req_delete(struct net *net, struct arpreq *r,
1152 struct rtable * rt; 1158 struct rtable * rt;
1153 if ((err = ip_route_output_key(net, &rt, &fl)) != 0) 1159 if ((err = ip_route_output_key(net, &rt, &fl)) != 0)
1154 return err; 1160 return err;
1155 dev = rt->u.dst.dev; 1161 dev = rt->dst.dev;
1156 ip_rt_put(rt); 1162 ip_rt_put(rt);
1157 if (!dev) 1163 if (!dev)
1158 return -EINVAL; 1164 return -EINVAL;
@@ -1453,14 +1459,3 @@ static int __init arp_proc_init(void)
1453} 1459}
1454 1460
1455#endif /* CONFIG_PROC_FS */ 1461#endif /* CONFIG_PROC_FS */
1456
1457EXPORT_SYMBOL(arp_broken_ops);
1458EXPORT_SYMBOL(arp_find);
1459EXPORT_SYMBOL(arp_create);
1460EXPORT_SYMBOL(arp_xmit);
1461EXPORT_SYMBOL(arp_send);
1462EXPORT_SYMBOL(arp_tbl);
1463
1464#if defined(CONFIG_ATM_CLIP) || defined(CONFIG_ATM_CLIP_MODULE)
1465EXPORT_SYMBOL(clip_tbl_hook);
1466#endif
diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c
index fb2465811b48..f0550941df7b 100644
--- a/net/ipv4/datagram.c
+++ b/net/ipv4/datagram.c
@@ -69,9 +69,7 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
69 sk->sk_state = TCP_ESTABLISHED; 69 sk->sk_state = TCP_ESTABLISHED;
70 inet->inet_id = jiffies; 70 inet->inet_id = jiffies;
71 71
72 sk_dst_set(sk, &rt->u.dst); 72 sk_dst_set(sk, &rt->dst);
73 return(0); 73 return(0);
74} 74}
75
76EXPORT_SYMBOL(ip4_datagram_connect); 75EXPORT_SYMBOL(ip4_datagram_connect);
77
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 382bc768ed56..da14c49284f4 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1081,6 +1081,7 @@ static int inetdev_event(struct notifier_block *this, unsigned long event,
1081 } 1081 }
1082 ip_mc_up(in_dev); 1082 ip_mc_up(in_dev);
1083 /* fall through */ 1083 /* fall through */
1084 case NETDEV_NOTIFY_PEERS:
1084 case NETDEV_CHANGEADDR: 1085 case NETDEV_CHANGEADDR:
1085 /* Send gratuitous ARP to notify of link change */ 1086 /* Send gratuitous ARP to notify of link change */
1086 if (IN_DEV_ARP_NOTIFY(in_dev)) { 1087 if (IN_DEV_ARP_NOTIFY(in_dev)) {
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 4f0ed458c883..a43968918350 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -175,6 +175,7 @@ out:
175 fib_res_put(&res); 175 fib_res_put(&res);
176 return dev; 176 return dev;
177} 177}
178EXPORT_SYMBOL(ip_dev_find);
178 179
179/* 180/*
180 * Find address type as if only "dev" was present in the system. If 181 * Find address type as if only "dev" was present in the system. If
@@ -214,12 +215,14 @@ unsigned int inet_addr_type(struct net *net, __be32 addr)
214{ 215{
215 return __inet_dev_addr_type(net, NULL, addr); 216 return __inet_dev_addr_type(net, NULL, addr);
216} 217}
218EXPORT_SYMBOL(inet_addr_type);
217 219
218unsigned int inet_dev_addr_type(struct net *net, const struct net_device *dev, 220unsigned int inet_dev_addr_type(struct net *net, const struct net_device *dev,
219 __be32 addr) 221 __be32 addr)
220{ 222{
221 return __inet_dev_addr_type(net, dev, addr); 223 return __inet_dev_addr_type(net, dev, addr);
222} 224}
225EXPORT_SYMBOL(inet_dev_addr_type);
223 226
224/* Given (packet source, input interface) and optional (dst, oif, tos): 227/* Given (packet source, input interface) and optional (dst, oif, tos):
225 - (main) check, that source is valid i.e. not broadcast or our local 228 - (main) check, that source is valid i.e. not broadcast or our local
@@ -284,7 +287,7 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
284 if (no_addr) 287 if (no_addr)
285 goto last_resort; 288 goto last_resort;
286 if (rpf == 1) 289 if (rpf == 1)
287 goto e_inval; 290 goto e_rpf;
288 fl.oif = dev->ifindex; 291 fl.oif = dev->ifindex;
289 292
290 ret = 0; 293 ret = 0;
@@ -299,7 +302,7 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
299 302
300last_resort: 303last_resort:
301 if (rpf) 304 if (rpf)
302 goto e_inval; 305 goto e_rpf;
303 *spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE); 306 *spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
304 *itag = 0; 307 *itag = 0;
305 return 0; 308 return 0;
@@ -308,6 +311,8 @@ e_inval_res:
308 fib_res_put(&res); 311 fib_res_put(&res);
309e_inval: 312e_inval:
310 return -EINVAL; 313 return -EINVAL;
314e_rpf:
315 return -EXDEV;
311} 316}
312 317
313static inline __be32 sk_extract_addr(struct sockaddr *addr) 318static inline __be32 sk_extract_addr(struct sockaddr *addr)
@@ -1075,7 +1080,3 @@ void __init ip_fib_init(void)
1075 1080
1076 fib_hash_init(); 1081 fib_hash_init();
1077} 1082}
1078
1079EXPORT_SYMBOL(inet_addr_type);
1080EXPORT_SYMBOL(inet_dev_addr_type);
1081EXPORT_SYMBOL(ip_dev_find);
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index d65e9215bcd7..a0d847c7cba5 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -181,6 +181,7 @@ const struct icmp_err icmp_err_convert[] = {
181 .fatal = 1, 181 .fatal = 1,
182 }, 182 },
183}; 183};
184EXPORT_SYMBOL(icmp_err_convert);
184 185
185/* 186/*
186 * ICMP control array. This specifies what to do with each ICMP. 187 * ICMP control array. This specifies what to do with each ICMP.
@@ -267,11 +268,12 @@ int xrlim_allow(struct dst_entry *dst, int timeout)
267 dst->rate_tokens = token; 268 dst->rate_tokens = token;
268 return rc; 269 return rc;
269} 270}
271EXPORT_SYMBOL(xrlim_allow);
270 272
271static inline int icmpv4_xrlim_allow(struct net *net, struct rtable *rt, 273static inline int icmpv4_xrlim_allow(struct net *net, struct rtable *rt,
272 int type, int code) 274 int type, int code)
273{ 275{
274 struct dst_entry *dst = &rt->u.dst; 276 struct dst_entry *dst = &rt->dst;
275 int rc = 1; 277 int rc = 1;
276 278
277 if (type > NR_ICMP_TYPES) 279 if (type > NR_ICMP_TYPES)
@@ -327,7 +329,7 @@ static void icmp_push_reply(struct icmp_bxm *icmp_param,
327 struct sock *sk; 329 struct sock *sk;
328 struct sk_buff *skb; 330 struct sk_buff *skb;
329 331
330 sk = icmp_sk(dev_net((*rt)->u.dst.dev)); 332 sk = icmp_sk(dev_net((*rt)->dst.dev));
331 if (ip_append_data(sk, icmp_glue_bits, icmp_param, 333 if (ip_append_data(sk, icmp_glue_bits, icmp_param,
332 icmp_param->data_len+icmp_param->head_len, 334 icmp_param->data_len+icmp_param->head_len,
333 icmp_param->head_len, 335 icmp_param->head_len,
@@ -359,7 +361,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
359{ 361{
360 struct ipcm_cookie ipc; 362 struct ipcm_cookie ipc;
361 struct rtable *rt = skb_rtable(skb); 363 struct rtable *rt = skb_rtable(skb);
362 struct net *net = dev_net(rt->u.dst.dev); 364 struct net *net = dev_net(rt->dst.dev);
363 struct sock *sk; 365 struct sock *sk;
364 struct inet_sock *inet; 366 struct inet_sock *inet;
365 __be32 daddr; 367 __be32 daddr;
@@ -427,7 +429,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
427 429
428 if (!rt) 430 if (!rt)
429 goto out; 431 goto out;
430 net = dev_net(rt->u.dst.dev); 432 net = dev_net(rt->dst.dev);
431 433
432 /* 434 /*
433 * Find the original header. It is expected to be valid, of course. 435 * Find the original header. It is expected to be valid, of course.
@@ -596,9 +598,9 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
596 /* Ugh! */ 598 /* Ugh! */
597 orefdst = skb_in->_skb_refdst; /* save old refdst */ 599 orefdst = skb_in->_skb_refdst; /* save old refdst */
598 err = ip_route_input(skb_in, fl.fl4_dst, fl.fl4_src, 600 err = ip_route_input(skb_in, fl.fl4_dst, fl.fl4_src,
599 RT_TOS(tos), rt2->u.dst.dev); 601 RT_TOS(tos), rt2->dst.dev);
600 602
601 dst_release(&rt2->u.dst); 603 dst_release(&rt2->dst);
602 rt2 = skb_rtable(skb_in); 604 rt2 = skb_rtable(skb_in);
603 skb_in->_skb_refdst = orefdst; /* restore old refdst */ 605 skb_in->_skb_refdst = orefdst; /* restore old refdst */
604 } 606 }
@@ -610,7 +612,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
610 XFRM_LOOKUP_ICMP); 612 XFRM_LOOKUP_ICMP);
611 switch (err) { 613 switch (err) {
612 case 0: 614 case 0:
613 dst_release(&rt->u.dst); 615 dst_release(&rt->dst);
614 rt = rt2; 616 rt = rt2;
615 break; 617 break;
616 case -EPERM: 618 case -EPERM:
@@ -629,7 +631,7 @@ route_done:
629 631
630 /* RFC says return as much as we can without exceeding 576 bytes. */ 632 /* RFC says return as much as we can without exceeding 576 bytes. */
631 633
632 room = dst_mtu(&rt->u.dst); 634 room = dst_mtu(&rt->dst);
633 if (room > 576) 635 if (room > 576)
634 room = 576; 636 room = 576;
635 room -= sizeof(struct iphdr) + icmp_param.replyopts.optlen; 637 room -= sizeof(struct iphdr) + icmp_param.replyopts.optlen;
@@ -647,6 +649,7 @@ out_unlock:
647 icmp_xmit_unlock(sk); 649 icmp_xmit_unlock(sk);
648out:; 650out:;
649} 651}
652EXPORT_SYMBOL(icmp_send);
650 653
651 654
652/* 655/*
@@ -925,6 +928,7 @@ static void icmp_address(struct sk_buff *skb)
925/* 928/*
926 * RFC1812 (4.3.3.9). A router SHOULD listen all replies, and complain 929 * RFC1812 (4.3.3.9). A router SHOULD listen all replies, and complain
927 * loudly if an inconsistency is found. 930 * loudly if an inconsistency is found.
931 * called with rcu_read_lock()
928 */ 932 */
929 933
930static void icmp_address_reply(struct sk_buff *skb) 934static void icmp_address_reply(struct sk_buff *skb)
@@ -935,12 +939,12 @@ static void icmp_address_reply(struct sk_buff *skb)
935 struct in_ifaddr *ifa; 939 struct in_ifaddr *ifa;
936 940
937 if (skb->len < 4 || !(rt->rt_flags&RTCF_DIRECTSRC)) 941 if (skb->len < 4 || !(rt->rt_flags&RTCF_DIRECTSRC))
938 goto out; 942 return;
939 943
940 in_dev = in_dev_get(dev); 944 in_dev = __in_dev_get_rcu(dev);
941 if (!in_dev) 945 if (!in_dev)
942 goto out; 946 return;
943 rcu_read_lock(); 947
944 if (in_dev->ifa_list && 948 if (in_dev->ifa_list &&
945 IN_DEV_LOG_MARTIANS(in_dev) && 949 IN_DEV_LOG_MARTIANS(in_dev) &&
946 IN_DEV_FORWARD(in_dev)) { 950 IN_DEV_FORWARD(in_dev)) {
@@ -958,9 +962,6 @@ static void icmp_address_reply(struct sk_buff *skb)
958 mp, dev->name, &rt->rt_src); 962 mp, dev->name, &rt->rt_src);
959 } 963 }
960 } 964 }
961 rcu_read_unlock();
962 in_dev_put(in_dev);
963out:;
964} 965}
965 966
966static void icmp_discard(struct sk_buff *skb) 967static void icmp_discard(struct sk_buff *skb)
@@ -974,7 +975,7 @@ int icmp_rcv(struct sk_buff *skb)
974{ 975{
975 struct icmphdr *icmph; 976 struct icmphdr *icmph;
976 struct rtable *rt = skb_rtable(skb); 977 struct rtable *rt = skb_rtable(skb);
977 struct net *net = dev_net(rt->u.dst.dev); 978 struct net *net = dev_net(rt->dst.dev);
978 979
979 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) { 980 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
980 struct sec_path *sp = skb_sec_path(skb); 981 struct sec_path *sp = skb_sec_path(skb);
@@ -1216,7 +1217,3 @@ int __init icmp_init(void)
1216{ 1217{
1217 return register_pernet_subsys(&icmp_sk_ops); 1218 return register_pernet_subsys(&icmp_sk_ops);
1218} 1219}
1219
1220EXPORT_SYMBOL(icmp_err_convert);
1221EXPORT_SYMBOL(icmp_send);
1222EXPORT_SYMBOL(xrlim_allow);
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 5fff865a4fa7..a1ad0e7180d2 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -312,7 +312,7 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size)
312 return NULL; 312 return NULL;
313 } 313 }
314 314
315 skb_dst_set(skb, &rt->u.dst); 315 skb_dst_set(skb, &rt->dst);
316 skb->dev = dev; 316 skb->dev = dev;
317 317
318 skb_reserve(skb, LL_RESERVED_SPACE(dev)); 318 skb_reserve(skb, LL_RESERVED_SPACE(dev));
@@ -330,7 +330,7 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size)
330 pip->saddr = rt->rt_src; 330 pip->saddr = rt->rt_src;
331 pip->protocol = IPPROTO_IGMP; 331 pip->protocol = IPPROTO_IGMP;
332 pip->tot_len = 0; /* filled in later */ 332 pip->tot_len = 0; /* filled in later */
333 ip_select_ident(pip, &rt->u.dst, NULL); 333 ip_select_ident(pip, &rt->dst, NULL);
334 ((u8*)&pip[1])[0] = IPOPT_RA; 334 ((u8*)&pip[1])[0] = IPOPT_RA;
335 ((u8*)&pip[1])[1] = 4; 335 ((u8*)&pip[1])[1] = 4;
336 ((u8*)&pip[1])[2] = 0; 336 ((u8*)&pip[1])[2] = 0;
@@ -660,7 +660,7 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc,
660 return -1; 660 return -1;
661 } 661 }
662 662
663 skb_dst_set(skb, &rt->u.dst); 663 skb_dst_set(skb, &rt->dst);
664 664
665 skb_reserve(skb, LL_RESERVED_SPACE(dev)); 665 skb_reserve(skb, LL_RESERVED_SPACE(dev));
666 666
@@ -676,7 +676,7 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc,
676 iph->daddr = dst; 676 iph->daddr = dst;
677 iph->saddr = rt->rt_src; 677 iph->saddr = rt->rt_src;
678 iph->protocol = IPPROTO_IGMP; 678 iph->protocol = IPPROTO_IGMP;
679 ip_select_ident(iph, &rt->u.dst, NULL); 679 ip_select_ident(iph, &rt->dst, NULL);
680 ((u8*)&iph[1])[0] = IPOPT_RA; 680 ((u8*)&iph[1])[0] = IPOPT_RA;
681 ((u8*)&iph[1])[1] = 4; 681 ((u8*)&iph[1])[1] = 4;
682 ((u8*)&iph[1])[2] = 0; 682 ((u8*)&iph[1])[2] = 0;
@@ -916,18 +916,19 @@ static void igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb,
916 read_unlock(&in_dev->mc_list_lock); 916 read_unlock(&in_dev->mc_list_lock);
917} 917}
918 918
919/* called in rcu_read_lock() section */
919int igmp_rcv(struct sk_buff *skb) 920int igmp_rcv(struct sk_buff *skb)
920{ 921{
921 /* This basically follows the spec line by line -- see RFC1112 */ 922 /* This basically follows the spec line by line -- see RFC1112 */
922 struct igmphdr *ih; 923 struct igmphdr *ih;
923 struct in_device *in_dev = in_dev_get(skb->dev); 924 struct in_device *in_dev = __in_dev_get_rcu(skb->dev);
924 int len = skb->len; 925 int len = skb->len;
925 926
926 if (in_dev == NULL) 927 if (in_dev == NULL)
927 goto drop; 928 goto drop;
928 929
929 if (!pskb_may_pull(skb, sizeof(struct igmphdr))) 930 if (!pskb_may_pull(skb, sizeof(struct igmphdr)))
930 goto drop_ref; 931 goto drop;
931 932
932 switch (skb->ip_summed) { 933 switch (skb->ip_summed) {
933 case CHECKSUM_COMPLETE: 934 case CHECKSUM_COMPLETE:
@@ -937,7 +938,7 @@ int igmp_rcv(struct sk_buff *skb)
937 case CHECKSUM_NONE: 938 case CHECKSUM_NONE:
938 skb->csum = 0; 939 skb->csum = 0;
939 if (__skb_checksum_complete(skb)) 940 if (__skb_checksum_complete(skb))
940 goto drop_ref; 941 goto drop;
941 } 942 }
942 943
943 ih = igmp_hdr(skb); 944 ih = igmp_hdr(skb);
@@ -957,7 +958,6 @@ int igmp_rcv(struct sk_buff *skb)
957 break; 958 break;
958 case IGMP_PIM: 959 case IGMP_PIM:
959#ifdef CONFIG_IP_PIMSM_V1 960#ifdef CONFIG_IP_PIMSM_V1
960 in_dev_put(in_dev);
961 return pim_rcv_v1(skb); 961 return pim_rcv_v1(skb);
962#endif 962#endif
963 case IGMPV3_HOST_MEMBERSHIP_REPORT: 963 case IGMPV3_HOST_MEMBERSHIP_REPORT:
@@ -971,8 +971,6 @@ int igmp_rcv(struct sk_buff *skb)
971 break; 971 break;
972 } 972 }
973 973
974drop_ref:
975 in_dev_put(in_dev);
976drop: 974drop:
977 kfree_skb(skb); 975 kfree_skb(skb);
978 return 0; 976 return 0;
@@ -1246,6 +1244,7 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 addr)
1246out: 1244out:
1247 return; 1245 return;
1248} 1246}
1247EXPORT_SYMBOL(ip_mc_inc_group);
1249 1248
1250/* 1249/*
1251 * Resend IGMP JOIN report; used for bonding. 1250 * Resend IGMP JOIN report; used for bonding.
@@ -1268,6 +1267,7 @@ void ip_mc_rejoin_group(struct ip_mc_list *im)
1268 igmp_ifc_event(in_dev); 1267 igmp_ifc_event(in_dev);
1269#endif 1268#endif
1270} 1269}
1270EXPORT_SYMBOL(ip_mc_rejoin_group);
1271 1271
1272/* 1272/*
1273 * A socket has left a multicast group on device dev 1273 * A socket has left a multicast group on device dev
@@ -1298,6 +1298,7 @@ void ip_mc_dec_group(struct in_device *in_dev, __be32 addr)
1298 } 1298 }
1299 } 1299 }
1300} 1300}
1301EXPORT_SYMBOL(ip_mc_dec_group);
1301 1302
1302/* Device changing type */ 1303/* Device changing type */
1303 1304
@@ -1427,7 +1428,7 @@ static struct in_device *ip_mc_find_dev(struct net *net, struct ip_mreqn *imr)
1427 } 1428 }
1428 1429
1429 if (!dev && !ip_route_output_key(net, &rt, &fl)) { 1430 if (!dev && !ip_route_output_key(net, &rt, &fl)) {
1430 dev = rt->u.dst.dev; 1431 dev = rt->dst.dev;
1431 ip_rt_put(rt); 1432 ip_rt_put(rt);
1432 } 1433 }
1433 if (dev) { 1434 if (dev) {
@@ -1646,8 +1647,7 @@ static int sf_setstate(struct ip_mc_list *pmc)
1646 if (dpsf->sf_inaddr == psf->sf_inaddr) 1647 if (dpsf->sf_inaddr == psf->sf_inaddr)
1647 break; 1648 break;
1648 if (!dpsf) { 1649 if (!dpsf) {
1649 dpsf = (struct ip_sf_list *) 1650 dpsf = kmalloc(sizeof(*dpsf), GFP_ATOMIC);
1650 kmalloc(sizeof(*dpsf), GFP_ATOMIC);
1651 if (!dpsf) 1651 if (!dpsf)
1652 continue; 1652 continue;
1653 *dpsf = *psf; 1653 *dpsf = *psf;
@@ -1807,6 +1807,7 @@ done:
1807 rtnl_unlock(); 1807 rtnl_unlock();
1808 return err; 1808 return err;
1809} 1809}
1810EXPORT_SYMBOL(ip_mc_join_group);
1810 1811
1811static void ip_sf_socklist_reclaim(struct rcu_head *rp) 1812static void ip_sf_socklist_reclaim(struct rcu_head *rp)
1812{ 1813{
@@ -2679,8 +2680,3 @@ int __init igmp_mc_proc_init(void)
2679 return register_pernet_subsys(&igmp_net_ops); 2680 return register_pernet_subsys(&igmp_net_ops);
2680} 2681}
2681#endif 2682#endif
2682
2683EXPORT_SYMBOL(ip_mc_dec_group);
2684EXPORT_SYMBOL(ip_mc_inc_group);
2685EXPORT_SYMBOL(ip_mc_join_group);
2686EXPORT_SYMBOL(ip_mc_rejoin_group);
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 70eb3507c406..7174370b1195 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -84,7 +84,6 @@ int inet_csk_bind_conflict(const struct sock *sk,
84 } 84 }
85 return node != NULL; 85 return node != NULL;
86} 86}
87
88EXPORT_SYMBOL_GPL(inet_csk_bind_conflict); 87EXPORT_SYMBOL_GPL(inet_csk_bind_conflict);
89 88
90/* Obtain a reference to a local port for the given sock, 89/* Obtain a reference to a local port for the given sock,
@@ -212,7 +211,6 @@ fail:
212 local_bh_enable(); 211 local_bh_enable();
213 return ret; 212 return ret;
214} 213}
215
216EXPORT_SYMBOL_GPL(inet_csk_get_port); 214EXPORT_SYMBOL_GPL(inet_csk_get_port);
217 215
218/* 216/*
@@ -305,7 +303,6 @@ out_err:
305 *err = error; 303 *err = error;
306 goto out; 304 goto out;
307} 305}
308
309EXPORT_SYMBOL(inet_csk_accept); 306EXPORT_SYMBOL(inet_csk_accept);
310 307
311/* 308/*
@@ -327,7 +324,6 @@ void inet_csk_init_xmit_timers(struct sock *sk,
327 setup_timer(&sk->sk_timer, keepalive_handler, (unsigned long)sk); 324 setup_timer(&sk->sk_timer, keepalive_handler, (unsigned long)sk);
328 icsk->icsk_pending = icsk->icsk_ack.pending = 0; 325 icsk->icsk_pending = icsk->icsk_ack.pending = 0;
329} 326}
330
331EXPORT_SYMBOL(inet_csk_init_xmit_timers); 327EXPORT_SYMBOL(inet_csk_init_xmit_timers);
332 328
333void inet_csk_clear_xmit_timers(struct sock *sk) 329void inet_csk_clear_xmit_timers(struct sock *sk)
@@ -340,21 +336,18 @@ void inet_csk_clear_xmit_timers(struct sock *sk)
340 sk_stop_timer(sk, &icsk->icsk_delack_timer); 336 sk_stop_timer(sk, &icsk->icsk_delack_timer);
341 sk_stop_timer(sk, &sk->sk_timer); 337 sk_stop_timer(sk, &sk->sk_timer);
342} 338}
343
344EXPORT_SYMBOL(inet_csk_clear_xmit_timers); 339EXPORT_SYMBOL(inet_csk_clear_xmit_timers);
345 340
346void inet_csk_delete_keepalive_timer(struct sock *sk) 341void inet_csk_delete_keepalive_timer(struct sock *sk)
347{ 342{
348 sk_stop_timer(sk, &sk->sk_timer); 343 sk_stop_timer(sk, &sk->sk_timer);
349} 344}
350
351EXPORT_SYMBOL(inet_csk_delete_keepalive_timer); 345EXPORT_SYMBOL(inet_csk_delete_keepalive_timer);
352 346
353void inet_csk_reset_keepalive_timer(struct sock *sk, unsigned long len) 347void inet_csk_reset_keepalive_timer(struct sock *sk, unsigned long len)
354{ 348{
355 sk_reset_timer(sk, &sk->sk_timer, jiffies + len); 349 sk_reset_timer(sk, &sk->sk_timer, jiffies + len);
356} 350}
357
358EXPORT_SYMBOL(inet_csk_reset_keepalive_timer); 351EXPORT_SYMBOL(inet_csk_reset_keepalive_timer);
359 352
360struct dst_entry *inet_csk_route_req(struct sock *sk, 353struct dst_entry *inet_csk_route_req(struct sock *sk,
@@ -383,7 +376,7 @@ struct dst_entry *inet_csk_route_req(struct sock *sk,
383 goto no_route; 376 goto no_route;
384 if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway) 377 if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway)
385 goto route_err; 378 goto route_err;
386 return &rt->u.dst; 379 return &rt->dst;
387 380
388route_err: 381route_err:
389 ip_rt_put(rt); 382 ip_rt_put(rt);
@@ -391,7 +384,6 @@ no_route:
391 IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES); 384 IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES);
392 return NULL; 385 return NULL;
393} 386}
394
395EXPORT_SYMBOL_GPL(inet_csk_route_req); 387EXPORT_SYMBOL_GPL(inet_csk_route_req);
396 388
397static inline u32 inet_synq_hash(const __be32 raddr, const __be16 rport, 389static inline u32 inet_synq_hash(const __be32 raddr, const __be16 rport,
@@ -433,7 +425,6 @@ struct request_sock *inet_csk_search_req(const struct sock *sk,
433 425
434 return req; 426 return req;
435} 427}
436
437EXPORT_SYMBOL_GPL(inet_csk_search_req); 428EXPORT_SYMBOL_GPL(inet_csk_search_req);
438 429
439void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req, 430void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req,
@@ -447,11 +438,11 @@ void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req,
447 reqsk_queue_hash_req(&icsk->icsk_accept_queue, h, req, timeout); 438 reqsk_queue_hash_req(&icsk->icsk_accept_queue, h, req, timeout);
448 inet_csk_reqsk_queue_added(sk, timeout); 439 inet_csk_reqsk_queue_added(sk, timeout);
449} 440}
441EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_hash_add);
450 442
451/* Only thing we need from tcp.h */ 443/* Only thing we need from tcp.h */
452extern int sysctl_tcp_synack_retries; 444extern int sysctl_tcp_synack_retries;
453 445
454EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_hash_add);
455 446
456/* Decide when to expire the request and when to resend SYN-ACK */ 447/* Decide when to expire the request and when to resend SYN-ACK */
457static inline void syn_ack_recalc(struct request_sock *req, const int thresh, 448static inline void syn_ack_recalc(struct request_sock *req, const int thresh,
@@ -569,7 +560,6 @@ void inet_csk_reqsk_queue_prune(struct sock *parent,
569 if (lopt->qlen) 560 if (lopt->qlen)
570 inet_csk_reset_keepalive_timer(parent, interval); 561 inet_csk_reset_keepalive_timer(parent, interval);
571} 562}
572
573EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_prune); 563EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_prune);
574 564
575struct sock *inet_csk_clone(struct sock *sk, const struct request_sock *req, 565struct sock *inet_csk_clone(struct sock *sk, const struct request_sock *req,
@@ -599,7 +589,6 @@ struct sock *inet_csk_clone(struct sock *sk, const struct request_sock *req,
599 } 589 }
600 return newsk; 590 return newsk;
601} 591}
602
603EXPORT_SYMBOL_GPL(inet_csk_clone); 592EXPORT_SYMBOL_GPL(inet_csk_clone);
604 593
605/* 594/*
@@ -630,7 +619,6 @@ void inet_csk_destroy_sock(struct sock *sk)
630 percpu_counter_dec(sk->sk_prot->orphan_count); 619 percpu_counter_dec(sk->sk_prot->orphan_count);
631 sock_put(sk); 620 sock_put(sk);
632} 621}
633
634EXPORT_SYMBOL(inet_csk_destroy_sock); 622EXPORT_SYMBOL(inet_csk_destroy_sock);
635 623
636int inet_csk_listen_start(struct sock *sk, const int nr_table_entries) 624int inet_csk_listen_start(struct sock *sk, const int nr_table_entries)
@@ -665,7 +653,6 @@ int inet_csk_listen_start(struct sock *sk, const int nr_table_entries)
665 __reqsk_queue_destroy(&icsk->icsk_accept_queue); 653 __reqsk_queue_destroy(&icsk->icsk_accept_queue);
666 return -EADDRINUSE; 654 return -EADDRINUSE;
667} 655}
668
669EXPORT_SYMBOL_GPL(inet_csk_listen_start); 656EXPORT_SYMBOL_GPL(inet_csk_listen_start);
670 657
671/* 658/*
@@ -720,7 +707,6 @@ void inet_csk_listen_stop(struct sock *sk)
720 } 707 }
721 WARN_ON(sk->sk_ack_backlog); 708 WARN_ON(sk->sk_ack_backlog);
722} 709}
723
724EXPORT_SYMBOL_GPL(inet_csk_listen_stop); 710EXPORT_SYMBOL_GPL(inet_csk_listen_stop);
725 711
726void inet_csk_addr2sockaddr(struct sock *sk, struct sockaddr *uaddr) 712void inet_csk_addr2sockaddr(struct sock *sk, struct sockaddr *uaddr)
@@ -732,7 +718,6 @@ void inet_csk_addr2sockaddr(struct sock *sk, struct sockaddr *uaddr)
732 sin->sin_addr.s_addr = inet->inet_daddr; 718 sin->sin_addr.s_addr = inet->inet_daddr;
733 sin->sin_port = inet->inet_dport; 719 sin->sin_port = inet->inet_dport;
734} 720}
735
736EXPORT_SYMBOL_GPL(inet_csk_addr2sockaddr); 721EXPORT_SYMBOL_GPL(inet_csk_addr2sockaddr);
737 722
738#ifdef CONFIG_COMPAT 723#ifdef CONFIG_COMPAT
@@ -747,7 +732,6 @@ int inet_csk_compat_getsockopt(struct sock *sk, int level, int optname,
747 return icsk->icsk_af_ops->getsockopt(sk, level, optname, 732 return icsk->icsk_af_ops->getsockopt(sk, level, optname,
748 optval, optlen); 733 optval, optlen);
749} 734}
750
751EXPORT_SYMBOL_GPL(inet_csk_compat_getsockopt); 735EXPORT_SYMBOL_GPL(inet_csk_compat_getsockopt);
752 736
753int inet_csk_compat_setsockopt(struct sock *sk, int level, int optname, 737int inet_csk_compat_setsockopt(struct sock *sk, int level, int optname,
@@ -761,6 +745,5 @@ int inet_csk_compat_setsockopt(struct sock *sk, int level, int optname,
761 return icsk->icsk_af_ops->setsockopt(sk, level, optname, 745 return icsk->icsk_af_ops->setsockopt(sk, level, optname,
762 optval, optlen); 746 optval, optlen);
763} 747}
764
765EXPORT_SYMBOL_GPL(inet_csk_compat_setsockopt); 748EXPORT_SYMBOL_GPL(inet_csk_compat_setsockopt);
766#endif 749#endif
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index a2ca6aed763b..5ff2a51b6d0c 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -114,7 +114,6 @@ void inet_frag_kill(struct inet_frag_queue *fq, struct inet_frags *f)
114 fq->last_in |= INET_FRAG_COMPLETE; 114 fq->last_in |= INET_FRAG_COMPLETE;
115 } 115 }
116} 116}
117
118EXPORT_SYMBOL(inet_frag_kill); 117EXPORT_SYMBOL(inet_frag_kill);
119 118
120static inline void frag_kfree_skb(struct netns_frags *nf, struct inet_frags *f, 119static inline void frag_kfree_skb(struct netns_frags *nf, struct inet_frags *f,
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index d3e160a88219..fb7ad5a21ff3 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -99,7 +99,6 @@ void inet_put_port(struct sock *sk)
99 __inet_put_port(sk); 99 __inet_put_port(sk);
100 local_bh_enable(); 100 local_bh_enable();
101} 101}
102
103EXPORT_SYMBOL(inet_put_port); 102EXPORT_SYMBOL(inet_put_port);
104 103
105void __inet_inherit_port(struct sock *sk, struct sock *child) 104void __inet_inherit_port(struct sock *sk, struct sock *child)
@@ -116,7 +115,6 @@ void __inet_inherit_port(struct sock *sk, struct sock *child)
116 inet_csk(child)->icsk_bind_hash = tb; 115 inet_csk(child)->icsk_bind_hash = tb;
117 spin_unlock(&head->lock); 116 spin_unlock(&head->lock);
118} 117}
119
120EXPORT_SYMBOL_GPL(__inet_inherit_port); 118EXPORT_SYMBOL_GPL(__inet_inherit_port);
121 119
122static inline int compute_score(struct sock *sk, struct net *net, 120static inline int compute_score(struct sock *sk, struct net *net,
@@ -546,7 +544,6 @@ int inet_hash_connect(struct inet_timewait_death_row *death_row,
546 return __inet_hash_connect(death_row, sk, inet_sk_port_offset(sk), 544 return __inet_hash_connect(death_row, sk, inet_sk_port_offset(sk),
547 __inet_check_established, __inet_hash_nolisten); 545 __inet_check_established, __inet_hash_nolisten);
548} 546}
549
550EXPORT_SYMBOL_GPL(inet_hash_connect); 547EXPORT_SYMBOL_GPL(inet_hash_connect);
551 548
552void inet_hashinfo_init(struct inet_hashinfo *h) 549void inet_hashinfo_init(struct inet_hashinfo *h)
@@ -560,5 +557,4 @@ void inet_hashinfo_init(struct inet_hashinfo *h)
560 i + LISTENING_NULLS_BASE); 557 i + LISTENING_NULLS_BASE);
561 } 558 }
562} 559}
563
564EXPORT_SYMBOL_GPL(inet_hashinfo_init); 560EXPORT_SYMBOL_GPL(inet_hashinfo_init);
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index 6bcfe52a9c87..9ffa24b9a804 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -51,8 +51,8 @@
51 * lookups performed with disabled BHs. 51 * lookups performed with disabled BHs.
52 * 52 *
53 * Serialisation issues. 53 * Serialisation issues.
54 * 1. Nodes may appear in the tree only with the pool write lock held. 54 * 1. Nodes may appear in the tree only with the pool lock held.
55 * 2. Nodes may disappear from the tree only with the pool write lock held 55 * 2. Nodes may disappear from the tree only with the pool lock held
56 * AND reference count being 0. 56 * AND reference count being 0.
57 * 3. Nodes appears and disappears from unused node list only under 57 * 3. Nodes appears and disappears from unused node list only under
58 * "inet_peer_unused_lock". 58 * "inet_peer_unused_lock".
@@ -64,23 +64,31 @@
64 * usually under some other lock to prevent node disappearing 64 * usually under some other lock to prevent node disappearing
65 * dtime: unused node list lock 65 * dtime: unused node list lock
66 * v4daddr: unchangeable 66 * v4daddr: unchangeable
67 * ip_id_count: idlock 67 * ip_id_count: atomic value (no lock needed)
68 */ 68 */
69 69
70static struct kmem_cache *peer_cachep __read_mostly; 70static struct kmem_cache *peer_cachep __read_mostly;
71 71
72#define node_height(x) x->avl_height 72#define node_height(x) x->avl_height
73static struct inet_peer peer_fake_node = { 73
74 .avl_left = &peer_fake_node, 74#define peer_avl_empty ((struct inet_peer *)&peer_fake_node)
75 .avl_right = &peer_fake_node, 75static const struct inet_peer peer_fake_node = {
76 .avl_left = peer_avl_empty,
77 .avl_right = peer_avl_empty,
76 .avl_height = 0 78 .avl_height = 0
77}; 79};
78#define peer_avl_empty (&peer_fake_node) 80
79static struct inet_peer *peer_root = peer_avl_empty; 81static struct {
80static DEFINE_RWLOCK(peer_pool_lock); 82 struct inet_peer *root;
83 spinlock_t lock;
84 int total;
85} peers = {
86 .root = peer_avl_empty,
87 .lock = __SPIN_LOCK_UNLOCKED(peers.lock),
88 .total = 0,
89};
81#define PEER_MAXDEPTH 40 /* sufficient for about 2^27 nodes */ 90#define PEER_MAXDEPTH 40 /* sufficient for about 2^27 nodes */
82 91
83static int peer_total;
84/* Exported for sysctl_net_ipv4. */ 92/* Exported for sysctl_net_ipv4. */
85int inet_peer_threshold __read_mostly = 65536 + 128; /* start to throw entries more 93int inet_peer_threshold __read_mostly = 65536 + 128; /* start to throw entries more
86 * aggressively at this stage */ 94 * aggressively at this stage */
@@ -89,8 +97,13 @@ int inet_peer_maxttl __read_mostly = 10 * 60 * HZ; /* usual time to live: 10 min
89int inet_peer_gc_mintime __read_mostly = 10 * HZ; 97int inet_peer_gc_mintime __read_mostly = 10 * HZ;
90int inet_peer_gc_maxtime __read_mostly = 120 * HZ; 98int inet_peer_gc_maxtime __read_mostly = 120 * HZ;
91 99
92static LIST_HEAD(unused_peers); 100static struct {
93static DEFINE_SPINLOCK(inet_peer_unused_lock); 101 struct list_head list;
102 spinlock_t lock;
103} unused_peers = {
104 .list = LIST_HEAD_INIT(unused_peers.list),
105 .lock = __SPIN_LOCK_UNLOCKED(unused_peers.lock),
106};
94 107
95static void peer_check_expire(unsigned long dummy); 108static void peer_check_expire(unsigned long dummy);
96static DEFINE_TIMER(peer_periodic_timer, peer_check_expire, 0, 0); 109static DEFINE_TIMER(peer_periodic_timer, peer_check_expire, 0, 0);
@@ -116,7 +129,7 @@ void __init inet_initpeers(void)
116 129
117 peer_cachep = kmem_cache_create("inet_peer_cache", 130 peer_cachep = kmem_cache_create("inet_peer_cache",
118 sizeof(struct inet_peer), 131 sizeof(struct inet_peer),
119 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, 132 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC,
120 NULL); 133 NULL);
121 134
122 /* All the timers, started at system startup tend 135 /* All the timers, started at system startup tend
@@ -131,38 +144,69 @@ void __init inet_initpeers(void)
131/* Called with or without local BH being disabled. */ 144/* Called with or without local BH being disabled. */
132static void unlink_from_unused(struct inet_peer *p) 145static void unlink_from_unused(struct inet_peer *p)
133{ 146{
134 spin_lock_bh(&inet_peer_unused_lock); 147 if (!list_empty(&p->unused)) {
135 list_del_init(&p->unused); 148 spin_lock_bh(&unused_peers.lock);
136 spin_unlock_bh(&inet_peer_unused_lock); 149 list_del_init(&p->unused);
150 spin_unlock_bh(&unused_peers.lock);
151 }
137} 152}
138 153
139/* 154/*
140 * Called with local BH disabled and the pool lock held. 155 * Called with local BH disabled and the pool lock held.
141 * _stack is known to be NULL or not at compile time,
142 * so compiler will optimize the if (_stack) tests.
143 */ 156 */
144#define lookup(_daddr, _stack) \ 157#define lookup(_daddr, _stack) \
145({ \ 158({ \
146 struct inet_peer *u, **v; \ 159 struct inet_peer *u, **v; \
147 if (_stack != NULL) { \ 160 \
148 stackptr = _stack; \ 161 stackptr = _stack; \
149 *stackptr++ = &peer_root; \ 162 *stackptr++ = &peers.root; \
150 } \ 163 for (u = peers.root; u != peer_avl_empty; ) { \
151 for (u = peer_root; u != peer_avl_empty; ) { \
152 if (_daddr == u->v4daddr) \ 164 if (_daddr == u->v4daddr) \
153 break; \ 165 break; \
154 if ((__force __u32)_daddr < (__force __u32)u->v4daddr) \ 166 if ((__force __u32)_daddr < (__force __u32)u->v4daddr) \
155 v = &u->avl_left; \ 167 v = &u->avl_left; \
156 else \ 168 else \
157 v = &u->avl_right; \ 169 v = &u->avl_right; \
158 if (_stack != NULL) \ 170 *stackptr++ = v; \
159 *stackptr++ = v; \
160 u = *v; \ 171 u = *v; \
161 } \ 172 } \
162 u; \ 173 u; \
163}) 174})
164 175
165/* Called with local BH disabled and the pool write lock held. */ 176/*
177 * Called with rcu_read_lock_bh()
178 * Because we hold no lock against a writer, its quite possible we fall
179 * in an endless loop.
180 * But every pointer we follow is guaranteed to be valid thanks to RCU.
181 * We exit from this function if number of links exceeds PEER_MAXDEPTH
182 */
183static struct inet_peer *lookup_rcu_bh(__be32 daddr)
184{
185 struct inet_peer *u = rcu_dereference_bh(peers.root);
186 int count = 0;
187
188 while (u != peer_avl_empty) {
189 if (daddr == u->v4daddr) {
190 /* Before taking a reference, check if this entry was
191 * deleted, unlink_from_pool() sets refcnt=-1 to make
192 * distinction between an unused entry (refcnt=0) and
193 * a freed one.
194 */
195 if (unlikely(!atomic_add_unless(&u->refcnt, 1, -1)))
196 u = NULL;
197 return u;
198 }
199 if ((__force __u32)daddr < (__force __u32)u->v4daddr)
200 u = rcu_dereference_bh(u->avl_left);
201 else
202 u = rcu_dereference_bh(u->avl_right);
203 if (unlikely(++count == PEER_MAXDEPTH))
204 break;
205 }
206 return NULL;
207}
208
209/* Called with local BH disabled and the pool lock held. */
166#define lookup_rightempty(start) \ 210#define lookup_rightempty(start) \
167({ \ 211({ \
168 struct inet_peer *u, **v; \ 212 struct inet_peer *u, **v; \
@@ -176,9 +220,10 @@ static void unlink_from_unused(struct inet_peer *p)
176 u; \ 220 u; \
177}) 221})
178 222
179/* Called with local BH disabled and the pool write lock held. 223/* Called with local BH disabled and the pool lock held.
180 * Variable names are the proof of operation correctness. 224 * Variable names are the proof of operation correctness.
181 * Look into mm/map_avl.c for more detail description of the ideas. */ 225 * Look into mm/map_avl.c for more detail description of the ideas.
226 */
182static void peer_avl_rebalance(struct inet_peer **stack[], 227static void peer_avl_rebalance(struct inet_peer **stack[],
183 struct inet_peer ***stackend) 228 struct inet_peer ***stackend)
184{ 229{
@@ -254,15 +299,21 @@ static void peer_avl_rebalance(struct inet_peer **stack[],
254 } 299 }
255} 300}
256 301
257/* Called with local BH disabled and the pool write lock held. */ 302/* Called with local BH disabled and the pool lock held. */
258#define link_to_pool(n) \ 303#define link_to_pool(n) \
259do { \ 304do { \
260 n->avl_height = 1; \ 305 n->avl_height = 1; \
261 n->avl_left = peer_avl_empty; \ 306 n->avl_left = peer_avl_empty; \
262 n->avl_right = peer_avl_empty; \ 307 n->avl_right = peer_avl_empty; \
308 smp_wmb(); /* lockless readers can catch us now */ \
263 **--stackptr = n; \ 309 **--stackptr = n; \
264 peer_avl_rebalance(stack, stackptr); \ 310 peer_avl_rebalance(stack, stackptr); \
265} while(0) 311} while (0)
312
313static void inetpeer_free_rcu(struct rcu_head *head)
314{
315 kmem_cache_free(peer_cachep, container_of(head, struct inet_peer, rcu));
316}
266 317
267/* May be called with local BH enabled. */ 318/* May be called with local BH enabled. */
268static void unlink_from_pool(struct inet_peer *p) 319static void unlink_from_pool(struct inet_peer *p)
@@ -271,13 +322,14 @@ static void unlink_from_pool(struct inet_peer *p)
271 322
272 do_free = 0; 323 do_free = 0;
273 324
274 write_lock_bh(&peer_pool_lock); 325 spin_lock_bh(&peers.lock);
275 /* Check the reference counter. It was artificially incremented by 1 326 /* Check the reference counter. It was artificially incremented by 1
276 * in cleanup() function to prevent sudden disappearing. If the 327 * in cleanup() function to prevent sudden disappearing. If we can
277 * reference count is still 1 then the node is referenced only as `p' 328 * atomically (because of lockless readers) take this last reference,
278 * here and from the pool. So under the exclusive pool lock it's safe 329 * it's safe to remove the node and free it later.
279 * to remove the node and free it later. */ 330 * We use refcnt=-1 to alert lockless readers this entry is deleted.
280 if (atomic_read(&p->refcnt) == 1) { 331 */
332 if (atomic_cmpxchg(&p->refcnt, 1, -1) == 1) {
281 struct inet_peer **stack[PEER_MAXDEPTH]; 333 struct inet_peer **stack[PEER_MAXDEPTH];
282 struct inet_peer ***stackptr, ***delp; 334 struct inet_peer ***stackptr, ***delp;
283 if (lookup(p->v4daddr, stack) != p) 335 if (lookup(p->v4daddr, stack) != p)
@@ -303,20 +355,21 @@ static void unlink_from_pool(struct inet_peer *p)
303 delp[1] = &t->avl_left; /* was &p->avl_left */ 355 delp[1] = &t->avl_left; /* was &p->avl_left */
304 } 356 }
305 peer_avl_rebalance(stack, stackptr); 357 peer_avl_rebalance(stack, stackptr);
306 peer_total--; 358 peers.total--;
307 do_free = 1; 359 do_free = 1;
308 } 360 }
309 write_unlock_bh(&peer_pool_lock); 361 spin_unlock_bh(&peers.lock);
310 362
311 if (do_free) 363 if (do_free)
312 kmem_cache_free(peer_cachep, p); 364 call_rcu_bh(&p->rcu, inetpeer_free_rcu);
313 else 365 else
314 /* The node is used again. Decrease the reference counter 366 /* The node is used again. Decrease the reference counter
315 * back. The loop "cleanup -> unlink_from_unused 367 * back. The loop "cleanup -> unlink_from_unused
316 * -> unlink_from_pool -> putpeer -> link_to_unused 368 * -> unlink_from_pool -> putpeer -> link_to_unused
317 * -> cleanup (for the same node)" 369 * -> cleanup (for the same node)"
318 * doesn't really exist because the entry will have a 370 * doesn't really exist because the entry will have a
319 * recent deletion time and will not be cleaned again soon. */ 371 * recent deletion time and will not be cleaned again soon.
372 */
320 inet_putpeer(p); 373 inet_putpeer(p);
321} 374}
322 375
@@ -326,16 +379,16 @@ static int cleanup_once(unsigned long ttl)
326 struct inet_peer *p = NULL; 379 struct inet_peer *p = NULL;
327 380
328 /* Remove the first entry from the list of unused nodes. */ 381 /* Remove the first entry from the list of unused nodes. */
329 spin_lock_bh(&inet_peer_unused_lock); 382 spin_lock_bh(&unused_peers.lock);
330 if (!list_empty(&unused_peers)) { 383 if (!list_empty(&unused_peers.list)) {
331 __u32 delta; 384 __u32 delta;
332 385
333 p = list_first_entry(&unused_peers, struct inet_peer, unused); 386 p = list_first_entry(&unused_peers.list, struct inet_peer, unused);
334 delta = (__u32)jiffies - p->dtime; 387 delta = (__u32)jiffies - p->dtime;
335 388
336 if (delta < ttl) { 389 if (delta < ttl) {
337 /* Do not prune fresh entries. */ 390 /* Do not prune fresh entries. */
338 spin_unlock_bh(&inet_peer_unused_lock); 391 spin_unlock_bh(&unused_peers.lock);
339 return -1; 392 return -1;
340 } 393 }
341 394
@@ -345,7 +398,7 @@ static int cleanup_once(unsigned long ttl)
345 * before unlink_from_pool() call. */ 398 * before unlink_from_pool() call. */
346 atomic_inc(&p->refcnt); 399 atomic_inc(&p->refcnt);
347 } 400 }
348 spin_unlock_bh(&inet_peer_unused_lock); 401 spin_unlock_bh(&unused_peers.lock);
349 402
350 if (p == NULL) 403 if (p == NULL)
351 /* It means that the total number of USED entries has 404 /* It means that the total number of USED entries has
@@ -360,62 +413,56 @@ static int cleanup_once(unsigned long ttl)
360/* Called with or without local BH being disabled. */ 413/* Called with or without local BH being disabled. */
361struct inet_peer *inet_getpeer(__be32 daddr, int create) 414struct inet_peer *inet_getpeer(__be32 daddr, int create)
362{ 415{
363 struct inet_peer *p, *n; 416 struct inet_peer *p;
364 struct inet_peer **stack[PEER_MAXDEPTH], ***stackptr; 417 struct inet_peer **stack[PEER_MAXDEPTH], ***stackptr;
365 418
366 /* Look up for the address quickly. */ 419 /* Look up for the address quickly, lockless.
367 read_lock_bh(&peer_pool_lock); 420 * Because of a concurrent writer, we might not find an existing entry.
368 p = lookup(daddr, NULL); 421 */
369 if (p != peer_avl_empty) 422 rcu_read_lock_bh();
370 atomic_inc(&p->refcnt); 423 p = lookup_rcu_bh(daddr);
371 read_unlock_bh(&peer_pool_lock); 424 rcu_read_unlock_bh();
425
426 if (p) {
427 /* The existing node has been found.
428 * Remove the entry from unused list if it was there.
429 */
430 unlink_from_unused(p);
431 return p;
432 }
372 433
434 /* retry an exact lookup, taking the lock before.
435 * At least, nodes should be hot in our cache.
436 */
437 spin_lock_bh(&peers.lock);
438 p = lookup(daddr, stack);
373 if (p != peer_avl_empty) { 439 if (p != peer_avl_empty) {
374 /* The existing node has been found. */ 440 atomic_inc(&p->refcnt);
441 spin_unlock_bh(&peers.lock);
375 /* Remove the entry from unused list if it was there. */ 442 /* Remove the entry from unused list if it was there. */
376 unlink_from_unused(p); 443 unlink_from_unused(p);
377 return p; 444 return p;
378 } 445 }
446 p = create ? kmem_cache_alloc(peer_cachep, GFP_ATOMIC) : NULL;
447 if (p) {
448 p->v4daddr = daddr;
449 atomic_set(&p->refcnt, 1);
450 atomic_set(&p->rid, 0);
451 atomic_set(&p->ip_id_count, secure_ip_id(daddr));
452 p->tcp_ts_stamp = 0;
453 INIT_LIST_HEAD(&p->unused);
454
455
456 /* Link the node. */
457 link_to_pool(p);
458 peers.total++;
459 }
460 spin_unlock_bh(&peers.lock);
379 461
380 if (!create) 462 if (peers.total >= inet_peer_threshold)
381 return NULL;
382
383 /* Allocate the space outside the locked region. */
384 n = kmem_cache_alloc(peer_cachep, GFP_ATOMIC);
385 if (n == NULL)
386 return NULL;
387 n->v4daddr = daddr;
388 atomic_set(&n->refcnt, 1);
389 atomic_set(&n->rid, 0);
390 atomic_set(&n->ip_id_count, secure_ip_id(daddr));
391 n->tcp_ts_stamp = 0;
392
393 write_lock_bh(&peer_pool_lock);
394 /* Check if an entry has suddenly appeared. */
395 p = lookup(daddr, stack);
396 if (p != peer_avl_empty)
397 goto out_free;
398
399 /* Link the node. */
400 link_to_pool(n);
401 INIT_LIST_HEAD(&n->unused);
402 peer_total++;
403 write_unlock_bh(&peer_pool_lock);
404
405 if (peer_total >= inet_peer_threshold)
406 /* Remove one less-recently-used entry. */ 463 /* Remove one less-recently-used entry. */
407 cleanup_once(0); 464 cleanup_once(0);
408 465
409 return n;
410
411out_free:
412 /* The appropriate node is already in the pool. */
413 atomic_inc(&p->refcnt);
414 write_unlock_bh(&peer_pool_lock);
415 /* Remove the entry from unused list if it was there. */
416 unlink_from_unused(p);
417 /* Free preallocated the preallocated node. */
418 kmem_cache_free(peer_cachep, n);
419 return p; 466 return p;
420} 467}
421 468
@@ -425,12 +472,12 @@ static void peer_check_expire(unsigned long dummy)
425 unsigned long now = jiffies; 472 unsigned long now = jiffies;
426 int ttl; 473 int ttl;
427 474
428 if (peer_total >= inet_peer_threshold) 475 if (peers.total >= inet_peer_threshold)
429 ttl = inet_peer_minttl; 476 ttl = inet_peer_minttl;
430 else 477 else
431 ttl = inet_peer_maxttl 478 ttl = inet_peer_maxttl
432 - (inet_peer_maxttl - inet_peer_minttl) / HZ * 479 - (inet_peer_maxttl - inet_peer_minttl) / HZ *
433 peer_total / inet_peer_threshold * HZ; 480 peers.total / inet_peer_threshold * HZ;
434 while (!cleanup_once(ttl)) { 481 while (!cleanup_once(ttl)) {
435 if (jiffies != now) 482 if (jiffies != now)
436 break; 483 break;
@@ -439,22 +486,25 @@ static void peer_check_expire(unsigned long dummy)
439 /* Trigger the timer after inet_peer_gc_mintime .. inet_peer_gc_maxtime 486 /* Trigger the timer after inet_peer_gc_mintime .. inet_peer_gc_maxtime
440 * interval depending on the total number of entries (more entries, 487 * interval depending on the total number of entries (more entries,
441 * less interval). */ 488 * less interval). */
442 if (peer_total >= inet_peer_threshold) 489 if (peers.total >= inet_peer_threshold)
443 peer_periodic_timer.expires = jiffies + inet_peer_gc_mintime; 490 peer_periodic_timer.expires = jiffies + inet_peer_gc_mintime;
444 else 491 else
445 peer_periodic_timer.expires = jiffies 492 peer_periodic_timer.expires = jiffies
446 + inet_peer_gc_maxtime 493 + inet_peer_gc_maxtime
447 - (inet_peer_gc_maxtime - inet_peer_gc_mintime) / HZ * 494 - (inet_peer_gc_maxtime - inet_peer_gc_mintime) / HZ *
448 peer_total / inet_peer_threshold * HZ; 495 peers.total / inet_peer_threshold * HZ;
449 add_timer(&peer_periodic_timer); 496 add_timer(&peer_periodic_timer);
450} 497}
451 498
452void inet_putpeer(struct inet_peer *p) 499void inet_putpeer(struct inet_peer *p)
453{ 500{
454 spin_lock_bh(&inet_peer_unused_lock); 501 local_bh_disable();
455 if (atomic_dec_and_test(&p->refcnt)) { 502
456 list_add_tail(&p->unused, &unused_peers); 503 if (atomic_dec_and_lock(&p->refcnt, &unused_peers.lock)) {
504 list_add_tail(&p->unused, &unused_peers.list);
457 p->dtime = (__u32)jiffies; 505 p->dtime = (__u32)jiffies;
506 spin_unlock(&unused_peers.lock);
458 } 507 }
459 spin_unlock_bh(&inet_peer_unused_lock); 508
509 local_bh_enable();
460} 510}
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index 56cdf68a074c..99461f09320f 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -87,16 +87,16 @@ int ip_forward(struct sk_buff *skb)
87 if (opt->is_strictroute && rt->rt_dst != rt->rt_gateway) 87 if (opt->is_strictroute && rt->rt_dst != rt->rt_gateway)
88 goto sr_failed; 88 goto sr_failed;
89 89
90 if (unlikely(skb->len > dst_mtu(&rt->u.dst) && !skb_is_gso(skb) && 90 if (unlikely(skb->len > dst_mtu(&rt->dst) && !skb_is_gso(skb) &&
91 (ip_hdr(skb)->frag_off & htons(IP_DF))) && !skb->local_df) { 91 (ip_hdr(skb)->frag_off & htons(IP_DF))) && !skb->local_df) {
92 IP_INC_STATS(dev_net(rt->u.dst.dev), IPSTATS_MIB_FRAGFAILS); 92 IP_INC_STATS(dev_net(rt->dst.dev), IPSTATS_MIB_FRAGFAILS);
93 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, 93 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
94 htonl(dst_mtu(&rt->u.dst))); 94 htonl(dst_mtu(&rt->dst)));
95 goto drop; 95 goto drop;
96 } 96 }
97 97
98 /* We are about to mangle packet. Copy it! */ 98 /* We are about to mangle packet. Copy it! */
99 if (skb_cow(skb, LL_RESERVED_SPACE(rt->u.dst.dev)+rt->u.dst.header_len)) 99 if (skb_cow(skb, LL_RESERVED_SPACE(rt->dst.dev)+rt->dst.header_len))
100 goto drop; 100 goto drop;
101 iph = ip_hdr(skb); 101 iph = ip_hdr(skb);
102 102
@@ -113,7 +113,7 @@ int ip_forward(struct sk_buff *skb)
113 skb->priority = rt_tos2priority(iph->tos); 113 skb->priority = rt_tos2priority(iph->tos);
114 114
115 return NF_HOOK(NFPROTO_IPV4, NF_INET_FORWARD, skb, skb->dev, 115 return NF_HOOK(NFPROTO_IPV4, NF_INET_FORWARD, skb, skb->dev,
116 rt->u.dst.dev, ip_forward_finish); 116 rt->dst.dev, ip_forward_finish);
117 117
118sr_failed: 118sr_failed:
119 /* 119 /*
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 75347ea70ea0..b7c41654dde5 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -124,11 +124,8 @@ static int ip4_frag_match(struct inet_frag_queue *q, void *a)
124} 124}
125 125
126/* Memory Tracking Functions. */ 126/* Memory Tracking Functions. */
127static __inline__ void frag_kfree_skb(struct netns_frags *nf, 127static void frag_kfree_skb(struct netns_frags *nf, struct sk_buff *skb)
128 struct sk_buff *skb, int *work)
129{ 128{
130 if (work)
131 *work -= skb->truesize;
132 atomic_sub(skb->truesize, &nf->mem); 129 atomic_sub(skb->truesize, &nf->mem);
133 kfree_skb(skb); 130 kfree_skb(skb);
134} 131}
@@ -309,7 +306,7 @@ static int ip_frag_reinit(struct ipq *qp)
309 fp = qp->q.fragments; 306 fp = qp->q.fragments;
310 do { 307 do {
311 struct sk_buff *xp = fp->next; 308 struct sk_buff *xp = fp->next;
312 frag_kfree_skb(qp->q.net, fp, NULL); 309 frag_kfree_skb(qp->q.net, fp);
313 fp = xp; 310 fp = xp;
314 } while (fp); 311 } while (fp);
315 312
@@ -317,6 +314,7 @@ static int ip_frag_reinit(struct ipq *qp)
317 qp->q.len = 0; 314 qp->q.len = 0;
318 qp->q.meat = 0; 315 qp->q.meat = 0;
319 qp->q.fragments = NULL; 316 qp->q.fragments = NULL;
317 qp->q.fragments_tail = NULL;
320 qp->iif = 0; 318 qp->iif = 0;
321 319
322 return 0; 320 return 0;
@@ -389,6 +387,11 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
389 * in the chain of fragments so far. We must know where to put 387 * in the chain of fragments so far. We must know where to put
390 * this fragment, right? 388 * this fragment, right?
391 */ 389 */
390 prev = qp->q.fragments_tail;
391 if (!prev || FRAG_CB(prev)->offset < offset) {
392 next = NULL;
393 goto found;
394 }
392 prev = NULL; 395 prev = NULL;
393 for (next = qp->q.fragments; next != NULL; next = next->next) { 396 for (next = qp->q.fragments; next != NULL; next = next->next) {
394 if (FRAG_CB(next)->offset >= offset) 397 if (FRAG_CB(next)->offset >= offset)
@@ -396,6 +399,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
396 prev = next; 399 prev = next;
397 } 400 }
398 401
402found:
399 /* We found where to put this one. Check for overlap with 403 /* We found where to put this one. Check for overlap with
400 * preceding fragment, and, if needed, align things so that 404 * preceding fragment, and, if needed, align things so that
401 * any overlaps are eliminated. 405 * any overlaps are eliminated.
@@ -446,7 +450,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
446 qp->q.fragments = next; 450 qp->q.fragments = next;
447 451
448 qp->q.meat -= free_it->len; 452 qp->q.meat -= free_it->len;
449 frag_kfree_skb(qp->q.net, free_it, NULL); 453 frag_kfree_skb(qp->q.net, free_it);
450 } 454 }
451 } 455 }
452 456
@@ -454,6 +458,8 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
454 458
455 /* Insert this fragment in the chain of fragments. */ 459 /* Insert this fragment in the chain of fragments. */
456 skb->next = next; 460 skb->next = next;
461 if (!next)
462 qp->q.fragments_tail = skb;
457 if (prev) 463 if (prev)
458 prev->next = skb; 464 prev->next = skb;
459 else 465 else
@@ -507,6 +513,8 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
507 goto out_nomem; 513 goto out_nomem;
508 514
509 fp->next = head->next; 515 fp->next = head->next;
516 if (!fp->next)
517 qp->q.fragments_tail = fp;
510 prev->next = fp; 518 prev->next = fp;
511 519
512 skb_morph(head, qp->q.fragments); 520 skb_morph(head, qp->q.fragments);
@@ -556,7 +564,6 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
556 564
557 skb_shinfo(head)->frag_list = head->next; 565 skb_shinfo(head)->frag_list = head->next;
558 skb_push(head, head->data - skb_network_header(head)); 566 skb_push(head, head->data - skb_network_header(head));
559 atomic_sub(head->truesize, &qp->q.net->mem);
560 567
561 for (fp=head->next; fp; fp = fp->next) { 568 for (fp=head->next; fp; fp = fp->next) {
562 head->data_len += fp->len; 569 head->data_len += fp->len;
@@ -566,8 +573,8 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
566 else if (head->ip_summed == CHECKSUM_COMPLETE) 573 else if (head->ip_summed == CHECKSUM_COMPLETE)
567 head->csum = csum_add(head->csum, fp->csum); 574 head->csum = csum_add(head->csum, fp->csum);
568 head->truesize += fp->truesize; 575 head->truesize += fp->truesize;
569 atomic_sub(fp->truesize, &qp->q.net->mem);
570 } 576 }
577 atomic_sub(head->truesize, &qp->q.net->mem);
571 578
572 head->next = NULL; 579 head->next = NULL;
573 head->dev = dev; 580 head->dev = dev;
@@ -578,6 +585,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
578 iph->tot_len = htons(len); 585 iph->tot_len = htons(len);
579 IP_INC_STATS_BH(net, IPSTATS_MIB_REASMOKS); 586 IP_INC_STATS_BH(net, IPSTATS_MIB_REASMOKS);
580 qp->q.fragments = NULL; 587 qp->q.fragments = NULL;
588 qp->q.fragments_tail = NULL;
581 return 0; 589 return 0;
582 590
583out_nomem: 591out_nomem:
@@ -624,6 +632,7 @@ int ip_defrag(struct sk_buff *skb, u32 user)
624 kfree_skb(skb); 632 kfree_skb(skb);
625 return -ENOMEM; 633 return -ENOMEM;
626} 634}
635EXPORT_SYMBOL(ip_defrag);
627 636
628#ifdef CONFIG_SYSCTL 637#ifdef CONFIG_SYSCTL
629static int zero; 638static int zero;
@@ -777,5 +786,3 @@ void __init ipfrag_init(void)
777 ip4_frags.secret_interval = 10 * 60 * HZ; 786 ip4_frags.secret_interval = 10 * 60 * HZ;
778 inet_frags_init(&ip4_frags); 787 inet_frags_init(&ip4_frags);
779} 788}
780
781EXPORT_SYMBOL(ip_defrag);
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 32618e11076d..945b20a5ad50 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -731,6 +731,8 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
731 tos = 0; 731 tos = 0;
732 if (skb->protocol == htons(ETH_P_IP)) 732 if (skb->protocol == htons(ETH_P_IP))
733 tos = old_iph->tos; 733 tos = old_iph->tos;
734 else if (skb->protocol == htons(ETH_P_IPV6))
735 tos = ipv6_get_dsfield((struct ipv6hdr *)old_iph);
734 } 736 }
735 737
736 { 738 {
@@ -745,7 +747,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
745 goto tx_error; 747 goto tx_error;
746 } 748 }
747 } 749 }
748 tdev = rt->u.dst.dev; 750 tdev = rt->dst.dev;
749 751
750 if (tdev == dev) { 752 if (tdev == dev) {
751 ip_rt_put(rt); 753 ip_rt_put(rt);
@@ -755,7 +757,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
755 757
756 df = tiph->frag_off; 758 df = tiph->frag_off;
757 if (df) 759 if (df)
758 mtu = dst_mtu(&rt->u.dst) - dev->hard_header_len - tunnel->hlen; 760 mtu = dst_mtu(&rt->dst) - dev->hard_header_len - tunnel->hlen;
759 else 761 else
760 mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu; 762 mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
761 763
@@ -803,7 +805,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
803 tunnel->err_count = 0; 805 tunnel->err_count = 0;
804 } 806 }
805 807
806 max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen + rt->u.dst.header_len; 808 max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen + rt->dst.header_len;
807 809
808 if (skb_headroom(skb) < max_headroom || skb_shared(skb)|| 810 if (skb_headroom(skb) < max_headroom || skb_shared(skb)||
809 (skb_cloned(skb) && !skb_clone_writable(skb, 0))) { 811 (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
@@ -830,7 +832,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
830 IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED | 832 IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
831 IPSKB_REROUTED); 833 IPSKB_REROUTED);
832 skb_dst_drop(skb); 834 skb_dst_drop(skb);
833 skb_dst_set(skb, &rt->u.dst); 835 skb_dst_set(skb, &rt->dst);
834 836
835 /* 837 /*
836 * Push down and install the IPIP header. 838 * Push down and install the IPIP header.
@@ -853,7 +855,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
853 iph->ttl = ((struct ipv6hdr *)old_iph)->hop_limit; 855 iph->ttl = ((struct ipv6hdr *)old_iph)->hop_limit;
854#endif 856#endif
855 else 857 else
856 iph->ttl = dst_metric(&rt->u.dst, RTAX_HOPLIMIT); 858 iph->ttl = dst_metric(&rt->dst, RTAX_HOPLIMIT);
857 } 859 }
858 860
859 ((__be16 *)(iph + 1))[0] = tunnel->parms.o_flags; 861 ((__be16 *)(iph + 1))[0] = tunnel->parms.o_flags;
@@ -915,7 +917,7 @@ static int ipgre_tunnel_bind_dev(struct net_device *dev)
915 .proto = IPPROTO_GRE }; 917 .proto = IPPROTO_GRE };
916 struct rtable *rt; 918 struct rtable *rt;
917 if (!ip_route_output_key(dev_net(dev), &rt, &fl)) { 919 if (!ip_route_output_key(dev_net(dev), &rt, &fl)) {
918 tdev = rt->u.dst.dev; 920 tdev = rt->dst.dev;
919 ip_rt_put(rt); 921 ip_rt_put(rt);
920 } 922 }
921 923
@@ -1174,7 +1176,7 @@ static int ipgre_open(struct net_device *dev)
1174 struct rtable *rt; 1176 struct rtable *rt;
1175 if (ip_route_output_key(dev_net(dev), &rt, &fl)) 1177 if (ip_route_output_key(dev_net(dev), &rt, &fl))
1176 return -EADDRNOTAVAIL; 1178 return -EADDRNOTAVAIL;
1177 dev = rt->u.dst.dev; 1179 dev = rt->dst.dev;
1178 ip_rt_put(rt); 1180 ip_rt_put(rt);
1179 if (__in_dev_get_rtnl(dev) == NULL) 1181 if (__in_dev_get_rtnl(dev) == NULL)
1180 return -EADDRNOTAVAIL; 1182 return -EADDRNOTAVAIL;
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index d930dc5e4d85..d859bcc26cb7 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -146,7 +146,7 @@
146#include <linux/netlink.h> 146#include <linux/netlink.h>
147 147
148/* 148/*
149 * Process Router Attention IP option 149 * Process Router Attention IP option (RFC 2113)
150 */ 150 */
151int ip_call_ra_chain(struct sk_buff *skb) 151int ip_call_ra_chain(struct sk_buff *skb)
152{ 152{
@@ -155,8 +155,7 @@ int ip_call_ra_chain(struct sk_buff *skb)
155 struct sock *last = NULL; 155 struct sock *last = NULL;
156 struct net_device *dev = skb->dev; 156 struct net_device *dev = skb->dev;
157 157
158 read_lock(&ip_ra_lock); 158 for (ra = rcu_dereference(ip_ra_chain); ra; ra = rcu_dereference(ra->next)) {
159 for (ra = ip_ra_chain; ra; ra = ra->next) {
160 struct sock *sk = ra->sk; 159 struct sock *sk = ra->sk;
161 160
162 /* If socket is bound to an interface, only report 161 /* If socket is bound to an interface, only report
@@ -167,10 +166,8 @@ int ip_call_ra_chain(struct sk_buff *skb)
167 sk->sk_bound_dev_if == dev->ifindex) && 166 sk->sk_bound_dev_if == dev->ifindex) &&
168 net_eq(sock_net(sk), dev_net(dev))) { 167 net_eq(sock_net(sk), dev_net(dev))) {
169 if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) { 168 if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) {
170 if (ip_defrag(skb, IP_DEFRAG_CALL_RA_CHAIN)) { 169 if (ip_defrag(skb, IP_DEFRAG_CALL_RA_CHAIN))
171 read_unlock(&ip_ra_lock);
172 return 1; 170 return 1;
173 }
174 } 171 }
175 if (last) { 172 if (last) {
176 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 173 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
@@ -183,10 +180,8 @@ int ip_call_ra_chain(struct sk_buff *skb)
183 180
184 if (last) { 181 if (last) {
185 raw_rcv(last, skb); 182 raw_rcv(last, skb);
186 read_unlock(&ip_ra_lock);
187 return 1; 183 return 1;
188 } 184 }
189 read_unlock(&ip_ra_lock);
190 return 0; 185 return 0;
191} 186}
192 187
@@ -298,18 +293,16 @@ static inline int ip_rcv_options(struct sk_buff *skb)
298 } 293 }
299 294
300 if (unlikely(opt->srr)) { 295 if (unlikely(opt->srr)) {
301 struct in_device *in_dev = in_dev_get(dev); 296 struct in_device *in_dev = __in_dev_get_rcu(dev);
297
302 if (in_dev) { 298 if (in_dev) {
303 if (!IN_DEV_SOURCE_ROUTE(in_dev)) { 299 if (!IN_DEV_SOURCE_ROUTE(in_dev)) {
304 if (IN_DEV_LOG_MARTIANS(in_dev) && 300 if (IN_DEV_LOG_MARTIANS(in_dev) &&
305 net_ratelimit()) 301 net_ratelimit())
306 printk(KERN_INFO "source route option %pI4 -> %pI4\n", 302 printk(KERN_INFO "source route option %pI4 -> %pI4\n",
307 &iph->saddr, &iph->daddr); 303 &iph->saddr, &iph->daddr);
308 in_dev_put(in_dev);
309 goto drop; 304 goto drop;
310 } 305 }
311
312 in_dev_put(in_dev);
313 } 306 }
314 307
315 if (ip_options_rcv_srr(skb)) 308 if (ip_options_rcv_srr(skb))
@@ -340,13 +333,16 @@ static int ip_rcv_finish(struct sk_buff *skb)
340 else if (err == -ENETUNREACH) 333 else if (err == -ENETUNREACH)
341 IP_INC_STATS_BH(dev_net(skb->dev), 334 IP_INC_STATS_BH(dev_net(skb->dev),
342 IPSTATS_MIB_INNOROUTES); 335 IPSTATS_MIB_INNOROUTES);
336 else if (err == -EXDEV)
337 NET_INC_STATS_BH(dev_net(skb->dev),
338 LINUX_MIB_IPRPFILTER);
343 goto drop; 339 goto drop;
344 } 340 }
345 } 341 }
346 342
347#ifdef CONFIG_NET_CLS_ROUTE 343#ifdef CONFIG_NET_CLS_ROUTE
348 if (unlikely(skb_dst(skb)->tclassid)) { 344 if (unlikely(skb_dst(skb)->tclassid)) {
349 struct ip_rt_acct *st = per_cpu_ptr(ip_rt_acct, smp_processor_id()); 345 struct ip_rt_acct *st = this_cpu_ptr(ip_rt_acct);
350 u32 idx = skb_dst(skb)->tclassid; 346 u32 idx = skb_dst(skb)->tclassid;
351 st[idx&0xFF].o_packets++; 347 st[idx&0xFF].o_packets++;
352 st[idx&0xFF].o_bytes += skb->len; 348 st[idx&0xFF].o_bytes += skb->len;
@@ -360,10 +356,10 @@ static int ip_rcv_finish(struct sk_buff *skb)
360 356
361 rt = skb_rtable(skb); 357 rt = skb_rtable(skb);
362 if (rt->rt_type == RTN_MULTICAST) { 358 if (rt->rt_type == RTN_MULTICAST) {
363 IP_UPD_PO_STATS_BH(dev_net(rt->u.dst.dev), IPSTATS_MIB_INMCAST, 359 IP_UPD_PO_STATS_BH(dev_net(rt->dst.dev), IPSTATS_MIB_INMCAST,
364 skb->len); 360 skb->len);
365 } else if (rt->rt_type == RTN_BROADCAST) 361 } else if (rt->rt_type == RTN_BROADCAST)
366 IP_UPD_PO_STATS_BH(dev_net(rt->u.dst.dev), IPSTATS_MIB_INBCAST, 362 IP_UPD_PO_STATS_BH(dev_net(rt->dst.dev), IPSTATS_MIB_INBCAST,
367 skb->len); 363 skb->len);
368 364
369 return dst_input(skb); 365 return dst_input(skb);
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 9a4a6c96cb0d..6652bd9da676 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -89,6 +89,7 @@ __inline__ void ip_send_check(struct iphdr *iph)
89 iph->check = 0; 89 iph->check = 0;
90 iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl); 90 iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
91} 91}
92EXPORT_SYMBOL(ip_send_check);
92 93
93int __ip_local_out(struct sk_buff *skb) 94int __ip_local_out(struct sk_buff *skb)
94{ 95{
@@ -151,15 +152,15 @@ int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk,
151 iph->version = 4; 152 iph->version = 4;
152 iph->ihl = 5; 153 iph->ihl = 5;
153 iph->tos = inet->tos; 154 iph->tos = inet->tos;
154 if (ip_dont_fragment(sk, &rt->u.dst)) 155 if (ip_dont_fragment(sk, &rt->dst))
155 iph->frag_off = htons(IP_DF); 156 iph->frag_off = htons(IP_DF);
156 else 157 else
157 iph->frag_off = 0; 158 iph->frag_off = 0;
158 iph->ttl = ip_select_ttl(inet, &rt->u.dst); 159 iph->ttl = ip_select_ttl(inet, &rt->dst);
159 iph->daddr = rt->rt_dst; 160 iph->daddr = rt->rt_dst;
160 iph->saddr = rt->rt_src; 161 iph->saddr = rt->rt_src;
161 iph->protocol = sk->sk_protocol; 162 iph->protocol = sk->sk_protocol;
162 ip_select_ident(iph, &rt->u.dst, sk); 163 ip_select_ident(iph, &rt->dst, sk);
163 164
164 if (opt && opt->optlen) { 165 if (opt && opt->optlen) {
165 iph->ihl += opt->optlen>>2; 166 iph->ihl += opt->optlen>>2;
@@ -172,7 +173,6 @@ int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk,
172 /* Send it out. */ 173 /* Send it out. */
173 return ip_local_out(skb); 174 return ip_local_out(skb);
174} 175}
175
176EXPORT_SYMBOL_GPL(ip_build_and_send_pkt); 176EXPORT_SYMBOL_GPL(ip_build_and_send_pkt);
177 177
178static inline int ip_finish_output2(struct sk_buff *skb) 178static inline int ip_finish_output2(struct sk_buff *skb)
@@ -240,7 +240,7 @@ int ip_mc_output(struct sk_buff *skb)
240{ 240{
241 struct sock *sk = skb->sk; 241 struct sock *sk = skb->sk;
242 struct rtable *rt = skb_rtable(skb); 242 struct rtable *rt = skb_rtable(skb);
243 struct net_device *dev = rt->u.dst.dev; 243 struct net_device *dev = rt->dst.dev;
244 244
245 /* 245 /*
246 * If the indicated interface is up and running, send the packet. 246 * If the indicated interface is up and running, send the packet.
@@ -359,9 +359,9 @@ int ip_queue_xmit(struct sk_buff *skb)
359 if (ip_route_output_flow(sock_net(sk), &rt, &fl, sk, 0)) 359 if (ip_route_output_flow(sock_net(sk), &rt, &fl, sk, 0))
360 goto no_route; 360 goto no_route;
361 } 361 }
362 sk_setup_caps(sk, &rt->u.dst); 362 sk_setup_caps(sk, &rt->dst);
363 } 363 }
364 skb_dst_set_noref(skb, &rt->u.dst); 364 skb_dst_set_noref(skb, &rt->dst);
365 365
366packet_routed: 366packet_routed:
367 if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway) 367 if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway)
@@ -372,11 +372,11 @@ packet_routed:
372 skb_reset_network_header(skb); 372 skb_reset_network_header(skb);
373 iph = ip_hdr(skb); 373 iph = ip_hdr(skb);
374 *((__be16 *)iph) = htons((4 << 12) | (5 << 8) | (inet->tos & 0xff)); 374 *((__be16 *)iph) = htons((4 << 12) | (5 << 8) | (inet->tos & 0xff));
375 if (ip_dont_fragment(sk, &rt->u.dst) && !skb->local_df) 375 if (ip_dont_fragment(sk, &rt->dst) && !skb->local_df)
376 iph->frag_off = htons(IP_DF); 376 iph->frag_off = htons(IP_DF);
377 else 377 else
378 iph->frag_off = 0; 378 iph->frag_off = 0;
379 iph->ttl = ip_select_ttl(inet, &rt->u.dst); 379 iph->ttl = ip_select_ttl(inet, &rt->dst);
380 iph->protocol = sk->sk_protocol; 380 iph->protocol = sk->sk_protocol;
381 iph->saddr = rt->rt_src; 381 iph->saddr = rt->rt_src;
382 iph->daddr = rt->rt_dst; 382 iph->daddr = rt->rt_dst;
@@ -387,7 +387,7 @@ packet_routed:
387 ip_options_build(skb, opt, inet->inet_daddr, rt, 0); 387 ip_options_build(skb, opt, inet->inet_daddr, rt, 0);
388 } 388 }
389 389
390 ip_select_ident_more(iph, &rt->u.dst, sk, 390 ip_select_ident_more(iph, &rt->dst, sk,
391 (skb_shinfo(skb)->gso_segs ?: 1) - 1); 391 (skb_shinfo(skb)->gso_segs ?: 1) - 1);
392 392
393 skb->priority = sk->sk_priority; 393 skb->priority = sk->sk_priority;
@@ -403,6 +403,7 @@ no_route:
403 kfree_skb(skb); 403 kfree_skb(skb);
404 return -EHOSTUNREACH; 404 return -EHOSTUNREACH;
405} 405}
406EXPORT_SYMBOL(ip_queue_xmit);
406 407
407 408
408static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from) 409static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from)
@@ -411,7 +412,7 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from)
411 to->priority = from->priority; 412 to->priority = from->priority;
412 to->protocol = from->protocol; 413 to->protocol = from->protocol;
413 skb_dst_drop(to); 414 skb_dst_drop(to);
414 skb_dst_set(to, dst_clone(skb_dst(from))); 415 skb_dst_copy(to, from);
415 to->dev = from->dev; 416 to->dev = from->dev;
416 to->mark = from->mark; 417 to->mark = from->mark;
417 418
@@ -442,7 +443,6 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from)
442int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) 443int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
443{ 444{
444 struct iphdr *iph; 445 struct iphdr *iph;
445 int raw = 0;
446 int ptr; 446 int ptr;
447 struct net_device *dev; 447 struct net_device *dev;
448 struct sk_buff *skb2; 448 struct sk_buff *skb2;
@@ -452,7 +452,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
452 struct rtable *rt = skb_rtable(skb); 452 struct rtable *rt = skb_rtable(skb);
453 int err = 0; 453 int err = 0;
454 454
455 dev = rt->u.dst.dev; 455 dev = rt->dst.dev;
456 456
457 /* 457 /*
458 * Point into the IP datagram header. 458 * Point into the IP datagram header.
@@ -473,7 +473,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
473 */ 473 */
474 474
475 hlen = iph->ihl * 4; 475 hlen = iph->ihl * 4;
476 mtu = dst_mtu(&rt->u.dst) - hlen; /* Size of data space */ 476 mtu = dst_mtu(&rt->dst) - hlen; /* Size of data space */
477#ifdef CONFIG_BRIDGE_NETFILTER 477#ifdef CONFIG_BRIDGE_NETFILTER
478 if (skb->nf_bridge) 478 if (skb->nf_bridge)
479 mtu -= nf_bridge_mtu_reduction(skb); 479 mtu -= nf_bridge_mtu_reduction(skb);
@@ -580,13 +580,13 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
580 580
581slow_path: 581slow_path:
582 left = skb->len - hlen; /* Space per frame */ 582 left = skb->len - hlen; /* Space per frame */
583 ptr = raw + hlen; /* Where to start from */ 583 ptr = hlen; /* Where to start from */
584 584
585 /* for bridged IP traffic encapsulated inside f.e. a vlan header, 585 /* for bridged IP traffic encapsulated inside f.e. a vlan header,
586 * we need to make room for the encapsulating header 586 * we need to make room for the encapsulating header
587 */ 587 */
588 pad = nf_bridge_pad(skb); 588 pad = nf_bridge_pad(skb);
589 ll_rs = LL_RESERVED_SPACE_EXTRA(rt->u.dst.dev, pad); 589 ll_rs = LL_RESERVED_SPACE_EXTRA(rt->dst.dev, pad);
590 mtu -= pad; 590 mtu -= pad;
591 591
592 /* 592 /*
@@ -697,7 +697,6 @@ fail:
697 IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS); 697 IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
698 return err; 698 return err;
699} 699}
700
701EXPORT_SYMBOL(ip_fragment); 700EXPORT_SYMBOL(ip_fragment);
702 701
703int 702int
@@ -716,6 +715,7 @@ ip_generic_getfrag(void *from, char *to, int offset, int len, int odd, struct sk
716 } 715 }
717 return 0; 716 return 0;
718} 717}
718EXPORT_SYMBOL(ip_generic_getfrag);
719 719
720static inline __wsum 720static inline __wsum
721csum_page(struct page *page, int offset, int copy) 721csum_page(struct page *page, int offset, int copy)
@@ -833,13 +833,13 @@ int ip_append_data(struct sock *sk,
833 */ 833 */
834 *rtp = NULL; 834 *rtp = NULL;
835 inet->cork.fragsize = mtu = inet->pmtudisc == IP_PMTUDISC_PROBE ? 835 inet->cork.fragsize = mtu = inet->pmtudisc == IP_PMTUDISC_PROBE ?
836 rt->u.dst.dev->mtu : 836 rt->dst.dev->mtu :
837 dst_mtu(rt->u.dst.path); 837 dst_mtu(rt->dst.path);
838 inet->cork.dst = &rt->u.dst; 838 inet->cork.dst = &rt->dst;
839 inet->cork.length = 0; 839 inet->cork.length = 0;
840 sk->sk_sndmsg_page = NULL; 840 sk->sk_sndmsg_page = NULL;
841 sk->sk_sndmsg_off = 0; 841 sk->sk_sndmsg_off = 0;
842 if ((exthdrlen = rt->u.dst.header_len) != 0) { 842 if ((exthdrlen = rt->dst.header_len) != 0) {
843 length += exthdrlen; 843 length += exthdrlen;
844 transhdrlen += exthdrlen; 844 transhdrlen += exthdrlen;
845 } 845 }
@@ -852,7 +852,7 @@ int ip_append_data(struct sock *sk,
852 exthdrlen = 0; 852 exthdrlen = 0;
853 mtu = inet->cork.fragsize; 853 mtu = inet->cork.fragsize;
854 } 854 }
855 hh_len = LL_RESERVED_SPACE(rt->u.dst.dev); 855 hh_len = LL_RESERVED_SPACE(rt->dst.dev);
856 856
857 fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0); 857 fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0);
858 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen; 858 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen;
@@ -869,14 +869,16 @@ int ip_append_data(struct sock *sk,
869 */ 869 */
870 if (transhdrlen && 870 if (transhdrlen &&
871 length + fragheaderlen <= mtu && 871 length + fragheaderlen <= mtu &&
872 rt->u.dst.dev->features & NETIF_F_V4_CSUM && 872 rt->dst.dev->features & NETIF_F_V4_CSUM &&
873 !exthdrlen) 873 !exthdrlen)
874 csummode = CHECKSUM_PARTIAL; 874 csummode = CHECKSUM_PARTIAL;
875 875
876 skb = skb_peek_tail(&sk->sk_write_queue);
877
876 inet->cork.length += length; 878 inet->cork.length += length;
877 if (((length> mtu) || !skb_queue_empty(&sk->sk_write_queue)) && 879 if (((length > mtu) || (skb && skb_is_gso(skb))) &&
878 (sk->sk_protocol == IPPROTO_UDP) && 880 (sk->sk_protocol == IPPROTO_UDP) &&
879 (rt->u.dst.dev->features & NETIF_F_UFO)) { 881 (rt->dst.dev->features & NETIF_F_UFO)) {
880 err = ip_ufo_append_data(sk, getfrag, from, length, hh_len, 882 err = ip_ufo_append_data(sk, getfrag, from, length, hh_len,
881 fragheaderlen, transhdrlen, mtu, 883 fragheaderlen, transhdrlen, mtu,
882 flags); 884 flags);
@@ -892,7 +894,7 @@ int ip_append_data(struct sock *sk,
892 * adding appropriate IP header. 894 * adding appropriate IP header.
893 */ 895 */
894 896
895 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) 897 if (!skb)
896 goto alloc_new_skb; 898 goto alloc_new_skb;
897 899
898 while (length > 0) { 900 while (length > 0) {
@@ -924,7 +926,7 @@ alloc_new_skb:
924 fraglen = datalen + fragheaderlen; 926 fraglen = datalen + fragheaderlen;
925 927
926 if ((flags & MSG_MORE) && 928 if ((flags & MSG_MORE) &&
927 !(rt->u.dst.dev->features&NETIF_F_SG)) 929 !(rt->dst.dev->features&NETIF_F_SG))
928 alloclen = mtu; 930 alloclen = mtu;
929 else 931 else
930 alloclen = datalen + fragheaderlen; 932 alloclen = datalen + fragheaderlen;
@@ -935,7 +937,7 @@ alloc_new_skb:
935 * the last. 937 * the last.
936 */ 938 */
937 if (datalen == length + fraggap) 939 if (datalen == length + fraggap)
938 alloclen += rt->u.dst.trailer_len; 940 alloclen += rt->dst.trailer_len;
939 941
940 if (transhdrlen) { 942 if (transhdrlen) {
941 skb = sock_alloc_send_skb(sk, 943 skb = sock_alloc_send_skb(sk,
@@ -1008,7 +1010,7 @@ alloc_new_skb:
1008 if (copy > length) 1010 if (copy > length)
1009 copy = length; 1011 copy = length;
1010 1012
1011 if (!(rt->u.dst.dev->features&NETIF_F_SG)) { 1013 if (!(rt->dst.dev->features&NETIF_F_SG)) {
1012 unsigned int off; 1014 unsigned int off;
1013 1015
1014 off = skb->len; 1016 off = skb->len;
@@ -1103,10 +1105,10 @@ ssize_t ip_append_page(struct sock *sk, struct page *page,
1103 if (inet->cork.flags & IPCORK_OPT) 1105 if (inet->cork.flags & IPCORK_OPT)
1104 opt = inet->cork.opt; 1106 opt = inet->cork.opt;
1105 1107
1106 if (!(rt->u.dst.dev->features&NETIF_F_SG)) 1108 if (!(rt->dst.dev->features&NETIF_F_SG))
1107 return -EOPNOTSUPP; 1109 return -EOPNOTSUPP;
1108 1110
1109 hh_len = LL_RESERVED_SPACE(rt->u.dst.dev); 1111 hh_len = LL_RESERVED_SPACE(rt->dst.dev);
1110 mtu = inet->cork.fragsize; 1112 mtu = inet->cork.fragsize;
1111 1113
1112 fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0); 1114 fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0);
@@ -1121,8 +1123,9 @@ ssize_t ip_append_page(struct sock *sk, struct page *page,
1121 return -EINVAL; 1123 return -EINVAL;
1122 1124
1123 inet->cork.length += size; 1125 inet->cork.length += size;
1124 if ((sk->sk_protocol == IPPROTO_UDP) && 1126 if ((size + skb->len > mtu) &&
1125 (rt->u.dst.dev->features & NETIF_F_UFO)) { 1127 (sk->sk_protocol == IPPROTO_UDP) &&
1128 (rt->dst.dev->features & NETIF_F_UFO)) {
1126 skb_shinfo(skb)->gso_size = mtu - fragheaderlen; 1129 skb_shinfo(skb)->gso_size = mtu - fragheaderlen;
1127 skb_shinfo(skb)->gso_type = SKB_GSO_UDP; 1130 skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
1128 } 1131 }
@@ -1274,8 +1277,8 @@ int ip_push_pending_frames(struct sock *sk)
1274 * If local_df is set too, we still allow to fragment this frame 1277 * If local_df is set too, we still allow to fragment this frame
1275 * locally. */ 1278 * locally. */
1276 if (inet->pmtudisc >= IP_PMTUDISC_DO || 1279 if (inet->pmtudisc >= IP_PMTUDISC_DO ||
1277 (skb->len <= dst_mtu(&rt->u.dst) && 1280 (skb->len <= dst_mtu(&rt->dst) &&
1278 ip_dont_fragment(sk, &rt->u.dst))) 1281 ip_dont_fragment(sk, &rt->dst)))
1279 df = htons(IP_DF); 1282 df = htons(IP_DF);
1280 1283
1281 if (inet->cork.flags & IPCORK_OPT) 1284 if (inet->cork.flags & IPCORK_OPT)
@@ -1284,7 +1287,7 @@ int ip_push_pending_frames(struct sock *sk)
1284 if (rt->rt_type == RTN_MULTICAST) 1287 if (rt->rt_type == RTN_MULTICAST)
1285 ttl = inet->mc_ttl; 1288 ttl = inet->mc_ttl;
1286 else 1289 else
1287 ttl = ip_select_ttl(inet, &rt->u.dst); 1290 ttl = ip_select_ttl(inet, &rt->dst);
1288 1291
1289 iph = (struct iphdr *)skb->data; 1292 iph = (struct iphdr *)skb->data;
1290 iph->version = 4; 1293 iph->version = 4;
@@ -1295,7 +1298,7 @@ int ip_push_pending_frames(struct sock *sk)
1295 } 1298 }
1296 iph->tos = inet->tos; 1299 iph->tos = inet->tos;
1297 iph->frag_off = df; 1300 iph->frag_off = df;
1298 ip_select_ident(iph, &rt->u.dst, sk); 1301 ip_select_ident(iph, &rt->dst, sk);
1299 iph->ttl = ttl; 1302 iph->ttl = ttl;
1300 iph->protocol = sk->sk_protocol; 1303 iph->protocol = sk->sk_protocol;
1301 iph->saddr = rt->rt_src; 1304 iph->saddr = rt->rt_src;
@@ -1308,7 +1311,7 @@ int ip_push_pending_frames(struct sock *sk)
1308 * on dst refcount 1311 * on dst refcount
1309 */ 1312 */
1310 inet->cork.dst = NULL; 1313 inet->cork.dst = NULL;
1311 skb_dst_set(skb, &rt->u.dst); 1314 skb_dst_set(skb, &rt->dst);
1312 1315
1313 if (iph->protocol == IPPROTO_ICMP) 1316 if (iph->protocol == IPPROTO_ICMP)
1314 icmp_out_count(net, ((struct icmphdr *) 1317 icmp_out_count(net, ((struct icmphdr *)
@@ -1445,7 +1448,3 @@ void __init ip_init(void)
1445 igmp_mc_proc_init(); 1448 igmp_mc_proc_init();
1446#endif 1449#endif
1447} 1450}
1448
1449EXPORT_SYMBOL(ip_generic_getfrag);
1450EXPORT_SYMBOL(ip_queue_xmit);
1451EXPORT_SYMBOL(ip_send_check);
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index ce231780a2b1..6c40a8c46e79 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -239,7 +239,16 @@ int ip_cmsg_send(struct net *net, struct msghdr *msg, struct ipcm_cookie *ipc)
239 sent to multicast group to reach destination designated router. 239 sent to multicast group to reach destination designated router.
240 */ 240 */
241struct ip_ra_chain *ip_ra_chain; 241struct ip_ra_chain *ip_ra_chain;
242DEFINE_RWLOCK(ip_ra_lock); 242static DEFINE_SPINLOCK(ip_ra_lock);
243
244
245static void ip_ra_destroy_rcu(struct rcu_head *head)
246{
247 struct ip_ra_chain *ra = container_of(head, struct ip_ra_chain, rcu);
248
249 sock_put(ra->saved_sk);
250 kfree(ra);
251}
243 252
244int ip_ra_control(struct sock *sk, unsigned char on, 253int ip_ra_control(struct sock *sk, unsigned char on,
245 void (*destructor)(struct sock *)) 254 void (*destructor)(struct sock *))
@@ -251,35 +260,42 @@ int ip_ra_control(struct sock *sk, unsigned char on,
251 260
252 new_ra = on ? kmalloc(sizeof(*new_ra), GFP_KERNEL) : NULL; 261 new_ra = on ? kmalloc(sizeof(*new_ra), GFP_KERNEL) : NULL;
253 262
254 write_lock_bh(&ip_ra_lock); 263 spin_lock_bh(&ip_ra_lock);
255 for (rap = &ip_ra_chain; (ra = *rap) != NULL; rap = &ra->next) { 264 for (rap = &ip_ra_chain; (ra = *rap) != NULL; rap = &ra->next) {
256 if (ra->sk == sk) { 265 if (ra->sk == sk) {
257 if (on) { 266 if (on) {
258 write_unlock_bh(&ip_ra_lock); 267 spin_unlock_bh(&ip_ra_lock);
259 kfree(new_ra); 268 kfree(new_ra);
260 return -EADDRINUSE; 269 return -EADDRINUSE;
261 } 270 }
262 *rap = ra->next; 271 /* dont let ip_call_ra_chain() use sk again */
263 write_unlock_bh(&ip_ra_lock); 272 ra->sk = NULL;
273 rcu_assign_pointer(*rap, ra->next);
274 spin_unlock_bh(&ip_ra_lock);
264 275
265 if (ra->destructor) 276 if (ra->destructor)
266 ra->destructor(sk); 277 ra->destructor(sk);
267 sock_put(sk); 278 /*
268 kfree(ra); 279 * Delay sock_put(sk) and kfree(ra) after one rcu grace
280 * period. This guarantee ip_call_ra_chain() dont need
281 * to mess with socket refcounts.
282 */
283 ra->saved_sk = sk;
284 call_rcu(&ra->rcu, ip_ra_destroy_rcu);
269 return 0; 285 return 0;
270 } 286 }
271 } 287 }
272 if (new_ra == NULL) { 288 if (new_ra == NULL) {
273 write_unlock_bh(&ip_ra_lock); 289 spin_unlock_bh(&ip_ra_lock);
274 return -ENOBUFS; 290 return -ENOBUFS;
275 } 291 }
276 new_ra->sk = sk; 292 new_ra->sk = sk;
277 new_ra->destructor = destructor; 293 new_ra->destructor = destructor;
278 294
279 new_ra->next = ra; 295 new_ra->next = ra;
280 *rap = new_ra; 296 rcu_assign_pointer(*rap, new_ra);
281 sock_hold(sk); 297 sock_hold(sk);
282 write_unlock_bh(&ip_ra_lock); 298 spin_unlock_bh(&ip_ra_lock);
283 299
284 return 0; 300 return 0;
285} 301}
@@ -449,7 +465,7 @@ static int do_ip_setsockopt(struct sock *sk, int level,
449 (1<<IP_MTU_DISCOVER) | (1<<IP_RECVERR) | 465 (1<<IP_MTU_DISCOVER) | (1<<IP_RECVERR) |
450 (1<<IP_ROUTER_ALERT) | (1<<IP_FREEBIND) | 466 (1<<IP_ROUTER_ALERT) | (1<<IP_FREEBIND) |
451 (1<<IP_PASSSEC) | (1<<IP_TRANSPARENT) | 467 (1<<IP_PASSSEC) | (1<<IP_TRANSPARENT) |
452 (1<<IP_MINTTL))) || 468 (1<<IP_MINTTL) | (1<<IP_NODEFRAG))) ||
453 optname == IP_MULTICAST_TTL || 469 optname == IP_MULTICAST_TTL ||
454 optname == IP_MULTICAST_ALL || 470 optname == IP_MULTICAST_ALL ||
455 optname == IP_MULTICAST_LOOP || 471 optname == IP_MULTICAST_LOOP ||
@@ -572,6 +588,13 @@ static int do_ip_setsockopt(struct sock *sk, int level,
572 } 588 }
573 inet->hdrincl = val ? 1 : 0; 589 inet->hdrincl = val ? 1 : 0;
574 break; 590 break;
591 case IP_NODEFRAG:
592 if (sk->sk_type != SOCK_RAW) {
593 err = -ENOPROTOOPT;
594 break;
595 }
596 inet->nodefrag = val ? 1 : 0;
597 break;
575 case IP_MTU_DISCOVER: 598 case IP_MTU_DISCOVER:
576 if (val < IP_PMTUDISC_DONT || val > IP_PMTUDISC_PROBE) 599 if (val < IP_PMTUDISC_DONT || val > IP_PMTUDISC_PROBE)
577 goto e_inval; 600 goto e_inval;
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index b9d84e800cf4..3a6e1ec5e9ae 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -665,6 +665,13 @@ ic_dhcp_init_options(u8 *options)
665 memcpy(e, ic_req_params, sizeof(ic_req_params)); 665 memcpy(e, ic_req_params, sizeof(ic_req_params));
666 e += sizeof(ic_req_params); 666 e += sizeof(ic_req_params);
667 667
668 if (ic_host_name_set) {
669 *e++ = 12; /* host-name */
670 len = strlen(utsname()->nodename);
671 *e++ = len;
672 memcpy(e, utsname()->nodename, len);
673 e += len;
674 }
668 if (*vendor_class_identifier) { 675 if (*vendor_class_identifier) {
669 printk(KERN_INFO "DHCP: sending class identifier \"%s\"\n", 676 printk(KERN_INFO "DHCP: sending class identifier \"%s\"\n",
670 vendor_class_identifier); 677 vendor_class_identifier);
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 7fd636711037..ec036731a70b 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -435,7 +435,7 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
435 goto tx_error_icmp; 435 goto tx_error_icmp;
436 } 436 }
437 } 437 }
438 tdev = rt->u.dst.dev; 438 tdev = rt->dst.dev;
439 439
440 if (tdev == dev) { 440 if (tdev == dev) {
441 ip_rt_put(rt); 441 ip_rt_put(rt);
@@ -446,7 +446,7 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
446 df |= old_iph->frag_off & htons(IP_DF); 446 df |= old_iph->frag_off & htons(IP_DF);
447 447
448 if (df) { 448 if (df) {
449 mtu = dst_mtu(&rt->u.dst) - sizeof(struct iphdr); 449 mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr);
450 450
451 if (mtu < 68) { 451 if (mtu < 68) {
452 stats->collisions++; 452 stats->collisions++;
@@ -503,7 +503,7 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
503 IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED | 503 IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
504 IPSKB_REROUTED); 504 IPSKB_REROUTED);
505 skb_dst_drop(skb); 505 skb_dst_drop(skb);
506 skb_dst_set(skb, &rt->u.dst); 506 skb_dst_set(skb, &rt->dst);
507 507
508 /* 508 /*
509 * Push down and install the IPIP header. 509 * Push down and install the IPIP header.
@@ -552,7 +552,7 @@ static void ipip_tunnel_bind_dev(struct net_device *dev)
552 .proto = IPPROTO_IPIP }; 552 .proto = IPPROTO_IPIP };
553 struct rtable *rt; 553 struct rtable *rt;
554 if (!ip_route_output_key(dev_net(dev), &rt, &fl)) { 554 if (!ip_route_output_key(dev_net(dev), &rt, &fl)) {
555 tdev = rt->u.dst.dev; 555 tdev = rt->dst.dev;
556 ip_rt_put(rt); 556 ip_rt_put(rt);
557 } 557 }
558 dev->flags |= IFF_POINTOPOINT; 558 dev->flags |= IFF_POINTOPOINT;
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 45889103b3e2..179fcab866fc 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -267,8 +267,10 @@ static void __net_exit ipmr_rules_exit(struct net *net)
267{ 267{
268 struct mr_table *mrt, *next; 268 struct mr_table *mrt, *next;
269 269
270 list_for_each_entry_safe(mrt, next, &net->ipv4.mr_tables, list) 270 list_for_each_entry_safe(mrt, next, &net->ipv4.mr_tables, list) {
271 list_del(&mrt->list);
271 kfree(mrt); 272 kfree(mrt);
273 }
272 fib_rules_unregister(net->ipv4.mr_rules_ops); 274 fib_rules_unregister(net->ipv4.mr_rules_ops);
273} 275}
274#else 276#else
@@ -440,8 +442,10 @@ static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
440 int err; 442 int err;
441 443
442 err = ipmr_fib_lookup(net, &fl, &mrt); 444 err = ipmr_fib_lookup(net, &fl, &mrt);
443 if (err < 0) 445 if (err < 0) {
446 kfree_skb(skb);
444 return err; 447 return err;
448 }
445 449
446 read_lock(&mrt_lock); 450 read_lock(&mrt_lock);
447 dev->stats.tx_bytes += skb->len; 451 dev->stats.tx_bytes += skb->len;
@@ -1551,9 +1555,9 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
1551 goto out_free; 1555 goto out_free;
1552 } 1556 }
1553 1557
1554 dev = rt->u.dst.dev; 1558 dev = rt->dst.dev;
1555 1559
1556 if (skb->len+encap > dst_mtu(&rt->u.dst) && (ntohs(iph->frag_off) & IP_DF)) { 1560 if (skb->len+encap > dst_mtu(&rt->dst) && (ntohs(iph->frag_off) & IP_DF)) {
1557 /* Do not fragment multicasts. Alas, IPv4 does not 1561 /* Do not fragment multicasts. Alas, IPv4 does not
1558 allow to send ICMP, so that packets will disappear 1562 allow to send ICMP, so that packets will disappear
1559 to blackhole. 1563 to blackhole.
@@ -1564,7 +1568,7 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
1564 goto out_free; 1568 goto out_free;
1565 } 1569 }
1566 1570
1567 encap += LL_RESERVED_SPACE(dev) + rt->u.dst.header_len; 1571 encap += LL_RESERVED_SPACE(dev) + rt->dst.header_len;
1568 1572
1569 if (skb_cow(skb, encap)) { 1573 if (skb_cow(skb, encap)) {
1570 ip_rt_put(rt); 1574 ip_rt_put(rt);
@@ -1575,7 +1579,7 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
1575 vif->bytes_out += skb->len; 1579 vif->bytes_out += skb->len;
1576 1580
1577 skb_dst_drop(skb); 1581 skb_dst_drop(skb);
1578 skb_dst_set(skb, &rt->u.dst); 1582 skb_dst_set(skb, &rt->dst);
1579 ip_decrease_ttl(ip_hdr(skb)); 1583 ip_decrease_ttl(ip_hdr(skb));
1580 1584
1581 /* FIXME: forward and output firewalls used to be called here. 1585 /* FIXME: forward and output firewalls used to be called here.
@@ -1726,8 +1730,10 @@ int ip_mr_input(struct sk_buff *skb)
1726 goto dont_forward; 1730 goto dont_forward;
1727 1731
1728 err = ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt); 1732 err = ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt);
1729 if (err < 0) 1733 if (err < 0) {
1734 kfree_skb(skb);
1730 return err; 1735 return err;
1736 }
1731 1737
1732 if (!local) { 1738 if (!local) {
1733 if (IPCB(skb)->opt.router_alert) { 1739 if (IPCB(skb)->opt.router_alert) {
@@ -1911,7 +1917,7 @@ static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
1911 struct rtattr *mp_head; 1917 struct rtattr *mp_head;
1912 1918
1913 /* If cache is unresolved, don't try to parse IIF and OIF */ 1919 /* If cache is unresolved, don't try to parse IIF and OIF */
1914 if (c->mfc_parent > MAXVIFS) 1920 if (c->mfc_parent >= MAXVIFS)
1915 return -ENOENT; 1921 return -ENOENT;
1916 1922
1917 if (VIF_EXISTS(mrt, c->mfc_parent)) 1923 if (VIF_EXISTS(mrt, c->mfc_parent))
diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c
index 07de855e2175..d88a46c54fd1 100644
--- a/net/ipv4/netfilter.c
+++ b/net/ipv4/netfilter.c
@@ -43,7 +43,7 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type)
43 43
44 /* Drop old route. */ 44 /* Drop old route. */
45 skb_dst_drop(skb); 45 skb_dst_drop(skb);
46 skb_dst_set(skb, &rt->u.dst); 46 skb_dst_set(skb, &rt->dst);
47 } else { 47 } else {
48 /* non-local src, find valid iif to satisfy 48 /* non-local src, find valid iif to satisfy
49 * rp-filter when calling ip_route_input. */ 49 * rp-filter when calling ip_route_input. */
@@ -53,11 +53,11 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type)
53 53
54 orefdst = skb->_skb_refdst; 54 orefdst = skb->_skb_refdst;
55 if (ip_route_input(skb, iph->daddr, iph->saddr, 55 if (ip_route_input(skb, iph->daddr, iph->saddr,
56 RT_TOS(iph->tos), rt->u.dst.dev) != 0) { 56 RT_TOS(iph->tos), rt->dst.dev) != 0) {
57 dst_release(&rt->u.dst); 57 dst_release(&rt->dst);
58 return -1; 58 return -1;
59 } 59 }
60 dst_release(&rt->u.dst); 60 dst_release(&rt->dst);
61 refdst_drop(orefdst); 61 refdst_drop(orefdst);
62 } 62 }
63 63
@@ -212,9 +212,7 @@ static __sum16 nf_ip_checksum_partial(struct sk_buff *skb, unsigned int hook,
212 skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr, protocol, 212 skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr, protocol,
213 skb->len - dataoff, 0); 213 skb->len - dataoff, 0);
214 skb->ip_summed = CHECKSUM_NONE; 214 skb->ip_summed = CHECKSUM_NONE;
215 csum = __skb_checksum_complete_head(skb, dataoff + len); 215 return __skb_checksum_complete_head(skb, dataoff + len);
216 if (!csum)
217 skb->ip_summed = CHECKSUM_UNNECESSARY;
218 } 216 }
219 return csum; 217 return csum;
220} 218}
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 1ac01b128621..16c0ba0a2728 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -758,7 +758,7 @@ static struct xt_counters *alloc_counters(const struct xt_table *table)
758 * about). 758 * about).
759 */ 759 */
760 countersize = sizeof(struct xt_counters) * private->number; 760 countersize = sizeof(struct xt_counters) * private->number;
761 counters = vmalloc_node(countersize, numa_node_id()); 761 counters = vmalloc(countersize);
762 762
763 if (counters == NULL) 763 if (counters == NULL)
764 return ERR_PTR(-ENOMEM); 764 return ERR_PTR(-ENOMEM);
@@ -1005,8 +1005,7 @@ static int __do_replace(struct net *net, const char *name,
1005 struct arpt_entry *iter; 1005 struct arpt_entry *iter;
1006 1006
1007 ret = 0; 1007 ret = 0;
1008 counters = vmalloc_node(num_counters * sizeof(struct xt_counters), 1008 counters = vmalloc(num_counters * sizeof(struct xt_counters));
1009 numa_node_id());
1010 if (!counters) { 1009 if (!counters) {
1011 ret = -ENOMEM; 1010 ret = -ENOMEM;
1012 goto out; 1011 goto out;
@@ -1159,7 +1158,7 @@ static int do_add_counters(struct net *net, const void __user *user,
1159 if (len != size + num_counters * sizeof(struct xt_counters)) 1158 if (len != size + num_counters * sizeof(struct xt_counters))
1160 return -EINVAL; 1159 return -EINVAL;
1161 1160
1162 paddc = vmalloc_node(len - size, numa_node_id()); 1161 paddc = vmalloc(len - size);
1163 if (!paddc) 1162 if (!paddc)
1164 return -ENOMEM; 1163 return -ENOMEM;
1165 1164
diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c
index a4e5fc5df4bf..d2c1311cb28d 100644
--- a/net/ipv4/netfilter/ip_queue.c
+++ b/net/ipv4/netfilter/ip_queue.c
@@ -42,7 +42,7 @@ typedef int (*ipq_cmpfn)(struct nf_queue_entry *, unsigned long);
42 42
43static unsigned char copy_mode __read_mostly = IPQ_COPY_NONE; 43static unsigned char copy_mode __read_mostly = IPQ_COPY_NONE;
44static unsigned int queue_maxlen __read_mostly = IPQ_QMAX_DEFAULT; 44static unsigned int queue_maxlen __read_mostly = IPQ_QMAX_DEFAULT;
45static DEFINE_RWLOCK(queue_lock); 45static DEFINE_SPINLOCK(queue_lock);
46static int peer_pid __read_mostly; 46static int peer_pid __read_mostly;
47static unsigned int copy_range __read_mostly; 47static unsigned int copy_range __read_mostly;
48static unsigned int queue_total; 48static unsigned int queue_total;
@@ -72,10 +72,10 @@ __ipq_set_mode(unsigned char mode, unsigned int range)
72 break; 72 break;
73 73
74 case IPQ_COPY_PACKET: 74 case IPQ_COPY_PACKET:
75 copy_mode = mode; 75 if (range > 0xFFFF)
76 range = 0xFFFF;
76 copy_range = range; 77 copy_range = range;
77 if (copy_range > 0xFFFF) 78 copy_mode = mode;
78 copy_range = 0xFFFF;
79 break; 79 break;
80 80
81 default: 81 default:
@@ -101,7 +101,7 @@ ipq_find_dequeue_entry(unsigned long id)
101{ 101{
102 struct nf_queue_entry *entry = NULL, *i; 102 struct nf_queue_entry *entry = NULL, *i;
103 103
104 write_lock_bh(&queue_lock); 104 spin_lock_bh(&queue_lock);
105 105
106 list_for_each_entry(i, &queue_list, list) { 106 list_for_each_entry(i, &queue_list, list) {
107 if ((unsigned long)i == id) { 107 if ((unsigned long)i == id) {
@@ -115,7 +115,7 @@ ipq_find_dequeue_entry(unsigned long id)
115 queue_total--; 115 queue_total--;
116 } 116 }
117 117
118 write_unlock_bh(&queue_lock); 118 spin_unlock_bh(&queue_lock);
119 return entry; 119 return entry;
120} 120}
121 121
@@ -136,9 +136,9 @@ __ipq_flush(ipq_cmpfn cmpfn, unsigned long data)
136static void 136static void
137ipq_flush(ipq_cmpfn cmpfn, unsigned long data) 137ipq_flush(ipq_cmpfn cmpfn, unsigned long data)
138{ 138{
139 write_lock_bh(&queue_lock); 139 spin_lock_bh(&queue_lock);
140 __ipq_flush(cmpfn, data); 140 __ipq_flush(cmpfn, data);
141 write_unlock_bh(&queue_lock); 141 spin_unlock_bh(&queue_lock);
142} 142}
143 143
144static struct sk_buff * 144static struct sk_buff *
@@ -152,9 +152,7 @@ ipq_build_packet_message(struct nf_queue_entry *entry, int *errp)
152 struct nlmsghdr *nlh; 152 struct nlmsghdr *nlh;
153 struct timeval tv; 153 struct timeval tv;
154 154
155 read_lock_bh(&queue_lock); 155 switch (ACCESS_ONCE(copy_mode)) {
156
157 switch (copy_mode) {
158 case IPQ_COPY_META: 156 case IPQ_COPY_META:
159 case IPQ_COPY_NONE: 157 case IPQ_COPY_NONE:
160 size = NLMSG_SPACE(sizeof(*pmsg)); 158 size = NLMSG_SPACE(sizeof(*pmsg));
@@ -162,26 +160,21 @@ ipq_build_packet_message(struct nf_queue_entry *entry, int *errp)
162 160
163 case IPQ_COPY_PACKET: 161 case IPQ_COPY_PACKET:
164 if (entry->skb->ip_summed == CHECKSUM_PARTIAL && 162 if (entry->skb->ip_summed == CHECKSUM_PARTIAL &&
165 (*errp = skb_checksum_help(entry->skb))) { 163 (*errp = skb_checksum_help(entry->skb)))
166 read_unlock_bh(&queue_lock);
167 return NULL; 164 return NULL;
168 } 165
169 if (copy_range == 0 || copy_range > entry->skb->len) 166 data_len = ACCESS_ONCE(copy_range);
167 if (data_len == 0 || data_len > entry->skb->len)
170 data_len = entry->skb->len; 168 data_len = entry->skb->len;
171 else
172 data_len = copy_range;
173 169
174 size = NLMSG_SPACE(sizeof(*pmsg) + data_len); 170 size = NLMSG_SPACE(sizeof(*pmsg) + data_len);
175 break; 171 break;
176 172
177 default: 173 default:
178 *errp = -EINVAL; 174 *errp = -EINVAL;
179 read_unlock_bh(&queue_lock);
180 return NULL; 175 return NULL;
181 } 176 }
182 177
183 read_unlock_bh(&queue_lock);
184
185 skb = alloc_skb(size, GFP_ATOMIC); 178 skb = alloc_skb(size, GFP_ATOMIC);
186 if (!skb) 179 if (!skb)
187 goto nlmsg_failure; 180 goto nlmsg_failure;
@@ -242,7 +235,7 @@ ipq_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
242 if (nskb == NULL) 235 if (nskb == NULL)
243 return status; 236 return status;
244 237
245 write_lock_bh(&queue_lock); 238 spin_lock_bh(&queue_lock);
246 239
247 if (!peer_pid) 240 if (!peer_pid)
248 goto err_out_free_nskb; 241 goto err_out_free_nskb;
@@ -266,14 +259,14 @@ ipq_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
266 259
267 __ipq_enqueue_entry(entry); 260 __ipq_enqueue_entry(entry);
268 261
269 write_unlock_bh(&queue_lock); 262 spin_unlock_bh(&queue_lock);
270 return status; 263 return status;
271 264
272err_out_free_nskb: 265err_out_free_nskb:
273 kfree_skb(nskb); 266 kfree_skb(nskb);
274 267
275err_out_unlock: 268err_out_unlock:
276 write_unlock_bh(&queue_lock); 269 spin_unlock_bh(&queue_lock);
277 return status; 270 return status;
278} 271}
279 272
@@ -342,9 +335,9 @@ ipq_set_mode(unsigned char mode, unsigned int range)
342{ 335{
343 int status; 336 int status;
344 337
345 write_lock_bh(&queue_lock); 338 spin_lock_bh(&queue_lock);
346 status = __ipq_set_mode(mode, range); 339 status = __ipq_set_mode(mode, range);
347 write_unlock_bh(&queue_lock); 340 spin_unlock_bh(&queue_lock);
348 return status; 341 return status;
349} 342}
350 343
@@ -440,11 +433,11 @@ __ipq_rcv_skb(struct sk_buff *skb)
440 if (security_netlink_recv(skb, CAP_NET_ADMIN)) 433 if (security_netlink_recv(skb, CAP_NET_ADMIN))
441 RCV_SKB_FAIL(-EPERM); 434 RCV_SKB_FAIL(-EPERM);
442 435
443 write_lock_bh(&queue_lock); 436 spin_lock_bh(&queue_lock);
444 437
445 if (peer_pid) { 438 if (peer_pid) {
446 if (peer_pid != pid) { 439 if (peer_pid != pid) {
447 write_unlock_bh(&queue_lock); 440 spin_unlock_bh(&queue_lock);
448 RCV_SKB_FAIL(-EBUSY); 441 RCV_SKB_FAIL(-EBUSY);
449 } 442 }
450 } else { 443 } else {
@@ -452,7 +445,7 @@ __ipq_rcv_skb(struct sk_buff *skb)
452 peer_pid = pid; 445 peer_pid = pid;
453 } 446 }
454 447
455 write_unlock_bh(&queue_lock); 448 spin_unlock_bh(&queue_lock);
456 449
457 status = ipq_receive_peer(NLMSG_DATA(nlh), type, 450 status = ipq_receive_peer(NLMSG_DATA(nlh), type,
458 nlmsglen - NLMSG_LENGTH(0)); 451 nlmsglen - NLMSG_LENGTH(0));
@@ -497,10 +490,10 @@ ipq_rcv_nl_event(struct notifier_block *this,
497 struct netlink_notify *n = ptr; 490 struct netlink_notify *n = ptr;
498 491
499 if (event == NETLINK_URELEASE && n->protocol == NETLINK_FIREWALL) { 492 if (event == NETLINK_URELEASE && n->protocol == NETLINK_FIREWALL) {
500 write_lock_bh(&queue_lock); 493 spin_lock_bh(&queue_lock);
501 if ((net_eq(n->net, &init_net)) && (n->pid == peer_pid)) 494 if ((net_eq(n->net, &init_net)) && (n->pid == peer_pid))
502 __ipq_reset(); 495 __ipq_reset();
503 write_unlock_bh(&queue_lock); 496 spin_unlock_bh(&queue_lock);
504 } 497 }
505 return NOTIFY_DONE; 498 return NOTIFY_DONE;
506} 499}
@@ -527,7 +520,7 @@ static ctl_table ipq_table[] = {
527#ifdef CONFIG_PROC_FS 520#ifdef CONFIG_PROC_FS
528static int ip_queue_show(struct seq_file *m, void *v) 521static int ip_queue_show(struct seq_file *m, void *v)
529{ 522{
530 read_lock_bh(&queue_lock); 523 spin_lock_bh(&queue_lock);
531 524
532 seq_printf(m, 525 seq_printf(m,
533 "Peer PID : %d\n" 526 "Peer PID : %d\n"
@@ -545,7 +538,7 @@ static int ip_queue_show(struct seq_file *m, void *v)
545 queue_dropped, 538 queue_dropped,
546 queue_user_dropped); 539 queue_user_dropped);
547 540
548 read_unlock_bh(&queue_lock); 541 spin_unlock_bh(&queue_lock);
549 return 0; 542 return 0;
550} 543}
551 544
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 63958f3394a5..b38c11810c65 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -336,7 +336,7 @@ ipt_do_table(struct sk_buff *skb,
336 cpu = smp_processor_id(); 336 cpu = smp_processor_id();
337 table_base = private->entries[cpu]; 337 table_base = private->entries[cpu];
338 jumpstack = (struct ipt_entry **)private->jumpstack[cpu]; 338 jumpstack = (struct ipt_entry **)private->jumpstack[cpu];
339 stackptr = &private->stackptr[cpu]; 339 stackptr = per_cpu_ptr(private->stackptr, cpu);
340 origptr = *stackptr; 340 origptr = *stackptr;
341 341
342 e = get_entry(table_base, private->hook_entry[hook]); 342 e = get_entry(table_base, private->hook_entry[hook]);
@@ -928,7 +928,7 @@ static struct xt_counters *alloc_counters(const struct xt_table *table)
928 (other than comefrom, which userspace doesn't care 928 (other than comefrom, which userspace doesn't care
929 about). */ 929 about). */
930 countersize = sizeof(struct xt_counters) * private->number; 930 countersize = sizeof(struct xt_counters) * private->number;
931 counters = vmalloc_node(countersize, numa_node_id()); 931 counters = vmalloc(countersize);
932 932
933 if (counters == NULL) 933 if (counters == NULL)
934 return ERR_PTR(-ENOMEM); 934 return ERR_PTR(-ENOMEM);
@@ -1352,7 +1352,7 @@ do_add_counters(struct net *net, const void __user *user,
1352 if (len != size + num_counters * sizeof(struct xt_counters)) 1352 if (len != size + num_counters * sizeof(struct xt_counters))
1353 return -EINVAL; 1353 return -EINVAL;
1354 1354
1355 paddc = vmalloc_node(len - size, numa_node_id()); 1355 paddc = vmalloc(len - size);
1356 if (!paddc) 1356 if (!paddc)
1357 return -ENOMEM; 1357 return -ENOMEM;
1358 1358
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index f91c94b9a790..64d0875f5192 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -53,12 +53,13 @@ struct clusterip_config {
53#endif 53#endif
54 enum clusterip_hashmode hash_mode; /* which hashing mode */ 54 enum clusterip_hashmode hash_mode; /* which hashing mode */
55 u_int32_t hash_initval; /* hash initialization */ 55 u_int32_t hash_initval; /* hash initialization */
56 struct rcu_head rcu;
56}; 57};
57 58
58static LIST_HEAD(clusterip_configs); 59static LIST_HEAD(clusterip_configs);
59 60
60/* clusterip_lock protects the clusterip_configs list */ 61/* clusterip_lock protects the clusterip_configs list */
61static DEFINE_RWLOCK(clusterip_lock); 62static DEFINE_SPINLOCK(clusterip_lock);
62 63
63#ifdef CONFIG_PROC_FS 64#ifdef CONFIG_PROC_FS
64static const struct file_operations clusterip_proc_fops; 65static const struct file_operations clusterip_proc_fops;
@@ -71,11 +72,17 @@ clusterip_config_get(struct clusterip_config *c)
71 atomic_inc(&c->refcount); 72 atomic_inc(&c->refcount);
72} 73}
73 74
75
76static void clusterip_config_rcu_free(struct rcu_head *head)
77{
78 kfree(container_of(head, struct clusterip_config, rcu));
79}
80
74static inline void 81static inline void
75clusterip_config_put(struct clusterip_config *c) 82clusterip_config_put(struct clusterip_config *c)
76{ 83{
77 if (atomic_dec_and_test(&c->refcount)) 84 if (atomic_dec_and_test(&c->refcount))
78 kfree(c); 85 call_rcu_bh(&c->rcu, clusterip_config_rcu_free);
79} 86}
80 87
81/* decrease the count of entries using/referencing this config. If last 88/* decrease the count of entries using/referencing this config. If last
@@ -84,10 +91,11 @@ clusterip_config_put(struct clusterip_config *c)
84static inline void 91static inline void
85clusterip_config_entry_put(struct clusterip_config *c) 92clusterip_config_entry_put(struct clusterip_config *c)
86{ 93{
87 write_lock_bh(&clusterip_lock); 94 local_bh_disable();
88 if (atomic_dec_and_test(&c->entries)) { 95 if (atomic_dec_and_lock(&c->entries, &clusterip_lock)) {
89 list_del(&c->list); 96 list_del_rcu(&c->list);
90 write_unlock_bh(&clusterip_lock); 97 spin_unlock(&clusterip_lock);
98 local_bh_enable();
91 99
92 dev_mc_del(c->dev, c->clustermac); 100 dev_mc_del(c->dev, c->clustermac);
93 dev_put(c->dev); 101 dev_put(c->dev);
@@ -100,7 +108,7 @@ clusterip_config_entry_put(struct clusterip_config *c)
100#endif 108#endif
101 return; 109 return;
102 } 110 }
103 write_unlock_bh(&clusterip_lock); 111 local_bh_enable();
104} 112}
105 113
106static struct clusterip_config * 114static struct clusterip_config *
@@ -108,7 +116,7 @@ __clusterip_config_find(__be32 clusterip)
108{ 116{
109 struct clusterip_config *c; 117 struct clusterip_config *c;
110 118
111 list_for_each_entry(c, &clusterip_configs, list) { 119 list_for_each_entry_rcu(c, &clusterip_configs, list) {
112 if (c->clusterip == clusterip) 120 if (c->clusterip == clusterip)
113 return c; 121 return c;
114 } 122 }
@@ -121,16 +129,15 @@ clusterip_config_find_get(__be32 clusterip, int entry)
121{ 129{
122 struct clusterip_config *c; 130 struct clusterip_config *c;
123 131
124 read_lock_bh(&clusterip_lock); 132 rcu_read_lock_bh();
125 c = __clusterip_config_find(clusterip); 133 c = __clusterip_config_find(clusterip);
126 if (!c) { 134 if (c) {
127 read_unlock_bh(&clusterip_lock); 135 if (unlikely(!atomic_inc_not_zero(&c->refcount)))
128 return NULL; 136 c = NULL;
137 else if (entry)
138 atomic_inc(&c->entries);
129 } 139 }
130 atomic_inc(&c->refcount); 140 rcu_read_unlock_bh();
131 if (entry)
132 atomic_inc(&c->entries);
133 read_unlock_bh(&clusterip_lock);
134 141
135 return c; 142 return c;
136} 143}
@@ -181,9 +188,9 @@ clusterip_config_init(const struct ipt_clusterip_tgt_info *i, __be32 ip,
181 } 188 }
182#endif 189#endif
183 190
184 write_lock_bh(&clusterip_lock); 191 spin_lock_bh(&clusterip_lock);
185 list_add(&c->list, &clusterip_configs); 192 list_add_rcu(&c->list, &clusterip_configs);
186 write_unlock_bh(&clusterip_lock); 193 spin_unlock_bh(&clusterip_lock);
187 194
188 return c; 195 return c;
189} 196}
@@ -733,6 +740,9 @@ static void __exit clusterip_tg_exit(void)
733#endif 740#endif
734 nf_unregister_hook(&cip_arp_ops); 741 nf_unregister_hook(&cip_arp_ops);
735 xt_unregister_target(&clusterip_tg_reg); 742 xt_unregister_target(&clusterip_tg_reg);
743
744 /* Wait for completion of call_rcu_bh()'s (clusterip_config_rcu_free) */
745 rcu_barrier_bh();
736} 746}
737 747
738module_init(clusterip_tg_init); 748module_init(clusterip_tg_init);
diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c
index 5234f4f3499a..915fc17d7ce2 100644
--- a/net/ipv4/netfilter/ipt_LOG.c
+++ b/net/ipv4/netfilter/ipt_LOG.c
@@ -13,6 +13,7 @@
13#include <linux/module.h> 13#include <linux/module.h>
14#include <linux/spinlock.h> 14#include <linux/spinlock.h>
15#include <linux/skbuff.h> 15#include <linux/skbuff.h>
16#include <linux/if_arp.h>
16#include <linux/ip.h> 17#include <linux/ip.h>
17#include <net/icmp.h> 18#include <net/icmp.h>
18#include <net/udp.h> 19#include <net/udp.h>
@@ -363,6 +364,42 @@ static void dump_packet(const struct nf_loginfo *info,
363 /* maxlen = 230+ 91 + 230 + 252 = 803 */ 364 /* maxlen = 230+ 91 + 230 + 252 = 803 */
364} 365}
365 366
367static void dump_mac_header(const struct nf_loginfo *info,
368 const struct sk_buff *skb)
369{
370 struct net_device *dev = skb->dev;
371 unsigned int logflags = 0;
372
373 if (info->type == NF_LOG_TYPE_LOG)
374 logflags = info->u.log.logflags;
375
376 if (!(logflags & IPT_LOG_MACDECODE))
377 goto fallback;
378
379 switch (dev->type) {
380 case ARPHRD_ETHER:
381 printk("MACSRC=%pM MACDST=%pM MACPROTO=%04x ",
382 eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest,
383 ntohs(eth_hdr(skb)->h_proto));
384 return;
385 default:
386 break;
387 }
388
389fallback:
390 printk("MAC=");
391 if (dev->hard_header_len &&
392 skb->mac_header != skb->network_header) {
393 const unsigned char *p = skb_mac_header(skb);
394 unsigned int i;
395
396 printk("%02x", *p++);
397 for (i = 1; i < dev->hard_header_len; i++, p++)
398 printk(":%02x", *p);
399 }
400 printk(" ");
401}
402
366static struct nf_loginfo default_loginfo = { 403static struct nf_loginfo default_loginfo = {
367 .type = NF_LOG_TYPE_LOG, 404 .type = NF_LOG_TYPE_LOG,
368 .u = { 405 .u = {
@@ -404,20 +441,9 @@ ipt_log_packet(u_int8_t pf,
404 } 441 }
405#endif 442#endif
406 443
407 if (in && !out) { 444 /* MAC logging for input path only. */
408 /* MAC logging for input chain only. */ 445 if (in && !out)
409 printk("MAC="); 446 dump_mac_header(loginfo, skb);
410 if (skb->dev && skb->dev->hard_header_len &&
411 skb->mac_header != skb->network_header) {
412 int i;
413 const unsigned char *p = skb_mac_header(skb);
414 for (i = 0; i < skb->dev->hard_header_len; i++,p++)
415 printk("%02x%c", *p,
416 i==skb->dev->hard_header_len - 1
417 ? ' ':':');
418 } else
419 printk(" ");
420 }
421 447
422 dump_packet(loginfo, skb, 0); 448 dump_packet(loginfo, skb, 0);
423 printk("\n"); 449 printk("\n");
diff --git a/net/ipv4/netfilter/ipt_NETMAP.c b/net/ipv4/netfilter/ipt_NETMAP.c
index f43867d1697f..6cdb298f1035 100644
--- a/net/ipv4/netfilter/ipt_NETMAP.c
+++ b/net/ipv4/netfilter/ipt_NETMAP.c
@@ -48,7 +48,8 @@ netmap_tg(struct sk_buff *skb, const struct xt_action_param *par)
48 48
49 NF_CT_ASSERT(par->hooknum == NF_INET_PRE_ROUTING || 49 NF_CT_ASSERT(par->hooknum == NF_INET_PRE_ROUTING ||
50 par->hooknum == NF_INET_POST_ROUTING || 50 par->hooknum == NF_INET_POST_ROUTING ||
51 par->hooknum == NF_INET_LOCAL_OUT); 51 par->hooknum == NF_INET_LOCAL_OUT ||
52 par->hooknum == NF_INET_LOCAL_IN);
52 ct = nf_ct_get(skb, &ctinfo); 53 ct = nf_ct_get(skb, &ctinfo);
53 54
54 netmask = ~(mr->range[0].min_ip ^ mr->range[0].max_ip); 55 netmask = ~(mr->range[0].min_ip ^ mr->range[0].max_ip);
@@ -77,7 +78,8 @@ static struct xt_target netmap_tg_reg __read_mostly = {
77 .table = "nat", 78 .table = "nat",
78 .hooks = (1 << NF_INET_PRE_ROUTING) | 79 .hooks = (1 << NF_INET_PRE_ROUTING) |
79 (1 << NF_INET_POST_ROUTING) | 80 (1 << NF_INET_POST_ROUTING) |
80 (1 << NF_INET_LOCAL_OUT), 81 (1 << NF_INET_LOCAL_OUT) |
82 (1 << NF_INET_LOCAL_IN),
81 .checkentry = netmap_tg_check, 83 .checkentry = netmap_tg_check,
82 .me = THIS_MODULE 84 .me = THIS_MODULE
83}; 85};
diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c
index f5f4a888e4ec..bbbd2736c549 100644
--- a/net/ipv4/netfilter/ipt_REJECT.c
+++ b/net/ipv4/netfilter/ipt_REJECT.c
@@ -109,7 +109,7 @@ static void send_reset(struct sk_buff *oldskb, int hook)
109 addr_type = RTN_LOCAL; 109 addr_type = RTN_LOCAL;
110 110
111 /* ip_route_me_harder expects skb->dst to be set */ 111 /* ip_route_me_harder expects skb->dst to be set */
112 skb_dst_set(nskb, dst_clone(skb_dst(oldskb))); 112 skb_dst_set_noref(nskb, skb_dst(oldskb));
113 113
114 if (ip_route_me_harder(nskb, addr_type)) 114 if (ip_route_me_harder(nskb, addr_type))
115 goto free_nskb; 115 goto free_nskb;
diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c
index cb763ae9ed90..eab8de32f200 100644
--- a/net/ipv4/netfilter/nf_defrag_ipv4.c
+++ b/net/ipv4/netfilter/nf_defrag_ipv4.c
@@ -66,6 +66,11 @@ static unsigned int ipv4_conntrack_defrag(unsigned int hooknum,
66 const struct net_device *out, 66 const struct net_device *out,
67 int (*okfn)(struct sk_buff *)) 67 int (*okfn)(struct sk_buff *))
68{ 68{
69 struct inet_sock *inet = inet_sk(skb->sk);
70
71 if (inet && inet->nodefrag)
72 return NF_ACCEPT;
73
69#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) 74#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
70#if !defined(CONFIG_NF_NAT) && !defined(CONFIG_NF_NAT_MODULE) 75#if !defined(CONFIG_NF_NAT) && !defined(CONFIG_NF_NAT_MODULE)
71 /* Previously seen (loopback)? Ignore. Do this before 76 /* Previously seen (loopback)? Ignore. Do this before
diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c
index 4f8bddb760c9..c7719b283ada 100644
--- a/net/ipv4/netfilter/nf_nat_core.c
+++ b/net/ipv4/netfilter/nf_nat_core.c
@@ -742,7 +742,7 @@ static int __init nf_nat_init(void)
742 spin_unlock_bh(&nf_nat_lock); 742 spin_unlock_bh(&nf_nat_lock);
743 743
744 /* Initialize fake conntrack so that NAT will skip it */ 744 /* Initialize fake conntrack so that NAT will skip it */
745 nf_conntrack_untracked.status |= IPS_NAT_DONE_MASK; 745 nf_ct_untracked_status_or(IPS_NAT_DONE_MASK);
746 746
747 l3proto = nf_ct_l3proto_find_get((u_int16_t)AF_INET); 747 l3proto = nf_ct_l3proto_find_get((u_int16_t)AF_INET);
748 748
diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c
index 98ed78281aee..ebbd319f62f5 100644
--- a/net/ipv4/netfilter/nf_nat_rule.c
+++ b/net/ipv4/netfilter/nf_nat_rule.c
@@ -28,7 +28,8 @@
28 28
29#define NAT_VALID_HOOKS ((1 << NF_INET_PRE_ROUTING) | \ 29#define NAT_VALID_HOOKS ((1 << NF_INET_PRE_ROUTING) | \
30 (1 << NF_INET_POST_ROUTING) | \ 30 (1 << NF_INET_POST_ROUTING) | \
31 (1 << NF_INET_LOCAL_OUT)) 31 (1 << NF_INET_LOCAL_OUT) | \
32 (1 << NF_INET_LOCAL_IN))
32 33
33static const struct xt_table nat_table = { 34static const struct xt_table nat_table = {
34 .name = "nat", 35 .name = "nat",
@@ -45,7 +46,8 @@ ipt_snat_target(struct sk_buff *skb, const struct xt_action_param *par)
45 enum ip_conntrack_info ctinfo; 46 enum ip_conntrack_info ctinfo;
46 const struct nf_nat_multi_range_compat *mr = par->targinfo; 47 const struct nf_nat_multi_range_compat *mr = par->targinfo;
47 48
48 NF_CT_ASSERT(par->hooknum == NF_INET_POST_ROUTING); 49 NF_CT_ASSERT(par->hooknum == NF_INET_POST_ROUTING ||
50 par->hooknum == NF_INET_LOCAL_IN);
49 51
50 ct = nf_ct_get(skb, &ctinfo); 52 ct = nf_ct_get(skb, &ctinfo);
51 53
@@ -99,7 +101,7 @@ static int ipt_dnat_checkentry(const struct xt_tgchk_param *par)
99 return 0; 101 return 0;
100} 102}
101 103
102unsigned int 104static unsigned int
103alloc_null_binding(struct nf_conn *ct, unsigned int hooknum) 105alloc_null_binding(struct nf_conn *ct, unsigned int hooknum)
104{ 106{
105 /* Force range to this IP; let proto decide mapping for 107 /* Force range to this IP; let proto decide mapping for
@@ -141,7 +143,7 @@ static struct xt_target ipt_snat_reg __read_mostly = {
141 .target = ipt_snat_target, 143 .target = ipt_snat_target,
142 .targetsize = sizeof(struct nf_nat_multi_range_compat), 144 .targetsize = sizeof(struct nf_nat_multi_range_compat),
143 .table = "nat", 145 .table = "nat",
144 .hooks = 1 << NF_INET_POST_ROUTING, 146 .hooks = (1 << NF_INET_POST_ROUTING) | (1 << NF_INET_LOCAL_IN),
145 .checkentry = ipt_snat_checkentry, 147 .checkentry = ipt_snat_checkentry,
146 .family = AF_INET, 148 .family = AF_INET,
147}; 149};
diff --git a/net/ipv4/netfilter/nf_nat_standalone.c b/net/ipv4/netfilter/nf_nat_standalone.c
index beb25819c9c9..95481fee8bdb 100644
--- a/net/ipv4/netfilter/nf_nat_standalone.c
+++ b/net/ipv4/netfilter/nf_nat_standalone.c
@@ -98,7 +98,7 @@ nf_nat_fn(unsigned int hooknum,
98 return NF_ACCEPT; 98 return NF_ACCEPT;
99 99
100 /* Don't try to NAT if this packet is not conntracked */ 100 /* Don't try to NAT if this packet is not conntracked */
101 if (ct == &nf_conntrack_untracked) 101 if (nf_ct_is_untracked(ct))
102 return NF_ACCEPT; 102 return NF_ACCEPT;
103 103
104 nat = nfct_nat(ct); 104 nat = nfct_nat(ct);
@@ -131,13 +131,7 @@ nf_nat_fn(unsigned int hooknum,
131 if (!nf_nat_initialized(ct, maniptype)) { 131 if (!nf_nat_initialized(ct, maniptype)) {
132 unsigned int ret; 132 unsigned int ret;
133 133
134 if (hooknum == NF_INET_LOCAL_IN) 134 ret = nf_nat_rule_find(skb, hooknum, in, out, ct);
135 /* LOCAL_IN hook doesn't have a chain! */
136 ret = alloc_null_binding(ct, hooknum);
137 else
138 ret = nf_nat_rule_find(skb, hooknum, in, out,
139 ct);
140
141 if (ret != NF_ACCEPT) 135 if (ret != NF_ACCEPT)
142 return ret; 136 return ret;
143 } else 137 } else
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 3dc9914c1dce..4ae1f203f7cb 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -252,6 +252,7 @@ static const struct snmp_mib snmp4_net_list[] = {
252 SNMP_MIB_ITEM("TCPBacklogDrop", LINUX_MIB_TCPBACKLOGDROP), 252 SNMP_MIB_ITEM("TCPBacklogDrop", LINUX_MIB_TCPBACKLOGDROP),
253 SNMP_MIB_ITEM("TCPMinTTLDrop", LINUX_MIB_TCPMINTTLDROP), 253 SNMP_MIB_ITEM("TCPMinTTLDrop", LINUX_MIB_TCPMINTTLDROP),
254 SNMP_MIB_ITEM("TCPDeferAcceptDrop", LINUX_MIB_TCPDEFERACCEPTDROP), 254 SNMP_MIB_ITEM("TCPDeferAcceptDrop", LINUX_MIB_TCPDEFERACCEPTDROP),
255 SNMP_MIB_ITEM("IPReversePathFilter", LINUX_MIB_IPRPFILTER),
255 SNMP_MIB_SENTINEL 256 SNMP_MIB_SENTINEL
256}; 257};
257 258
@@ -342,10 +343,12 @@ static int snmp_seq_show(struct seq_file *seq, void *v)
342 IPV4_DEVCONF_ALL(net, FORWARDING) ? 1 : 2, 343 IPV4_DEVCONF_ALL(net, FORWARDING) ? 1 : 2,
343 sysctl_ip_default_ttl); 344 sysctl_ip_default_ttl);
344 345
346 BUILD_BUG_ON(offsetof(struct ipstats_mib, mibs) != 0);
345 for (i = 0; snmp4_ipstats_list[i].name != NULL; i++) 347 for (i = 0; snmp4_ipstats_list[i].name != NULL; i++)
346 seq_printf(seq, " %lu", 348 seq_printf(seq, " %llu",
347 snmp_fold_field((void __percpu **)net->mib.ip_statistics, 349 snmp_fold_field64((void __percpu **)net->mib.ip_statistics,
348 snmp4_ipstats_list[i].entry)); 350 snmp4_ipstats_list[i].entry,
351 offsetof(struct ipstats_mib, syncp)));
349 352
350 icmp_put(seq); /* RFC 2011 compatibility */ 353 icmp_put(seq); /* RFC 2011 compatibility */
351 icmpmsg_put(seq); 354 icmpmsg_put(seq);
@@ -431,9 +434,10 @@ static int netstat_seq_show(struct seq_file *seq, void *v)
431 434
432 seq_puts(seq, "\nIpExt:"); 435 seq_puts(seq, "\nIpExt:");
433 for (i = 0; snmp4_ipextstats_list[i].name != NULL; i++) 436 for (i = 0; snmp4_ipextstats_list[i].name != NULL; i++)
434 seq_printf(seq, " %lu", 437 seq_printf(seq, " %llu",
435 snmp_fold_field((void __percpu **)net->mib.ip_statistics, 438 snmp_fold_field64((void __percpu **)net->mib.ip_statistics,
436 snmp4_ipextstats_list[i].entry)); 439 snmp4_ipextstats_list[i].entry,
440 offsetof(struct ipstats_mib, syncp)));
437 441
438 seq_putc(seq, '\n'); 442 seq_putc(seq, '\n');
439 return 0; 443 return 0;
diff --git a/net/ipv4/protocol.c b/net/ipv4/protocol.c
index 542f22fc98b3..f2d297351405 100644
--- a/net/ipv4/protocol.c
+++ b/net/ipv4/protocol.c
@@ -52,6 +52,7 @@ int inet_add_protocol(const struct net_protocol *prot, unsigned char protocol)
52 52
53 return ret; 53 return ret;
54} 54}
55EXPORT_SYMBOL(inet_add_protocol);
55 56
56/* 57/*
57 * Remove a protocol from the hash tables. 58 * Remove a protocol from the hash tables.
@@ -76,6 +77,4 @@ int inet_del_protocol(const struct net_protocol *prot, unsigned char protocol)
76 77
77 return ret; 78 return ret;
78} 79}
79
80EXPORT_SYMBOL(inet_add_protocol);
81EXPORT_SYMBOL(inet_del_protocol); 80EXPORT_SYMBOL(inet_del_protocol);
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 2c7a1639388a..009a7b2aa1ef 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -314,7 +314,7 @@ int raw_rcv(struct sock *sk, struct sk_buff *skb)
314} 314}
315 315
316static int raw_send_hdrinc(struct sock *sk, void *from, size_t length, 316static int raw_send_hdrinc(struct sock *sk, void *from, size_t length,
317 struct rtable *rt, 317 struct rtable **rtp,
318 unsigned int flags) 318 unsigned int flags)
319{ 319{
320 struct inet_sock *inet = inet_sk(sk); 320 struct inet_sock *inet = inet_sk(sk);
@@ -323,25 +323,27 @@ static int raw_send_hdrinc(struct sock *sk, void *from, size_t length,
323 struct sk_buff *skb; 323 struct sk_buff *skb;
324 unsigned int iphlen; 324 unsigned int iphlen;
325 int err; 325 int err;
326 struct rtable *rt = *rtp;
326 327
327 if (length > rt->u.dst.dev->mtu) { 328 if (length > rt->dst.dev->mtu) {
328 ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->inet_dport, 329 ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->inet_dport,
329 rt->u.dst.dev->mtu); 330 rt->dst.dev->mtu);
330 return -EMSGSIZE; 331 return -EMSGSIZE;
331 } 332 }
332 if (flags&MSG_PROBE) 333 if (flags&MSG_PROBE)
333 goto out; 334 goto out;
334 335
335 skb = sock_alloc_send_skb(sk, 336 skb = sock_alloc_send_skb(sk,
336 length + LL_ALLOCATED_SPACE(rt->u.dst.dev) + 15, 337 length + LL_ALLOCATED_SPACE(rt->dst.dev) + 15,
337 flags & MSG_DONTWAIT, &err); 338 flags & MSG_DONTWAIT, &err);
338 if (skb == NULL) 339 if (skb == NULL)
339 goto error; 340 goto error;
340 skb_reserve(skb, LL_RESERVED_SPACE(rt->u.dst.dev)); 341 skb_reserve(skb, LL_RESERVED_SPACE(rt->dst.dev));
341 342
342 skb->priority = sk->sk_priority; 343 skb->priority = sk->sk_priority;
343 skb->mark = sk->sk_mark; 344 skb->mark = sk->sk_mark;
344 skb_dst_set(skb, dst_clone(&rt->u.dst)); 345 skb_dst_set(skb, &rt->dst);
346 *rtp = NULL;
345 347
346 skb_reset_network_header(skb); 348 skb_reset_network_header(skb);
347 iph = ip_hdr(skb); 349 iph = ip_hdr(skb);
@@ -373,7 +375,7 @@ static int raw_send_hdrinc(struct sock *sk, void *from, size_t length,
373 iph->check = 0; 375 iph->check = 0;
374 iph->tot_len = htons(length); 376 iph->tot_len = htons(length);
375 if (!iph->id) 377 if (!iph->id)
376 ip_select_ident(iph, &rt->u.dst, NULL); 378 ip_select_ident(iph, &rt->dst, NULL);
377 379
378 iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl); 380 iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
379 } 381 }
@@ -382,7 +384,7 @@ static int raw_send_hdrinc(struct sock *sk, void *from, size_t length,
382 skb_transport_header(skb))->type); 384 skb_transport_header(skb))->type);
383 385
384 err = NF_HOOK(NFPROTO_IPV4, NF_INET_LOCAL_OUT, skb, NULL, 386 err = NF_HOOK(NFPROTO_IPV4, NF_INET_LOCAL_OUT, skb, NULL,
385 rt->u.dst.dev, dst_output); 387 rt->dst.dev, dst_output);
386 if (err > 0) 388 if (err > 0)
387 err = net_xmit_errno(err); 389 err = net_xmit_errno(err);
388 if (err) 390 if (err)
@@ -576,7 +578,7 @@ back_from_confirm:
576 578
577 if (inet->hdrincl) 579 if (inet->hdrincl)
578 err = raw_send_hdrinc(sk, msg->msg_iov, len, 580 err = raw_send_hdrinc(sk, msg->msg_iov, len,
579 rt, msg->msg_flags); 581 &rt, msg->msg_flags);
580 582
581 else { 583 else {
582 if (!ipc.addr) 584 if (!ipc.addr)
@@ -604,7 +606,7 @@ out:
604 return len; 606 return len;
605 607
606do_confirm: 608do_confirm:
607 dst_confirm(&rt->u.dst); 609 dst_confirm(&rt->dst);
608 if (!(msg->msg_flags & MSG_PROBE) || len) 610 if (!(msg->msg_flags & MSG_PROBE) || len)
609 goto back_from_confirm; 611 goto back_from_confirm;
610 err = 0; 612 err = 0;
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 560acc677ce4..562ce92de2a6 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -253,8 +253,7 @@ static unsigned rt_hash_mask __read_mostly;
253static unsigned int rt_hash_log __read_mostly; 253static unsigned int rt_hash_log __read_mostly;
254 254
255static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat); 255static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat);
256#define RT_CACHE_STAT_INC(field) \ 256#define RT_CACHE_STAT_INC(field) __this_cpu_inc(rt_cache_stat.field)
257 (__raw_get_cpu_var(rt_cache_stat).field++)
258 257
259static inline unsigned int rt_hash(__be32 daddr, __be32 saddr, int idx, 258static inline unsigned int rt_hash(__be32 daddr, __be32 saddr, int idx,
260 int genid) 259 int genid)
@@ -287,10 +286,10 @@ static struct rtable *rt_cache_get_first(struct seq_file *seq)
287 rcu_read_lock_bh(); 286 rcu_read_lock_bh();
288 r = rcu_dereference_bh(rt_hash_table[st->bucket].chain); 287 r = rcu_dereference_bh(rt_hash_table[st->bucket].chain);
289 while (r) { 288 while (r) {
290 if (dev_net(r->u.dst.dev) == seq_file_net(seq) && 289 if (dev_net(r->dst.dev) == seq_file_net(seq) &&
291 r->rt_genid == st->genid) 290 r->rt_genid == st->genid)
292 return r; 291 return r;
293 r = rcu_dereference_bh(r->u.dst.rt_next); 292 r = rcu_dereference_bh(r->dst.rt_next);
294 } 293 }
295 rcu_read_unlock_bh(); 294 rcu_read_unlock_bh();
296 } 295 }
@@ -302,7 +301,7 @@ static struct rtable *__rt_cache_get_next(struct seq_file *seq,
302{ 301{
303 struct rt_cache_iter_state *st = seq->private; 302 struct rt_cache_iter_state *st = seq->private;
304 303
305 r = r->u.dst.rt_next; 304 r = r->dst.rt_next;
306 while (!r) { 305 while (!r) {
307 rcu_read_unlock_bh(); 306 rcu_read_unlock_bh();
308 do { 307 do {
@@ -320,7 +319,7 @@ static struct rtable *rt_cache_get_next(struct seq_file *seq,
320{ 319{
321 struct rt_cache_iter_state *st = seq->private; 320 struct rt_cache_iter_state *st = seq->private;
322 while ((r = __rt_cache_get_next(seq, r)) != NULL) { 321 while ((r = __rt_cache_get_next(seq, r)) != NULL) {
323 if (dev_net(r->u.dst.dev) != seq_file_net(seq)) 322 if (dev_net(r->dst.dev) != seq_file_net(seq))
324 continue; 323 continue;
325 if (r->rt_genid == st->genid) 324 if (r->rt_genid == st->genid)
326 break; 325 break;
@@ -378,19 +377,19 @@ static int rt_cache_seq_show(struct seq_file *seq, void *v)
378 377
379 seq_printf(seq, "%s\t%08X\t%08X\t%8X\t%d\t%u\t%d\t" 378 seq_printf(seq, "%s\t%08X\t%08X\t%8X\t%d\t%u\t%d\t"
380 "%08X\t%d\t%u\t%u\t%02X\t%d\t%1d\t%08X%n", 379 "%08X\t%d\t%u\t%u\t%02X\t%d\t%1d\t%08X%n",
381 r->u.dst.dev ? r->u.dst.dev->name : "*", 380 r->dst.dev ? r->dst.dev->name : "*",
382 (__force u32)r->rt_dst, 381 (__force u32)r->rt_dst,
383 (__force u32)r->rt_gateway, 382 (__force u32)r->rt_gateway,
384 r->rt_flags, atomic_read(&r->u.dst.__refcnt), 383 r->rt_flags, atomic_read(&r->dst.__refcnt),
385 r->u.dst.__use, 0, (__force u32)r->rt_src, 384 r->dst.__use, 0, (__force u32)r->rt_src,
386 (dst_metric(&r->u.dst, RTAX_ADVMSS) ? 385 (dst_metric(&r->dst, RTAX_ADVMSS) ?
387 (int)dst_metric(&r->u.dst, RTAX_ADVMSS) + 40 : 0), 386 (int)dst_metric(&r->dst, RTAX_ADVMSS) + 40 : 0),
388 dst_metric(&r->u.dst, RTAX_WINDOW), 387 dst_metric(&r->dst, RTAX_WINDOW),
389 (int)((dst_metric(&r->u.dst, RTAX_RTT) >> 3) + 388 (int)((dst_metric(&r->dst, RTAX_RTT) >> 3) +
390 dst_metric(&r->u.dst, RTAX_RTTVAR)), 389 dst_metric(&r->dst, RTAX_RTTVAR)),
391 r->fl.fl4_tos, 390 r->fl.fl4_tos,
392 r->u.dst.hh ? atomic_read(&r->u.dst.hh->hh_refcnt) : -1, 391 r->dst.hh ? atomic_read(&r->dst.hh->hh_refcnt) : -1,
393 r->u.dst.hh ? (r->u.dst.hh->hh_output == 392 r->dst.hh ? (r->dst.hh->hh_output ==
394 dev_queue_xmit) : 0, 393 dev_queue_xmit) : 0,
395 r->rt_spec_dst, &len); 394 r->rt_spec_dst, &len);
396 395
@@ -609,13 +608,13 @@ static inline int ip_rt_proc_init(void)
609 608
610static inline void rt_free(struct rtable *rt) 609static inline void rt_free(struct rtable *rt)
611{ 610{
612 call_rcu_bh(&rt->u.dst.rcu_head, dst_rcu_free); 611 call_rcu_bh(&rt->dst.rcu_head, dst_rcu_free);
613} 612}
614 613
615static inline void rt_drop(struct rtable *rt) 614static inline void rt_drop(struct rtable *rt)
616{ 615{
617 ip_rt_put(rt); 616 ip_rt_put(rt);
618 call_rcu_bh(&rt->u.dst.rcu_head, dst_rcu_free); 617 call_rcu_bh(&rt->dst.rcu_head, dst_rcu_free);
619} 618}
620 619
621static inline int rt_fast_clean(struct rtable *rth) 620static inline int rt_fast_clean(struct rtable *rth)
@@ -623,13 +622,13 @@ static inline int rt_fast_clean(struct rtable *rth)
623 /* Kill broadcast/multicast entries very aggresively, if they 622 /* Kill broadcast/multicast entries very aggresively, if they
624 collide in hash table with more useful entries */ 623 collide in hash table with more useful entries */
625 return (rth->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) && 624 return (rth->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) &&
626 rth->fl.iif && rth->u.dst.rt_next; 625 rth->fl.iif && rth->dst.rt_next;
627} 626}
628 627
629static inline int rt_valuable(struct rtable *rth) 628static inline int rt_valuable(struct rtable *rth)
630{ 629{
631 return (rth->rt_flags & (RTCF_REDIRECTED | RTCF_NOTIFY)) || 630 return (rth->rt_flags & (RTCF_REDIRECTED | RTCF_NOTIFY)) ||
632 rth->u.dst.expires; 631 rth->dst.expires;
633} 632}
634 633
635static int rt_may_expire(struct rtable *rth, unsigned long tmo1, unsigned long tmo2) 634static int rt_may_expire(struct rtable *rth, unsigned long tmo1, unsigned long tmo2)
@@ -637,15 +636,15 @@ static int rt_may_expire(struct rtable *rth, unsigned long tmo1, unsigned long t
637 unsigned long age; 636 unsigned long age;
638 int ret = 0; 637 int ret = 0;
639 638
640 if (atomic_read(&rth->u.dst.__refcnt)) 639 if (atomic_read(&rth->dst.__refcnt))
641 goto out; 640 goto out;
642 641
643 ret = 1; 642 ret = 1;
644 if (rth->u.dst.expires && 643 if (rth->dst.expires &&
645 time_after_eq(jiffies, rth->u.dst.expires)) 644 time_after_eq(jiffies, rth->dst.expires))
646 goto out; 645 goto out;
647 646
648 age = jiffies - rth->u.dst.lastuse; 647 age = jiffies - rth->dst.lastuse;
649 ret = 0; 648 ret = 0;
650 if ((age <= tmo1 && !rt_fast_clean(rth)) || 649 if ((age <= tmo1 && !rt_fast_clean(rth)) ||
651 (age <= tmo2 && rt_valuable(rth))) 650 (age <= tmo2 && rt_valuable(rth)))
@@ -661,7 +660,7 @@ out: return ret;
661 */ 660 */
662static inline u32 rt_score(struct rtable *rt) 661static inline u32 rt_score(struct rtable *rt)
663{ 662{
664 u32 score = jiffies - rt->u.dst.lastuse; 663 u32 score = jiffies - rt->dst.lastuse;
665 664
666 score = ~score & ~(3<<30); 665 score = ~score & ~(3<<30);
667 666
@@ -701,12 +700,12 @@ static inline int compare_keys(struct flowi *fl1, struct flowi *fl2)
701 700
702static inline int compare_netns(struct rtable *rt1, struct rtable *rt2) 701static inline int compare_netns(struct rtable *rt1, struct rtable *rt2)
703{ 702{
704 return net_eq(dev_net(rt1->u.dst.dev), dev_net(rt2->u.dst.dev)); 703 return net_eq(dev_net(rt1->dst.dev), dev_net(rt2->dst.dev));
705} 704}
706 705
707static inline int rt_is_expired(struct rtable *rth) 706static inline int rt_is_expired(struct rtable *rth)
708{ 707{
709 return rth->rt_genid != rt_genid(dev_net(rth->u.dst.dev)); 708 return rth->rt_genid != rt_genid(dev_net(rth->dst.dev));
710} 709}
711 710
712/* 711/*
@@ -735,7 +734,7 @@ static void rt_do_flush(int process_context)
735 rth = rt_hash_table[i].chain; 734 rth = rt_hash_table[i].chain;
736 735
737 /* defer releasing the head of the list after spin_unlock */ 736 /* defer releasing the head of the list after spin_unlock */
738 for (tail = rth; tail; tail = tail->u.dst.rt_next) 737 for (tail = rth; tail; tail = tail->dst.rt_next)
739 if (!rt_is_expired(tail)) 738 if (!rt_is_expired(tail))
740 break; 739 break;
741 if (rth != tail) 740 if (rth != tail)
@@ -744,9 +743,9 @@ static void rt_do_flush(int process_context)
744 /* call rt_free on entries after the tail requiring flush */ 743 /* call rt_free on entries after the tail requiring flush */
745 prev = &rt_hash_table[i].chain; 744 prev = &rt_hash_table[i].chain;
746 for (p = *prev; p; p = next) { 745 for (p = *prev; p; p = next) {
747 next = p->u.dst.rt_next; 746 next = p->dst.rt_next;
748 if (!rt_is_expired(p)) { 747 if (!rt_is_expired(p)) {
749 prev = &p->u.dst.rt_next; 748 prev = &p->dst.rt_next;
750 } else { 749 } else {
751 *prev = next; 750 *prev = next;
752 rt_free(p); 751 rt_free(p);
@@ -761,7 +760,7 @@ static void rt_do_flush(int process_context)
761 spin_unlock_bh(rt_hash_lock_addr(i)); 760 spin_unlock_bh(rt_hash_lock_addr(i));
762 761
763 for (; rth != tail; rth = next) { 762 for (; rth != tail; rth = next) {
764 next = rth->u.dst.rt_next; 763 next = rth->dst.rt_next;
765 rt_free(rth); 764 rt_free(rth);
766 } 765 }
767 } 766 }
@@ -792,7 +791,7 @@ static int has_noalias(const struct rtable *head, const struct rtable *rth)
792 while (aux != rth) { 791 while (aux != rth) {
793 if (compare_hash_inputs(&aux->fl, &rth->fl)) 792 if (compare_hash_inputs(&aux->fl, &rth->fl))
794 return 0; 793 return 0;
795 aux = aux->u.dst.rt_next; 794 aux = aux->dst.rt_next;
796 } 795 }
797 return ONE; 796 return ONE;
798} 797}
@@ -832,18 +831,18 @@ static void rt_check_expire(void)
832 length = 0; 831 length = 0;
833 spin_lock_bh(rt_hash_lock_addr(i)); 832 spin_lock_bh(rt_hash_lock_addr(i));
834 while ((rth = *rthp) != NULL) { 833 while ((rth = *rthp) != NULL) {
835 prefetch(rth->u.dst.rt_next); 834 prefetch(rth->dst.rt_next);
836 if (rt_is_expired(rth)) { 835 if (rt_is_expired(rth)) {
837 *rthp = rth->u.dst.rt_next; 836 *rthp = rth->dst.rt_next;
838 rt_free(rth); 837 rt_free(rth);
839 continue; 838 continue;
840 } 839 }
841 if (rth->u.dst.expires) { 840 if (rth->dst.expires) {
842 /* Entry is expired even if it is in use */ 841 /* Entry is expired even if it is in use */
843 if (time_before_eq(jiffies, rth->u.dst.expires)) { 842 if (time_before_eq(jiffies, rth->dst.expires)) {
844nofree: 843nofree:
845 tmo >>= 1; 844 tmo >>= 1;
846 rthp = &rth->u.dst.rt_next; 845 rthp = &rth->dst.rt_next;
847 /* 846 /*
848 * We only count entries on 847 * We only count entries on
849 * a chain with equal hash inputs once 848 * a chain with equal hash inputs once
@@ -859,7 +858,7 @@ nofree:
859 goto nofree; 858 goto nofree;
860 859
861 /* Cleanup aged off entries. */ 860 /* Cleanup aged off entries. */
862 *rthp = rth->u.dst.rt_next; 861 *rthp = rth->dst.rt_next;
863 rt_free(rth); 862 rt_free(rth);
864 } 863 }
865 spin_unlock_bh(rt_hash_lock_addr(i)); 864 spin_unlock_bh(rt_hash_lock_addr(i));
@@ -1000,10 +999,10 @@ static int rt_garbage_collect(struct dst_ops *ops)
1000 if (!rt_is_expired(rth) && 999 if (!rt_is_expired(rth) &&
1001 !rt_may_expire(rth, tmo, expire)) { 1000 !rt_may_expire(rth, tmo, expire)) {
1002 tmo >>= 1; 1001 tmo >>= 1;
1003 rthp = &rth->u.dst.rt_next; 1002 rthp = &rth->dst.rt_next;
1004 continue; 1003 continue;
1005 } 1004 }
1006 *rthp = rth->u.dst.rt_next; 1005 *rthp = rth->dst.rt_next;
1007 rt_free(rth); 1006 rt_free(rth);
1008 goal--; 1007 goal--;
1009 } 1008 }
@@ -1069,7 +1068,7 @@ static int slow_chain_length(const struct rtable *head)
1069 1068
1070 while (rth) { 1069 while (rth) {
1071 length += has_noalias(head, rth); 1070 length += has_noalias(head, rth);
1072 rth = rth->u.dst.rt_next; 1071 rth = rth->dst.rt_next;
1073 } 1072 }
1074 return length >> FRACT_BITS; 1073 return length >> FRACT_BITS;
1075} 1074}
@@ -1091,7 +1090,7 @@ restart:
1091 candp = NULL; 1090 candp = NULL;
1092 now = jiffies; 1091 now = jiffies;
1093 1092
1094 if (!rt_caching(dev_net(rt->u.dst.dev))) { 1093 if (!rt_caching(dev_net(rt->dst.dev))) {
1095 /* 1094 /*
1096 * If we're not caching, just tell the caller we 1095 * If we're not caching, just tell the caller we
1097 * were successful and don't touch the route. The 1096 * were successful and don't touch the route. The
@@ -1109,7 +1108,7 @@ restart:
1109 */ 1108 */
1110 1109
1111 if (rt->rt_type == RTN_UNICAST || rt->fl.iif == 0) { 1110 if (rt->rt_type == RTN_UNICAST || rt->fl.iif == 0) {
1112 int err = arp_bind_neighbour(&rt->u.dst); 1111 int err = arp_bind_neighbour(&rt->dst);
1113 if (err) { 1112 if (err) {
1114 if (net_ratelimit()) 1113 if (net_ratelimit())
1115 printk(KERN_WARNING 1114 printk(KERN_WARNING
@@ -1128,19 +1127,19 @@ restart:
1128 spin_lock_bh(rt_hash_lock_addr(hash)); 1127 spin_lock_bh(rt_hash_lock_addr(hash));
1129 while ((rth = *rthp) != NULL) { 1128 while ((rth = *rthp) != NULL) {
1130 if (rt_is_expired(rth)) { 1129 if (rt_is_expired(rth)) {
1131 *rthp = rth->u.dst.rt_next; 1130 *rthp = rth->dst.rt_next;
1132 rt_free(rth); 1131 rt_free(rth);
1133 continue; 1132 continue;
1134 } 1133 }
1135 if (compare_keys(&rth->fl, &rt->fl) && compare_netns(rth, rt)) { 1134 if (compare_keys(&rth->fl, &rt->fl) && compare_netns(rth, rt)) {
1136 /* Put it first */ 1135 /* Put it first */
1137 *rthp = rth->u.dst.rt_next; 1136 *rthp = rth->dst.rt_next;
1138 /* 1137 /*
1139 * Since lookup is lockfree, the deletion 1138 * Since lookup is lockfree, the deletion
1140 * must be visible to another weakly ordered CPU before 1139 * must be visible to another weakly ordered CPU before
1141 * the insertion at the start of the hash chain. 1140 * the insertion at the start of the hash chain.
1142 */ 1141 */
1143 rcu_assign_pointer(rth->u.dst.rt_next, 1142 rcu_assign_pointer(rth->dst.rt_next,
1144 rt_hash_table[hash].chain); 1143 rt_hash_table[hash].chain);
1145 /* 1144 /*
1146 * Since lookup is lockfree, the update writes 1145 * Since lookup is lockfree, the update writes
@@ -1148,18 +1147,18 @@ restart:
1148 */ 1147 */
1149 rcu_assign_pointer(rt_hash_table[hash].chain, rth); 1148 rcu_assign_pointer(rt_hash_table[hash].chain, rth);
1150 1149
1151 dst_use(&rth->u.dst, now); 1150 dst_use(&rth->dst, now);
1152 spin_unlock_bh(rt_hash_lock_addr(hash)); 1151 spin_unlock_bh(rt_hash_lock_addr(hash));
1153 1152
1154 rt_drop(rt); 1153 rt_drop(rt);
1155 if (rp) 1154 if (rp)
1156 *rp = rth; 1155 *rp = rth;
1157 else 1156 else
1158 skb_dst_set(skb, &rth->u.dst); 1157 skb_dst_set(skb, &rth->dst);
1159 return 0; 1158 return 0;
1160 } 1159 }
1161 1160
1162 if (!atomic_read(&rth->u.dst.__refcnt)) { 1161 if (!atomic_read(&rth->dst.__refcnt)) {
1163 u32 score = rt_score(rth); 1162 u32 score = rt_score(rth);
1164 1163
1165 if (score <= min_score) { 1164 if (score <= min_score) {
@@ -1171,7 +1170,7 @@ restart:
1171 1170
1172 chain_length++; 1171 chain_length++;
1173 1172
1174 rthp = &rth->u.dst.rt_next; 1173 rthp = &rth->dst.rt_next;
1175 } 1174 }
1176 1175
1177 if (cand) { 1176 if (cand) {
@@ -1182,17 +1181,17 @@ restart:
1182 * only 2 entries per bucket. We will see. 1181 * only 2 entries per bucket. We will see.
1183 */ 1182 */
1184 if (chain_length > ip_rt_gc_elasticity) { 1183 if (chain_length > ip_rt_gc_elasticity) {
1185 *candp = cand->u.dst.rt_next; 1184 *candp = cand->dst.rt_next;
1186 rt_free(cand); 1185 rt_free(cand);
1187 } 1186 }
1188 } else { 1187 } else {
1189 if (chain_length > rt_chain_length_max && 1188 if (chain_length > rt_chain_length_max &&
1190 slow_chain_length(rt_hash_table[hash].chain) > rt_chain_length_max) { 1189 slow_chain_length(rt_hash_table[hash].chain) > rt_chain_length_max) {
1191 struct net *net = dev_net(rt->u.dst.dev); 1190 struct net *net = dev_net(rt->dst.dev);
1192 int num = ++net->ipv4.current_rt_cache_rebuild_count; 1191 int num = ++net->ipv4.current_rt_cache_rebuild_count;
1193 if (!rt_caching(net)) { 1192 if (!rt_caching(net)) {
1194 printk(KERN_WARNING "%s: %d rebuilds is over limit, route caching disabled\n", 1193 printk(KERN_WARNING "%s: %d rebuilds is over limit, route caching disabled\n",
1195 rt->u.dst.dev->name, num); 1194 rt->dst.dev->name, num);
1196 } 1195 }
1197 rt_emergency_hash_rebuild(net); 1196 rt_emergency_hash_rebuild(net);
1198 spin_unlock_bh(rt_hash_lock_addr(hash)); 1197 spin_unlock_bh(rt_hash_lock_addr(hash));
@@ -1207,7 +1206,7 @@ restart:
1207 route or unicast forwarding path. 1206 route or unicast forwarding path.
1208 */ 1207 */
1209 if (rt->rt_type == RTN_UNICAST || rt->fl.iif == 0) { 1208 if (rt->rt_type == RTN_UNICAST || rt->fl.iif == 0) {
1210 int err = arp_bind_neighbour(&rt->u.dst); 1209 int err = arp_bind_neighbour(&rt->dst);
1211 if (err) { 1210 if (err) {
1212 spin_unlock_bh(rt_hash_lock_addr(hash)); 1211 spin_unlock_bh(rt_hash_lock_addr(hash));
1213 1212
@@ -1238,14 +1237,14 @@ restart:
1238 } 1237 }
1239 } 1238 }
1240 1239
1241 rt->u.dst.rt_next = rt_hash_table[hash].chain; 1240 rt->dst.rt_next = rt_hash_table[hash].chain;
1242 1241
1243#if RT_CACHE_DEBUG >= 2 1242#if RT_CACHE_DEBUG >= 2
1244 if (rt->u.dst.rt_next) { 1243 if (rt->dst.rt_next) {
1245 struct rtable *trt; 1244 struct rtable *trt;
1246 printk(KERN_DEBUG "rt_cache @%02x: %pI4", 1245 printk(KERN_DEBUG "rt_cache @%02x: %pI4",
1247 hash, &rt->rt_dst); 1246 hash, &rt->rt_dst);
1248 for (trt = rt->u.dst.rt_next; trt; trt = trt->u.dst.rt_next) 1247 for (trt = rt->dst.rt_next; trt; trt = trt->dst.rt_next)
1249 printk(" . %pI4", &trt->rt_dst); 1248 printk(" . %pI4", &trt->rt_dst);
1250 printk("\n"); 1249 printk("\n");
1251 } 1250 }
@@ -1263,7 +1262,7 @@ skip_hashing:
1263 if (rp) 1262 if (rp)
1264 *rp = rt; 1263 *rp = rt;
1265 else 1264 else
1266 skb_dst_set(skb, &rt->u.dst); 1265 skb_dst_set(skb, &rt->dst);
1267 return 0; 1266 return 0;
1268} 1267}
1269 1268
@@ -1325,6 +1324,7 @@ void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more)
1325 1324
1326 ip_select_fb_ident(iph); 1325 ip_select_fb_ident(iph);
1327} 1326}
1327EXPORT_SYMBOL(__ip_select_ident);
1328 1328
1329static void rt_del(unsigned hash, struct rtable *rt) 1329static void rt_del(unsigned hash, struct rtable *rt)
1330{ 1330{
@@ -1335,20 +1335,21 @@ static void rt_del(unsigned hash, struct rtable *rt)
1335 ip_rt_put(rt); 1335 ip_rt_put(rt);
1336 while ((aux = *rthp) != NULL) { 1336 while ((aux = *rthp) != NULL) {
1337 if (aux == rt || rt_is_expired(aux)) { 1337 if (aux == rt || rt_is_expired(aux)) {
1338 *rthp = aux->u.dst.rt_next; 1338 *rthp = aux->dst.rt_next;
1339 rt_free(aux); 1339 rt_free(aux);
1340 continue; 1340 continue;
1341 } 1341 }
1342 rthp = &aux->u.dst.rt_next; 1342 rthp = &aux->dst.rt_next;
1343 } 1343 }
1344 spin_unlock_bh(rt_hash_lock_addr(hash)); 1344 spin_unlock_bh(rt_hash_lock_addr(hash));
1345} 1345}
1346 1346
1347/* called in rcu_read_lock() section */
1347void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, 1348void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
1348 __be32 saddr, struct net_device *dev) 1349 __be32 saddr, struct net_device *dev)
1349{ 1350{
1350 int i, k; 1351 int i, k;
1351 struct in_device *in_dev = in_dev_get(dev); 1352 struct in_device *in_dev = __in_dev_get_rcu(dev);
1352 struct rtable *rth, **rthp; 1353 struct rtable *rth, **rthp;
1353 __be32 skeys[2] = { saddr, 0 }; 1354 __be32 skeys[2] = { saddr, 0 };
1354 int ikeys[2] = { dev->ifindex, 0 }; 1355 int ikeys[2] = { dev->ifindex, 0 };
@@ -1384,7 +1385,6 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
1384 1385
1385 rthp=&rt_hash_table[hash].chain; 1386 rthp=&rt_hash_table[hash].chain;
1386 1387
1387 rcu_read_lock();
1388 while ((rth = rcu_dereference(*rthp)) != NULL) { 1388 while ((rth = rcu_dereference(*rthp)) != NULL) {
1389 struct rtable *rt; 1389 struct rtable *rt;
1390 1390
@@ -1393,44 +1393,42 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
1393 rth->fl.oif != ikeys[k] || 1393 rth->fl.oif != ikeys[k] ||
1394 rth->fl.iif != 0 || 1394 rth->fl.iif != 0 ||
1395 rt_is_expired(rth) || 1395 rt_is_expired(rth) ||
1396 !net_eq(dev_net(rth->u.dst.dev), net)) { 1396 !net_eq(dev_net(rth->dst.dev), net)) {
1397 rthp = &rth->u.dst.rt_next; 1397 rthp = &rth->dst.rt_next;
1398 continue; 1398 continue;
1399 } 1399 }
1400 1400
1401 if (rth->rt_dst != daddr || 1401 if (rth->rt_dst != daddr ||
1402 rth->rt_src != saddr || 1402 rth->rt_src != saddr ||
1403 rth->u.dst.error || 1403 rth->dst.error ||
1404 rth->rt_gateway != old_gw || 1404 rth->rt_gateway != old_gw ||
1405 rth->u.dst.dev != dev) 1405 rth->dst.dev != dev)
1406 break; 1406 break;
1407 1407
1408 dst_hold(&rth->u.dst); 1408 dst_hold(&rth->dst);
1409 rcu_read_unlock();
1410 1409
1411 rt = dst_alloc(&ipv4_dst_ops); 1410 rt = dst_alloc(&ipv4_dst_ops);
1412 if (rt == NULL) { 1411 if (rt == NULL) {
1413 ip_rt_put(rth); 1412 ip_rt_put(rth);
1414 in_dev_put(in_dev);
1415 return; 1413 return;
1416 } 1414 }
1417 1415
1418 /* Copy all the information. */ 1416 /* Copy all the information. */
1419 *rt = *rth; 1417 *rt = *rth;
1420 rt->u.dst.__use = 1; 1418 rt->dst.__use = 1;
1421 atomic_set(&rt->u.dst.__refcnt, 1); 1419 atomic_set(&rt->dst.__refcnt, 1);
1422 rt->u.dst.child = NULL; 1420 rt->dst.child = NULL;
1423 if (rt->u.dst.dev) 1421 if (rt->dst.dev)
1424 dev_hold(rt->u.dst.dev); 1422 dev_hold(rt->dst.dev);
1425 if (rt->idev) 1423 if (rt->idev)
1426 in_dev_hold(rt->idev); 1424 in_dev_hold(rt->idev);
1427 rt->u.dst.obsolete = -1; 1425 rt->dst.obsolete = -1;
1428 rt->u.dst.lastuse = jiffies; 1426 rt->dst.lastuse = jiffies;
1429 rt->u.dst.path = &rt->u.dst; 1427 rt->dst.path = &rt->dst;
1430 rt->u.dst.neighbour = NULL; 1428 rt->dst.neighbour = NULL;
1431 rt->u.dst.hh = NULL; 1429 rt->dst.hh = NULL;
1432#ifdef CONFIG_XFRM 1430#ifdef CONFIG_XFRM
1433 rt->u.dst.xfrm = NULL; 1431 rt->dst.xfrm = NULL;
1434#endif 1432#endif
1435 rt->rt_genid = rt_genid(net); 1433 rt->rt_genid = rt_genid(net);
1436 rt->rt_flags |= RTCF_REDIRECTED; 1434 rt->rt_flags |= RTCF_REDIRECTED;
@@ -1439,23 +1437,23 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
1439 rt->rt_gateway = new_gw; 1437 rt->rt_gateway = new_gw;
1440 1438
1441 /* Redirect received -> path was valid */ 1439 /* Redirect received -> path was valid */
1442 dst_confirm(&rth->u.dst); 1440 dst_confirm(&rth->dst);
1443 1441
1444 if (rt->peer) 1442 if (rt->peer)
1445 atomic_inc(&rt->peer->refcnt); 1443 atomic_inc(&rt->peer->refcnt);
1446 1444
1447 if (arp_bind_neighbour(&rt->u.dst) || 1445 if (arp_bind_neighbour(&rt->dst) ||
1448 !(rt->u.dst.neighbour->nud_state & 1446 !(rt->dst.neighbour->nud_state &
1449 NUD_VALID)) { 1447 NUD_VALID)) {
1450 if (rt->u.dst.neighbour) 1448 if (rt->dst.neighbour)
1451 neigh_event_send(rt->u.dst.neighbour, NULL); 1449 neigh_event_send(rt->dst.neighbour, NULL);
1452 ip_rt_put(rth); 1450 ip_rt_put(rth);
1453 rt_drop(rt); 1451 rt_drop(rt);
1454 goto do_next; 1452 goto do_next;
1455 } 1453 }
1456 1454
1457 netevent.old = &rth->u.dst; 1455 netevent.old = &rth->dst;
1458 netevent.new = &rt->u.dst; 1456 netevent.new = &rt->dst;
1459 call_netevent_notifiers(NETEVENT_REDIRECT, 1457 call_netevent_notifiers(NETEVENT_REDIRECT,
1460 &netevent); 1458 &netevent);
1461 1459
@@ -1464,12 +1462,10 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
1464 ip_rt_put(rt); 1462 ip_rt_put(rt);
1465 goto do_next; 1463 goto do_next;
1466 } 1464 }
1467 rcu_read_unlock();
1468 do_next: 1465 do_next:
1469 ; 1466 ;
1470 } 1467 }
1471 } 1468 }
1472 in_dev_put(in_dev);
1473 return; 1469 return;
1474 1470
1475reject_redirect: 1471reject_redirect:
@@ -1480,7 +1476,7 @@ reject_redirect:
1480 &old_gw, dev->name, &new_gw, 1476 &old_gw, dev->name, &new_gw,
1481 &saddr, &daddr); 1477 &saddr, &daddr);
1482#endif 1478#endif
1483 in_dev_put(in_dev); 1479 ;
1484} 1480}
1485 1481
1486static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst) 1482static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst)
@@ -1493,8 +1489,8 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst)
1493 ip_rt_put(rt); 1489 ip_rt_put(rt);
1494 ret = NULL; 1490 ret = NULL;
1495 } else if ((rt->rt_flags & RTCF_REDIRECTED) || 1491 } else if ((rt->rt_flags & RTCF_REDIRECTED) ||
1496 (rt->u.dst.expires && 1492 (rt->dst.expires &&
1497 time_after_eq(jiffies, rt->u.dst.expires))) { 1493 time_after_eq(jiffies, rt->dst.expires))) {
1498 unsigned hash = rt_hash(rt->fl.fl4_dst, rt->fl.fl4_src, 1494 unsigned hash = rt_hash(rt->fl.fl4_dst, rt->fl.fl4_src,
1499 rt->fl.oif, 1495 rt->fl.oif,
1500 rt_genid(dev_net(dst->dev))); 1496 rt_genid(dev_net(dst->dev)));
@@ -1532,7 +1528,7 @@ void ip_rt_send_redirect(struct sk_buff *skb)
1532 int log_martians; 1528 int log_martians;
1533 1529
1534 rcu_read_lock(); 1530 rcu_read_lock();
1535 in_dev = __in_dev_get_rcu(rt->u.dst.dev); 1531 in_dev = __in_dev_get_rcu(rt->dst.dev);
1536 if (!in_dev || !IN_DEV_TX_REDIRECTS(in_dev)) { 1532 if (!in_dev || !IN_DEV_TX_REDIRECTS(in_dev)) {
1537 rcu_read_unlock(); 1533 rcu_read_unlock();
1538 return; 1534 return;
@@ -1543,30 +1539,30 @@ void ip_rt_send_redirect(struct sk_buff *skb)
1543 /* No redirected packets during ip_rt_redirect_silence; 1539 /* No redirected packets during ip_rt_redirect_silence;
1544 * reset the algorithm. 1540 * reset the algorithm.
1545 */ 1541 */
1546 if (time_after(jiffies, rt->u.dst.rate_last + ip_rt_redirect_silence)) 1542 if (time_after(jiffies, rt->dst.rate_last + ip_rt_redirect_silence))
1547 rt->u.dst.rate_tokens = 0; 1543 rt->dst.rate_tokens = 0;
1548 1544
1549 /* Too many ignored redirects; do not send anything 1545 /* Too many ignored redirects; do not send anything
1550 * set u.dst.rate_last to the last seen redirected packet. 1546 * set dst.rate_last to the last seen redirected packet.
1551 */ 1547 */
1552 if (rt->u.dst.rate_tokens >= ip_rt_redirect_number) { 1548 if (rt->dst.rate_tokens >= ip_rt_redirect_number) {
1553 rt->u.dst.rate_last = jiffies; 1549 rt->dst.rate_last = jiffies;
1554 return; 1550 return;
1555 } 1551 }
1556 1552
1557 /* Check for load limit; set rate_last to the latest sent 1553 /* Check for load limit; set rate_last to the latest sent
1558 * redirect. 1554 * redirect.
1559 */ 1555 */
1560 if (rt->u.dst.rate_tokens == 0 || 1556 if (rt->dst.rate_tokens == 0 ||
1561 time_after(jiffies, 1557 time_after(jiffies,
1562 (rt->u.dst.rate_last + 1558 (rt->dst.rate_last +
1563 (ip_rt_redirect_load << rt->u.dst.rate_tokens)))) { 1559 (ip_rt_redirect_load << rt->dst.rate_tokens)))) {
1564 icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt->rt_gateway); 1560 icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt->rt_gateway);
1565 rt->u.dst.rate_last = jiffies; 1561 rt->dst.rate_last = jiffies;
1566 ++rt->u.dst.rate_tokens; 1562 ++rt->dst.rate_tokens;
1567#ifdef CONFIG_IP_ROUTE_VERBOSE 1563#ifdef CONFIG_IP_ROUTE_VERBOSE
1568 if (log_martians && 1564 if (log_martians &&
1569 rt->u.dst.rate_tokens == ip_rt_redirect_number && 1565 rt->dst.rate_tokens == ip_rt_redirect_number &&
1570 net_ratelimit()) 1566 net_ratelimit())
1571 printk(KERN_WARNING "host %pI4/if%d ignores redirects for %pI4 to %pI4.\n", 1567 printk(KERN_WARNING "host %pI4/if%d ignores redirects for %pI4 to %pI4.\n",
1572 &rt->rt_src, rt->rt_iif, 1568 &rt->rt_src, rt->rt_iif,
@@ -1581,7 +1577,7 @@ static int ip_error(struct sk_buff *skb)
1581 unsigned long now; 1577 unsigned long now;
1582 int code; 1578 int code;
1583 1579
1584 switch (rt->u.dst.error) { 1580 switch (rt->dst.error) {
1585 case EINVAL: 1581 case EINVAL:
1586 default: 1582 default:
1587 goto out; 1583 goto out;
@@ -1590,7 +1586,7 @@ static int ip_error(struct sk_buff *skb)
1590 break; 1586 break;
1591 case ENETUNREACH: 1587 case ENETUNREACH:
1592 code = ICMP_NET_UNREACH; 1588 code = ICMP_NET_UNREACH;
1593 IP_INC_STATS_BH(dev_net(rt->u.dst.dev), 1589 IP_INC_STATS_BH(dev_net(rt->dst.dev),
1594 IPSTATS_MIB_INNOROUTES); 1590 IPSTATS_MIB_INNOROUTES);
1595 break; 1591 break;
1596 case EACCES: 1592 case EACCES:
@@ -1599,12 +1595,12 @@ static int ip_error(struct sk_buff *skb)
1599 } 1595 }
1600 1596
1601 now = jiffies; 1597 now = jiffies;
1602 rt->u.dst.rate_tokens += now - rt->u.dst.rate_last; 1598 rt->dst.rate_tokens += now - rt->dst.rate_last;
1603 if (rt->u.dst.rate_tokens > ip_rt_error_burst) 1599 if (rt->dst.rate_tokens > ip_rt_error_burst)
1604 rt->u.dst.rate_tokens = ip_rt_error_burst; 1600 rt->dst.rate_tokens = ip_rt_error_burst;
1605 rt->u.dst.rate_last = now; 1601 rt->dst.rate_last = now;
1606 if (rt->u.dst.rate_tokens >= ip_rt_error_cost) { 1602 if (rt->dst.rate_tokens >= ip_rt_error_cost) {
1607 rt->u.dst.rate_tokens -= ip_rt_error_cost; 1603 rt->dst.rate_tokens -= ip_rt_error_cost;
1608 icmp_send(skb, ICMP_DEST_UNREACH, code, 0); 1604 icmp_send(skb, ICMP_DEST_UNREACH, code, 0);
1609 } 1605 }
1610 1606
@@ -1649,7 +1645,7 @@ unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph,
1649 1645
1650 rcu_read_lock(); 1646 rcu_read_lock();
1651 for (rth = rcu_dereference(rt_hash_table[hash].chain); rth; 1647 for (rth = rcu_dereference(rt_hash_table[hash].chain); rth;
1652 rth = rcu_dereference(rth->u.dst.rt_next)) { 1648 rth = rcu_dereference(rth->dst.rt_next)) {
1653 unsigned short mtu = new_mtu; 1649 unsigned short mtu = new_mtu;
1654 1650
1655 if (rth->fl.fl4_dst != daddr || 1651 if (rth->fl.fl4_dst != daddr ||
@@ -1658,8 +1654,8 @@ unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph,
1658 rth->rt_src != iph->saddr || 1654 rth->rt_src != iph->saddr ||
1659 rth->fl.oif != ikeys[k] || 1655 rth->fl.oif != ikeys[k] ||
1660 rth->fl.iif != 0 || 1656 rth->fl.iif != 0 ||
1661 dst_metric_locked(&rth->u.dst, RTAX_MTU) || 1657 dst_metric_locked(&rth->dst, RTAX_MTU) ||
1662 !net_eq(dev_net(rth->u.dst.dev), net) || 1658 !net_eq(dev_net(rth->dst.dev), net) ||
1663 rt_is_expired(rth)) 1659 rt_is_expired(rth))
1664 continue; 1660 continue;
1665 1661
@@ -1667,22 +1663,22 @@ unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph,
1667 1663
1668 /* BSD 4.2 compatibility hack :-( */ 1664 /* BSD 4.2 compatibility hack :-( */
1669 if (mtu == 0 && 1665 if (mtu == 0 &&
1670 old_mtu >= dst_mtu(&rth->u.dst) && 1666 old_mtu >= dst_mtu(&rth->dst) &&
1671 old_mtu >= 68 + (iph->ihl << 2)) 1667 old_mtu >= 68 + (iph->ihl << 2))
1672 old_mtu -= iph->ihl << 2; 1668 old_mtu -= iph->ihl << 2;
1673 1669
1674 mtu = guess_mtu(old_mtu); 1670 mtu = guess_mtu(old_mtu);
1675 } 1671 }
1676 if (mtu <= dst_mtu(&rth->u.dst)) { 1672 if (mtu <= dst_mtu(&rth->dst)) {
1677 if (mtu < dst_mtu(&rth->u.dst)) { 1673 if (mtu < dst_mtu(&rth->dst)) {
1678 dst_confirm(&rth->u.dst); 1674 dst_confirm(&rth->dst);
1679 if (mtu < ip_rt_min_pmtu) { 1675 if (mtu < ip_rt_min_pmtu) {
1680 mtu = ip_rt_min_pmtu; 1676 mtu = ip_rt_min_pmtu;
1681 rth->u.dst.metrics[RTAX_LOCK-1] |= 1677 rth->dst.metrics[RTAX_LOCK-1] |=
1682 (1 << RTAX_MTU); 1678 (1 << RTAX_MTU);
1683 } 1679 }
1684 rth->u.dst.metrics[RTAX_MTU-1] = mtu; 1680 rth->dst.metrics[RTAX_MTU-1] = mtu;
1685 dst_set_expires(&rth->u.dst, 1681 dst_set_expires(&rth->dst,
1686 ip_rt_mtu_expires); 1682 ip_rt_mtu_expires);
1687 } 1683 }
1688 est_mtu = mtu; 1684 est_mtu = mtu;
@@ -1755,7 +1751,7 @@ static void ipv4_link_failure(struct sk_buff *skb)
1755 1751
1756 rt = skb_rtable(skb); 1752 rt = skb_rtable(skb);
1757 if (rt) 1753 if (rt)
1758 dst_set_expires(&rt->u.dst, 0); 1754 dst_set_expires(&rt->dst, 0);
1759} 1755}
1760 1756
1761static int ip_rt_bug(struct sk_buff *skb) 1757static int ip_rt_bug(struct sk_buff *skb)
@@ -1783,11 +1779,11 @@ void ip_rt_get_source(u8 *addr, struct rtable *rt)
1783 1779
1784 if (rt->fl.iif == 0) 1780 if (rt->fl.iif == 0)
1785 src = rt->rt_src; 1781 src = rt->rt_src;
1786 else if (fib_lookup(dev_net(rt->u.dst.dev), &rt->fl, &res) == 0) { 1782 else if (fib_lookup(dev_net(rt->dst.dev), &rt->fl, &res) == 0) {
1787 src = FIB_RES_PREFSRC(res); 1783 src = FIB_RES_PREFSRC(res);
1788 fib_res_put(&res); 1784 fib_res_put(&res);
1789 } else 1785 } else
1790 src = inet_select_addr(rt->u.dst.dev, rt->rt_gateway, 1786 src = inet_select_addr(rt->dst.dev, rt->rt_gateway,
1791 RT_SCOPE_UNIVERSE); 1787 RT_SCOPE_UNIVERSE);
1792 memcpy(addr, &src, 4); 1788 memcpy(addr, &src, 4);
1793} 1789}
@@ -1795,10 +1791,10 @@ void ip_rt_get_source(u8 *addr, struct rtable *rt)
1795#ifdef CONFIG_NET_CLS_ROUTE 1791#ifdef CONFIG_NET_CLS_ROUTE
1796static void set_class_tag(struct rtable *rt, u32 tag) 1792static void set_class_tag(struct rtable *rt, u32 tag)
1797{ 1793{
1798 if (!(rt->u.dst.tclassid & 0xFFFF)) 1794 if (!(rt->dst.tclassid & 0xFFFF))
1799 rt->u.dst.tclassid |= tag & 0xFFFF; 1795 rt->dst.tclassid |= tag & 0xFFFF;
1800 if (!(rt->u.dst.tclassid & 0xFFFF0000)) 1796 if (!(rt->dst.tclassid & 0xFFFF0000))
1801 rt->u.dst.tclassid |= tag & 0xFFFF0000; 1797 rt->dst.tclassid |= tag & 0xFFFF0000;
1802} 1798}
1803#endif 1799#endif
1804 1800
@@ -1810,30 +1806,30 @@ static void rt_set_nexthop(struct rtable *rt, struct fib_result *res, u32 itag)
1810 if (FIB_RES_GW(*res) && 1806 if (FIB_RES_GW(*res) &&
1811 FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) 1807 FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK)
1812 rt->rt_gateway = FIB_RES_GW(*res); 1808 rt->rt_gateway = FIB_RES_GW(*res);
1813 memcpy(rt->u.dst.metrics, fi->fib_metrics, 1809 memcpy(rt->dst.metrics, fi->fib_metrics,
1814 sizeof(rt->u.dst.metrics)); 1810 sizeof(rt->dst.metrics));
1815 if (fi->fib_mtu == 0) { 1811 if (fi->fib_mtu == 0) {
1816 rt->u.dst.metrics[RTAX_MTU-1] = rt->u.dst.dev->mtu; 1812 rt->dst.metrics[RTAX_MTU-1] = rt->dst.dev->mtu;
1817 if (dst_metric_locked(&rt->u.dst, RTAX_MTU) && 1813 if (dst_metric_locked(&rt->dst, RTAX_MTU) &&
1818 rt->rt_gateway != rt->rt_dst && 1814 rt->rt_gateway != rt->rt_dst &&
1819 rt->u.dst.dev->mtu > 576) 1815 rt->dst.dev->mtu > 576)
1820 rt->u.dst.metrics[RTAX_MTU-1] = 576; 1816 rt->dst.metrics[RTAX_MTU-1] = 576;
1821 } 1817 }
1822#ifdef CONFIG_NET_CLS_ROUTE 1818#ifdef CONFIG_NET_CLS_ROUTE
1823 rt->u.dst.tclassid = FIB_RES_NH(*res).nh_tclassid; 1819 rt->dst.tclassid = FIB_RES_NH(*res).nh_tclassid;
1824#endif 1820#endif
1825 } else 1821 } else
1826 rt->u.dst.metrics[RTAX_MTU-1]= rt->u.dst.dev->mtu; 1822 rt->dst.metrics[RTAX_MTU-1]= rt->dst.dev->mtu;
1827 1823
1828 if (dst_metric(&rt->u.dst, RTAX_HOPLIMIT) == 0) 1824 if (dst_metric(&rt->dst, RTAX_HOPLIMIT) == 0)
1829 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = sysctl_ip_default_ttl; 1825 rt->dst.metrics[RTAX_HOPLIMIT-1] = sysctl_ip_default_ttl;
1830 if (dst_mtu(&rt->u.dst) > IP_MAX_MTU) 1826 if (dst_mtu(&rt->dst) > IP_MAX_MTU)
1831 rt->u.dst.metrics[RTAX_MTU-1] = IP_MAX_MTU; 1827 rt->dst.metrics[RTAX_MTU-1] = IP_MAX_MTU;
1832 if (dst_metric(&rt->u.dst, RTAX_ADVMSS) == 0) 1828 if (dst_metric(&rt->dst, RTAX_ADVMSS) == 0)
1833 rt->u.dst.metrics[RTAX_ADVMSS-1] = max_t(unsigned int, rt->u.dst.dev->mtu - 40, 1829 rt->dst.metrics[RTAX_ADVMSS-1] = max_t(unsigned int, rt->dst.dev->mtu - 40,
1834 ip_rt_min_advmss); 1830 ip_rt_min_advmss);
1835 if (dst_metric(&rt->u.dst, RTAX_ADVMSS) > 65535 - 40) 1831 if (dst_metric(&rt->dst, RTAX_ADVMSS) > 65535 - 40)
1836 rt->u.dst.metrics[RTAX_ADVMSS-1] = 65535 - 40; 1832 rt->dst.metrics[RTAX_ADVMSS-1] = 65535 - 40;
1837 1833
1838#ifdef CONFIG_NET_CLS_ROUTE 1834#ifdef CONFIG_NET_CLS_ROUTE
1839#ifdef CONFIG_IP_MULTIPLE_TABLES 1835#ifdef CONFIG_IP_MULTIPLE_TABLES
@@ -1844,14 +1840,16 @@ static void rt_set_nexthop(struct rtable *rt, struct fib_result *res, u32 itag)
1844 rt->rt_type = res->type; 1840 rt->rt_type = res->type;
1845} 1841}
1846 1842
1843/* called in rcu_read_lock() section */
1847static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, 1844static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1848 u8 tos, struct net_device *dev, int our) 1845 u8 tos, struct net_device *dev, int our)
1849{ 1846{
1850 unsigned hash; 1847 unsigned int hash;
1851 struct rtable *rth; 1848 struct rtable *rth;
1852 __be32 spec_dst; 1849 __be32 spec_dst;
1853 struct in_device *in_dev = in_dev_get(dev); 1850 struct in_device *in_dev = __in_dev_get_rcu(dev);
1854 u32 itag = 0; 1851 u32 itag = 0;
1852 int err;
1855 1853
1856 /* Primary sanity checks. */ 1854 /* Primary sanity checks. */
1857 1855
@@ -1866,21 +1864,23 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1866 if (!ipv4_is_local_multicast(daddr)) 1864 if (!ipv4_is_local_multicast(daddr))
1867 goto e_inval; 1865 goto e_inval;
1868 spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK); 1866 spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK);
1869 } else if (fib_validate_source(saddr, 0, tos, 0, 1867 } else {
1870 dev, &spec_dst, &itag, 0) < 0) 1868 err = fib_validate_source(saddr, 0, tos, 0, dev, &spec_dst,
1871 goto e_inval; 1869 &itag, 0);
1872 1870 if (err < 0)
1871 goto e_err;
1872 }
1873 rth = dst_alloc(&ipv4_dst_ops); 1873 rth = dst_alloc(&ipv4_dst_ops);
1874 if (!rth) 1874 if (!rth)
1875 goto e_nobufs; 1875 goto e_nobufs;
1876 1876
1877 rth->u.dst.output = ip_rt_bug; 1877 rth->dst.output = ip_rt_bug;
1878 rth->u.dst.obsolete = -1; 1878 rth->dst.obsolete = -1;
1879 1879
1880 atomic_set(&rth->u.dst.__refcnt, 1); 1880 atomic_set(&rth->dst.__refcnt, 1);
1881 rth->u.dst.flags= DST_HOST; 1881 rth->dst.flags= DST_HOST;
1882 if (IN_DEV_CONF_GET(in_dev, NOPOLICY)) 1882 if (IN_DEV_CONF_GET(in_dev, NOPOLICY))
1883 rth->u.dst.flags |= DST_NOPOLICY; 1883 rth->dst.flags |= DST_NOPOLICY;
1884 rth->fl.fl4_dst = daddr; 1884 rth->fl.fl4_dst = daddr;
1885 rth->rt_dst = daddr; 1885 rth->rt_dst = daddr;
1886 rth->fl.fl4_tos = tos; 1886 rth->fl.fl4_tos = tos;
@@ -1888,13 +1888,13 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1888 rth->fl.fl4_src = saddr; 1888 rth->fl.fl4_src = saddr;
1889 rth->rt_src = saddr; 1889 rth->rt_src = saddr;
1890#ifdef CONFIG_NET_CLS_ROUTE 1890#ifdef CONFIG_NET_CLS_ROUTE
1891 rth->u.dst.tclassid = itag; 1891 rth->dst.tclassid = itag;
1892#endif 1892#endif
1893 rth->rt_iif = 1893 rth->rt_iif =
1894 rth->fl.iif = dev->ifindex; 1894 rth->fl.iif = dev->ifindex;
1895 rth->u.dst.dev = init_net.loopback_dev; 1895 rth->dst.dev = init_net.loopback_dev;
1896 dev_hold(rth->u.dst.dev); 1896 dev_hold(rth->dst.dev);
1897 rth->idev = in_dev_get(rth->u.dst.dev); 1897 rth->idev = in_dev_get(rth->dst.dev);
1898 rth->fl.oif = 0; 1898 rth->fl.oif = 0;
1899 rth->rt_gateway = daddr; 1899 rth->rt_gateway = daddr;
1900 rth->rt_spec_dst= spec_dst; 1900 rth->rt_spec_dst= spec_dst;
@@ -1902,27 +1902,25 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1902 rth->rt_flags = RTCF_MULTICAST; 1902 rth->rt_flags = RTCF_MULTICAST;
1903 rth->rt_type = RTN_MULTICAST; 1903 rth->rt_type = RTN_MULTICAST;
1904 if (our) { 1904 if (our) {
1905 rth->u.dst.input= ip_local_deliver; 1905 rth->dst.input= ip_local_deliver;
1906 rth->rt_flags |= RTCF_LOCAL; 1906 rth->rt_flags |= RTCF_LOCAL;
1907 } 1907 }
1908 1908
1909#ifdef CONFIG_IP_MROUTE 1909#ifdef CONFIG_IP_MROUTE
1910 if (!ipv4_is_local_multicast(daddr) && IN_DEV_MFORWARD(in_dev)) 1910 if (!ipv4_is_local_multicast(daddr) && IN_DEV_MFORWARD(in_dev))
1911 rth->u.dst.input = ip_mr_input; 1911 rth->dst.input = ip_mr_input;
1912#endif 1912#endif
1913 RT_CACHE_STAT_INC(in_slow_mc); 1913 RT_CACHE_STAT_INC(in_slow_mc);
1914 1914
1915 in_dev_put(in_dev);
1916 hash = rt_hash(daddr, saddr, dev->ifindex, rt_genid(dev_net(dev))); 1915 hash = rt_hash(daddr, saddr, dev->ifindex, rt_genid(dev_net(dev)));
1917 return rt_intern_hash(hash, rth, NULL, skb, dev->ifindex); 1916 return rt_intern_hash(hash, rth, NULL, skb, dev->ifindex);
1918 1917
1919e_nobufs: 1918e_nobufs:
1920 in_dev_put(in_dev);
1921 return -ENOBUFS; 1919 return -ENOBUFS;
1922
1923e_inval: 1920e_inval:
1924 in_dev_put(in_dev);
1925 return -EINVAL; 1921 return -EINVAL;
1922e_err:
1923 return err;
1926} 1924}
1927 1925
1928 1926
@@ -1956,22 +1954,22 @@ static void ip_handle_martian_source(struct net_device *dev,
1956#endif 1954#endif
1957} 1955}
1958 1956
1957/* called in rcu_read_lock() section */
1959static int __mkroute_input(struct sk_buff *skb, 1958static int __mkroute_input(struct sk_buff *skb,
1960 struct fib_result *res, 1959 struct fib_result *res,
1961 struct in_device *in_dev, 1960 struct in_device *in_dev,
1962 __be32 daddr, __be32 saddr, u32 tos, 1961 __be32 daddr, __be32 saddr, u32 tos,
1963 struct rtable **result) 1962 struct rtable **result)
1964{ 1963{
1965
1966 struct rtable *rth; 1964 struct rtable *rth;
1967 int err; 1965 int err;
1968 struct in_device *out_dev; 1966 struct in_device *out_dev;
1969 unsigned flags = 0; 1967 unsigned int flags = 0;
1970 __be32 spec_dst; 1968 __be32 spec_dst;
1971 u32 itag; 1969 u32 itag;
1972 1970
1973 /* get a working reference to the output device */ 1971 /* get a working reference to the output device */
1974 out_dev = in_dev_get(FIB_RES_DEV(*res)); 1972 out_dev = __in_dev_get_rcu(FIB_RES_DEV(*res));
1975 if (out_dev == NULL) { 1973 if (out_dev == NULL) {
1976 if (net_ratelimit()) 1974 if (net_ratelimit())
1977 printk(KERN_CRIT "Bug in ip_route_input" \ 1975 printk(KERN_CRIT "Bug in ip_route_input" \
@@ -1986,7 +1984,6 @@ static int __mkroute_input(struct sk_buff *skb,
1986 ip_handle_martian_source(in_dev->dev, in_dev, skb, daddr, 1984 ip_handle_martian_source(in_dev->dev, in_dev, skb, daddr,
1987 saddr); 1985 saddr);
1988 1986
1989 err = -EINVAL;
1990 goto cleanup; 1987 goto cleanup;
1991 } 1988 }
1992 1989
@@ -2020,12 +2017,12 @@ static int __mkroute_input(struct sk_buff *skb,
2020 goto cleanup; 2017 goto cleanup;
2021 } 2018 }
2022 2019
2023 atomic_set(&rth->u.dst.__refcnt, 1); 2020 atomic_set(&rth->dst.__refcnt, 1);
2024 rth->u.dst.flags= DST_HOST; 2021 rth->dst.flags= DST_HOST;
2025 if (IN_DEV_CONF_GET(in_dev, NOPOLICY)) 2022 if (IN_DEV_CONF_GET(in_dev, NOPOLICY))
2026 rth->u.dst.flags |= DST_NOPOLICY; 2023 rth->dst.flags |= DST_NOPOLICY;
2027 if (IN_DEV_CONF_GET(out_dev, NOXFRM)) 2024 if (IN_DEV_CONF_GET(out_dev, NOXFRM))
2028 rth->u.dst.flags |= DST_NOXFRM; 2025 rth->dst.flags |= DST_NOXFRM;
2029 rth->fl.fl4_dst = daddr; 2026 rth->fl.fl4_dst = daddr;
2030 rth->rt_dst = daddr; 2027 rth->rt_dst = daddr;
2031 rth->fl.fl4_tos = tos; 2028 rth->fl.fl4_tos = tos;
@@ -2035,16 +2032,16 @@ static int __mkroute_input(struct sk_buff *skb,
2035 rth->rt_gateway = daddr; 2032 rth->rt_gateway = daddr;
2036 rth->rt_iif = 2033 rth->rt_iif =
2037 rth->fl.iif = in_dev->dev->ifindex; 2034 rth->fl.iif = in_dev->dev->ifindex;
2038 rth->u.dst.dev = (out_dev)->dev; 2035 rth->dst.dev = (out_dev)->dev;
2039 dev_hold(rth->u.dst.dev); 2036 dev_hold(rth->dst.dev);
2040 rth->idev = in_dev_get(rth->u.dst.dev); 2037 rth->idev = in_dev_get(rth->dst.dev);
2041 rth->fl.oif = 0; 2038 rth->fl.oif = 0;
2042 rth->rt_spec_dst= spec_dst; 2039 rth->rt_spec_dst= spec_dst;
2043 2040
2044 rth->u.dst.obsolete = -1; 2041 rth->dst.obsolete = -1;
2045 rth->u.dst.input = ip_forward; 2042 rth->dst.input = ip_forward;
2046 rth->u.dst.output = ip_output; 2043 rth->dst.output = ip_output;
2047 rth->rt_genid = rt_genid(dev_net(rth->u.dst.dev)); 2044 rth->rt_genid = rt_genid(dev_net(rth->dst.dev));
2048 2045
2049 rt_set_nexthop(rth, res, itag); 2046 rt_set_nexthop(rth, res, itag);
2050 2047
@@ -2053,8 +2050,6 @@ static int __mkroute_input(struct sk_buff *skb,
2053 *result = rth; 2050 *result = rth;
2054 err = 0; 2051 err = 0;
2055 cleanup: 2052 cleanup:
2056 /* release the working reference to the output device */
2057 in_dev_put(out_dev);
2058 return err; 2053 return err;
2059} 2054}
2060 2055
@@ -2080,7 +2075,7 @@ static int ip_mkroute_input(struct sk_buff *skb,
2080 2075
2081 /* put it into the cache */ 2076 /* put it into the cache */
2082 hash = rt_hash(daddr, saddr, fl->iif, 2077 hash = rt_hash(daddr, saddr, fl->iif,
2083 rt_genid(dev_net(rth->u.dst.dev))); 2078 rt_genid(dev_net(rth->dst.dev)));
2084 return rt_intern_hash(hash, rth, NULL, skb, fl->iif); 2079 return rt_intern_hash(hash, rth, NULL, skb, fl->iif);
2085} 2080}
2086 2081
@@ -2098,7 +2093,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
2098 u8 tos, struct net_device *dev) 2093 u8 tos, struct net_device *dev)
2099{ 2094{
2100 struct fib_result res; 2095 struct fib_result res;
2101 struct in_device *in_dev = in_dev_get(dev); 2096 struct in_device *in_dev = __in_dev_get_rcu(dev);
2102 struct flowi fl = { .nl_u = { .ip4_u = 2097 struct flowi fl = { .nl_u = { .ip4_u =
2103 { .daddr = daddr, 2098 { .daddr = daddr,
2104 .saddr = saddr, 2099 .saddr = saddr,
@@ -2158,13 +2153,12 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
2158 goto brd_input; 2153 goto brd_input;
2159 2154
2160 if (res.type == RTN_LOCAL) { 2155 if (res.type == RTN_LOCAL) {
2161 int result; 2156 err = fib_validate_source(saddr, daddr, tos,
2162 result = fib_validate_source(saddr, daddr, tos,
2163 net->loopback_dev->ifindex, 2157 net->loopback_dev->ifindex,
2164 dev, &spec_dst, &itag, skb->mark); 2158 dev, &spec_dst, &itag, skb->mark);
2165 if (result < 0) 2159 if (err < 0)
2166 goto martian_source; 2160 goto martian_source_keep_err;
2167 if (result) 2161 if (err)
2168 flags |= RTCF_DIRECTSRC; 2162 flags |= RTCF_DIRECTSRC;
2169 spec_dst = daddr; 2163 spec_dst = daddr;
2170 goto local_input; 2164 goto local_input;
@@ -2177,7 +2171,6 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
2177 2171
2178 err = ip_mkroute_input(skb, &res, &fl, in_dev, daddr, saddr, tos); 2172 err = ip_mkroute_input(skb, &res, &fl, in_dev, daddr, saddr, tos);
2179done: 2173done:
2180 in_dev_put(in_dev);
2181 if (free_res) 2174 if (free_res)
2182 fib_res_put(&res); 2175 fib_res_put(&res);
2183out: return err; 2176out: return err;
@@ -2192,7 +2185,7 @@ brd_input:
2192 err = fib_validate_source(saddr, 0, tos, 0, dev, &spec_dst, 2185 err = fib_validate_source(saddr, 0, tos, 0, dev, &spec_dst,
2193 &itag, skb->mark); 2186 &itag, skb->mark);
2194 if (err < 0) 2187 if (err < 0)
2195 goto martian_source; 2188 goto martian_source_keep_err;
2196 if (err) 2189 if (err)
2197 flags |= RTCF_DIRECTSRC; 2190 flags |= RTCF_DIRECTSRC;
2198 } 2191 }
@@ -2205,14 +2198,14 @@ local_input:
2205 if (!rth) 2198 if (!rth)
2206 goto e_nobufs; 2199 goto e_nobufs;
2207 2200
2208 rth->u.dst.output= ip_rt_bug; 2201 rth->dst.output= ip_rt_bug;
2209 rth->u.dst.obsolete = -1; 2202 rth->dst.obsolete = -1;
2210 rth->rt_genid = rt_genid(net); 2203 rth->rt_genid = rt_genid(net);
2211 2204
2212 atomic_set(&rth->u.dst.__refcnt, 1); 2205 atomic_set(&rth->dst.__refcnt, 1);
2213 rth->u.dst.flags= DST_HOST; 2206 rth->dst.flags= DST_HOST;
2214 if (IN_DEV_CONF_GET(in_dev, NOPOLICY)) 2207 if (IN_DEV_CONF_GET(in_dev, NOPOLICY))
2215 rth->u.dst.flags |= DST_NOPOLICY; 2208 rth->dst.flags |= DST_NOPOLICY;
2216 rth->fl.fl4_dst = daddr; 2209 rth->fl.fl4_dst = daddr;
2217 rth->rt_dst = daddr; 2210 rth->rt_dst = daddr;
2218 rth->fl.fl4_tos = tos; 2211 rth->fl.fl4_tos = tos;
@@ -2220,20 +2213,20 @@ local_input:
2220 rth->fl.fl4_src = saddr; 2213 rth->fl.fl4_src = saddr;
2221 rth->rt_src = saddr; 2214 rth->rt_src = saddr;
2222#ifdef CONFIG_NET_CLS_ROUTE 2215#ifdef CONFIG_NET_CLS_ROUTE
2223 rth->u.dst.tclassid = itag; 2216 rth->dst.tclassid = itag;
2224#endif 2217#endif
2225 rth->rt_iif = 2218 rth->rt_iif =
2226 rth->fl.iif = dev->ifindex; 2219 rth->fl.iif = dev->ifindex;
2227 rth->u.dst.dev = net->loopback_dev; 2220 rth->dst.dev = net->loopback_dev;
2228 dev_hold(rth->u.dst.dev); 2221 dev_hold(rth->dst.dev);
2229 rth->idev = in_dev_get(rth->u.dst.dev); 2222 rth->idev = in_dev_get(rth->dst.dev);
2230 rth->rt_gateway = daddr; 2223 rth->rt_gateway = daddr;
2231 rth->rt_spec_dst= spec_dst; 2224 rth->rt_spec_dst= spec_dst;
2232 rth->u.dst.input= ip_local_deliver; 2225 rth->dst.input= ip_local_deliver;
2233 rth->rt_flags = flags|RTCF_LOCAL; 2226 rth->rt_flags = flags|RTCF_LOCAL;
2234 if (res.type == RTN_UNREACHABLE) { 2227 if (res.type == RTN_UNREACHABLE) {
2235 rth->u.dst.input= ip_error; 2228 rth->dst.input= ip_error;
2236 rth->u.dst.error= -err; 2229 rth->dst.error= -err;
2237 rth->rt_flags &= ~RTCF_LOCAL; 2230 rth->rt_flags &= ~RTCF_LOCAL;
2238 } 2231 }
2239 rth->rt_type = res.type; 2232 rth->rt_type = res.type;
@@ -2273,8 +2266,10 @@ e_nobufs:
2273 goto done; 2266 goto done;
2274 2267
2275martian_source: 2268martian_source:
2269 err = -EINVAL;
2270martian_source_keep_err:
2276 ip_handle_martian_source(dev, in_dev, skb, daddr, saddr); 2271 ip_handle_martian_source(dev, in_dev, skb, daddr, saddr);
2277 goto e_inval; 2272 goto done;
2278} 2273}
2279 2274
2280int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr, 2275int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr,
@@ -2284,32 +2279,34 @@ int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr,
2284 unsigned hash; 2279 unsigned hash;
2285 int iif = dev->ifindex; 2280 int iif = dev->ifindex;
2286 struct net *net; 2281 struct net *net;
2282 int res;
2287 2283
2288 net = dev_net(dev); 2284 net = dev_net(dev);
2289 2285
2286 rcu_read_lock();
2287
2290 if (!rt_caching(net)) 2288 if (!rt_caching(net))
2291 goto skip_cache; 2289 goto skip_cache;
2292 2290
2293 tos &= IPTOS_RT_MASK; 2291 tos &= IPTOS_RT_MASK;
2294 hash = rt_hash(daddr, saddr, iif, rt_genid(net)); 2292 hash = rt_hash(daddr, saddr, iif, rt_genid(net));
2295 2293
2296 rcu_read_lock();
2297 for (rth = rcu_dereference(rt_hash_table[hash].chain); rth; 2294 for (rth = rcu_dereference(rt_hash_table[hash].chain); rth;
2298 rth = rcu_dereference(rth->u.dst.rt_next)) { 2295 rth = rcu_dereference(rth->dst.rt_next)) {
2299 if ((((__force u32)rth->fl.fl4_dst ^ (__force u32)daddr) | 2296 if ((((__force u32)rth->fl.fl4_dst ^ (__force u32)daddr) |
2300 ((__force u32)rth->fl.fl4_src ^ (__force u32)saddr) | 2297 ((__force u32)rth->fl.fl4_src ^ (__force u32)saddr) |
2301 (rth->fl.iif ^ iif) | 2298 (rth->fl.iif ^ iif) |
2302 rth->fl.oif | 2299 rth->fl.oif |
2303 (rth->fl.fl4_tos ^ tos)) == 0 && 2300 (rth->fl.fl4_tos ^ tos)) == 0 &&
2304 rth->fl.mark == skb->mark && 2301 rth->fl.mark == skb->mark &&
2305 net_eq(dev_net(rth->u.dst.dev), net) && 2302 net_eq(dev_net(rth->dst.dev), net) &&
2306 !rt_is_expired(rth)) { 2303 !rt_is_expired(rth)) {
2307 if (noref) { 2304 if (noref) {
2308 dst_use_noref(&rth->u.dst, jiffies); 2305 dst_use_noref(&rth->dst, jiffies);
2309 skb_dst_set_noref(skb, &rth->u.dst); 2306 skb_dst_set_noref(skb, &rth->dst);
2310 } else { 2307 } else {
2311 dst_use(&rth->u.dst, jiffies); 2308 dst_use(&rth->dst, jiffies);
2312 skb_dst_set(skb, &rth->u.dst); 2309 skb_dst_set(skb, &rth->dst);
2313 } 2310 }
2314 RT_CACHE_STAT_INC(in_hit); 2311 RT_CACHE_STAT_INC(in_hit);
2315 rcu_read_unlock(); 2312 rcu_read_unlock();
@@ -2317,7 +2314,6 @@ int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr,
2317 } 2314 }
2318 RT_CACHE_STAT_INC(in_hlist_search); 2315 RT_CACHE_STAT_INC(in_hlist_search);
2319 } 2316 }
2320 rcu_read_unlock();
2321 2317
2322skip_cache: 2318skip_cache:
2323 /* Multicast recognition logic is moved from route cache to here. 2319 /* Multicast recognition logic is moved from route cache to here.
@@ -2332,12 +2328,11 @@ skip_cache:
2332 route cache entry is created eventually. 2328 route cache entry is created eventually.
2333 */ 2329 */
2334 if (ipv4_is_multicast(daddr)) { 2330 if (ipv4_is_multicast(daddr)) {
2335 struct in_device *in_dev; 2331 struct in_device *in_dev = __in_dev_get_rcu(dev);
2336 2332
2337 rcu_read_lock(); 2333 if (in_dev) {
2338 if ((in_dev = __in_dev_get_rcu(dev)) != NULL) {
2339 int our = ip_check_mc(in_dev, daddr, saddr, 2334 int our = ip_check_mc(in_dev, daddr, saddr,
2340 ip_hdr(skb)->protocol); 2335 ip_hdr(skb)->protocol);
2341 if (our 2336 if (our
2342#ifdef CONFIG_IP_MROUTE 2337#ifdef CONFIG_IP_MROUTE
2343 || 2338 ||
@@ -2345,15 +2340,18 @@ skip_cache:
2345 IN_DEV_MFORWARD(in_dev)) 2340 IN_DEV_MFORWARD(in_dev))
2346#endif 2341#endif
2347 ) { 2342 ) {
2343 int res = ip_route_input_mc(skb, daddr, saddr,
2344 tos, dev, our);
2348 rcu_read_unlock(); 2345 rcu_read_unlock();
2349 return ip_route_input_mc(skb, daddr, saddr, 2346 return res;
2350 tos, dev, our);
2351 } 2347 }
2352 } 2348 }
2353 rcu_read_unlock(); 2349 rcu_read_unlock();
2354 return -EINVAL; 2350 return -EINVAL;
2355 } 2351 }
2356 return ip_route_input_slow(skb, daddr, saddr, tos, dev); 2352 res = ip_route_input_slow(skb, daddr, saddr, tos, dev);
2353 rcu_read_unlock();
2354 return res;
2357} 2355}
2358EXPORT_SYMBOL(ip_route_input_common); 2356EXPORT_SYMBOL(ip_route_input_common);
2359 2357
@@ -2415,12 +2413,12 @@ static int __mkroute_output(struct rtable **result,
2415 goto cleanup; 2413 goto cleanup;
2416 } 2414 }
2417 2415
2418 atomic_set(&rth->u.dst.__refcnt, 1); 2416 atomic_set(&rth->dst.__refcnt, 1);
2419 rth->u.dst.flags= DST_HOST; 2417 rth->dst.flags= DST_HOST;
2420 if (IN_DEV_CONF_GET(in_dev, NOXFRM)) 2418 if (IN_DEV_CONF_GET(in_dev, NOXFRM))
2421 rth->u.dst.flags |= DST_NOXFRM; 2419 rth->dst.flags |= DST_NOXFRM;
2422 if (IN_DEV_CONF_GET(in_dev, NOPOLICY)) 2420 if (IN_DEV_CONF_GET(in_dev, NOPOLICY))
2423 rth->u.dst.flags |= DST_NOPOLICY; 2421 rth->dst.flags |= DST_NOPOLICY;
2424 2422
2425 rth->fl.fl4_dst = oldflp->fl4_dst; 2423 rth->fl.fl4_dst = oldflp->fl4_dst;
2426 rth->fl.fl4_tos = tos; 2424 rth->fl.fl4_tos = tos;
@@ -2432,35 +2430,35 @@ static int __mkroute_output(struct rtable **result,
2432 rth->rt_iif = oldflp->oif ? : dev_out->ifindex; 2430 rth->rt_iif = oldflp->oif ? : dev_out->ifindex;
2433 /* get references to the devices that are to be hold by the routing 2431 /* get references to the devices that are to be hold by the routing
2434 cache entry */ 2432 cache entry */
2435 rth->u.dst.dev = dev_out; 2433 rth->dst.dev = dev_out;
2436 dev_hold(dev_out); 2434 dev_hold(dev_out);
2437 rth->idev = in_dev_get(dev_out); 2435 rth->idev = in_dev_get(dev_out);
2438 rth->rt_gateway = fl->fl4_dst; 2436 rth->rt_gateway = fl->fl4_dst;
2439 rth->rt_spec_dst= fl->fl4_src; 2437 rth->rt_spec_dst= fl->fl4_src;
2440 2438
2441 rth->u.dst.output=ip_output; 2439 rth->dst.output=ip_output;
2442 rth->u.dst.obsolete = -1; 2440 rth->dst.obsolete = -1;
2443 rth->rt_genid = rt_genid(dev_net(dev_out)); 2441 rth->rt_genid = rt_genid(dev_net(dev_out));
2444 2442
2445 RT_CACHE_STAT_INC(out_slow_tot); 2443 RT_CACHE_STAT_INC(out_slow_tot);
2446 2444
2447 if (flags & RTCF_LOCAL) { 2445 if (flags & RTCF_LOCAL) {
2448 rth->u.dst.input = ip_local_deliver; 2446 rth->dst.input = ip_local_deliver;
2449 rth->rt_spec_dst = fl->fl4_dst; 2447 rth->rt_spec_dst = fl->fl4_dst;
2450 } 2448 }
2451 if (flags & (RTCF_BROADCAST | RTCF_MULTICAST)) { 2449 if (flags & (RTCF_BROADCAST | RTCF_MULTICAST)) {
2452 rth->rt_spec_dst = fl->fl4_src; 2450 rth->rt_spec_dst = fl->fl4_src;
2453 if (flags & RTCF_LOCAL && 2451 if (flags & RTCF_LOCAL &&
2454 !(dev_out->flags & IFF_LOOPBACK)) { 2452 !(dev_out->flags & IFF_LOOPBACK)) {
2455 rth->u.dst.output = ip_mc_output; 2453 rth->dst.output = ip_mc_output;
2456 RT_CACHE_STAT_INC(out_slow_mc); 2454 RT_CACHE_STAT_INC(out_slow_mc);
2457 } 2455 }
2458#ifdef CONFIG_IP_MROUTE 2456#ifdef CONFIG_IP_MROUTE
2459 if (res->type == RTN_MULTICAST) { 2457 if (res->type == RTN_MULTICAST) {
2460 if (IN_DEV_MFORWARD(in_dev) && 2458 if (IN_DEV_MFORWARD(in_dev) &&
2461 !ipv4_is_local_multicast(oldflp->fl4_dst)) { 2459 !ipv4_is_local_multicast(oldflp->fl4_dst)) {
2462 rth->u.dst.input = ip_mr_input; 2460 rth->dst.input = ip_mr_input;
2463 rth->u.dst.output = ip_mc_output; 2461 rth->dst.output = ip_mc_output;
2464 } 2462 }
2465 } 2463 }
2466#endif 2464#endif
@@ -2715,7 +2713,7 @@ int __ip_route_output_key(struct net *net, struct rtable **rp,
2715 2713
2716 rcu_read_lock_bh(); 2714 rcu_read_lock_bh();
2717 for (rth = rcu_dereference_bh(rt_hash_table[hash].chain); rth; 2715 for (rth = rcu_dereference_bh(rt_hash_table[hash].chain); rth;
2718 rth = rcu_dereference_bh(rth->u.dst.rt_next)) { 2716 rth = rcu_dereference_bh(rth->dst.rt_next)) {
2719 if (rth->fl.fl4_dst == flp->fl4_dst && 2717 if (rth->fl.fl4_dst == flp->fl4_dst &&
2720 rth->fl.fl4_src == flp->fl4_src && 2718 rth->fl.fl4_src == flp->fl4_src &&
2721 rth->fl.iif == 0 && 2719 rth->fl.iif == 0 &&
@@ -2723,9 +2721,9 @@ int __ip_route_output_key(struct net *net, struct rtable **rp,
2723 rth->fl.mark == flp->mark && 2721 rth->fl.mark == flp->mark &&
2724 !((rth->fl.fl4_tos ^ flp->fl4_tos) & 2722 !((rth->fl.fl4_tos ^ flp->fl4_tos) &
2725 (IPTOS_RT_MASK | RTO_ONLINK)) && 2723 (IPTOS_RT_MASK | RTO_ONLINK)) &&
2726 net_eq(dev_net(rth->u.dst.dev), net) && 2724 net_eq(dev_net(rth->dst.dev), net) &&
2727 !rt_is_expired(rth)) { 2725 !rt_is_expired(rth)) {
2728 dst_use(&rth->u.dst, jiffies); 2726 dst_use(&rth->dst, jiffies);
2729 RT_CACHE_STAT_INC(out_hit); 2727 RT_CACHE_STAT_INC(out_hit);
2730 rcu_read_unlock_bh(); 2728 rcu_read_unlock_bh();
2731 *rp = rth; 2729 *rp = rth;
@@ -2738,7 +2736,6 @@ int __ip_route_output_key(struct net *net, struct rtable **rp,
2738slow_output: 2736slow_output:
2739 return ip_route_output_slow(net, rp, flp); 2737 return ip_route_output_slow(net, rp, flp);
2740} 2738}
2741
2742EXPORT_SYMBOL_GPL(__ip_route_output_key); 2739EXPORT_SYMBOL_GPL(__ip_route_output_key);
2743 2740
2744static void ipv4_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu) 2741static void ipv4_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
@@ -2762,15 +2759,15 @@ static int ipv4_dst_blackhole(struct net *net, struct rtable **rp, struct flowi
2762 dst_alloc(&ipv4_dst_blackhole_ops); 2759 dst_alloc(&ipv4_dst_blackhole_ops);
2763 2760
2764 if (rt) { 2761 if (rt) {
2765 struct dst_entry *new = &rt->u.dst; 2762 struct dst_entry *new = &rt->dst;
2766 2763
2767 atomic_set(&new->__refcnt, 1); 2764 atomic_set(&new->__refcnt, 1);
2768 new->__use = 1; 2765 new->__use = 1;
2769 new->input = dst_discard; 2766 new->input = dst_discard;
2770 new->output = dst_discard; 2767 new->output = dst_discard;
2771 memcpy(new->metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32)); 2768 memcpy(new->metrics, ort->dst.metrics, RTAX_MAX*sizeof(u32));
2772 2769
2773 new->dev = ort->u.dst.dev; 2770 new->dev = ort->dst.dev;
2774 if (new->dev) 2771 if (new->dev)
2775 dev_hold(new->dev); 2772 dev_hold(new->dev);
2776 2773
@@ -2794,7 +2791,7 @@ static int ipv4_dst_blackhole(struct net *net, struct rtable **rp, struct flowi
2794 dst_free(new); 2791 dst_free(new);
2795 } 2792 }
2796 2793
2797 dst_release(&(*rp)->u.dst); 2794 dst_release(&(*rp)->dst);
2798 *rp = rt; 2795 *rp = rt;
2799 return (rt ? 0 : -ENOMEM); 2796 return (rt ? 0 : -ENOMEM);
2800} 2797}
@@ -2822,13 +2819,13 @@ int ip_route_output_flow(struct net *net, struct rtable **rp, struct flowi *flp,
2822 2819
2823 return 0; 2820 return 0;
2824} 2821}
2825
2826EXPORT_SYMBOL_GPL(ip_route_output_flow); 2822EXPORT_SYMBOL_GPL(ip_route_output_flow);
2827 2823
2828int ip_route_output_key(struct net *net, struct rtable **rp, struct flowi *flp) 2824int ip_route_output_key(struct net *net, struct rtable **rp, struct flowi *flp)
2829{ 2825{
2830 return ip_route_output_flow(net, rp, flp, NULL, 0); 2826 return ip_route_output_flow(net, rp, flp, NULL, 0);
2831} 2827}
2828EXPORT_SYMBOL(ip_route_output_key);
2832 2829
2833static int rt_fill_info(struct net *net, 2830static int rt_fill_info(struct net *net,
2834 struct sk_buff *skb, u32 pid, u32 seq, int event, 2831 struct sk_buff *skb, u32 pid, u32 seq, int event,
@@ -2864,11 +2861,11 @@ static int rt_fill_info(struct net *net,
2864 r->rtm_src_len = 32; 2861 r->rtm_src_len = 32;
2865 NLA_PUT_BE32(skb, RTA_SRC, rt->fl.fl4_src); 2862 NLA_PUT_BE32(skb, RTA_SRC, rt->fl.fl4_src);
2866 } 2863 }
2867 if (rt->u.dst.dev) 2864 if (rt->dst.dev)
2868 NLA_PUT_U32(skb, RTA_OIF, rt->u.dst.dev->ifindex); 2865 NLA_PUT_U32(skb, RTA_OIF, rt->dst.dev->ifindex);
2869#ifdef CONFIG_NET_CLS_ROUTE 2866#ifdef CONFIG_NET_CLS_ROUTE
2870 if (rt->u.dst.tclassid) 2867 if (rt->dst.tclassid)
2871 NLA_PUT_U32(skb, RTA_FLOW, rt->u.dst.tclassid); 2868 NLA_PUT_U32(skb, RTA_FLOW, rt->dst.tclassid);
2872#endif 2869#endif
2873 if (rt->fl.iif) 2870 if (rt->fl.iif)
2874 NLA_PUT_BE32(skb, RTA_PREFSRC, rt->rt_spec_dst); 2871 NLA_PUT_BE32(skb, RTA_PREFSRC, rt->rt_spec_dst);
@@ -2878,12 +2875,13 @@ static int rt_fill_info(struct net *net,
2878 if (rt->rt_dst != rt->rt_gateway) 2875 if (rt->rt_dst != rt->rt_gateway)
2879 NLA_PUT_BE32(skb, RTA_GATEWAY, rt->rt_gateway); 2876 NLA_PUT_BE32(skb, RTA_GATEWAY, rt->rt_gateway);
2880 2877
2881 if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0) 2878 if (rtnetlink_put_metrics(skb, rt->dst.metrics) < 0)
2882 goto nla_put_failure; 2879 goto nla_put_failure;
2883 2880
2884 error = rt->u.dst.error; 2881 error = rt->dst.error;
2885 expires = rt->u.dst.expires ? rt->u.dst.expires - jiffies : 0; 2882 expires = rt->dst.expires ? rt->dst.expires - jiffies : 0;
2886 if (rt->peer) { 2883 if (rt->peer) {
2884 inet_peer_refcheck(rt->peer);
2887 id = atomic_read(&rt->peer->ip_id_count) & 0xffff; 2885 id = atomic_read(&rt->peer->ip_id_count) & 0xffff;
2888 if (rt->peer->tcp_ts_stamp) { 2886 if (rt->peer->tcp_ts_stamp) {
2889 ts = rt->peer->tcp_ts; 2887 ts = rt->peer->tcp_ts;
@@ -2914,7 +2912,7 @@ static int rt_fill_info(struct net *net,
2914 NLA_PUT_U32(skb, RTA_IIF, rt->fl.iif); 2912 NLA_PUT_U32(skb, RTA_IIF, rt->fl.iif);
2915 } 2913 }
2916 2914
2917 if (rtnl_put_cacheinfo(skb, &rt->u.dst, id, ts, tsage, 2915 if (rtnl_put_cacheinfo(skb, &rt->dst, id, ts, tsage,
2918 expires, error) < 0) 2916 expires, error) < 0)
2919 goto nla_put_failure; 2917 goto nla_put_failure;
2920 2918
@@ -2979,8 +2977,8 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void
2979 local_bh_enable(); 2977 local_bh_enable();
2980 2978
2981 rt = skb_rtable(skb); 2979 rt = skb_rtable(skb);
2982 if (err == 0 && rt->u.dst.error) 2980 if (err == 0 && rt->dst.error)
2983 err = -rt->u.dst.error; 2981 err = -rt->dst.error;
2984 } else { 2982 } else {
2985 struct flowi fl = { 2983 struct flowi fl = {
2986 .nl_u = { 2984 .nl_u = {
@@ -2998,7 +2996,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void
2998 if (err) 2996 if (err)
2999 goto errout_free; 2997 goto errout_free;
3000 2998
3001 skb_dst_set(skb, &rt->u.dst); 2999 skb_dst_set(skb, &rt->dst);
3002 if (rtm->rtm_flags & RTM_F_NOTIFY) 3000 if (rtm->rtm_flags & RTM_F_NOTIFY)
3003 rt->rt_flags |= RTCF_NOTIFY; 3001 rt->rt_flags |= RTCF_NOTIFY;
3004 3002
@@ -3034,12 +3032,12 @@ int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb)
3034 continue; 3032 continue;
3035 rcu_read_lock_bh(); 3033 rcu_read_lock_bh();
3036 for (rt = rcu_dereference_bh(rt_hash_table[h].chain), idx = 0; rt; 3034 for (rt = rcu_dereference_bh(rt_hash_table[h].chain), idx = 0; rt;
3037 rt = rcu_dereference_bh(rt->u.dst.rt_next), idx++) { 3035 rt = rcu_dereference_bh(rt->dst.rt_next), idx++) {
3038 if (!net_eq(dev_net(rt->u.dst.dev), net) || idx < s_idx) 3036 if (!net_eq(dev_net(rt->dst.dev), net) || idx < s_idx)
3039 continue; 3037 continue;
3040 if (rt_is_expired(rt)) 3038 if (rt_is_expired(rt))
3041 continue; 3039 continue;
3042 skb_dst_set_noref(skb, &rt->u.dst); 3040 skb_dst_set_noref(skb, &rt->dst);
3043 if (rt_fill_info(net, skb, NETLINK_CB(cb->skb).pid, 3041 if (rt_fill_info(net, skb, NETLINK_CB(cb->skb).pid,
3044 cb->nlh->nlmsg_seq, RTM_NEWROUTE, 3042 cb->nlh->nlmsg_seq, RTM_NEWROUTE,
3045 1, NLM_F_MULTI) <= 0) { 3043 1, NLM_F_MULTI) <= 0) {
@@ -3365,6 +3363,3 @@ void __init ip_static_sysctl_init(void)
3365 register_sysctl_paths(ipv4_path, ipv4_skeleton); 3363 register_sysctl_paths(ipv4_path, ipv4_skeleton);
3366} 3364}
3367#endif 3365#endif
3368
3369EXPORT_SYMBOL(__ip_select_ident);
3370EXPORT_SYMBOL(ip_route_output_key);
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 5c24db4a3c91..650cace2180d 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -18,8 +18,8 @@
18#include <net/tcp.h> 18#include <net/tcp.h>
19#include <net/route.h> 19#include <net/route.h>
20 20
21/* Timestamps: lowest 9 bits store TCP options */ 21/* Timestamps: lowest bits store TCP options */
22#define TSBITS 9 22#define TSBITS 6
23#define TSMASK (((__u32)1 << TSBITS) - 1) 23#define TSMASK (((__u32)1 << TSBITS) - 1)
24 24
25extern int sysctl_tcp_syncookies; 25extern int sysctl_tcp_syncookies;
@@ -58,7 +58,7 @@ static u32 cookie_hash(__be32 saddr, __be32 daddr, __be16 sport, __be16 dport,
58 58
59/* 59/*
60 * when syncookies are in effect and tcp timestamps are enabled we encode 60 * when syncookies are in effect and tcp timestamps are enabled we encode
61 * tcp options in the lowest 9 bits of the timestamp value that will be 61 * tcp options in the lower bits of the timestamp value that will be
62 * sent in the syn-ack. 62 * sent in the syn-ack.
63 * Since subsequent timestamps use the normal tcp_time_stamp value, we 63 * Since subsequent timestamps use the normal tcp_time_stamp value, we
64 * must make sure that the resulting initial timestamp is <= tcp_time_stamp. 64 * must make sure that the resulting initial timestamp is <= tcp_time_stamp.
@@ -70,11 +70,10 @@ __u32 cookie_init_timestamp(struct request_sock *req)
70 u32 options = 0; 70 u32 options = 0;
71 71
72 ireq = inet_rsk(req); 72 ireq = inet_rsk(req);
73 if (ireq->wscale_ok) { 73
74 options = ireq->snd_wscale; 74 options = ireq->wscale_ok ? ireq->snd_wscale : 0xf;
75 options |= ireq->rcv_wscale << 4; 75 options |= ireq->sack_ok << 4;
76 } 76 options |= ireq->ecn_ok << 5;
77 options |= ireq->sack_ok << 8;
78 77
79 ts = ts_now & ~TSMASK; 78 ts = ts_now & ~TSMASK;
80 ts |= options; 79 ts |= options;
@@ -138,23 +137,23 @@ static __u32 check_tcp_syn_cookie(__u32 cookie, __be32 saddr, __be32 daddr,
138} 137}
139 138
140/* 139/*
141 * This table has to be sorted and terminated with (__u16)-1. 140 * MSS Values are taken from the 2009 paper
142 * XXX generate a better table. 141 * 'Measuring TCP Maximum Segment Size' by S. Alcock and R. Nelson:
143 * Unresolved Issues: HIPPI with a 64k MSS is not well supported. 142 * - values 1440 to 1460 accounted for 80% of observed mss values
143 * - values outside the 536-1460 range are rare (<0.2%).
144 *
145 * Table must be sorted.
144 */ 146 */
145static __u16 const msstab[] = { 147static __u16 const msstab[] = {
146 64 - 1, 148 64,
147 256 - 1, 149 512,
148 512 - 1, 150 536,
149 536 - 1, 151 1024,
150 1024 - 1, 152 1440,
151 1440 - 1, 153 1460,
152 1460 - 1, 154 4312,
153 4312 - 1, 155 8960,
154 (__u16)-1
155}; 156};
156/* The number doesn't include the -1 terminator */
157#define NUM_MSS (ARRAY_SIZE(msstab) - 1)
158 157
159/* 158/*
160 * Generate a syncookie. mssp points to the mss, which is returned 159 * Generate a syncookie. mssp points to the mss, which is returned
@@ -169,10 +168,10 @@ __u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb, __u16 *mssp)
169 168
170 tcp_synq_overflow(sk); 169 tcp_synq_overflow(sk);
171 170
172 /* XXX sort msstab[] by probability? Binary search? */ 171 for (mssind = ARRAY_SIZE(msstab) - 1; mssind ; mssind--)
173 for (mssind = 0; mss > msstab[mssind + 1]; mssind++) 172 if (mss >= msstab[mssind])
174 ; 173 break;
175 *mssp = msstab[mssind] + 1; 174 *mssp = msstab[mssind];
176 175
177 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESSENT); 176 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESSENT);
178 177
@@ -202,7 +201,7 @@ static inline int cookie_check(struct sk_buff *skb, __u32 cookie)
202 jiffies / (HZ * 60), 201 jiffies / (HZ * 60),
203 COUNTER_TRIES); 202 COUNTER_TRIES);
204 203
205 return mssind < NUM_MSS ? msstab[mssind] + 1 : 0; 204 return mssind < ARRAY_SIZE(msstab) ? msstab[mssind] : 0;
206} 205}
207 206
208static inline struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb, 207static inline struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb,
@@ -227,26 +226,38 @@ static inline struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb,
227 * additional tcp options in the timestamp. 226 * additional tcp options in the timestamp.
228 * This extracts these options from the timestamp echo. 227 * This extracts these options from the timestamp echo.
229 * 228 *
230 * The lowest 4 bits are for snd_wscale 229 * The lowest 4 bits store snd_wscale.
231 * The next 4 lsb are for rcv_wscale 230 * next 2 bits indicate SACK and ECN support.
232 * The next lsb is for sack_ok 231 *
232 * return false if we decode an option that should not be.
233 */ 233 */
234void cookie_check_timestamp(struct tcp_options_received *tcp_opt) 234bool cookie_check_timestamp(struct tcp_options_received *tcp_opt, bool *ecn_ok)
235{ 235{
236 /* echoed timestamp, 9 lowest bits contain options */ 236 /* echoed timestamp, lowest bits contain options */
237 u32 options = tcp_opt->rcv_tsecr & TSMASK; 237 u32 options = tcp_opt->rcv_tsecr & TSMASK;
238 238
239 tcp_opt->snd_wscale = options & 0xf; 239 if (!tcp_opt->saw_tstamp) {
240 options >>= 4; 240 tcp_clear_options(tcp_opt);
241 tcp_opt->rcv_wscale = options & 0xf; 241 return true;
242 }
243
244 if (!sysctl_tcp_timestamps)
245 return false;
242 246
243 tcp_opt->sack_ok = (options >> 4) & 0x1; 247 tcp_opt->sack_ok = (options >> 4) & 0x1;
248 *ecn_ok = (options >> 5) & 1;
249 if (*ecn_ok && !sysctl_tcp_ecn)
250 return false;
251
252 if (tcp_opt->sack_ok && !sysctl_tcp_sack)
253 return false;
244 254
245 if (tcp_opt->sack_ok) 255 if ((options & 0xf) == 0xf)
246 tcp_sack_reset(tcp_opt); 256 return true; /* no window scaling */
247 257
248 if (tcp_opt->snd_wscale || tcp_opt->rcv_wscale) 258 tcp_opt->wscale_ok = 1;
249 tcp_opt->wscale_ok = 1; 259 tcp_opt->snd_wscale = options & 0xf;
260 return sysctl_tcp_window_scaling != 0;
250} 261}
251EXPORT_SYMBOL(cookie_check_timestamp); 262EXPORT_SYMBOL(cookie_check_timestamp);
252 263
@@ -265,8 +276,9 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
265 int mss; 276 int mss;
266 struct rtable *rt; 277 struct rtable *rt;
267 __u8 rcv_wscale; 278 __u8 rcv_wscale;
279 bool ecn_ok;
268 280
269 if (!sysctl_tcp_syncookies || !th->ack) 281 if (!sysctl_tcp_syncookies || !th->ack || th->rst)
270 goto out; 282 goto out;
271 283
272 if (tcp_synq_no_recent_overflow(sk) || 284 if (tcp_synq_no_recent_overflow(sk) ||
@@ -281,8 +293,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
281 memset(&tcp_opt, 0, sizeof(tcp_opt)); 293 memset(&tcp_opt, 0, sizeof(tcp_opt));
282 tcp_parse_options(skb, &tcp_opt, &hash_location, 0); 294 tcp_parse_options(skb, &tcp_opt, &hash_location, 0);
283 295
284 if (tcp_opt.saw_tstamp) 296 if (!cookie_check_timestamp(&tcp_opt, &ecn_ok))
285 cookie_check_timestamp(&tcp_opt); 297 goto out;
286 298
287 ret = NULL; 299 ret = NULL;
288 req = inet_reqsk_alloc(&tcp_request_sock_ops); /* for safety */ 300 req = inet_reqsk_alloc(&tcp_request_sock_ops); /* for safety */
@@ -298,9 +310,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
298 ireq->rmt_port = th->source; 310 ireq->rmt_port = th->source;
299 ireq->loc_addr = ip_hdr(skb)->daddr; 311 ireq->loc_addr = ip_hdr(skb)->daddr;
300 ireq->rmt_addr = ip_hdr(skb)->saddr; 312 ireq->rmt_addr = ip_hdr(skb)->saddr;
301 ireq->ecn_ok = 0; 313 ireq->ecn_ok = ecn_ok;
302 ireq->snd_wscale = tcp_opt.snd_wscale; 314 ireq->snd_wscale = tcp_opt.snd_wscale;
303 ireq->rcv_wscale = tcp_opt.rcv_wscale;
304 ireq->sack_ok = tcp_opt.sack_ok; 315 ireq->sack_ok = tcp_opt.sack_ok;
305 ireq->wscale_ok = tcp_opt.wscale_ok; 316 ireq->wscale_ok = tcp_opt.wscale_ok;
306 ireq->tstamp_ok = tcp_opt.saw_tstamp; 317 ireq->tstamp_ok = tcp_opt.saw_tstamp;
@@ -347,22 +358,22 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
347 { .sport = th->dest, 358 { .sport = th->dest,
348 .dport = th->source } } }; 359 .dport = th->source } } };
349 security_req_classify_flow(req, &fl); 360 security_req_classify_flow(req, &fl);
350 if (ip_route_output_key(&init_net, &rt, &fl)) { 361 if (ip_route_output_key(sock_net(sk), &rt, &fl)) {
351 reqsk_free(req); 362 reqsk_free(req);
352 goto out; 363 goto out;
353 } 364 }
354 } 365 }
355 366
356 /* Try to redo what tcp_v4_send_synack did. */ 367 /* Try to redo what tcp_v4_send_synack did. */
357 req->window_clamp = tp->window_clamp ? :dst_metric(&rt->u.dst, RTAX_WINDOW); 368 req->window_clamp = tp->window_clamp ? :dst_metric(&rt->dst, RTAX_WINDOW);
358 369
359 tcp_select_initial_window(tcp_full_space(sk), req->mss, 370 tcp_select_initial_window(tcp_full_space(sk), req->mss,
360 &req->rcv_wnd, &req->window_clamp, 371 &req->rcv_wnd, &req->window_clamp,
361 ireq->wscale_ok, &rcv_wscale, 372 ireq->wscale_ok, &rcv_wscale,
362 dst_metric(&rt->u.dst, RTAX_INITRWND)); 373 dst_metric(&rt->dst, RTAX_INITRWND));
363 374
364 ireq->rcv_wscale = rcv_wscale; 375 ireq->rcv_wscale = rcv_wscale;
365 376
366 ret = get_cookie_sock(sk, skb, req, &rt->u.dst); 377 ret = get_cookie_sock(sk, skb, req, &rt->dst);
367out: return ret; 378out: return ret;
368} 379}
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 6596b4feeddc..86b9f67abede 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -315,7 +315,6 @@ struct tcp_splice_state {
315 * is strict, actions are advisory and have some latency. 315 * is strict, actions are advisory and have some latency.
316 */ 316 */
317int tcp_memory_pressure __read_mostly; 317int tcp_memory_pressure __read_mostly;
318
319EXPORT_SYMBOL(tcp_memory_pressure); 318EXPORT_SYMBOL(tcp_memory_pressure);
320 319
321void tcp_enter_memory_pressure(struct sock *sk) 320void tcp_enter_memory_pressure(struct sock *sk)
@@ -325,7 +324,6 @@ void tcp_enter_memory_pressure(struct sock *sk)
325 tcp_memory_pressure = 1; 324 tcp_memory_pressure = 1;
326 } 325 }
327} 326}
328
329EXPORT_SYMBOL(tcp_enter_memory_pressure); 327EXPORT_SYMBOL(tcp_enter_memory_pressure);
330 328
331/* Convert seconds to retransmits based on initial and max timeout */ 329/* Convert seconds to retransmits based on initial and max timeout */
@@ -460,6 +458,7 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
460 } 458 }
461 return mask; 459 return mask;
462} 460}
461EXPORT_SYMBOL(tcp_poll);
463 462
464int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg) 463int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
465{ 464{
@@ -508,10 +507,11 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
508 507
509 return put_user(answ, (int __user *)arg); 508 return put_user(answ, (int __user *)arg);
510} 509}
510EXPORT_SYMBOL(tcp_ioctl);
511 511
512static inline void tcp_mark_push(struct tcp_sock *tp, struct sk_buff *skb) 512static inline void tcp_mark_push(struct tcp_sock *tp, struct sk_buff *skb)
513{ 513{
514 TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH; 514 TCP_SKB_CB(skb)->flags |= TCPHDR_PSH;
515 tp->pushed_seq = tp->write_seq; 515 tp->pushed_seq = tp->write_seq;
516} 516}
517 517
@@ -527,7 +527,7 @@ static inline void skb_entail(struct sock *sk, struct sk_buff *skb)
527 527
528 skb->csum = 0; 528 skb->csum = 0;
529 tcb->seq = tcb->end_seq = tp->write_seq; 529 tcb->seq = tcb->end_seq = tp->write_seq;
530 tcb->flags = TCPCB_FLAG_ACK; 530 tcb->flags = TCPHDR_ACK;
531 tcb->sacked = 0; 531 tcb->sacked = 0;
532 skb_header_release(skb); 532 skb_header_release(skb);
533 tcp_add_write_queue_tail(sk, skb); 533 tcp_add_write_queue_tail(sk, skb);
@@ -608,6 +608,7 @@ ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos,
608 ssize_t spliced; 608 ssize_t spliced;
609 int ret; 609 int ret;
610 610
611 sock_rps_record_flow(sk);
611 /* 612 /*
612 * We can't seek on a socket input 613 * We can't seek on a socket input
613 */ 614 */
@@ -675,6 +676,7 @@ ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos,
675 676
676 return ret; 677 return ret;
677} 678}
679EXPORT_SYMBOL(tcp_splice_read);
678 680
679struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp) 681struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp)
680{ 682{
@@ -815,7 +817,7 @@ new_segment:
815 skb_shinfo(skb)->gso_segs = 0; 817 skb_shinfo(skb)->gso_segs = 0;
816 818
817 if (!copied) 819 if (!copied)
818 TCP_SKB_CB(skb)->flags &= ~TCPCB_FLAG_PSH; 820 TCP_SKB_CB(skb)->flags &= ~TCPHDR_PSH;
819 821
820 copied += copy; 822 copied += copy;
821 poffset += copy; 823 poffset += copy;
@@ -856,15 +858,15 @@ out_err:
856 return sk_stream_error(sk, flags, err); 858 return sk_stream_error(sk, flags, err);
857} 859}
858 860
859ssize_t tcp_sendpage(struct socket *sock, struct page *page, int offset, 861int tcp_sendpage(struct sock *sk, struct page *page, int offset,
860 size_t size, int flags) 862 size_t size, int flags)
861{ 863{
862 ssize_t res; 864 ssize_t res;
863 struct sock *sk = sock->sk;
864 865
865 if (!(sk->sk_route_caps & NETIF_F_SG) || 866 if (!(sk->sk_route_caps & NETIF_F_SG) ||
866 !(sk->sk_route_caps & NETIF_F_ALL_CSUM)) 867 !(sk->sk_route_caps & NETIF_F_ALL_CSUM))
867 return sock_no_sendpage(sock, page, offset, size, flags); 868 return sock_no_sendpage(sk->sk_socket, page, offset, size,
869 flags);
868 870
869 lock_sock(sk); 871 lock_sock(sk);
870 TCP_CHECK_TIMER(sk); 872 TCP_CHECK_TIMER(sk);
@@ -873,6 +875,7 @@ ssize_t tcp_sendpage(struct socket *sock, struct page *page, int offset,
873 release_sock(sk); 875 release_sock(sk);
874 return res; 876 return res;
875} 877}
878EXPORT_SYMBOL(tcp_sendpage);
876 879
877#define TCP_PAGE(sk) (sk->sk_sndmsg_page) 880#define TCP_PAGE(sk) (sk->sk_sndmsg_page)
878#define TCP_OFF(sk) (sk->sk_sndmsg_off) 881#define TCP_OFF(sk) (sk->sk_sndmsg_off)
@@ -897,10 +900,9 @@ static inline int select_size(struct sock *sk, int sg)
897 return tmp; 900 return tmp;
898} 901}
899 902
900int tcp_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, 903int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
901 size_t size) 904 size_t size)
902{ 905{
903 struct sock *sk = sock->sk;
904 struct iovec *iov; 906 struct iovec *iov;
905 struct tcp_sock *tp = tcp_sk(sk); 907 struct tcp_sock *tp = tcp_sk(sk);
906 struct sk_buff *skb; 908 struct sk_buff *skb;
@@ -1061,7 +1063,7 @@ new_segment:
1061 } 1063 }
1062 1064
1063 if (!copied) 1065 if (!copied)
1064 TCP_SKB_CB(skb)->flags &= ~TCPCB_FLAG_PSH; 1066 TCP_SKB_CB(skb)->flags &= ~TCPHDR_PSH;
1065 1067
1066 tp->write_seq += copy; 1068 tp->write_seq += copy;
1067 TCP_SKB_CB(skb)->end_seq += copy; 1069 TCP_SKB_CB(skb)->end_seq += copy;
@@ -1121,6 +1123,7 @@ out_err:
1121 release_sock(sk); 1123 release_sock(sk);
1122 return err; 1124 return err;
1123} 1125}
1126EXPORT_SYMBOL(tcp_sendmsg);
1124 1127
1125/* 1128/*
1126 * Handle reading urgent data. BSD has very simple semantics for 1129 * Handle reading urgent data. BSD has very simple semantics for
@@ -1380,6 +1383,7 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
1380 tcp_cleanup_rbuf(sk, copied); 1383 tcp_cleanup_rbuf(sk, copied);
1381 return copied; 1384 return copied;
1382} 1385}
1386EXPORT_SYMBOL(tcp_read_sock);
1383 1387
1384/* 1388/*
1385 * This routine copies from a sock struct into the user buffer. 1389 * This routine copies from a sock struct into the user buffer.
@@ -1774,6 +1778,7 @@ recv_urg:
1774 err = tcp_recv_urg(sk, msg, len, flags); 1778 err = tcp_recv_urg(sk, msg, len, flags);
1775 goto out; 1779 goto out;
1776} 1780}
1781EXPORT_SYMBOL(tcp_recvmsg);
1777 1782
1778void tcp_set_state(struct sock *sk, int state) 1783void tcp_set_state(struct sock *sk, int state)
1779{ 1784{
@@ -1866,6 +1871,7 @@ void tcp_shutdown(struct sock *sk, int how)
1866 tcp_send_fin(sk); 1871 tcp_send_fin(sk);
1867 } 1872 }
1868} 1873}
1874EXPORT_SYMBOL(tcp_shutdown);
1869 1875
1870void tcp_close(struct sock *sk, long timeout) 1876void tcp_close(struct sock *sk, long timeout)
1871{ 1877{
@@ -1898,6 +1904,10 @@ void tcp_close(struct sock *sk, long timeout)
1898 1904
1899 sk_mem_reclaim(sk); 1905 sk_mem_reclaim(sk);
1900 1906
1907 /* If socket has been already reset (e.g. in tcp_reset()) - kill it. */
1908 if (sk->sk_state == TCP_CLOSE)
1909 goto adjudge_to_death;
1910
1901 /* As outlined in RFC 2525, section 2.17, we send a RST here because 1911 /* As outlined in RFC 2525, section 2.17, we send a RST here because
1902 * data was lost. To witness the awful effects of the old behavior of 1912 * data was lost. To witness the awful effects of the old behavior of
1903 * always doing a FIN, run an older 2.1.x kernel or 2.0.x, start a bulk 1913 * always doing a FIN, run an older 2.1.x kernel or 2.0.x, start a bulk
@@ -2025,6 +2035,7 @@ out:
2025 local_bh_enable(); 2035 local_bh_enable();
2026 sock_put(sk); 2036 sock_put(sk);
2027} 2037}
2038EXPORT_SYMBOL(tcp_close);
2028 2039
2029/* These states need RST on ABORT according to RFC793 */ 2040/* These states need RST on ABORT according to RFC793 */
2030 2041
@@ -2098,6 +2109,7 @@ int tcp_disconnect(struct sock *sk, int flags)
2098 sk->sk_error_report(sk); 2109 sk->sk_error_report(sk);
2099 return err; 2110 return err;
2100} 2111}
2112EXPORT_SYMBOL(tcp_disconnect);
2101 2113
2102/* 2114/*
2103 * Socket option code for TCP. 2115 * Socket option code for TCP.
@@ -2396,6 +2408,7 @@ int tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval,
2396 optval, optlen); 2408 optval, optlen);
2397 return do_tcp_setsockopt(sk, level, optname, optval, optlen); 2409 return do_tcp_setsockopt(sk, level, optname, optval, optlen);
2398} 2410}
2411EXPORT_SYMBOL(tcp_setsockopt);
2399 2412
2400#ifdef CONFIG_COMPAT 2413#ifdef CONFIG_COMPAT
2401int compat_tcp_setsockopt(struct sock *sk, int level, int optname, 2414int compat_tcp_setsockopt(struct sock *sk, int level, int optname,
@@ -2406,7 +2419,6 @@ int compat_tcp_setsockopt(struct sock *sk, int level, int optname,
2406 optval, optlen); 2419 optval, optlen);
2407 return do_tcp_setsockopt(sk, level, optname, optval, optlen); 2420 return do_tcp_setsockopt(sk, level, optname, optval, optlen);
2408} 2421}
2409
2410EXPORT_SYMBOL(compat_tcp_setsockopt); 2422EXPORT_SYMBOL(compat_tcp_setsockopt);
2411#endif 2423#endif
2412 2424
@@ -2472,7 +2484,6 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
2472 2484
2473 info->tcpi_total_retrans = tp->total_retrans; 2485 info->tcpi_total_retrans = tp->total_retrans;
2474} 2486}
2475
2476EXPORT_SYMBOL_GPL(tcp_get_info); 2487EXPORT_SYMBOL_GPL(tcp_get_info);
2477 2488
2478static int do_tcp_getsockopt(struct sock *sk, int level, 2489static int do_tcp_getsockopt(struct sock *sk, int level,
@@ -2611,6 +2622,7 @@ int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval,
2611 optval, optlen); 2622 optval, optlen);
2612 return do_tcp_getsockopt(sk, level, optname, optval, optlen); 2623 return do_tcp_getsockopt(sk, level, optname, optval, optlen);
2613} 2624}
2625EXPORT_SYMBOL(tcp_getsockopt);
2614 2626
2615#ifdef CONFIG_COMPAT 2627#ifdef CONFIG_COMPAT
2616int compat_tcp_getsockopt(struct sock *sk, int level, int optname, 2628int compat_tcp_getsockopt(struct sock *sk, int level, int optname,
@@ -2621,7 +2633,6 @@ int compat_tcp_getsockopt(struct sock *sk, int level, int optname,
2621 optval, optlen); 2633 optval, optlen);
2622 return do_tcp_getsockopt(sk, level, optname, optval, optlen); 2634 return do_tcp_getsockopt(sk, level, optname, optval, optlen);
2623} 2635}
2624
2625EXPORT_SYMBOL(compat_tcp_getsockopt); 2636EXPORT_SYMBOL(compat_tcp_getsockopt);
2626#endif 2637#endif
2627 2638
@@ -2858,7 +2869,6 @@ void tcp_free_md5sig_pool(void)
2858 if (pool) 2869 if (pool)
2859 __tcp_free_md5sig_pool(pool); 2870 __tcp_free_md5sig_pool(pool);
2860} 2871}
2861
2862EXPORT_SYMBOL(tcp_free_md5sig_pool); 2872EXPORT_SYMBOL(tcp_free_md5sig_pool);
2863 2873
2864static struct tcp_md5sig_pool * __percpu * 2874static struct tcp_md5sig_pool * __percpu *
@@ -2934,7 +2944,6 @@ retry:
2934 } 2944 }
2935 return pool; 2945 return pool;
2936} 2946}
2937
2938EXPORT_SYMBOL(tcp_alloc_md5sig_pool); 2947EXPORT_SYMBOL(tcp_alloc_md5sig_pool);
2939 2948
2940 2949
@@ -2958,7 +2967,7 @@ struct tcp_md5sig_pool *tcp_get_md5sig_pool(void)
2958 spin_unlock(&tcp_md5sig_pool_lock); 2967 spin_unlock(&tcp_md5sig_pool_lock);
2959 2968
2960 if (p) 2969 if (p)
2961 return *per_cpu_ptr(p, smp_processor_id()); 2970 return *this_cpu_ptr(p);
2962 2971
2963 local_bh_enable(); 2972 local_bh_enable();
2964 return NULL; 2973 return NULL;
@@ -2986,7 +2995,6 @@ int tcp_md5_hash_header(struct tcp_md5sig_pool *hp,
2986 th->check = old_checksum; 2995 th->check = old_checksum;
2987 return err; 2996 return err;
2988} 2997}
2989
2990EXPORT_SYMBOL(tcp_md5_hash_header); 2998EXPORT_SYMBOL(tcp_md5_hash_header);
2991 2999
2992int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp, 3000int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp,
@@ -2999,6 +3007,7 @@ int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp,
2999 const unsigned head_data_len = skb_headlen(skb) > header_len ? 3007 const unsigned head_data_len = skb_headlen(skb) > header_len ?
3000 skb_headlen(skb) - header_len : 0; 3008 skb_headlen(skb) - header_len : 0;
3001 const struct skb_shared_info *shi = skb_shinfo(skb); 3009 const struct skb_shared_info *shi = skb_shinfo(skb);
3010 struct sk_buff *frag_iter;
3002 3011
3003 sg_init_table(&sg, 1); 3012 sg_init_table(&sg, 1);
3004 3013
@@ -3013,9 +3022,12 @@ int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp,
3013 return 1; 3022 return 1;
3014 } 3023 }
3015 3024
3025 skb_walk_frags(skb, frag_iter)
3026 if (tcp_md5_hash_skb_data(hp, frag_iter, 0))
3027 return 1;
3028
3016 return 0; 3029 return 0;
3017} 3030}
3018
3019EXPORT_SYMBOL(tcp_md5_hash_skb_data); 3031EXPORT_SYMBOL(tcp_md5_hash_skb_data);
3020 3032
3021int tcp_md5_hash_key(struct tcp_md5sig_pool *hp, struct tcp_md5sig_key *key) 3033int tcp_md5_hash_key(struct tcp_md5sig_pool *hp, struct tcp_md5sig_key *key)
@@ -3025,7 +3037,6 @@ int tcp_md5_hash_key(struct tcp_md5sig_pool *hp, struct tcp_md5sig_key *key)
3025 sg_init_one(&sg, key->key, key->keylen); 3037 sg_init_one(&sg, key->key, key->keylen);
3026 return crypto_hash_update(&hp->md5_desc, &sg, key->keylen); 3038 return crypto_hash_update(&hp->md5_desc, &sg, key->keylen);
3027} 3039}
3028
3029EXPORT_SYMBOL(tcp_md5_hash_key); 3040EXPORT_SYMBOL(tcp_md5_hash_key);
3030 3041
3031#endif 3042#endif
@@ -3297,16 +3308,3 @@ void __init tcp_init(void)
3297 tcp_secret_retiring = &tcp_secret_two; 3308 tcp_secret_retiring = &tcp_secret_two;
3298 tcp_secret_secondary = &tcp_secret_two; 3309 tcp_secret_secondary = &tcp_secret_two;
3299} 3310}
3300
3301EXPORT_SYMBOL(tcp_close);
3302EXPORT_SYMBOL(tcp_disconnect);
3303EXPORT_SYMBOL(tcp_getsockopt);
3304EXPORT_SYMBOL(tcp_ioctl);
3305EXPORT_SYMBOL(tcp_poll);
3306EXPORT_SYMBOL(tcp_read_sock);
3307EXPORT_SYMBOL(tcp_recvmsg);
3308EXPORT_SYMBOL(tcp_sendmsg);
3309EXPORT_SYMBOL(tcp_splice_read);
3310EXPORT_SYMBOL(tcp_sendpage);
3311EXPORT_SYMBOL(tcp_setsockopt);
3312EXPORT_SYMBOL(tcp_shutdown);
diff --git a/net/ipv4/tcp_hybla.c b/net/ipv4/tcp_hybla.c
index c209e054a634..377bc9349371 100644
--- a/net/ipv4/tcp_hybla.c
+++ b/net/ipv4/tcp_hybla.c
@@ -126,8 +126,8 @@ static void hybla_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
126 * calculate 2^fract in a <<7 value. 126 * calculate 2^fract in a <<7 value.
127 */ 127 */
128 is_slowstart = 1; 128 is_slowstart = 1;
129 increment = ((1 << ca->rho) * hybla_fraction(rho_fractions)) 129 increment = ((1 << min(ca->rho, 16U)) *
130 - 128; 130 hybla_fraction(rho_fractions)) - 128;
131 } else { 131 } else {
132 /* 132 /*
133 * congestion avoidance 133 * congestion avoidance
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 3e6dafcb1071..3c426cb318e7 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -78,10 +78,13 @@ int sysctl_tcp_window_scaling __read_mostly = 1;
78int sysctl_tcp_sack __read_mostly = 1; 78int sysctl_tcp_sack __read_mostly = 1;
79int sysctl_tcp_fack __read_mostly = 1; 79int sysctl_tcp_fack __read_mostly = 1;
80int sysctl_tcp_reordering __read_mostly = TCP_FASTRETRANS_THRESH; 80int sysctl_tcp_reordering __read_mostly = TCP_FASTRETRANS_THRESH;
81EXPORT_SYMBOL(sysctl_tcp_reordering);
81int sysctl_tcp_ecn __read_mostly = 2; 82int sysctl_tcp_ecn __read_mostly = 2;
83EXPORT_SYMBOL(sysctl_tcp_ecn);
82int sysctl_tcp_dsack __read_mostly = 1; 84int sysctl_tcp_dsack __read_mostly = 1;
83int sysctl_tcp_app_win __read_mostly = 31; 85int sysctl_tcp_app_win __read_mostly = 31;
84int sysctl_tcp_adv_win_scale __read_mostly = 2; 86int sysctl_tcp_adv_win_scale __read_mostly = 2;
87EXPORT_SYMBOL(sysctl_tcp_adv_win_scale);
85 88
86int sysctl_tcp_stdurg __read_mostly; 89int sysctl_tcp_stdurg __read_mostly;
87int sysctl_tcp_rfc1337 __read_mostly; 90int sysctl_tcp_rfc1337 __read_mostly;
@@ -419,6 +422,7 @@ void tcp_initialize_rcv_mss(struct sock *sk)
419 422
420 inet_csk(sk)->icsk_ack.rcv_mss = hint; 423 inet_csk(sk)->icsk_ack.rcv_mss = hint;
421} 424}
425EXPORT_SYMBOL(tcp_initialize_rcv_mss);
422 426
423/* Receiver "autotuning" code. 427/* Receiver "autotuning" code.
424 * 428 *
@@ -2639,7 +2643,7 @@ static void DBGUNDO(struct sock *sk, const char *msg)
2639 if (sk->sk_family == AF_INET) { 2643 if (sk->sk_family == AF_INET) {
2640 printk(KERN_DEBUG "Undo %s %pI4/%u c%u l%u ss%u/%u p%u\n", 2644 printk(KERN_DEBUG "Undo %s %pI4/%u c%u l%u ss%u/%u p%u\n",
2641 msg, 2645 msg,
2642 &inet->daddr, ntohs(inet->dport), 2646 &inet->inet_daddr, ntohs(inet->inet_dport),
2643 tp->snd_cwnd, tcp_left_out(tp), 2647 tp->snd_cwnd, tcp_left_out(tp),
2644 tp->snd_ssthresh, tp->prior_ssthresh, 2648 tp->snd_ssthresh, tp->prior_ssthresh,
2645 tp->packets_out); 2649 tp->packets_out);
@@ -2649,7 +2653,7 @@ static void DBGUNDO(struct sock *sk, const char *msg)
2649 struct ipv6_pinfo *np = inet6_sk(sk); 2653 struct ipv6_pinfo *np = inet6_sk(sk);
2650 printk(KERN_DEBUG "Undo %s %pI6/%u c%u l%u ss%u/%u p%u\n", 2654 printk(KERN_DEBUG "Undo %s %pI6/%u c%u l%u ss%u/%u p%u\n",
2651 msg, 2655 msg,
2652 &np->daddr, ntohs(inet->dport), 2656 &np->daddr, ntohs(inet->inet_dport),
2653 tp->snd_cwnd, tcp_left_out(tp), 2657 tp->snd_cwnd, tcp_left_out(tp),
2654 tp->snd_ssthresh, tp->prior_ssthresh, 2658 tp->snd_ssthresh, tp->prior_ssthresh,
2655 tp->packets_out); 2659 tp->packets_out);
@@ -2938,6 +2942,7 @@ void tcp_simple_retransmit(struct sock *sk)
2938 } 2942 }
2939 tcp_xmit_retransmit_queue(sk); 2943 tcp_xmit_retransmit_queue(sk);
2940} 2944}
2945EXPORT_SYMBOL(tcp_simple_retransmit);
2941 2946
2942/* Process an event, which can update packets-in-flight not trivially. 2947/* Process an event, which can update packets-in-flight not trivially.
2943 * Main goal of this function is to calculate new estimate for left_out, 2948 * Main goal of this function is to calculate new estimate for left_out,
@@ -3286,7 +3291,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
3286 * connection startup slow start one packet too 3291 * connection startup slow start one packet too
3287 * quickly. This is severely frowned upon behavior. 3292 * quickly. This is severely frowned upon behavior.
3288 */ 3293 */
3289 if (!(scb->flags & TCPCB_FLAG_SYN)) { 3294 if (!(scb->flags & TCPHDR_SYN)) {
3290 flag |= FLAG_DATA_ACKED; 3295 flag |= FLAG_DATA_ACKED;
3291 } else { 3296 } else {
3292 flag |= FLAG_SYN_ACKED; 3297 flag |= FLAG_SYN_ACKED;
@@ -3858,6 +3863,7 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
3858 } 3863 }
3859 } 3864 }
3860} 3865}
3866EXPORT_SYMBOL(tcp_parse_options);
3861 3867
3862static int tcp_parse_aligned_timestamp(struct tcp_sock *tp, struct tcphdr *th) 3868static int tcp_parse_aligned_timestamp(struct tcp_sock *tp, struct tcphdr *th)
3863{ 3869{
@@ -3931,6 +3937,7 @@ u8 *tcp_parse_md5sig_option(struct tcphdr *th)
3931 } 3937 }
3932 return NULL; 3938 return NULL;
3933} 3939}
3940EXPORT_SYMBOL(tcp_parse_md5sig_option);
3934#endif 3941#endif
3935 3942
3936static inline void tcp_store_ts_recent(struct tcp_sock *tp) 3943static inline void tcp_store_ts_recent(struct tcp_sock *tp)
@@ -5432,6 +5439,7 @@ discard:
5432 __kfree_skb(skb); 5439 __kfree_skb(skb);
5433 return 0; 5440 return 0;
5434} 5441}
5442EXPORT_SYMBOL(tcp_rcv_established);
5435 5443
5436static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, 5444static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
5437 struct tcphdr *th, unsigned len) 5445 struct tcphdr *th, unsigned len)
@@ -5931,14 +5939,4 @@ discard:
5931 } 5939 }
5932 return 0; 5940 return 0;
5933} 5941}
5934
5935EXPORT_SYMBOL(sysctl_tcp_ecn);
5936EXPORT_SYMBOL(sysctl_tcp_reordering);
5937EXPORT_SYMBOL(sysctl_tcp_adv_win_scale);
5938EXPORT_SYMBOL(tcp_parse_options);
5939#ifdef CONFIG_TCP_MD5SIG
5940EXPORT_SYMBOL(tcp_parse_md5sig_option);
5941#endif
5942EXPORT_SYMBOL(tcp_rcv_established);
5943EXPORT_SYMBOL(tcp_rcv_state_process); 5942EXPORT_SYMBOL(tcp_rcv_state_process);
5944EXPORT_SYMBOL(tcp_initialize_rcv_mss);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 202cf09c4cd4..020766292bb0 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -84,6 +84,7 @@
84 84
85int sysctl_tcp_tw_reuse __read_mostly; 85int sysctl_tcp_tw_reuse __read_mostly;
86int sysctl_tcp_low_latency __read_mostly; 86int sysctl_tcp_low_latency __read_mostly;
87EXPORT_SYMBOL(sysctl_tcp_low_latency);
87 88
88 89
89#ifdef CONFIG_TCP_MD5SIG 90#ifdef CONFIG_TCP_MD5SIG
@@ -100,6 +101,7 @@ struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk, __be32 addr)
100#endif 101#endif
101 102
102struct inet_hashinfo tcp_hashinfo; 103struct inet_hashinfo tcp_hashinfo;
104EXPORT_SYMBOL(tcp_hashinfo);
103 105
104static inline __u32 tcp_v4_init_sequence(struct sk_buff *skb) 106static inline __u32 tcp_v4_init_sequence(struct sk_buff *skb)
105{ 107{
@@ -139,7 +141,6 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
139 141
140 return 0; 142 return 0;
141} 143}
142
143EXPORT_SYMBOL_GPL(tcp_twsk_unique); 144EXPORT_SYMBOL_GPL(tcp_twsk_unique);
144 145
145/* This will initiate an outgoing connection. */ 146/* This will initiate an outgoing connection. */
@@ -204,10 +205,12 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
204 * TIME-WAIT * and initialize rx_opt.ts_recent from it, 205 * TIME-WAIT * and initialize rx_opt.ts_recent from it,
205 * when trying new connection. 206 * when trying new connection.
206 */ 207 */
207 if (peer != NULL && 208 if (peer) {
208 (u32)get_seconds() - peer->tcp_ts_stamp <= TCP_PAWS_MSL) { 209 inet_peer_refcheck(peer);
209 tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp; 210 if ((u32)get_seconds() - peer->tcp_ts_stamp <= TCP_PAWS_MSL) {
210 tp->rx_opt.ts_recent = peer->tcp_ts; 211 tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp;
212 tp->rx_opt.ts_recent = peer->tcp_ts;
213 }
211 } 214 }
212 } 215 }
213 216
@@ -237,7 +240,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
237 240
238 /* OK, now commit destination to socket. */ 241 /* OK, now commit destination to socket. */
239 sk->sk_gso_type = SKB_GSO_TCPV4; 242 sk->sk_gso_type = SKB_GSO_TCPV4;
240 sk_setup_caps(sk, &rt->u.dst); 243 sk_setup_caps(sk, &rt->dst);
241 244
242 if (!tp->write_seq) 245 if (!tp->write_seq)
243 tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr, 246 tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr,
@@ -265,6 +268,7 @@ failure:
265 inet->inet_dport = 0; 268 inet->inet_dport = 0;
266 return err; 269 return err;
267} 270}
271EXPORT_SYMBOL(tcp_v4_connect);
268 272
269/* 273/*
270 * This routine does path mtu discovery as defined in RFC1191. 274 * This routine does path mtu discovery as defined in RFC1191.
@@ -543,6 +547,7 @@ void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb)
543 547
544 __tcp_v4_send_check(skb, inet->inet_saddr, inet->inet_daddr); 548 __tcp_v4_send_check(skb, inet->inet_saddr, inet->inet_daddr);
545} 549}
550EXPORT_SYMBOL(tcp_v4_send_check);
546 551
547int tcp_v4_gso_send_check(struct sk_buff *skb) 552int tcp_v4_gso_send_check(struct sk_buff *skb)
548{ 553{
@@ -793,19 +798,20 @@ static void tcp_v4_reqsk_destructor(struct request_sock *req)
793 kfree(inet_rsk(req)->opt); 798 kfree(inet_rsk(req)->opt);
794} 799}
795 800
796#ifdef CONFIG_SYN_COOKIES 801static void syn_flood_warning(const struct sk_buff *skb)
797static void syn_flood_warning(struct sk_buff *skb)
798{ 802{
799 static unsigned long warntime; 803 const char *msg;
800 804
801 if (time_after(jiffies, (warntime + HZ * 60))) { 805#ifdef CONFIG_SYN_COOKIES
802 warntime = jiffies; 806 if (sysctl_tcp_syncookies)
803 printk(KERN_INFO 807 msg = "Sending cookies";
804 "possible SYN flooding on port %d. Sending cookies.\n", 808 else
805 ntohs(tcp_hdr(skb)->dest));
806 }
807}
808#endif 809#endif
810 msg = "Dropping request";
811
812 pr_info("TCP: Possible SYN flooding on port %d. %s.\n",
813 ntohs(tcp_hdr(skb)->dest), msg);
814}
809 815
810/* 816/*
811 * Save and compile IPv4 options into the request_sock if needed. 817 * Save and compile IPv4 options into the request_sock if needed.
@@ -857,7 +863,6 @@ struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk,
857{ 863{
858 return tcp_v4_md5_do_lookup(sk, inet_sk(addr_sk)->inet_daddr); 864 return tcp_v4_md5_do_lookup(sk, inet_sk(addr_sk)->inet_daddr);
859} 865}
860
861EXPORT_SYMBOL(tcp_v4_md5_lookup); 866EXPORT_SYMBOL(tcp_v4_md5_lookup);
862 867
863static struct tcp_md5sig_key *tcp_v4_reqsk_md5_lookup(struct sock *sk, 868static struct tcp_md5sig_key *tcp_v4_reqsk_md5_lookup(struct sock *sk,
@@ -924,7 +929,6 @@ int tcp_v4_md5_do_add(struct sock *sk, __be32 addr,
924 } 929 }
925 return 0; 930 return 0;
926} 931}
927
928EXPORT_SYMBOL(tcp_v4_md5_do_add); 932EXPORT_SYMBOL(tcp_v4_md5_do_add);
929 933
930static int tcp_v4_md5_add_func(struct sock *sk, struct sock *addr_sk, 934static int tcp_v4_md5_add_func(struct sock *sk, struct sock *addr_sk,
@@ -962,7 +966,6 @@ int tcp_v4_md5_do_del(struct sock *sk, __be32 addr)
962 } 966 }
963 return -ENOENT; 967 return -ENOENT;
964} 968}
965
966EXPORT_SYMBOL(tcp_v4_md5_do_del); 969EXPORT_SYMBOL(tcp_v4_md5_do_del);
967 970
968static void tcp_v4_clear_md5_list(struct sock *sk) 971static void tcp_v4_clear_md5_list(struct sock *sk)
@@ -1135,7 +1138,6 @@ clear_hash_noput:
1135 memset(md5_hash, 0, 16); 1138 memset(md5_hash, 0, 16);
1136 return 1; 1139 return 1;
1137} 1140}
1138
1139EXPORT_SYMBOL(tcp_v4_md5_hash_skb); 1141EXPORT_SYMBOL(tcp_v4_md5_hash_skb);
1140 1142
1141static int tcp_v4_inbound_md5_hash(struct sock *sk, struct sk_buff *skb) 1143static int tcp_v4_inbound_md5_hash(struct sock *sk, struct sk_buff *skb)
@@ -1243,6 +1245,8 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1243 * evidently real one. 1245 * evidently real one.
1244 */ 1246 */
1245 if (inet_csk_reqsk_queue_is_full(sk) && !isn) { 1247 if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
1248 if (net_ratelimit())
1249 syn_flood_warning(skb);
1246#ifdef CONFIG_SYN_COOKIES 1250#ifdef CONFIG_SYN_COOKIES
1247 if (sysctl_tcp_syncookies) { 1251 if (sysctl_tcp_syncookies) {
1248 want_cookie = 1; 1252 want_cookie = 1;
@@ -1323,15 +1327,12 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1323 if (security_inet_conn_request(sk, skb, req)) 1327 if (security_inet_conn_request(sk, skb, req))
1324 goto drop_and_free; 1328 goto drop_and_free;
1325 1329
1326 if (!want_cookie) 1330 if (!want_cookie || tmp_opt.tstamp_ok)
1327 TCP_ECN_create_request(req, tcp_hdr(skb)); 1331 TCP_ECN_create_request(req, tcp_hdr(skb));
1328 1332
1329 if (want_cookie) { 1333 if (want_cookie) {
1330#ifdef CONFIG_SYN_COOKIES
1331 syn_flood_warning(skb);
1332 req->cookie_ts = tmp_opt.tstamp_ok;
1333#endif
1334 isn = cookie_v4_init_sequence(sk, skb, &req->mss); 1334 isn = cookie_v4_init_sequence(sk, skb, &req->mss);
1335 req->cookie_ts = tmp_opt.tstamp_ok;
1335 } else if (!isn) { 1336 } else if (!isn) {
1336 struct inet_peer *peer = NULL; 1337 struct inet_peer *peer = NULL;
1337 1338
@@ -1349,6 +1350,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1349 (dst = inet_csk_route_req(sk, req)) != NULL && 1350 (dst = inet_csk_route_req(sk, req)) != NULL &&
1350 (peer = rt_get_peer((struct rtable *)dst)) != NULL && 1351 (peer = rt_get_peer((struct rtable *)dst)) != NULL &&
1351 peer->v4daddr == saddr) { 1352 peer->v4daddr == saddr) {
1353 inet_peer_refcheck(peer);
1352 if ((u32)get_seconds() - peer->tcp_ts_stamp < TCP_PAWS_MSL && 1354 if ((u32)get_seconds() - peer->tcp_ts_stamp < TCP_PAWS_MSL &&
1353 (s32)(peer->tcp_ts - req->ts_recent) > 1355 (s32)(peer->tcp_ts - req->ts_recent) >
1354 TCP_PAWS_WINDOW) { 1356 TCP_PAWS_WINDOW) {
@@ -1393,6 +1395,7 @@ drop_and_free:
1393drop: 1395drop:
1394 return 0; 1396 return 0;
1395} 1397}
1398EXPORT_SYMBOL(tcp_v4_conn_request);
1396 1399
1397 1400
1398/* 1401/*
@@ -1478,6 +1481,7 @@ exit:
1478 dst_release(dst); 1481 dst_release(dst);
1479 return NULL; 1482 return NULL;
1480} 1483}
1484EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
1481 1485
1482static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) 1486static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
1483{ 1487{
@@ -1504,7 +1508,7 @@ static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
1504 } 1508 }
1505 1509
1506#ifdef CONFIG_SYN_COOKIES 1510#ifdef CONFIG_SYN_COOKIES
1507 if (!th->rst && !th->syn && th->ack) 1511 if (!th->syn)
1508 sk = cookie_v4_check(sk, skb, &(IPCB(skb)->opt)); 1512 sk = cookie_v4_check(sk, skb, &(IPCB(skb)->opt));
1509#endif 1513#endif
1510 return sk; 1514 return sk;
@@ -1555,6 +1559,7 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
1555#endif 1559#endif
1556 1560
1557 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ 1561 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1562 sock_rps_save_rxhash(sk, skb->rxhash);
1558 TCP_CHECK_TIMER(sk); 1563 TCP_CHECK_TIMER(sk);
1559 if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) { 1564 if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) {
1560 rsk = sk; 1565 rsk = sk;
@@ -1579,7 +1584,9 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
1579 } 1584 }
1580 return 0; 1585 return 0;
1581 } 1586 }
1582 } 1587 } else
1588 sock_rps_save_rxhash(sk, skb->rxhash);
1589
1583 1590
1584 TCP_CHECK_TIMER(sk); 1591 TCP_CHECK_TIMER(sk);
1585 if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) { 1592 if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) {
@@ -1604,6 +1611,7 @@ csum_err:
1604 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS); 1611 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
1605 goto discard; 1612 goto discard;
1606} 1613}
1614EXPORT_SYMBOL(tcp_v4_do_rcv);
1607 1615
1608/* 1616/*
1609 * From tcp_input.c 1617 * From tcp_input.c
@@ -1672,8 +1680,6 @@ process:
1672 1680
1673 skb->dev = NULL; 1681 skb->dev = NULL;
1674 1682
1675 sock_rps_save_rxhash(sk, skb->rxhash);
1676
1677 bh_lock_sock_nested(sk); 1683 bh_lock_sock_nested(sk);
1678 ret = 0; 1684 ret = 0;
1679 if (!sock_owned_by_user(sk)) { 1685 if (!sock_owned_by_user(sk)) {
@@ -1792,6 +1798,7 @@ int tcp_v4_remember_stamp(struct sock *sk)
1792 1798
1793 return 0; 1799 return 0;
1794} 1800}
1801EXPORT_SYMBOL(tcp_v4_remember_stamp);
1795 1802
1796int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw) 1803int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw)
1797{ 1804{
@@ -1831,6 +1838,7 @@ const struct inet_connection_sock_af_ops ipv4_specific = {
1831 .compat_getsockopt = compat_ip_getsockopt, 1838 .compat_getsockopt = compat_ip_getsockopt,
1832#endif 1839#endif
1833}; 1840};
1841EXPORT_SYMBOL(ipv4_specific);
1834 1842
1835#ifdef CONFIG_TCP_MD5SIG 1843#ifdef CONFIG_TCP_MD5SIG
1836static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = { 1844static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
@@ -1959,7 +1967,6 @@ void tcp_v4_destroy_sock(struct sock *sk)
1959 1967
1960 percpu_counter_dec(&tcp_sockets_allocated); 1968 percpu_counter_dec(&tcp_sockets_allocated);
1961} 1969}
1962
1963EXPORT_SYMBOL(tcp_v4_destroy_sock); 1970EXPORT_SYMBOL(tcp_v4_destroy_sock);
1964 1971
1965#ifdef CONFIG_PROC_FS 1972#ifdef CONFIG_PROC_FS
@@ -1977,6 +1984,11 @@ static inline struct inet_timewait_sock *tw_next(struct inet_timewait_sock *tw)
1977 hlist_nulls_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL; 1984 hlist_nulls_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL;
1978} 1985}
1979 1986
1987/*
1988 * Get next listener socket follow cur. If cur is NULL, get first socket
1989 * starting from bucket given in st->bucket; when st->bucket is zero the
1990 * very first socket in the hash table is returned.
1991 */
1980static void *listening_get_next(struct seq_file *seq, void *cur) 1992static void *listening_get_next(struct seq_file *seq, void *cur)
1981{ 1993{
1982 struct inet_connection_sock *icsk; 1994 struct inet_connection_sock *icsk;
@@ -1987,14 +1999,15 @@ static void *listening_get_next(struct seq_file *seq, void *cur)
1987 struct net *net = seq_file_net(seq); 1999 struct net *net = seq_file_net(seq);
1988 2000
1989 if (!sk) { 2001 if (!sk) {
1990 st->bucket = 0; 2002 ilb = &tcp_hashinfo.listening_hash[st->bucket];
1991 ilb = &tcp_hashinfo.listening_hash[0];
1992 spin_lock_bh(&ilb->lock); 2003 spin_lock_bh(&ilb->lock);
1993 sk = sk_nulls_head(&ilb->head); 2004 sk = sk_nulls_head(&ilb->head);
2005 st->offset = 0;
1994 goto get_sk; 2006 goto get_sk;
1995 } 2007 }
1996 ilb = &tcp_hashinfo.listening_hash[st->bucket]; 2008 ilb = &tcp_hashinfo.listening_hash[st->bucket];
1997 ++st->num; 2009 ++st->num;
2010 ++st->offset;
1998 2011
1999 if (st->state == TCP_SEQ_STATE_OPENREQ) { 2012 if (st->state == TCP_SEQ_STATE_OPENREQ) {
2000 struct request_sock *req = cur; 2013 struct request_sock *req = cur;
@@ -2009,6 +2022,7 @@ static void *listening_get_next(struct seq_file *seq, void *cur)
2009 } 2022 }
2010 req = req->dl_next; 2023 req = req->dl_next;
2011 } 2024 }
2025 st->offset = 0;
2012 if (++st->sbucket >= icsk->icsk_accept_queue.listen_opt->nr_table_entries) 2026 if (++st->sbucket >= icsk->icsk_accept_queue.listen_opt->nr_table_entries)
2013 break; 2027 break;
2014get_req: 2028get_req:
@@ -2044,6 +2058,7 @@ start_req:
2044 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); 2058 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2045 } 2059 }
2046 spin_unlock_bh(&ilb->lock); 2060 spin_unlock_bh(&ilb->lock);
2061 st->offset = 0;
2047 if (++st->bucket < INET_LHTABLE_SIZE) { 2062 if (++st->bucket < INET_LHTABLE_SIZE) {
2048 ilb = &tcp_hashinfo.listening_hash[st->bucket]; 2063 ilb = &tcp_hashinfo.listening_hash[st->bucket];
2049 spin_lock_bh(&ilb->lock); 2064 spin_lock_bh(&ilb->lock);
@@ -2057,7 +2072,12 @@ out:
2057 2072
2058static void *listening_get_idx(struct seq_file *seq, loff_t *pos) 2073static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
2059{ 2074{
2060 void *rc = listening_get_next(seq, NULL); 2075 struct tcp_iter_state *st = seq->private;
2076 void *rc;
2077
2078 st->bucket = 0;
2079 st->offset = 0;
2080 rc = listening_get_next(seq, NULL);
2061 2081
2062 while (rc && *pos) { 2082 while (rc && *pos) {
2063 rc = listening_get_next(seq, rc); 2083 rc = listening_get_next(seq, rc);
@@ -2072,13 +2092,18 @@ static inline int empty_bucket(struct tcp_iter_state *st)
2072 hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].twchain); 2092 hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].twchain);
2073} 2093}
2074 2094
2095/*
2096 * Get first established socket starting from bucket given in st->bucket.
2097 * If st->bucket is zero, the very first socket in the hash is returned.
2098 */
2075static void *established_get_first(struct seq_file *seq) 2099static void *established_get_first(struct seq_file *seq)
2076{ 2100{
2077 struct tcp_iter_state *st = seq->private; 2101 struct tcp_iter_state *st = seq->private;
2078 struct net *net = seq_file_net(seq); 2102 struct net *net = seq_file_net(seq);
2079 void *rc = NULL; 2103 void *rc = NULL;
2080 2104
2081 for (st->bucket = 0; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) { 2105 st->offset = 0;
2106 for (; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) {
2082 struct sock *sk; 2107 struct sock *sk;
2083 struct hlist_nulls_node *node; 2108 struct hlist_nulls_node *node;
2084 struct inet_timewait_sock *tw; 2109 struct inet_timewait_sock *tw;
@@ -2123,6 +2148,7 @@ static void *established_get_next(struct seq_file *seq, void *cur)
2123 struct net *net = seq_file_net(seq); 2148 struct net *net = seq_file_net(seq);
2124 2149
2125 ++st->num; 2150 ++st->num;
2151 ++st->offset;
2126 2152
2127 if (st->state == TCP_SEQ_STATE_TIME_WAIT) { 2153 if (st->state == TCP_SEQ_STATE_TIME_WAIT) {
2128 tw = cur; 2154 tw = cur;
@@ -2139,6 +2165,7 @@ get_tw:
2139 st->state = TCP_SEQ_STATE_ESTABLISHED; 2165 st->state = TCP_SEQ_STATE_ESTABLISHED;
2140 2166
2141 /* Look for next non empty bucket */ 2167 /* Look for next non empty bucket */
2168 st->offset = 0;
2142 while (++st->bucket <= tcp_hashinfo.ehash_mask && 2169 while (++st->bucket <= tcp_hashinfo.ehash_mask &&
2143 empty_bucket(st)) 2170 empty_bucket(st))
2144 ; 2171 ;
@@ -2166,7 +2193,11 @@ out:
2166 2193
2167static void *established_get_idx(struct seq_file *seq, loff_t pos) 2194static void *established_get_idx(struct seq_file *seq, loff_t pos)
2168{ 2195{
2169 void *rc = established_get_first(seq); 2196 struct tcp_iter_state *st = seq->private;
2197 void *rc;
2198
2199 st->bucket = 0;
2200 rc = established_get_first(seq);
2170 2201
2171 while (rc && pos) { 2202 while (rc && pos) {
2172 rc = established_get_next(seq, rc); 2203 rc = established_get_next(seq, rc);
@@ -2191,24 +2222,72 @@ static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
2191 return rc; 2222 return rc;
2192} 2223}
2193 2224
2225static void *tcp_seek_last_pos(struct seq_file *seq)
2226{
2227 struct tcp_iter_state *st = seq->private;
2228 int offset = st->offset;
2229 int orig_num = st->num;
2230 void *rc = NULL;
2231
2232 switch (st->state) {
2233 case TCP_SEQ_STATE_OPENREQ:
2234 case TCP_SEQ_STATE_LISTENING:
2235 if (st->bucket >= INET_LHTABLE_SIZE)
2236 break;
2237 st->state = TCP_SEQ_STATE_LISTENING;
2238 rc = listening_get_next(seq, NULL);
2239 while (offset-- && rc)
2240 rc = listening_get_next(seq, rc);
2241 if (rc)
2242 break;
2243 st->bucket = 0;
2244 /* Fallthrough */
2245 case TCP_SEQ_STATE_ESTABLISHED:
2246 case TCP_SEQ_STATE_TIME_WAIT:
2247 st->state = TCP_SEQ_STATE_ESTABLISHED;
2248 if (st->bucket > tcp_hashinfo.ehash_mask)
2249 break;
2250 rc = established_get_first(seq);
2251 while (offset-- && rc)
2252 rc = established_get_next(seq, rc);
2253 }
2254
2255 st->num = orig_num;
2256
2257 return rc;
2258}
2259
2194static void *tcp_seq_start(struct seq_file *seq, loff_t *pos) 2260static void *tcp_seq_start(struct seq_file *seq, loff_t *pos)
2195{ 2261{
2196 struct tcp_iter_state *st = seq->private; 2262 struct tcp_iter_state *st = seq->private;
2263 void *rc;
2264
2265 if (*pos && *pos == st->last_pos) {
2266 rc = tcp_seek_last_pos(seq);
2267 if (rc)
2268 goto out;
2269 }
2270
2197 st->state = TCP_SEQ_STATE_LISTENING; 2271 st->state = TCP_SEQ_STATE_LISTENING;
2198 st->num = 0; 2272 st->num = 0;
2199 return *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN; 2273 st->bucket = 0;
2274 st->offset = 0;
2275 rc = *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2276
2277out:
2278 st->last_pos = *pos;
2279 return rc;
2200} 2280}
2201 2281
2202static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos) 2282static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2203{ 2283{
2284 struct tcp_iter_state *st = seq->private;
2204 void *rc = NULL; 2285 void *rc = NULL;
2205 struct tcp_iter_state *st;
2206 2286
2207 if (v == SEQ_START_TOKEN) { 2287 if (v == SEQ_START_TOKEN) {
2208 rc = tcp_get_idx(seq, 0); 2288 rc = tcp_get_idx(seq, 0);
2209 goto out; 2289 goto out;
2210 } 2290 }
2211 st = seq->private;
2212 2291
2213 switch (st->state) { 2292 switch (st->state) {
2214 case TCP_SEQ_STATE_OPENREQ: 2293 case TCP_SEQ_STATE_OPENREQ:
@@ -2216,6 +2295,8 @@ static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2216 rc = listening_get_next(seq, v); 2295 rc = listening_get_next(seq, v);
2217 if (!rc) { 2296 if (!rc) {
2218 st->state = TCP_SEQ_STATE_ESTABLISHED; 2297 st->state = TCP_SEQ_STATE_ESTABLISHED;
2298 st->bucket = 0;
2299 st->offset = 0;
2219 rc = established_get_first(seq); 2300 rc = established_get_first(seq);
2220 } 2301 }
2221 break; 2302 break;
@@ -2226,6 +2307,7 @@ static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2226 } 2307 }
2227out: 2308out:
2228 ++*pos; 2309 ++*pos;
2310 st->last_pos = *pos;
2229 return rc; 2311 return rc;
2230} 2312}
2231 2313
@@ -2264,6 +2346,7 @@ static int tcp_seq_open(struct inode *inode, struct file *file)
2264 2346
2265 s = ((struct seq_file *)file->private_data)->private; 2347 s = ((struct seq_file *)file->private_data)->private;
2266 s->family = afinfo->family; 2348 s->family = afinfo->family;
2349 s->last_pos = 0;
2267 return 0; 2350 return 0;
2268} 2351}
2269 2352
@@ -2287,11 +2370,13 @@ int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo)
2287 rc = -ENOMEM; 2370 rc = -ENOMEM;
2288 return rc; 2371 return rc;
2289} 2372}
2373EXPORT_SYMBOL(tcp_proc_register);
2290 2374
2291void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo) 2375void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo)
2292{ 2376{
2293 proc_net_remove(net, afinfo->name); 2377 proc_net_remove(net, afinfo->name);
2294} 2378}
2379EXPORT_SYMBOL(tcp_proc_unregister);
2295 2380
2296static void get_openreq4(struct sock *sk, struct request_sock *req, 2381static void get_openreq4(struct sock *sk, struct request_sock *req,
2297 struct seq_file *f, int i, int uid, int *len) 2382 struct seq_file *f, int i, int uid, int *len)
@@ -2515,6 +2600,8 @@ struct proto tcp_prot = {
2515 .setsockopt = tcp_setsockopt, 2600 .setsockopt = tcp_setsockopt,
2516 .getsockopt = tcp_getsockopt, 2601 .getsockopt = tcp_getsockopt,
2517 .recvmsg = tcp_recvmsg, 2602 .recvmsg = tcp_recvmsg,
2603 .sendmsg = tcp_sendmsg,
2604 .sendpage = tcp_sendpage,
2518 .backlog_rcv = tcp_v4_do_rcv, 2605 .backlog_rcv = tcp_v4_do_rcv,
2519 .hash = inet_hash, 2606 .hash = inet_hash,
2520 .unhash = inet_unhash, 2607 .unhash = inet_unhash,
@@ -2533,11 +2620,13 @@ struct proto tcp_prot = {
2533 .twsk_prot = &tcp_timewait_sock_ops, 2620 .twsk_prot = &tcp_timewait_sock_ops,
2534 .rsk_prot = &tcp_request_sock_ops, 2621 .rsk_prot = &tcp_request_sock_ops,
2535 .h.hashinfo = &tcp_hashinfo, 2622 .h.hashinfo = &tcp_hashinfo,
2623 .no_autobind = true,
2536#ifdef CONFIG_COMPAT 2624#ifdef CONFIG_COMPAT
2537 .compat_setsockopt = compat_tcp_setsockopt, 2625 .compat_setsockopt = compat_tcp_setsockopt,
2538 .compat_getsockopt = compat_tcp_getsockopt, 2626 .compat_getsockopt = compat_tcp_getsockopt,
2539#endif 2627#endif
2540}; 2628};
2629EXPORT_SYMBOL(tcp_prot);
2541 2630
2542 2631
2543static int __net_init tcp_sk_init(struct net *net) 2632static int __net_init tcp_sk_init(struct net *net)
@@ -2568,20 +2657,3 @@ void __init tcp_v4_init(void)
2568 if (register_pernet_subsys(&tcp_sk_ops)) 2657 if (register_pernet_subsys(&tcp_sk_ops))
2569 panic("Failed to create the TCP control socket.\n"); 2658 panic("Failed to create the TCP control socket.\n");
2570} 2659}
2571
2572EXPORT_SYMBOL(ipv4_specific);
2573EXPORT_SYMBOL(tcp_hashinfo);
2574EXPORT_SYMBOL(tcp_prot);
2575EXPORT_SYMBOL(tcp_v4_conn_request);
2576EXPORT_SYMBOL(tcp_v4_connect);
2577EXPORT_SYMBOL(tcp_v4_do_rcv);
2578EXPORT_SYMBOL(tcp_v4_remember_stamp);
2579EXPORT_SYMBOL(tcp_v4_send_check);
2580EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
2581
2582#ifdef CONFIG_PROC_FS
2583EXPORT_SYMBOL(tcp_proc_register);
2584EXPORT_SYMBOL(tcp_proc_unregister);
2585#endif
2586EXPORT_SYMBOL(sysctl_tcp_low_latency);
2587
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 794c2e122a41..f25b56cb85cb 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -47,7 +47,6 @@ struct inet_timewait_death_row tcp_death_row = {
47 .twcal_timer = TIMER_INITIALIZER(inet_twdr_twcal_tick, 0, 47 .twcal_timer = TIMER_INITIALIZER(inet_twdr_twcal_tick, 0,
48 (unsigned long)&tcp_death_row), 48 (unsigned long)&tcp_death_row),
49}; 49};
50
51EXPORT_SYMBOL_GPL(tcp_death_row); 50EXPORT_SYMBOL_GPL(tcp_death_row);
52 51
53static __inline__ int tcp_in_window(u32 seq, u32 end_seq, u32 s_win, u32 e_win) 52static __inline__ int tcp_in_window(u32 seq, u32 end_seq, u32 s_win, u32 e_win)
@@ -262,6 +261,7 @@ kill:
262 inet_twsk_put(tw); 261 inet_twsk_put(tw);
263 return TCP_TW_SUCCESS; 262 return TCP_TW_SUCCESS;
264} 263}
264EXPORT_SYMBOL(tcp_timewait_state_process);
265 265
266/* 266/*
267 * Move a socket to time-wait or dead fin-wait-2 state. 267 * Move a socket to time-wait or dead fin-wait-2 state.
@@ -362,7 +362,6 @@ void tcp_twsk_destructor(struct sock *sk)
362 tcp_free_md5sig_pool(); 362 tcp_free_md5sig_pool();
363#endif 363#endif
364} 364}
365
366EXPORT_SYMBOL_GPL(tcp_twsk_destructor); 365EXPORT_SYMBOL_GPL(tcp_twsk_destructor);
367 366
368static inline void TCP_ECN_openreq_child(struct tcp_sock *tp, 367static inline void TCP_ECN_openreq_child(struct tcp_sock *tp,
@@ -510,6 +509,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
510 } 509 }
511 return newsk; 510 return newsk;
512} 511}
512EXPORT_SYMBOL(tcp_create_openreq_child);
513 513
514/* 514/*
515 * Process an incoming packet for SYN_RECV sockets represented 515 * Process an incoming packet for SYN_RECV sockets represented
@@ -706,6 +706,7 @@ embryonic_reset:
706 inet_csk_reqsk_queue_drop(sk, req, prev); 706 inet_csk_reqsk_queue_drop(sk, req, prev);
707 return NULL; 707 return NULL;
708} 708}
709EXPORT_SYMBOL(tcp_check_req);
709 710
710/* 711/*
711 * Queue segment on the new socket if the new socket is active, 712 * Queue segment on the new socket if the new socket is active,
@@ -737,8 +738,4 @@ int tcp_child_process(struct sock *parent, struct sock *child,
737 sock_put(child); 738 sock_put(child);
738 return ret; 739 return ret;
739} 740}
740
741EXPORT_SYMBOL(tcp_check_req);
742EXPORT_SYMBOL(tcp_child_process); 741EXPORT_SYMBOL(tcp_child_process);
743EXPORT_SYMBOL(tcp_create_openreq_child);
744EXPORT_SYMBOL(tcp_timewait_state_process);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index b4ed957f201a..de3bd8458588 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -247,6 +247,7 @@ void tcp_select_initial_window(int __space, __u32 mss,
247 /* Set the clamp no higher than max representable value */ 247 /* Set the clamp no higher than max representable value */
248 (*window_clamp) = min(65535U << (*rcv_wscale), *window_clamp); 248 (*window_clamp) = min(65535U << (*rcv_wscale), *window_clamp);
249} 249}
250EXPORT_SYMBOL(tcp_select_initial_window);
250 251
251/* Chose a new window to advertise, update state in tcp_sock for the 252/* Chose a new window to advertise, update state in tcp_sock for the
252 * socket, and return result with RFC1323 scaling applied. The return 253 * socket, and return result with RFC1323 scaling applied. The return
@@ -294,9 +295,9 @@ static u16 tcp_select_window(struct sock *sk)
294/* Packet ECN state for a SYN-ACK */ 295/* Packet ECN state for a SYN-ACK */
295static inline void TCP_ECN_send_synack(struct tcp_sock *tp, struct sk_buff *skb) 296static inline void TCP_ECN_send_synack(struct tcp_sock *tp, struct sk_buff *skb)
296{ 297{
297 TCP_SKB_CB(skb)->flags &= ~TCPCB_FLAG_CWR; 298 TCP_SKB_CB(skb)->flags &= ~TCPHDR_CWR;
298 if (!(tp->ecn_flags & TCP_ECN_OK)) 299 if (!(tp->ecn_flags & TCP_ECN_OK))
299 TCP_SKB_CB(skb)->flags &= ~TCPCB_FLAG_ECE; 300 TCP_SKB_CB(skb)->flags &= ~TCPHDR_ECE;
300} 301}
301 302
302/* Packet ECN state for a SYN. */ 303/* Packet ECN state for a SYN. */
@@ -306,7 +307,7 @@ static inline void TCP_ECN_send_syn(struct sock *sk, struct sk_buff *skb)
306 307
307 tp->ecn_flags = 0; 308 tp->ecn_flags = 0;
308 if (sysctl_tcp_ecn == 1) { 309 if (sysctl_tcp_ecn == 1) {
309 TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_ECE | TCPCB_FLAG_CWR; 310 TCP_SKB_CB(skb)->flags |= TCPHDR_ECE | TCPHDR_CWR;
310 tp->ecn_flags = TCP_ECN_OK; 311 tp->ecn_flags = TCP_ECN_OK;
311 } 312 }
312} 313}
@@ -361,7 +362,7 @@ static void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags)
361 skb_shinfo(skb)->gso_type = 0; 362 skb_shinfo(skb)->gso_type = 0;
362 363
363 TCP_SKB_CB(skb)->seq = seq; 364 TCP_SKB_CB(skb)->seq = seq;
364 if (flags & (TCPCB_FLAG_SYN | TCPCB_FLAG_FIN)) 365 if (flags & (TCPHDR_SYN | TCPHDR_FIN))
365 seq++; 366 seq++;
366 TCP_SKB_CB(skb)->end_seq = seq; 367 TCP_SKB_CB(skb)->end_seq = seq;
367} 368}
@@ -820,7 +821,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
820 tcb = TCP_SKB_CB(skb); 821 tcb = TCP_SKB_CB(skb);
821 memset(&opts, 0, sizeof(opts)); 822 memset(&opts, 0, sizeof(opts));
822 823
823 if (unlikely(tcb->flags & TCPCB_FLAG_SYN)) 824 if (unlikely(tcb->flags & TCPHDR_SYN))
824 tcp_options_size = tcp_syn_options(sk, skb, &opts, &md5); 825 tcp_options_size = tcp_syn_options(sk, skb, &opts, &md5);
825 else 826 else
826 tcp_options_size = tcp_established_options(sk, skb, &opts, 827 tcp_options_size = tcp_established_options(sk, skb, &opts,
@@ -843,7 +844,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
843 *(((__be16 *)th) + 6) = htons(((tcp_header_size >> 2) << 12) | 844 *(((__be16 *)th) + 6) = htons(((tcp_header_size >> 2) << 12) |
844 tcb->flags); 845 tcb->flags);
845 846
846 if (unlikely(tcb->flags & TCPCB_FLAG_SYN)) { 847 if (unlikely(tcb->flags & TCPHDR_SYN)) {
847 /* RFC1323: The window in SYN & SYN/ACK segments 848 /* RFC1323: The window in SYN & SYN/ACK segments
848 * is never scaled. 849 * is never scaled.
849 */ 850 */
@@ -866,7 +867,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
866 } 867 }
867 868
868 tcp_options_write((__be32 *)(th + 1), tp, &opts); 869 tcp_options_write((__be32 *)(th + 1), tp, &opts);
869 if (likely((tcb->flags & TCPCB_FLAG_SYN) == 0)) 870 if (likely((tcb->flags & TCPHDR_SYN) == 0))
870 TCP_ECN_send(sk, skb, tcp_header_size); 871 TCP_ECN_send(sk, skb, tcp_header_size);
871 872
872#ifdef CONFIG_TCP_MD5SIG 873#ifdef CONFIG_TCP_MD5SIG
@@ -880,7 +881,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
880 881
881 icsk->icsk_af_ops->send_check(sk, skb); 882 icsk->icsk_af_ops->send_check(sk, skb);
882 883
883 if (likely(tcb->flags & TCPCB_FLAG_ACK)) 884 if (likely(tcb->flags & TCPHDR_ACK))
884 tcp_event_ack_sent(sk, tcp_skb_pcount(skb)); 885 tcp_event_ack_sent(sk, tcp_skb_pcount(skb));
885 886
886 if (skb->len != tcp_header_size) 887 if (skb->len != tcp_header_size)
@@ -1023,7 +1024,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len,
1023 1024
1024 /* PSH and FIN should only be set in the second packet. */ 1025 /* PSH and FIN should only be set in the second packet. */
1025 flags = TCP_SKB_CB(skb)->flags; 1026 flags = TCP_SKB_CB(skb)->flags;
1026 TCP_SKB_CB(skb)->flags = flags & ~(TCPCB_FLAG_FIN | TCPCB_FLAG_PSH); 1027 TCP_SKB_CB(skb)->flags = flags & ~(TCPHDR_FIN | TCPHDR_PSH);
1027 TCP_SKB_CB(buff)->flags = flags; 1028 TCP_SKB_CB(buff)->flags = flags;
1028 TCP_SKB_CB(buff)->sacked = TCP_SKB_CB(skb)->sacked; 1029 TCP_SKB_CB(buff)->sacked = TCP_SKB_CB(skb)->sacked;
1029 1030
@@ -1189,6 +1190,7 @@ void tcp_mtup_init(struct sock *sk)
1189 icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, sysctl_tcp_base_mss); 1190 icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, sysctl_tcp_base_mss);
1190 icsk->icsk_mtup.probe_size = 0; 1191 icsk->icsk_mtup.probe_size = 0;
1191} 1192}
1193EXPORT_SYMBOL(tcp_mtup_init);
1192 1194
1193/* This function synchronize snd mss to current pmtu/exthdr set. 1195/* This function synchronize snd mss to current pmtu/exthdr set.
1194 1196
@@ -1232,6 +1234,7 @@ unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu)
1232 1234
1233 return mss_now; 1235 return mss_now;
1234} 1236}
1237EXPORT_SYMBOL(tcp_sync_mss);
1235 1238
1236/* Compute the current effective MSS, taking SACKs and IP options, 1239/* Compute the current effective MSS, taking SACKs and IP options,
1237 * and even PMTU discovery events into account. 1240 * and even PMTU discovery events into account.
@@ -1328,8 +1331,7 @@ static inline unsigned int tcp_cwnd_test(struct tcp_sock *tp,
1328 u32 in_flight, cwnd; 1331 u32 in_flight, cwnd;
1329 1332
1330 /* Don't be strict about the congestion window for the final FIN. */ 1333 /* Don't be strict about the congestion window for the final FIN. */
1331 if ((TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN) && 1334 if ((TCP_SKB_CB(skb)->flags & TCPHDR_FIN) && tcp_skb_pcount(skb) == 1)
1332 tcp_skb_pcount(skb) == 1)
1333 return 1; 1335 return 1;
1334 1336
1335 in_flight = tcp_packets_in_flight(tp); 1337 in_flight = tcp_packets_in_flight(tp);
@@ -1398,7 +1400,7 @@ static inline int tcp_nagle_test(struct tcp_sock *tp, struct sk_buff *skb,
1398 * Nagle can be ignored during F-RTO too (see RFC4138). 1400 * Nagle can be ignored during F-RTO too (see RFC4138).
1399 */ 1401 */
1400 if (tcp_urg_mode(tp) || (tp->frto_counter == 2) || 1402 if (tcp_urg_mode(tp) || (tp->frto_counter == 2) ||
1401 (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN)) 1403 (TCP_SKB_CB(skb)->flags & TCPHDR_FIN))
1402 return 1; 1404 return 1;
1403 1405
1404 if (!tcp_nagle_check(tp, skb, cur_mss, nonagle)) 1406 if (!tcp_nagle_check(tp, skb, cur_mss, nonagle))
@@ -1461,7 +1463,7 @@ int tcp_may_send_now(struct sock *sk)
1461 * packet has never been sent out before (and thus is not cloned). 1463 * packet has never been sent out before (and thus is not cloned).
1462 */ 1464 */
1463static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len, 1465static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
1464 unsigned int mss_now) 1466 unsigned int mss_now, gfp_t gfp)
1465{ 1467{
1466 struct sk_buff *buff; 1468 struct sk_buff *buff;
1467 int nlen = skb->len - len; 1469 int nlen = skb->len - len;
@@ -1471,7 +1473,7 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
1471 if (skb->len != skb->data_len) 1473 if (skb->len != skb->data_len)
1472 return tcp_fragment(sk, skb, len, mss_now); 1474 return tcp_fragment(sk, skb, len, mss_now);
1473 1475
1474 buff = sk_stream_alloc_skb(sk, 0, GFP_ATOMIC); 1476 buff = sk_stream_alloc_skb(sk, 0, gfp);
1475 if (unlikely(buff == NULL)) 1477 if (unlikely(buff == NULL))
1476 return -ENOMEM; 1478 return -ENOMEM;
1477 1479
@@ -1487,7 +1489,7 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
1487 1489
1488 /* PSH and FIN should only be set in the second packet. */ 1490 /* PSH and FIN should only be set in the second packet. */
1489 flags = TCP_SKB_CB(skb)->flags; 1491 flags = TCP_SKB_CB(skb)->flags;
1490 TCP_SKB_CB(skb)->flags = flags & ~(TCPCB_FLAG_FIN | TCPCB_FLAG_PSH); 1492 TCP_SKB_CB(skb)->flags = flags & ~(TCPHDR_FIN | TCPHDR_PSH);
1491 TCP_SKB_CB(buff)->flags = flags; 1493 TCP_SKB_CB(buff)->flags = flags;
1492 1494
1493 /* This packet was never sent out yet, so no SACK bits. */ 1495 /* This packet was never sent out yet, so no SACK bits. */
@@ -1518,7 +1520,7 @@ static int tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb)
1518 const struct inet_connection_sock *icsk = inet_csk(sk); 1520 const struct inet_connection_sock *icsk = inet_csk(sk);
1519 u32 send_win, cong_win, limit, in_flight; 1521 u32 send_win, cong_win, limit, in_flight;
1520 1522
1521 if (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN) 1523 if (TCP_SKB_CB(skb)->flags & TCPHDR_FIN)
1522 goto send_now; 1524 goto send_now;
1523 1525
1524 if (icsk->icsk_ca_state != TCP_CA_Open) 1526 if (icsk->icsk_ca_state != TCP_CA_Open)
@@ -1644,7 +1646,7 @@ static int tcp_mtu_probe(struct sock *sk)
1644 1646
1645 TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(skb)->seq; 1647 TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(skb)->seq;
1646 TCP_SKB_CB(nskb)->end_seq = TCP_SKB_CB(skb)->seq + probe_size; 1648 TCP_SKB_CB(nskb)->end_seq = TCP_SKB_CB(skb)->seq + probe_size;
1647 TCP_SKB_CB(nskb)->flags = TCPCB_FLAG_ACK; 1649 TCP_SKB_CB(nskb)->flags = TCPHDR_ACK;
1648 TCP_SKB_CB(nskb)->sacked = 0; 1650 TCP_SKB_CB(nskb)->sacked = 0;
1649 nskb->csum = 0; 1651 nskb->csum = 0;
1650 nskb->ip_summed = skb->ip_summed; 1652 nskb->ip_summed = skb->ip_summed;
@@ -1669,7 +1671,7 @@ static int tcp_mtu_probe(struct sock *sk)
1669 sk_wmem_free_skb(sk, skb); 1671 sk_wmem_free_skb(sk, skb);
1670 } else { 1672 } else {
1671 TCP_SKB_CB(nskb)->flags |= TCP_SKB_CB(skb)->flags & 1673 TCP_SKB_CB(nskb)->flags |= TCP_SKB_CB(skb)->flags &
1672 ~(TCPCB_FLAG_FIN|TCPCB_FLAG_PSH); 1674 ~(TCPHDR_FIN|TCPHDR_PSH);
1673 if (!skb_shinfo(skb)->nr_frags) { 1675 if (!skb_shinfo(skb)->nr_frags) {
1674 skb_pull(skb, copy); 1676 skb_pull(skb, copy);
1675 if (skb->ip_summed != CHECKSUM_PARTIAL) 1677 if (skb->ip_summed != CHECKSUM_PARTIAL)
@@ -1769,7 +1771,7 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
1769 cwnd_quota); 1771 cwnd_quota);
1770 1772
1771 if (skb->len > limit && 1773 if (skb->len > limit &&
1772 unlikely(tso_fragment(sk, skb, limit, mss_now))) 1774 unlikely(tso_fragment(sk, skb, limit, mss_now, gfp)))
1773 break; 1775 break;
1774 1776
1775 TCP_SKB_CB(skb)->when = tcp_time_stamp; 1777 TCP_SKB_CB(skb)->when = tcp_time_stamp;
@@ -2020,7 +2022,7 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *to,
2020 2022
2021 if (!sysctl_tcp_retrans_collapse) 2023 if (!sysctl_tcp_retrans_collapse)
2022 return; 2024 return;
2023 if (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_SYN) 2025 if (TCP_SKB_CB(skb)->flags & TCPHDR_SYN)
2024 return; 2026 return;
2025 2027
2026 tcp_for_write_queue_from_safe(skb, tmp, sk) { 2028 tcp_for_write_queue_from_safe(skb, tmp, sk) {
@@ -2112,7 +2114,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
2112 * since it is cheap to do so and saves bytes on the network. 2114 * since it is cheap to do so and saves bytes on the network.
2113 */ 2115 */
2114 if (skb->len > 0 && 2116 if (skb->len > 0 &&
2115 (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN) && 2117 (TCP_SKB_CB(skb)->flags & TCPHDR_FIN) &&
2116 tp->snd_una == (TCP_SKB_CB(skb)->end_seq - 1)) { 2118 tp->snd_una == (TCP_SKB_CB(skb)->end_seq - 1)) {
2117 if (!pskb_trim(skb, 0)) { 2119 if (!pskb_trim(skb, 0)) {
2118 /* Reuse, even though it does some unnecessary work */ 2120 /* Reuse, even though it does some unnecessary work */
@@ -2208,6 +2210,9 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
2208 int mib_idx; 2210 int mib_idx;
2209 int fwd_rexmitting = 0; 2211 int fwd_rexmitting = 0;
2210 2212
2213 if (!tp->packets_out)
2214 return;
2215
2211 if (!tp->lost_out) 2216 if (!tp->lost_out)
2212 tp->retransmit_high = tp->snd_una; 2217 tp->retransmit_high = tp->snd_una;
2213 2218
@@ -2301,7 +2306,7 @@ void tcp_send_fin(struct sock *sk)
2301 mss_now = tcp_current_mss(sk); 2306 mss_now = tcp_current_mss(sk);
2302 2307
2303 if (tcp_send_head(sk) != NULL) { 2308 if (tcp_send_head(sk) != NULL) {
2304 TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_FIN; 2309 TCP_SKB_CB(skb)->flags |= TCPHDR_FIN;
2305 TCP_SKB_CB(skb)->end_seq++; 2310 TCP_SKB_CB(skb)->end_seq++;
2306 tp->write_seq++; 2311 tp->write_seq++;
2307 } else { 2312 } else {
@@ -2318,7 +2323,7 @@ void tcp_send_fin(struct sock *sk)
2318 skb_reserve(skb, MAX_TCP_HEADER); 2323 skb_reserve(skb, MAX_TCP_HEADER);
2319 /* FIN eats a sequence byte, write_seq advanced by tcp_queue_skb(). */ 2324 /* FIN eats a sequence byte, write_seq advanced by tcp_queue_skb(). */
2320 tcp_init_nondata_skb(skb, tp->write_seq, 2325 tcp_init_nondata_skb(skb, tp->write_seq,
2321 TCPCB_FLAG_ACK | TCPCB_FLAG_FIN); 2326 TCPHDR_ACK | TCPHDR_FIN);
2322 tcp_queue_skb(sk, skb); 2327 tcp_queue_skb(sk, skb);
2323 } 2328 }
2324 __tcp_push_pending_frames(sk, mss_now, TCP_NAGLE_OFF); 2329 __tcp_push_pending_frames(sk, mss_now, TCP_NAGLE_OFF);
@@ -2343,7 +2348,7 @@ void tcp_send_active_reset(struct sock *sk, gfp_t priority)
2343 /* Reserve space for headers and prepare control bits. */ 2348 /* Reserve space for headers and prepare control bits. */
2344 skb_reserve(skb, MAX_TCP_HEADER); 2349 skb_reserve(skb, MAX_TCP_HEADER);
2345 tcp_init_nondata_skb(skb, tcp_acceptable_seq(sk), 2350 tcp_init_nondata_skb(skb, tcp_acceptable_seq(sk),
2346 TCPCB_FLAG_ACK | TCPCB_FLAG_RST); 2351 TCPHDR_ACK | TCPHDR_RST);
2347 /* Send it off. */ 2352 /* Send it off. */
2348 TCP_SKB_CB(skb)->when = tcp_time_stamp; 2353 TCP_SKB_CB(skb)->when = tcp_time_stamp;
2349 if (tcp_transmit_skb(sk, skb, 0, priority)) 2354 if (tcp_transmit_skb(sk, skb, 0, priority))
@@ -2363,11 +2368,11 @@ int tcp_send_synack(struct sock *sk)
2363 struct sk_buff *skb; 2368 struct sk_buff *skb;
2364 2369
2365 skb = tcp_write_queue_head(sk); 2370 skb = tcp_write_queue_head(sk);
2366 if (skb == NULL || !(TCP_SKB_CB(skb)->flags & TCPCB_FLAG_SYN)) { 2371 if (skb == NULL || !(TCP_SKB_CB(skb)->flags & TCPHDR_SYN)) {
2367 printk(KERN_DEBUG "tcp_send_synack: wrong queue state\n"); 2372 printk(KERN_DEBUG "tcp_send_synack: wrong queue state\n");
2368 return -EFAULT; 2373 return -EFAULT;
2369 } 2374 }
2370 if (!(TCP_SKB_CB(skb)->flags & TCPCB_FLAG_ACK)) { 2375 if (!(TCP_SKB_CB(skb)->flags & TCPHDR_ACK)) {
2371 if (skb_cloned(skb)) { 2376 if (skb_cloned(skb)) {
2372 struct sk_buff *nskb = skb_copy(skb, GFP_ATOMIC); 2377 struct sk_buff *nskb = skb_copy(skb, GFP_ATOMIC);
2373 if (nskb == NULL) 2378 if (nskb == NULL)
@@ -2381,7 +2386,7 @@ int tcp_send_synack(struct sock *sk)
2381 skb = nskb; 2386 skb = nskb;
2382 } 2387 }
2383 2388
2384 TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_ACK; 2389 TCP_SKB_CB(skb)->flags |= TCPHDR_ACK;
2385 TCP_ECN_send_synack(tcp_sk(sk), skb); 2390 TCP_ECN_send_synack(tcp_sk(sk), skb);
2386 } 2391 }
2387 TCP_SKB_CB(skb)->when = tcp_time_stamp; 2392 TCP_SKB_CB(skb)->when = tcp_time_stamp;
@@ -2460,7 +2465,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
2460 * not even correctly set) 2465 * not even correctly set)
2461 */ 2466 */
2462 tcp_init_nondata_skb(skb, tcp_rsk(req)->snt_isn, 2467 tcp_init_nondata_skb(skb, tcp_rsk(req)->snt_isn,
2463 TCPCB_FLAG_SYN | TCPCB_FLAG_ACK); 2468 TCPHDR_SYN | TCPHDR_ACK);
2464 2469
2465 if (OPTION_COOKIE_EXTENSION & opts.options) { 2470 if (OPTION_COOKIE_EXTENSION & opts.options) {
2466 if (s_data_desired) { 2471 if (s_data_desired) {
@@ -2515,6 +2520,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
2515 2520
2516 return skb; 2521 return skb;
2517} 2522}
2523EXPORT_SYMBOL(tcp_make_synack);
2518 2524
2519/* Do all connect socket setups that can be done AF independent. */ 2525/* Do all connect socket setups that can be done AF independent. */
2520static void tcp_connect_init(struct sock *sk) 2526static void tcp_connect_init(struct sock *sk)
@@ -2592,7 +2598,7 @@ int tcp_connect(struct sock *sk)
2592 skb_reserve(buff, MAX_TCP_HEADER); 2598 skb_reserve(buff, MAX_TCP_HEADER);
2593 2599
2594 tp->snd_nxt = tp->write_seq; 2600 tp->snd_nxt = tp->write_seq;
2595 tcp_init_nondata_skb(buff, tp->write_seq++, TCPCB_FLAG_SYN); 2601 tcp_init_nondata_skb(buff, tp->write_seq++, TCPHDR_SYN);
2596 TCP_ECN_send_syn(sk, buff); 2602 TCP_ECN_send_syn(sk, buff);
2597 2603
2598 /* Send it off. */ 2604 /* Send it off. */
@@ -2617,6 +2623,7 @@ int tcp_connect(struct sock *sk)
2617 inet_csk(sk)->icsk_rto, TCP_RTO_MAX); 2623 inet_csk(sk)->icsk_rto, TCP_RTO_MAX);
2618 return 0; 2624 return 0;
2619} 2625}
2626EXPORT_SYMBOL(tcp_connect);
2620 2627
2621/* Send out a delayed ack, the caller does the policy checking 2628/* Send out a delayed ack, the caller does the policy checking
2622 * to see if we should even be here. See tcp_input.c:tcp_ack_snd_check() 2629 * to see if we should even be here. See tcp_input.c:tcp_ack_snd_check()
@@ -2698,7 +2705,7 @@ void tcp_send_ack(struct sock *sk)
2698 2705
2699 /* Reserve space for headers and prepare control bits. */ 2706 /* Reserve space for headers and prepare control bits. */
2700 skb_reserve(buff, MAX_TCP_HEADER); 2707 skb_reserve(buff, MAX_TCP_HEADER);
2701 tcp_init_nondata_skb(buff, tcp_acceptable_seq(sk), TCPCB_FLAG_ACK); 2708 tcp_init_nondata_skb(buff, tcp_acceptable_seq(sk), TCPHDR_ACK);
2702 2709
2703 /* Send it off, this clears delayed acks for us. */ 2710 /* Send it off, this clears delayed acks for us. */
2704 TCP_SKB_CB(buff)->when = tcp_time_stamp; 2711 TCP_SKB_CB(buff)->when = tcp_time_stamp;
@@ -2732,7 +2739,7 @@ static int tcp_xmit_probe_skb(struct sock *sk, int urgent)
2732 * end to send an ack. Don't queue or clone SKB, just 2739 * end to send an ack. Don't queue or clone SKB, just
2733 * send it. 2740 * send it.
2734 */ 2741 */
2735 tcp_init_nondata_skb(skb, tp->snd_una - !urgent, TCPCB_FLAG_ACK); 2742 tcp_init_nondata_skb(skb, tp->snd_una - !urgent, TCPHDR_ACK);
2736 TCP_SKB_CB(skb)->when = tcp_time_stamp; 2743 TCP_SKB_CB(skb)->when = tcp_time_stamp;
2737 return tcp_transmit_skb(sk, skb, 0, GFP_ATOMIC); 2744 return tcp_transmit_skb(sk, skb, 0, GFP_ATOMIC);
2738} 2745}
@@ -2762,13 +2769,13 @@ int tcp_write_wakeup(struct sock *sk)
2762 if (seg_size < TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq || 2769 if (seg_size < TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq ||
2763 skb->len > mss) { 2770 skb->len > mss) {
2764 seg_size = min(seg_size, mss); 2771 seg_size = min(seg_size, mss);
2765 TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH; 2772 TCP_SKB_CB(skb)->flags |= TCPHDR_PSH;
2766 if (tcp_fragment(sk, skb, seg_size, mss)) 2773 if (tcp_fragment(sk, skb, seg_size, mss))
2767 return -1; 2774 return -1;
2768 } else if (!tcp_skb_pcount(skb)) 2775 } else if (!tcp_skb_pcount(skb))
2769 tcp_set_skb_tso_segs(sk, skb, mss); 2776 tcp_set_skb_tso_segs(sk, skb, mss);
2770 2777
2771 TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH; 2778 TCP_SKB_CB(skb)->flags |= TCPHDR_PSH;
2772 TCP_SKB_CB(skb)->when = tcp_time_stamp; 2779 TCP_SKB_CB(skb)->when = tcp_time_stamp;
2773 err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC); 2780 err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
2774 if (!err) 2781 if (!err)
@@ -2821,10 +2828,3 @@ void tcp_send_probe0(struct sock *sk)
2821 TCP_RTO_MAX); 2828 TCP_RTO_MAX);
2822 } 2829 }
2823} 2830}
2824
2825EXPORT_SYMBOL(tcp_select_initial_window);
2826EXPORT_SYMBOL(tcp_connect);
2827EXPORT_SYMBOL(tcp_make_synack);
2828EXPORT_SYMBOL(tcp_simple_retransmit);
2829EXPORT_SYMBOL(tcp_sync_mss);
2830EXPORT_SYMBOL(tcp_mtup_init);
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 440a5c6004f6..808bb920c9f5 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -41,7 +41,6 @@ void tcp_init_xmit_timers(struct sock *sk)
41 inet_csk_init_xmit_timers(sk, &tcp_write_timer, &tcp_delack_timer, 41 inet_csk_init_xmit_timers(sk, &tcp_write_timer, &tcp_delack_timer,
42 &tcp_keepalive_timer); 42 &tcp_keepalive_timer);
43} 43}
44
45EXPORT_SYMBOL(tcp_init_xmit_timers); 44EXPORT_SYMBOL(tcp_init_xmit_timers);
46 45
47static void tcp_write_err(struct sock *sk) 46static void tcp_write_err(struct sock *sk)
diff --git a/net/ipv4/tunnel4.c b/net/ipv4/tunnel4.c
index 3b3813cc80b9..59186ca7808a 100644
--- a/net/ipv4/tunnel4.c
+++ b/net/ipv4/tunnel4.c
@@ -48,7 +48,6 @@ err:
48 48
49 return ret; 49 return ret;
50} 50}
51
52EXPORT_SYMBOL(xfrm4_tunnel_register); 51EXPORT_SYMBOL(xfrm4_tunnel_register);
53 52
54int xfrm4_tunnel_deregister(struct xfrm_tunnel *handler, unsigned short family) 53int xfrm4_tunnel_deregister(struct xfrm_tunnel *handler, unsigned short family)
@@ -72,7 +71,6 @@ int xfrm4_tunnel_deregister(struct xfrm_tunnel *handler, unsigned short family)
72 71
73 return ret; 72 return ret;
74} 73}
75
76EXPORT_SYMBOL(xfrm4_tunnel_deregister); 74EXPORT_SYMBOL(xfrm4_tunnel_deregister);
77 75
78static int tunnel4_rcv(struct sk_buff *skb) 76static int tunnel4_rcv(struct sk_buff *skb)
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 9de6a698f91d..32e0bef60d0a 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -633,9 +633,9 @@ void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable)
633 if (!inet->recverr) { 633 if (!inet->recverr) {
634 if (!harderr || sk->sk_state != TCP_ESTABLISHED) 634 if (!harderr || sk->sk_state != TCP_ESTABLISHED)
635 goto out; 635 goto out;
636 } else { 636 } else
637 ip_icmp_error(sk, skb, err, uh->dest, info, (u8 *)(uh+1)); 637 ip_icmp_error(sk, skb, err, uh->dest, info, (u8 *)(uh+1));
638 } 638
639 sk->sk_err = err; 639 sk->sk_err = err;
640 sk->sk_error_report(sk); 640 sk->sk_error_report(sk);
641out: 641out:
@@ -914,7 +914,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
914 !sock_flag(sk, SOCK_BROADCAST)) 914 !sock_flag(sk, SOCK_BROADCAST))
915 goto out; 915 goto out;
916 if (connected) 916 if (connected)
917 sk_dst_set(sk, dst_clone(&rt->u.dst)); 917 sk_dst_set(sk, dst_clone(&rt->dst));
918 } 918 }
919 919
920 if (msg->msg_flags&MSG_CONFIRM) 920 if (msg->msg_flags&MSG_CONFIRM)
@@ -978,7 +978,7 @@ out:
978 return err; 978 return err;
979 979
980do_confirm: 980do_confirm:
981 dst_confirm(&rt->u.dst); 981 dst_confirm(&rt->dst);
982 if (!(msg->msg_flags&MSG_PROBE) || len) 982 if (!(msg->msg_flags&MSG_PROBE) || len)
983 goto back_from_confirm; 983 goto back_from_confirm;
984 err = 0; 984 err = 0;
@@ -1063,10 +1063,11 @@ static unsigned int first_packet_length(struct sock *sk)
1063 spin_unlock_bh(&rcvq->lock); 1063 spin_unlock_bh(&rcvq->lock);
1064 1064
1065 if (!skb_queue_empty(&list_kill)) { 1065 if (!skb_queue_empty(&list_kill)) {
1066 lock_sock_bh(sk); 1066 bool slow = lock_sock_fast(sk);
1067
1067 __skb_queue_purge(&list_kill); 1068 __skb_queue_purge(&list_kill);
1068 sk_mem_reclaim_partial(sk); 1069 sk_mem_reclaim_partial(sk);
1069 unlock_sock_bh(sk); 1070 unlock_sock_fast(sk, slow);
1070 } 1071 }
1071 return res; 1072 return res;
1072} 1073}
@@ -1123,6 +1124,7 @@ int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
1123 int peeked; 1124 int peeked;
1124 int err; 1125 int err;
1125 int is_udplite = IS_UDPLITE(sk); 1126 int is_udplite = IS_UDPLITE(sk);
1127 bool slow;
1126 1128
1127 /* 1129 /*
1128 * Check any passed addresses 1130 * Check any passed addresses
@@ -1197,10 +1199,10 @@ out:
1197 return err; 1199 return err;
1198 1200
1199csum_copy_err: 1201csum_copy_err:
1200 lock_sock_bh(sk); 1202 slow = lock_sock_fast(sk);
1201 if (!skb_kill_datagram(sk, skb, flags)) 1203 if (!skb_kill_datagram(sk, skb, flags))
1202 UDP_INC_STATS_USER(sock_net(sk), UDP_MIB_INERRORS, is_udplite); 1204 UDP_INC_STATS_USER(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
1203 unlock_sock_bh(sk); 1205 unlock_sock_fast(sk, slow);
1204 1206
1205 if (noblock) 1207 if (noblock)
1206 return -EAGAIN; 1208 return -EAGAIN;
@@ -1625,9 +1627,9 @@ int udp_rcv(struct sk_buff *skb)
1625 1627
1626void udp_destroy_sock(struct sock *sk) 1628void udp_destroy_sock(struct sock *sk)
1627{ 1629{
1628 lock_sock_bh(sk); 1630 bool slow = lock_sock_fast(sk);
1629 udp_flush_pending_frames(sk); 1631 udp_flush_pending_frames(sk);
1630 unlock_sock_bh(sk); 1632 unlock_sock_fast(sk, slow);
1631} 1633}
1632 1634
1633/* 1635/*
@@ -1686,8 +1688,8 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
1686 return -ENOPROTOOPT; 1688 return -ENOPROTOOPT;
1687 if (val != 0 && val < 8) /* Illegal coverage: use default (8) */ 1689 if (val != 0 && val < 8) /* Illegal coverage: use default (8) */
1688 val = 8; 1690 val = 8;
1689 else if (val > USHORT_MAX) 1691 else if (val > USHRT_MAX)
1690 val = USHORT_MAX; 1692 val = USHRT_MAX;
1691 up->pcslen = val; 1693 up->pcslen = val;
1692 up->pcflag |= UDPLITE_SEND_CC; 1694 up->pcflag |= UDPLITE_SEND_CC;
1693 break; 1695 break;
@@ -1700,8 +1702,8 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
1700 return -ENOPROTOOPT; 1702 return -ENOPROTOOPT;
1701 if (val != 0 && val < 8) /* Avoid silly minimal values. */ 1703 if (val != 0 && val < 8) /* Avoid silly minimal values. */
1702 val = 8; 1704 val = 8;
1703 else if (val > USHORT_MAX) 1705 else if (val > USHRT_MAX)
1704 val = USHORT_MAX; 1706 val = USHRT_MAX;
1705 up->pcrlen = val; 1707 up->pcrlen = val;
1706 up->pcflag |= UDPLITE_RECV_CC; 1708 up->pcflag |= UDPLITE_RECV_CC;
1707 break; 1709 break;
diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c
index 6610bf76369f..ab76aa928fa9 100644
--- a/net/ipv4/udplite.c
+++ b/net/ipv4/udplite.c
@@ -58,6 +58,7 @@ struct proto udplite_prot = {
58 .compat_getsockopt = compat_udp_getsockopt, 58 .compat_getsockopt = compat_udp_getsockopt,
59#endif 59#endif
60}; 60};
61EXPORT_SYMBOL(udplite_prot);
61 62
62static struct inet_protosw udplite4_protosw = { 63static struct inet_protosw udplite4_protosw = {
63 .type = SOCK_DGRAM, 64 .type = SOCK_DGRAM,
@@ -127,5 +128,3 @@ out_unregister_proto:
127out_register_err: 128out_register_err:
128 printk(KERN_CRIT "%s: Cannot add UDP-Lite protocol.\n", __func__); 129 printk(KERN_CRIT "%s: Cannot add UDP-Lite protocol.\n", __func__);
129} 130}
130
131EXPORT_SYMBOL(udplite_prot);
diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c
index ad8fbb871aa0..06814b6216dc 100644
--- a/net/ipv4/xfrm4_input.c
+++ b/net/ipv4/xfrm4_input.c
@@ -163,5 +163,4 @@ int xfrm4_rcv(struct sk_buff *skb)
163{ 163{
164 return xfrm4_rcv_spi(skb, ip_hdr(skb)->protocol, 0); 164 return xfrm4_rcv_spi(skb, ip_hdr(skb)->protocol, 0);
165} 165}
166
167EXPORT_SYMBOL(xfrm4_rcv); 166EXPORT_SYMBOL(xfrm4_rcv);
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 1705476670ef..869078d4eeb9 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -37,7 +37,7 @@ static struct dst_entry *xfrm4_dst_lookup(struct net *net, int tos,
37 fl.fl4_src = saddr->a4; 37 fl.fl4_src = saddr->a4;
38 38
39 err = __ip_route_output_key(net, &rt, &fl); 39 err = __ip_route_output_key(net, &rt, &fl);
40 dst = &rt->u.dst; 40 dst = &rt->dst;
41 if (err) 41 if (err)
42 dst = ERR_PTR(err); 42 dst = ERR_PTR(err);
43 return dst; 43 return dst;
@@ -108,6 +108,8 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse)
108 u8 *xprth = skb_network_header(skb) + iph->ihl * 4; 108 u8 *xprth = skb_network_header(skb) + iph->ihl * 4;
109 109
110 memset(fl, 0, sizeof(struct flowi)); 110 memset(fl, 0, sizeof(struct flowi));
111 fl->mark = skb->mark;
112
111 if (!(iph->frag_off & htons(IP_MF | IP_OFFSET))) { 113 if (!(iph->frag_off & htons(IP_MF | IP_OFFSET))) {
112 switch (iph->protocol) { 114 switch (iph->protocol) {
113 case IPPROTO_UDP: 115 case IPPROTO_UDP:
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index e1a698df5706..e81155d2f251 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -121,8 +121,6 @@ static inline void addrconf_sysctl_unregister(struct inet6_dev *idev)
121static int __ipv6_regen_rndid(struct inet6_dev *idev); 121static int __ipv6_regen_rndid(struct inet6_dev *idev);
122static int __ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpaddr); 122static int __ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpaddr);
123static void ipv6_regen_rndid(unsigned long data); 123static void ipv6_regen_rndid(unsigned long data);
124
125static int desync_factor = MAX_DESYNC_FACTOR * HZ;
126#endif 124#endif
127 125
128static int ipv6_generate_eui64(u8 *eui, struct net_device *dev); 126static int ipv6_generate_eui64(u8 *eui, struct net_device *dev);
@@ -284,13 +282,16 @@ static void addrconf_mod_timer(struct inet6_ifaddr *ifp,
284static int snmp6_alloc_dev(struct inet6_dev *idev) 282static int snmp6_alloc_dev(struct inet6_dev *idev)
285{ 283{
286 if (snmp_mib_init((void __percpu **)idev->stats.ipv6, 284 if (snmp_mib_init((void __percpu **)idev->stats.ipv6,
287 sizeof(struct ipstats_mib)) < 0) 285 sizeof(struct ipstats_mib),
286 __alignof__(struct ipstats_mib)) < 0)
288 goto err_ip; 287 goto err_ip;
289 if (snmp_mib_init((void __percpu **)idev->stats.icmpv6, 288 if (snmp_mib_init((void __percpu **)idev->stats.icmpv6,
290 sizeof(struct icmpv6_mib)) < 0) 289 sizeof(struct icmpv6_mib),
290 __alignof__(struct icmpv6_mib)) < 0)
291 goto err_icmp; 291 goto err_icmp;
292 if (snmp_mib_init((void __percpu **)idev->stats.icmpv6msg, 292 if (snmp_mib_init((void __percpu **)idev->stats.icmpv6msg,
293 sizeof(struct icmpv6msg_mib)) < 0) 293 sizeof(struct icmpv6msg_mib),
294 __alignof__(struct icmpv6msg_mib)) < 0)
294 goto err_icmpmsg; 295 goto err_icmpmsg;
295 296
296 return 0; 297 return 0;
@@ -557,7 +558,7 @@ void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp)
557 pr_warning("Freeing alive inet6 address %p\n", ifp); 558 pr_warning("Freeing alive inet6 address %p\n", ifp);
558 return; 559 return;
559 } 560 }
560 dst_release(&ifp->rt->u.dst); 561 dst_release(&ifp->rt->dst);
561 562
562 call_rcu(&ifp->rcu, inet6_ifa_finish_destroy_rcu); 563 call_rcu(&ifp->rcu, inet6_ifa_finish_destroy_rcu);
563} 564}
@@ -823,7 +824,7 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp)
823 rt->rt6i_flags |= RTF_EXPIRES; 824 rt->rt6i_flags |= RTF_EXPIRES;
824 } 825 }
825 } 826 }
826 dst_release(&rt->u.dst); 827 dst_release(&rt->dst);
827 } 828 }
828 829
829out: 830out:
@@ -890,7 +891,8 @@ retry:
890 idev->cnf.temp_valid_lft); 891 idev->cnf.temp_valid_lft);
891 tmp_prefered_lft = min_t(__u32, 892 tmp_prefered_lft = min_t(__u32,
892 ifp->prefered_lft, 893 ifp->prefered_lft,
893 idev->cnf.temp_prefered_lft - desync_factor / HZ); 894 idev->cnf.temp_prefered_lft -
895 idev->cnf.max_desync_factor);
894 tmp_plen = ifp->prefix_len; 896 tmp_plen = ifp->prefix_len;
895 max_addresses = idev->cnf.max_addresses; 897 max_addresses = idev->cnf.max_addresses;
896 tmp_cstamp = ifp->cstamp; 898 tmp_cstamp = ifp->cstamp;
@@ -1650,7 +1652,8 @@ static void ipv6_regen_rndid(unsigned long data)
1650 1652
1651 expires = jiffies + 1653 expires = jiffies +
1652 idev->cnf.temp_prefered_lft * HZ - 1654 idev->cnf.temp_prefered_lft * HZ -
1653 idev->cnf.regen_max_retry * idev->cnf.dad_transmits * idev->nd_parms->retrans_time - desync_factor; 1655 idev->cnf.regen_max_retry * idev->cnf.dad_transmits * idev->nd_parms->retrans_time -
1656 idev->cnf.max_desync_factor * HZ;
1654 if (time_before(expires, jiffies)) { 1657 if (time_before(expires, jiffies)) {
1655 printk(KERN_WARNING 1658 printk(KERN_WARNING
1656 "ipv6_regen_rndid(): too short regeneration interval; timer disabled for %s.\n", 1659 "ipv6_regen_rndid(): too short regeneration interval; timer disabled for %s.\n",
@@ -1863,7 +1866,7 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len)
1863 dev, expires, flags); 1866 dev, expires, flags);
1864 } 1867 }
1865 if (rt) 1868 if (rt)
1866 dst_release(&rt->u.dst); 1869 dst_release(&rt->dst);
1867 } 1870 }
1868 1871
1869 /* Try to figure out our local address for this prefix */ 1872 /* Try to figure out our local address for this prefix */
@@ -3492,8 +3495,12 @@ static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa,
3492 preferred -= tval; 3495 preferred -= tval;
3493 else 3496 else
3494 preferred = 0; 3497 preferred = 0;
3495 if (valid != INFINITY_LIFE_TIME) 3498 if (valid != INFINITY_LIFE_TIME) {
3496 valid -= tval; 3499 if (valid > tval)
3500 valid -= tval;
3501 else
3502 valid = 0;
3503 }
3497 } 3504 }
3498 } else { 3505 } else {
3499 preferred = INFINITY_LIFE_TIME; 3506 preferred = INFINITY_LIFE_TIME;
@@ -3855,12 +3862,28 @@ static inline void __snmp6_fill_stats(u64 *stats, void __percpu **mib,
3855 memset(&stats[items], 0, pad); 3862 memset(&stats[items], 0, pad);
3856} 3863}
3857 3864
3865static inline void __snmp6_fill_stats64(u64 *stats, void __percpu **mib,
3866 int items, int bytes, size_t syncpoff)
3867{
3868 int i;
3869 int pad = bytes - sizeof(u64) * items;
3870 BUG_ON(pad < 0);
3871
3872 /* Use put_unaligned() because stats may not be aligned for u64. */
3873 put_unaligned(items, &stats[0]);
3874 for (i = 1; i < items; i++)
3875 put_unaligned(snmp_fold_field64(mib, i, syncpoff), &stats[i]);
3876
3877 memset(&stats[items], 0, pad);
3878}
3879
3858static void snmp6_fill_stats(u64 *stats, struct inet6_dev *idev, int attrtype, 3880static void snmp6_fill_stats(u64 *stats, struct inet6_dev *idev, int attrtype,
3859 int bytes) 3881 int bytes)
3860{ 3882{
3861 switch (attrtype) { 3883 switch (attrtype) {
3862 case IFLA_INET6_STATS: 3884 case IFLA_INET6_STATS:
3863 __snmp6_fill_stats(stats, (void __percpu **)idev->stats.ipv6, IPSTATS_MIB_MAX, bytes); 3885 __snmp6_fill_stats64(stats, (void __percpu **)idev->stats.ipv6,
3886 IPSTATS_MIB_MAX, bytes, offsetof(struct ipstats_mib, syncp));
3864 break; 3887 break;
3865 case IFLA_INET6_ICMP6STATS: 3888 case IFLA_INET6_ICMP6STATS:
3866 __snmp6_fill_stats(stats, (void __percpu **)idev->stats.icmpv6, ICMP6_MIB_MAX, bytes); 3889 __snmp6_fill_stats(stats, (void __percpu **)idev->stats.icmpv6, ICMP6_MIB_MAX, bytes);
@@ -4093,11 +4116,11 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
4093 if (ifp->idev->cnf.forwarding) 4116 if (ifp->idev->cnf.forwarding)
4094 addrconf_leave_anycast(ifp); 4117 addrconf_leave_anycast(ifp);
4095 addrconf_leave_solict(ifp->idev, &ifp->addr); 4118 addrconf_leave_solict(ifp->idev, &ifp->addr);
4096 dst_hold(&ifp->rt->u.dst); 4119 dst_hold(&ifp->rt->dst);
4097 4120
4098 if (ifp->state == INET6_IFADDR_STATE_DEAD && 4121 if (ifp->state == INET6_IFADDR_STATE_DEAD &&
4099 ip6_del_rt(ifp->rt)) 4122 ip6_del_rt(ifp->rt))
4100 dst_free(&ifp->rt->u.dst); 4123 dst_free(&ifp->rt->dst);
4101 break; 4124 break;
4102 } 4125 }
4103} 4126}
diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c
index 8c4348cb1950..f0e774cea386 100644
--- a/net/ipv6/addrlabel.c
+++ b/net/ipv6/addrlabel.c
@@ -53,11 +53,7 @@ static struct ip6addrlbl_table
53static inline 53static inline
54struct net *ip6addrlbl_net(const struct ip6addrlbl_entry *lbl) 54struct net *ip6addrlbl_net(const struct ip6addrlbl_entry *lbl)
55{ 55{
56#ifdef CONFIG_NET_NS 56 return read_pnet(&lbl->lbl_net);
57 return lbl->lbl_net;
58#else
59 return &init_net;
60#endif
61} 57}
62 58
63/* 59/*
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index e733942dafe1..56b9bf2516f4 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -522,10 +522,10 @@ const struct proto_ops inet6_stream_ops = {
522 .shutdown = inet_shutdown, /* ok */ 522 .shutdown = inet_shutdown, /* ok */
523 .setsockopt = sock_common_setsockopt, /* ok */ 523 .setsockopt = sock_common_setsockopt, /* ok */
524 .getsockopt = sock_common_getsockopt, /* ok */ 524 .getsockopt = sock_common_getsockopt, /* ok */
525 .sendmsg = tcp_sendmsg, /* ok */ 525 .sendmsg = inet_sendmsg, /* ok */
526 .recvmsg = sock_common_recvmsg, /* ok */ 526 .recvmsg = inet_recvmsg, /* ok */
527 .mmap = sock_no_mmap, 527 .mmap = sock_no_mmap,
528 .sendpage = tcp_sendpage, 528 .sendpage = inet_sendpage,
529 .splice_read = tcp_splice_read, 529 .splice_read = tcp_splice_read,
530#ifdef CONFIG_COMPAT 530#ifdef CONFIG_COMPAT
531 .compat_setsockopt = compat_sock_common_setsockopt, 531 .compat_setsockopt = compat_sock_common_setsockopt,
@@ -549,7 +549,7 @@ const struct proto_ops inet6_dgram_ops = {
549 .setsockopt = sock_common_setsockopt, /* ok */ 549 .setsockopt = sock_common_setsockopt, /* ok */
550 .getsockopt = sock_common_getsockopt, /* ok */ 550 .getsockopt = sock_common_getsockopt, /* ok */
551 .sendmsg = inet_sendmsg, /* ok */ 551 .sendmsg = inet_sendmsg, /* ok */
552 .recvmsg = sock_common_recvmsg, /* ok */ 552 .recvmsg = inet_recvmsg, /* ok */
553 .mmap = sock_no_mmap, 553 .mmap = sock_no_mmap,
554 .sendpage = sock_no_sendpage, 554 .sendpage = sock_no_sendpage,
555#ifdef CONFIG_COMPAT 555#ifdef CONFIG_COMPAT
@@ -651,7 +651,7 @@ int inet6_sk_rebuild_header(struct sock *sk)
651 651
652 if (dst == NULL) { 652 if (dst == NULL) {
653 struct inet_sock *inet = inet_sk(sk); 653 struct inet_sock *inet = inet_sk(sk);
654 struct in6_addr *final_p = NULL, final; 654 struct in6_addr *final_p, final;
655 struct flowi fl; 655 struct flowi fl;
656 656
657 memset(&fl, 0, sizeof(fl)); 657 memset(&fl, 0, sizeof(fl));
@@ -665,12 +665,7 @@ int inet6_sk_rebuild_header(struct sock *sk)
665 fl.fl_ip_sport = inet->inet_sport; 665 fl.fl_ip_sport = inet->inet_sport;
666 security_sk_classify_flow(sk, &fl); 666 security_sk_classify_flow(sk, &fl);
667 667
668 if (np->opt && np->opt->srcrt) { 668 final_p = fl6_update_dst(&fl, np->opt, &final);
669 struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
670 ipv6_addr_copy(&final, &fl.fl6_dst);
671 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
672 final_p = &final;
673 }
674 669
675 err = ip6_dst_lookup(sk, &dst, &fl); 670 err = ip6_dst_lookup(sk, &dst, &fl);
676 if (err) { 671 if (err) {
@@ -976,19 +971,24 @@ static void ipv6_packet_cleanup(void)
976static int __net_init ipv6_init_mibs(struct net *net) 971static int __net_init ipv6_init_mibs(struct net *net)
977{ 972{
978 if (snmp_mib_init((void __percpu **)net->mib.udp_stats_in6, 973 if (snmp_mib_init((void __percpu **)net->mib.udp_stats_in6,
979 sizeof (struct udp_mib)) < 0) 974 sizeof(struct udp_mib),
975 __alignof__(struct udp_mib)) < 0)
980 return -ENOMEM; 976 return -ENOMEM;
981 if (snmp_mib_init((void __percpu **)net->mib.udplite_stats_in6, 977 if (snmp_mib_init((void __percpu **)net->mib.udplite_stats_in6,
982 sizeof (struct udp_mib)) < 0) 978 sizeof(struct udp_mib),
979 __alignof__(struct udp_mib)) < 0)
983 goto err_udplite_mib; 980 goto err_udplite_mib;
984 if (snmp_mib_init((void __percpu **)net->mib.ipv6_statistics, 981 if (snmp_mib_init((void __percpu **)net->mib.ipv6_statistics,
985 sizeof(struct ipstats_mib)) < 0) 982 sizeof(struct ipstats_mib),
983 __alignof__(struct ipstats_mib)) < 0)
986 goto err_ip_mib; 984 goto err_ip_mib;
987 if (snmp_mib_init((void __percpu **)net->mib.icmpv6_statistics, 985 if (snmp_mib_init((void __percpu **)net->mib.icmpv6_statistics,
988 sizeof(struct icmpv6_mib)) < 0) 986 sizeof(struct icmpv6_mib),
987 __alignof__(struct icmpv6_mib)) < 0)
989 goto err_icmp_mib; 988 goto err_icmp_mib;
990 if (snmp_mib_init((void __percpu **)net->mib.icmpv6msg_statistics, 989 if (snmp_mib_init((void __percpu **)net->mib.icmpv6msg_statistics,
991 sizeof(struct icmpv6msg_mib)) < 0) 990 sizeof(struct icmpv6msg_mib),
991 __alignof__(struct icmpv6msg_mib)) < 0)
992 goto err_icmpmsg_mib; 992 goto err_icmpmsg_mib;
993 return 0; 993 return 0;
994 994
diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
index b5b07054508a..0e5e943446f0 100644
--- a/net/ipv6/anycast.c
+++ b/net/ipv6/anycast.c
@@ -77,41 +77,40 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, struct in6_addr *addr)
77 pac->acl_next = NULL; 77 pac->acl_next = NULL;
78 ipv6_addr_copy(&pac->acl_addr, addr); 78 ipv6_addr_copy(&pac->acl_addr, addr);
79 79
80 rcu_read_lock();
80 if (ifindex == 0) { 81 if (ifindex == 0) {
81 struct rt6_info *rt; 82 struct rt6_info *rt;
82 83
83 rt = rt6_lookup(net, addr, NULL, 0, 0); 84 rt = rt6_lookup(net, addr, NULL, 0, 0);
84 if (rt) { 85 if (rt) {
85 dev = rt->rt6i_dev; 86 dev = rt->rt6i_dev;
86 dev_hold(dev); 87 dst_release(&rt->dst);
87 dst_release(&rt->u.dst);
88 } else if (ishost) { 88 } else if (ishost) {
89 err = -EADDRNOTAVAIL; 89 err = -EADDRNOTAVAIL;
90 goto out_free_pac; 90 goto error;
91 } else { 91 } else {
92 /* router, no matching interface: just pick one */ 92 /* router, no matching interface: just pick one */
93 93 dev = dev_get_by_flags_rcu(net, IFF_UP,
94 dev = dev_get_by_flags(net, IFF_UP, IFF_UP|IFF_LOOPBACK); 94 IFF_UP | IFF_LOOPBACK);
95 } 95 }
96 } else 96 } else
97 dev = dev_get_by_index(net, ifindex); 97 dev = dev_get_by_index_rcu(net, ifindex);
98 98
99 if (dev == NULL) { 99 if (dev == NULL) {
100 err = -ENODEV; 100 err = -ENODEV;
101 goto out_free_pac; 101 goto error;
102 } 102 }
103 103
104 idev = in6_dev_get(dev); 104 idev = __in6_dev_get(dev);
105 if (!idev) { 105 if (!idev) {
106 if (ifindex) 106 if (ifindex)
107 err = -ENODEV; 107 err = -ENODEV;
108 else 108 else
109 err = -EADDRNOTAVAIL; 109 err = -EADDRNOTAVAIL;
110 goto out_dev_put; 110 goto error;
111 } 111 }
112 /* reset ishost, now that we have a specific device */ 112 /* reset ishost, now that we have a specific device */
113 ishost = !idev->cnf.forwarding; 113 ishost = !idev->cnf.forwarding;
114 in6_dev_put(idev);
115 114
116 pac->acl_ifindex = dev->ifindex; 115 pac->acl_ifindex = dev->ifindex;
117 116
@@ -124,26 +123,22 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, struct in6_addr *addr)
124 if (ishost) 123 if (ishost)
125 err = -EADDRNOTAVAIL; 124 err = -EADDRNOTAVAIL;
126 if (err) 125 if (err)
127 goto out_dev_put; 126 goto error;
128 } 127 }
129 128
130 err = ipv6_dev_ac_inc(dev, addr); 129 err = ipv6_dev_ac_inc(dev, addr);
131 if (err) 130 if (!err) {
132 goto out_dev_put; 131 write_lock_bh(&ipv6_sk_ac_lock);
133 132 pac->acl_next = np->ipv6_ac_list;
134 write_lock_bh(&ipv6_sk_ac_lock); 133 np->ipv6_ac_list = pac;
135 pac->acl_next = np->ipv6_ac_list; 134 write_unlock_bh(&ipv6_sk_ac_lock);
136 np->ipv6_ac_list = pac; 135 pac = NULL;
137 write_unlock_bh(&ipv6_sk_ac_lock); 136 }
138
139 dev_put(dev);
140
141 return 0;
142 137
143out_dev_put: 138error:
144 dev_put(dev); 139 rcu_read_unlock();
145out_free_pac: 140 if (pac)
146 sock_kfree_s(sk, pac, sizeof(*pac)); 141 sock_kfree_s(sk, pac, sizeof(*pac));
147 return err; 142 return err;
148} 143}
149 144
@@ -176,11 +171,12 @@ int ipv6_sock_ac_drop(struct sock *sk, int ifindex, struct in6_addr *addr)
176 171
177 write_unlock_bh(&ipv6_sk_ac_lock); 172 write_unlock_bh(&ipv6_sk_ac_lock);
178 173
179 dev = dev_get_by_index(net, pac->acl_ifindex); 174 rcu_read_lock();
180 if (dev) { 175 dev = dev_get_by_index_rcu(net, pac->acl_ifindex);
176 if (dev)
181 ipv6_dev_ac_dec(dev, &pac->acl_addr); 177 ipv6_dev_ac_dec(dev, &pac->acl_addr);
182 dev_put(dev); 178 rcu_read_unlock();
183 } 179
184 sock_kfree_s(sk, pac, sizeof(*pac)); 180 sock_kfree_s(sk, pac, sizeof(*pac));
185 return 0; 181 return 0;
186} 182}
@@ -199,13 +195,12 @@ void ipv6_sock_ac_close(struct sock *sk)
199 write_unlock_bh(&ipv6_sk_ac_lock); 195 write_unlock_bh(&ipv6_sk_ac_lock);
200 196
201 prev_index = 0; 197 prev_index = 0;
198 rcu_read_lock();
202 while (pac) { 199 while (pac) {
203 struct ipv6_ac_socklist *next = pac->acl_next; 200 struct ipv6_ac_socklist *next = pac->acl_next;
204 201
205 if (pac->acl_ifindex != prev_index) { 202 if (pac->acl_ifindex != prev_index) {
206 if (dev) 203 dev = dev_get_by_index_rcu(net, pac->acl_ifindex);
207 dev_put(dev);
208 dev = dev_get_by_index(net, pac->acl_ifindex);
209 prev_index = pac->acl_ifindex; 204 prev_index = pac->acl_ifindex;
210 } 205 }
211 if (dev) 206 if (dev)
@@ -213,8 +208,7 @@ void ipv6_sock_ac_close(struct sock *sk)
213 sock_kfree_s(sk, pac, sizeof(*pac)); 208 sock_kfree_s(sk, pac, sizeof(*pac));
214 pac = next; 209 pac = next;
215 } 210 }
216 if (dev) 211 rcu_read_unlock();
217 dev_put(dev);
218} 212}
219 213
220#if 0 214#if 0
@@ -250,7 +244,7 @@ static void aca_put(struct ifacaddr6 *ac)
250{ 244{
251 if (atomic_dec_and_test(&ac->aca_refcnt)) { 245 if (atomic_dec_and_test(&ac->aca_refcnt)) {
252 in6_dev_put(ac->aca_idev); 246 in6_dev_put(ac->aca_idev);
253 dst_release(&ac->aca_rt->u.dst); 247 dst_release(&ac->aca_rt->dst);
254 kfree(ac); 248 kfree(ac);
255 } 249 }
256} 250}
@@ -356,40 +350,39 @@ int __ipv6_dev_ac_dec(struct inet6_dev *idev, struct in6_addr *addr)
356 write_unlock_bh(&idev->lock); 350 write_unlock_bh(&idev->lock);
357 addrconf_leave_solict(idev, &aca->aca_addr); 351 addrconf_leave_solict(idev, &aca->aca_addr);
358 352
359 dst_hold(&aca->aca_rt->u.dst); 353 dst_hold(&aca->aca_rt->dst);
360 ip6_del_rt(aca->aca_rt); 354 ip6_del_rt(aca->aca_rt);
361 355
362 aca_put(aca); 356 aca_put(aca);
363 return 0; 357 return 0;
364} 358}
365 359
360/* called with rcu_read_lock() */
366static int ipv6_dev_ac_dec(struct net_device *dev, struct in6_addr *addr) 361static int ipv6_dev_ac_dec(struct net_device *dev, struct in6_addr *addr)
367{ 362{
368 int ret; 363 struct inet6_dev *idev = __in6_dev_get(dev);
369 struct inet6_dev *idev = in6_dev_get(dev); 364
370 if (idev == NULL) 365 if (idev == NULL)
371 return -ENODEV; 366 return -ENODEV;
372 ret = __ipv6_dev_ac_dec(idev, addr); 367 return __ipv6_dev_ac_dec(idev, addr);
373 in6_dev_put(idev);
374 return ret;
375} 368}
376 369
377/* 370/*
378 * check if the interface has this anycast address 371 * check if the interface has this anycast address
372 * called with rcu_read_lock()
379 */ 373 */
380static int ipv6_chk_acast_dev(struct net_device *dev, struct in6_addr *addr) 374static int ipv6_chk_acast_dev(struct net_device *dev, struct in6_addr *addr)
381{ 375{
382 struct inet6_dev *idev; 376 struct inet6_dev *idev;
383 struct ifacaddr6 *aca; 377 struct ifacaddr6 *aca;
384 378
385 idev = in6_dev_get(dev); 379 idev = __in6_dev_get(dev);
386 if (idev) { 380 if (idev) {
387 read_lock_bh(&idev->lock); 381 read_lock_bh(&idev->lock);
388 for (aca = idev->ac_list; aca; aca = aca->aca_next) 382 for (aca = idev->ac_list; aca; aca = aca->aca_next)
389 if (ipv6_addr_equal(&aca->aca_addr, addr)) 383 if (ipv6_addr_equal(&aca->aca_addr, addr))
390 break; 384 break;
391 read_unlock_bh(&idev->lock); 385 read_unlock_bh(&idev->lock);
392 in6_dev_put(idev);
393 return aca != NULL; 386 return aca != NULL;
394 } 387 }
395 return 0; 388 return 0;
@@ -403,14 +396,15 @@ int ipv6_chk_acast_addr(struct net *net, struct net_device *dev,
403{ 396{
404 int found = 0; 397 int found = 0;
405 398
406 if (dev)
407 return ipv6_chk_acast_dev(dev, addr);
408 rcu_read_lock(); 399 rcu_read_lock();
409 for_each_netdev_rcu(net, dev) 400 if (dev)
410 if (ipv6_chk_acast_dev(dev, addr)) { 401 found = ipv6_chk_acast_dev(dev, addr);
411 found = 1; 402 else
412 break; 403 for_each_netdev_rcu(net, dev)
413 } 404 if (ipv6_chk_acast_dev(dev, addr)) {
405 found = 1;
406 break;
407 }
414 rcu_read_unlock(); 408 rcu_read_unlock();
415 return found; 409 return found;
416} 410}
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 712684687c9a..7d929a22cbc2 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -38,10 +38,11 @@ int ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
38 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr; 38 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
39 struct inet_sock *inet = inet_sk(sk); 39 struct inet_sock *inet = inet_sk(sk);
40 struct ipv6_pinfo *np = inet6_sk(sk); 40 struct ipv6_pinfo *np = inet6_sk(sk);
41 struct in6_addr *daddr, *final_p = NULL, final; 41 struct in6_addr *daddr, *final_p, final;
42 struct dst_entry *dst; 42 struct dst_entry *dst;
43 struct flowi fl; 43 struct flowi fl;
44 struct ip6_flowlabel *flowlabel = NULL; 44 struct ip6_flowlabel *flowlabel = NULL;
45 struct ipv6_txoptions *opt;
45 int addr_type; 46 int addr_type;
46 int err; 47 int err;
47 48
@@ -155,19 +156,8 @@ ipv4_connected:
155 156
156 security_sk_classify_flow(sk, &fl); 157 security_sk_classify_flow(sk, &fl);
157 158
158 if (flowlabel) { 159 opt = flowlabel ? flowlabel->opt : np->opt;
159 if (flowlabel->opt && flowlabel->opt->srcrt) { 160 final_p = fl6_update_dst(&fl, opt, &final);
160 struct rt0_hdr *rt0 = (struct rt0_hdr *) flowlabel->opt->srcrt;
161 ipv6_addr_copy(&final, &fl.fl6_dst);
162 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
163 final_p = &final;
164 }
165 } else if (np->opt && np->opt->srcrt) {
166 struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt;
167 ipv6_addr_copy(&final, &fl.fl6_dst);
168 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
169 final_p = &final;
170 }
171 161
172 err = ip6_dst_lookup(sk, &dst, &fl); 162 err = ip6_dst_lookup(sk, &dst, &fl);
173 if (err) 163 if (err)
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index 8a659f92d17a..262f105d23b9 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -312,6 +312,7 @@ static int ipv6_destopt_rcv(struct sk_buff *skb)
312 Routing header. 312 Routing header.
313 ********************************/ 313 ********************************/
314 314
315/* called with rcu_read_lock() */
315static int ipv6_rthdr_rcv(struct sk_buff *skb) 316static int ipv6_rthdr_rcv(struct sk_buff *skb)
316{ 317{
317 struct inet6_skb_parm *opt = IP6CB(skb); 318 struct inet6_skb_parm *opt = IP6CB(skb);
@@ -324,12 +325,9 @@ static int ipv6_rthdr_rcv(struct sk_buff *skb)
324 struct net *net = dev_net(skb->dev); 325 struct net *net = dev_net(skb->dev);
325 int accept_source_route = net->ipv6.devconf_all->accept_source_route; 326 int accept_source_route = net->ipv6.devconf_all->accept_source_route;
326 327
327 idev = in6_dev_get(skb->dev); 328 idev = __in6_dev_get(skb->dev);
328 if (idev) { 329 if (idev && accept_source_route > idev->cnf.accept_source_route)
329 if (accept_source_route > idev->cnf.accept_source_route) 330 accept_source_route = idev->cnf.accept_source_route;
330 accept_source_route = idev->cnf.accept_source_route;
331 in6_dev_put(idev);
332 }
333 331
334 if (!pskb_may_pull(skb, skb_transport_offset(skb) + 8) || 332 if (!pskb_may_pull(skb, skb_transport_offset(skb) + 8) ||
335 !pskb_may_pull(skb, (skb_transport_offset(skb) + 333 !pskb_may_pull(skb, (skb_transport_offset(skb) +
@@ -874,3 +872,27 @@ struct ipv6_txoptions *ipv6_fixup_options(struct ipv6_txoptions *opt_space,
874 return opt; 872 return opt;
875} 873}
876 874
875/**
876 * fl6_update_dst - update flowi destination address with info given
877 * by srcrt option, if any.
878 *
879 * @fl: flowi for which fl6_dst is to be updated
880 * @opt: struct ipv6_txoptions in which to look for srcrt opt
881 * @orig: copy of original fl6_dst address if modified
882 *
883 * Returns NULL if no txoptions or no srcrt, otherwise returns orig
884 * and initial value of fl->fl6_dst set in orig
885 */
886struct in6_addr *fl6_update_dst(struct flowi *fl,
887 const struct ipv6_txoptions *opt,
888 struct in6_addr *orig)
889{
890 if (!opt || !opt->srcrt)
891 return NULL;
892
893 ipv6_addr_copy(orig, &fl->fl6_dst);
894 ipv6_addr_copy(&fl->fl6_dst, ((struct rt0_hdr *)opt->srcrt)->addr);
895 return orig;
896}
897
898EXPORT_SYMBOL_GPL(fl6_update_dst);
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index 8e44f8f9c188..b1108ede18e1 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -43,8 +43,8 @@ struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi *fl,
43 if (arg.result) 43 if (arg.result)
44 return arg.result; 44 return arg.result;
45 45
46 dst_hold(&net->ipv6.ip6_null_entry->u.dst); 46 dst_hold(&net->ipv6.ip6_null_entry->dst);
47 return &net->ipv6.ip6_null_entry->u.dst; 47 return &net->ipv6.ip6_null_entry->dst;
48} 48}
49 49
50static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp, 50static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
@@ -86,7 +86,7 @@ static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
86 struct in6_addr saddr; 86 struct in6_addr saddr;
87 87
88 if (ipv6_dev_get_saddr(net, 88 if (ipv6_dev_get_saddr(net,
89 ip6_dst_idev(&rt->u.dst)->dev, 89 ip6_dst_idev(&rt->dst)->dev,
90 &flp->fl6_dst, 90 &flp->fl6_dst,
91 rt6_flags2srcprefs(flags), 91 rt6_flags2srcprefs(flags),
92 &saddr)) 92 &saddr))
@@ -99,12 +99,12 @@ static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
99 goto out; 99 goto out;
100 } 100 }
101again: 101again:
102 dst_release(&rt->u.dst); 102 dst_release(&rt->dst);
103 rt = NULL; 103 rt = NULL;
104 goto out; 104 goto out;
105 105
106discard_pkt: 106discard_pkt:
107 dst_hold(&rt->u.dst); 107 dst_hold(&rt->dst);
108out: 108out:
109 arg->result = rt; 109 arg->result = rt;
110 return rt == NULL ? -EAGAIN : 0; 110 return rt == NULL ? -EAGAIN : 0;
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index ce7992982557..03e62f94ff8e 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -483,7 +483,7 @@ route_done:
483 np->tclass, NULL, &fl, (struct rt6_info*)dst, 483 np->tclass, NULL, &fl, (struct rt6_info*)dst,
484 MSG_DONTWAIT, np->dontfrag); 484 MSG_DONTWAIT, np->dontfrag);
485 if (err) { 485 if (err) {
486 ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS); 486 ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTERRORS);
487 ip6_flush_pending_frames(sk); 487 ip6_flush_pending_frames(sk);
488 goto out_put; 488 goto out_put;
489 } 489 }
@@ -565,7 +565,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
565 np->dontfrag); 565 np->dontfrag);
566 566
567 if (err) { 567 if (err) {
568 ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS); 568 ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTERRORS);
569 ip6_flush_pending_frames(sk); 569 ip6_flush_pending_frames(sk);
570 goto out_put; 570 goto out_put;
571 } 571 }
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index 0c5e3c3b7fd5..8a1628023bd1 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -185,7 +185,7 @@ int inet6_csk_xmit(struct sk_buff *skb)
185 struct ipv6_pinfo *np = inet6_sk(sk); 185 struct ipv6_pinfo *np = inet6_sk(sk);
186 struct flowi fl; 186 struct flowi fl;
187 struct dst_entry *dst; 187 struct dst_entry *dst;
188 struct in6_addr *final_p = NULL, final; 188 struct in6_addr *final_p, final;
189 189
190 memset(&fl, 0, sizeof(fl)); 190 memset(&fl, 0, sizeof(fl));
191 fl.proto = sk->sk_protocol; 191 fl.proto = sk->sk_protocol;
@@ -199,12 +199,7 @@ int inet6_csk_xmit(struct sk_buff *skb)
199 fl.fl_ip_dport = inet->inet_dport; 199 fl.fl_ip_dport = inet->inet_dport;
200 security_sk_classify_flow(sk, &fl); 200 security_sk_classify_flow(sk, &fl);
201 201
202 if (np->opt && np->opt->srcrt) { 202 final_p = fl6_update_dst(&fl, np->opt, &final);
203 struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt;
204 ipv6_addr_copy(&final, &fl.fl6_dst);
205 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
206 final_p = &final;
207 }
208 203
209 dst = __inet6_csk_dst_check(sk, np->dst_cookie); 204 dst = __inet6_csk_dst_check(sk, np->dst_cookie);
210 205
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 92a122b7795d..b6a585909d35 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -165,7 +165,7 @@ static __inline__ void node_free(struct fib6_node * fn)
165static __inline__ void rt6_release(struct rt6_info *rt) 165static __inline__ void rt6_release(struct rt6_info *rt)
166{ 166{
167 if (atomic_dec_and_test(&rt->rt6i_ref)) 167 if (atomic_dec_and_test(&rt->rt6i_ref))
168 dst_free(&rt->u.dst); 168 dst_free(&rt->dst);
169} 169}
170 170
171static void fib6_link_table(struct net *net, struct fib6_table *tb) 171static void fib6_link_table(struct net *net, struct fib6_table *tb)
@@ -278,7 +278,7 @@ static int fib6_dump_node(struct fib6_walker_t *w)
278 int res; 278 int res;
279 struct rt6_info *rt; 279 struct rt6_info *rt;
280 280
281 for (rt = w->leaf; rt; rt = rt->u.dst.rt6_next) { 281 for (rt = w->leaf; rt; rt = rt->dst.rt6_next) {
282 res = rt6_dump_route(rt, w->args); 282 res = rt6_dump_route(rt, w->args);
283 if (res < 0) { 283 if (res < 0) {
284 /* Frame is full, suspend walking */ 284 /* Frame is full, suspend walking */
@@ -619,7 +619,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
619 619
620 ins = &fn->leaf; 620 ins = &fn->leaf;
621 621
622 for (iter = fn->leaf; iter; iter=iter->u.dst.rt6_next) { 622 for (iter = fn->leaf; iter; iter=iter->dst.rt6_next) {
623 /* 623 /*
624 * Search for duplicates 624 * Search for duplicates
625 */ 625 */
@@ -647,7 +647,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
647 if (iter->rt6i_metric > rt->rt6i_metric) 647 if (iter->rt6i_metric > rt->rt6i_metric)
648 break; 648 break;
649 649
650 ins = &iter->u.dst.rt6_next; 650 ins = &iter->dst.rt6_next;
651 } 651 }
652 652
653 /* Reset round-robin state, if necessary */ 653 /* Reset round-robin state, if necessary */
@@ -658,7 +658,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
658 * insert node 658 * insert node
659 */ 659 */
660 660
661 rt->u.dst.rt6_next = iter; 661 rt->dst.rt6_next = iter;
662 *ins = rt; 662 *ins = rt;
663 rt->rt6i_node = fn; 663 rt->rt6i_node = fn;
664 atomic_inc(&rt->rt6i_ref); 664 atomic_inc(&rt->rt6i_ref);
@@ -799,7 +799,7 @@ out:
799 atomic_inc(&pn->leaf->rt6i_ref); 799 atomic_inc(&pn->leaf->rt6i_ref);
800 } 800 }
801#endif 801#endif
802 dst_free(&rt->u.dst); 802 dst_free(&rt->dst);
803 } 803 }
804 return err; 804 return err;
805 805
@@ -810,7 +810,7 @@ out:
810st_failure: 810st_failure:
811 if (fn && !(fn->fn_flags & (RTN_RTINFO|RTN_ROOT))) 811 if (fn && !(fn->fn_flags & (RTN_RTINFO|RTN_ROOT)))
812 fib6_repair_tree(info->nl_net, fn); 812 fib6_repair_tree(info->nl_net, fn);
813 dst_free(&rt->u.dst); 813 dst_free(&rt->dst);
814 return err; 814 return err;
815#endif 815#endif
816} 816}
@@ -1108,7 +1108,7 @@ static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp,
1108 RT6_TRACE("fib6_del_route\n"); 1108 RT6_TRACE("fib6_del_route\n");
1109 1109
1110 /* Unlink it */ 1110 /* Unlink it */
1111 *rtp = rt->u.dst.rt6_next; 1111 *rtp = rt->dst.rt6_next;
1112 rt->rt6i_node = NULL; 1112 rt->rt6i_node = NULL;
1113 net->ipv6.rt6_stats->fib_rt_entries--; 1113 net->ipv6.rt6_stats->fib_rt_entries--;
1114 net->ipv6.rt6_stats->fib_discarded_routes++; 1114 net->ipv6.rt6_stats->fib_discarded_routes++;
@@ -1122,14 +1122,14 @@ static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp,
1122 FOR_WALKERS(w) { 1122 FOR_WALKERS(w) {
1123 if (w->state == FWS_C && w->leaf == rt) { 1123 if (w->state == FWS_C && w->leaf == rt) {
1124 RT6_TRACE("walker %p adjusted by delroute\n", w); 1124 RT6_TRACE("walker %p adjusted by delroute\n", w);
1125 w->leaf = rt->u.dst.rt6_next; 1125 w->leaf = rt->dst.rt6_next;
1126 if (w->leaf == NULL) 1126 if (w->leaf == NULL)
1127 w->state = FWS_U; 1127 w->state = FWS_U;
1128 } 1128 }
1129 } 1129 }
1130 read_unlock(&fib6_walker_lock); 1130 read_unlock(&fib6_walker_lock);
1131 1131
1132 rt->u.dst.rt6_next = NULL; 1132 rt->dst.rt6_next = NULL;
1133 1133
1134 /* If it was last route, expunge its radix tree node */ 1134 /* If it was last route, expunge its radix tree node */
1135 if (fn->leaf == NULL) { 1135 if (fn->leaf == NULL) {
@@ -1168,7 +1168,7 @@ int fib6_del(struct rt6_info *rt, struct nl_info *info)
1168 struct rt6_info **rtp; 1168 struct rt6_info **rtp;
1169 1169
1170#if RT6_DEBUG >= 2 1170#if RT6_DEBUG >= 2
1171 if (rt->u.dst.obsolete>0) { 1171 if (rt->dst.obsolete>0) {
1172 WARN_ON(fn != NULL); 1172 WARN_ON(fn != NULL);
1173 return -ENOENT; 1173 return -ENOENT;
1174 } 1174 }
@@ -1195,7 +1195,7 @@ int fib6_del(struct rt6_info *rt, struct nl_info *info)
1195 * Walk the leaf entries looking for ourself 1195 * Walk the leaf entries looking for ourself
1196 */ 1196 */
1197 1197
1198 for (rtp = &fn->leaf; *rtp; rtp = &(*rtp)->u.dst.rt6_next) { 1198 for (rtp = &fn->leaf; *rtp; rtp = &(*rtp)->dst.rt6_next) {
1199 if (*rtp == rt) { 1199 if (*rtp == rt) {
1200 fib6_del_route(fn, rtp, info); 1200 fib6_del_route(fn, rtp, info);
1201 return 0; 1201 return 0;
@@ -1334,7 +1334,7 @@ static int fib6_clean_node(struct fib6_walker_t *w)
1334 .nl_net = c->net, 1334 .nl_net = c->net,
1335 }; 1335 };
1336 1336
1337 for (rt = w->leaf; rt; rt = rt->u.dst.rt6_next) { 1337 for (rt = w->leaf; rt; rt = rt->dst.rt6_next) {
1338 res = c->func(rt, c->arg); 1338 res = c->func(rt, c->arg);
1339 if (res < 0) { 1339 if (res < 0) {
1340 w->leaf = rt; 1340 w->leaf = rt;
@@ -1448,8 +1448,8 @@ static int fib6_age(struct rt6_info *rt, void *arg)
1448 } 1448 }
1449 gc_args.more++; 1449 gc_args.more++;
1450 } else if (rt->rt6i_flags & RTF_CACHE) { 1450 } else if (rt->rt6i_flags & RTF_CACHE) {
1451 if (atomic_read(&rt->u.dst.__refcnt) == 0 && 1451 if (atomic_read(&rt->dst.__refcnt) == 0 &&
1452 time_after_eq(now, rt->u.dst.lastuse + gc_args.timeout)) { 1452 time_after_eq(now, rt->dst.lastuse + gc_args.timeout)) {
1453 RT6_TRACE("aging clone %p\n", rt); 1453 RT6_TRACE("aging clone %p\n", rt);
1454 return -1; 1454 return -1;
1455 } else if ((rt->rt6i_flags & RTF_GATEWAY) && 1455 } else if ((rt->rt6i_flags & RTF_GATEWAY) &&
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index cd963f64e27c..d40b330c0ee6 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -507,7 +507,7 @@ int ip6_forward(struct sk_buff *skb)
507 if (mtu < IPV6_MIN_MTU) 507 if (mtu < IPV6_MIN_MTU)
508 mtu = IPV6_MIN_MTU; 508 mtu = IPV6_MIN_MTU;
509 509
510 if (skb->len > mtu) { 510 if (skb->len > mtu && !skb_is_gso(skb)) {
511 /* Again, force OUTPUT device used as source address */ 511 /* Again, force OUTPUT device used as source address */
512 skb->dev = dst->dev; 512 skb->dev = dst->dev;
513 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 513 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
@@ -698,7 +698,7 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
698 ipv6_hdr(skb)->payload_len = htons(first_len - 698 ipv6_hdr(skb)->payload_len = htons(first_len -
699 sizeof(struct ipv6hdr)); 699 sizeof(struct ipv6hdr));
700 700
701 dst_hold(&rt->u.dst); 701 dst_hold(&rt->dst);
702 702
703 for (;;) { 703 for (;;) {
704 /* Prepare header of the next frame, 704 /* Prepare header of the next frame,
@@ -726,7 +726,7 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
726 726
727 err = output(skb); 727 err = output(skb);
728 if(!err) 728 if(!err)
729 IP6_INC_STATS(net, ip6_dst_idev(&rt->u.dst), 729 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
730 IPSTATS_MIB_FRAGCREATES); 730 IPSTATS_MIB_FRAGCREATES);
731 731
732 if (err || !frag) 732 if (err || !frag)
@@ -740,9 +740,9 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
740 kfree(tmp_hdr); 740 kfree(tmp_hdr);
741 741
742 if (err == 0) { 742 if (err == 0) {
743 IP6_INC_STATS(net, ip6_dst_idev(&rt->u.dst), 743 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
744 IPSTATS_MIB_FRAGOKS); 744 IPSTATS_MIB_FRAGOKS);
745 dst_release(&rt->u.dst); 745 dst_release(&rt->dst);
746 return 0; 746 return 0;
747 } 747 }
748 748
@@ -752,9 +752,9 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
752 frag = skb; 752 frag = skb;
753 } 753 }
754 754
755 IP6_INC_STATS(net, ip6_dst_idev(&rt->u.dst), 755 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
756 IPSTATS_MIB_FRAGFAILS); 756 IPSTATS_MIB_FRAGFAILS);
757 dst_release(&rt->u.dst); 757 dst_release(&rt->dst);
758 return err; 758 return err;
759 } 759 }
760 760
@@ -785,7 +785,7 @@ slow_path:
785 * Allocate buffer. 785 * Allocate buffer.
786 */ 786 */
787 787
788 if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_ALLOCATED_SPACE(rt->u.dst.dev), GFP_ATOMIC)) == NULL) { 788 if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_ALLOCATED_SPACE(rt->dst.dev), GFP_ATOMIC)) == NULL) {
789 NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n"); 789 NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n");
790 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 790 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
791 IPSTATS_MIB_FRAGFAILS); 791 IPSTATS_MIB_FRAGFAILS);
@@ -798,7 +798,7 @@ slow_path:
798 */ 798 */
799 799
800 ip6_copy_metadata(frag, skb); 800 ip6_copy_metadata(frag, skb);
801 skb_reserve(frag, LL_RESERVED_SPACE(rt->u.dst.dev)); 801 skb_reserve(frag, LL_RESERVED_SPACE(rt->dst.dev));
802 skb_put(frag, len + hlen + sizeof(struct frag_hdr)); 802 skb_put(frag, len + hlen + sizeof(struct frag_hdr));
803 skb_reset_network_header(frag); 803 skb_reset_network_header(frag);
804 fh = (struct frag_hdr *)(skb_network_header(frag) + hlen); 804 fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
@@ -1156,24 +1156,24 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
1156 1156
1157 /* need source address above miyazawa*/ 1157 /* need source address above miyazawa*/
1158 } 1158 }
1159 dst_hold(&rt->u.dst); 1159 dst_hold(&rt->dst);
1160 inet->cork.dst = &rt->u.dst; 1160 inet->cork.dst = &rt->dst;
1161 inet->cork.fl = *fl; 1161 inet->cork.fl = *fl;
1162 np->cork.hop_limit = hlimit; 1162 np->cork.hop_limit = hlimit;
1163 np->cork.tclass = tclass; 1163 np->cork.tclass = tclass;
1164 mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ? 1164 mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
1165 rt->u.dst.dev->mtu : dst_mtu(rt->u.dst.path); 1165 rt->dst.dev->mtu : dst_mtu(rt->dst.path);
1166 if (np->frag_size < mtu) { 1166 if (np->frag_size < mtu) {
1167 if (np->frag_size) 1167 if (np->frag_size)
1168 mtu = np->frag_size; 1168 mtu = np->frag_size;
1169 } 1169 }
1170 inet->cork.fragsize = mtu; 1170 inet->cork.fragsize = mtu;
1171 if (dst_allfrag(rt->u.dst.path)) 1171 if (dst_allfrag(rt->dst.path))
1172 inet->cork.flags |= IPCORK_ALLFRAG; 1172 inet->cork.flags |= IPCORK_ALLFRAG;
1173 inet->cork.length = 0; 1173 inet->cork.length = 0;
1174 sk->sk_sndmsg_page = NULL; 1174 sk->sk_sndmsg_page = NULL;
1175 sk->sk_sndmsg_off = 0; 1175 sk->sk_sndmsg_off = 0;
1176 exthdrlen = rt->u.dst.header_len + (opt ? opt->opt_flen : 0) - 1176 exthdrlen = rt->dst.header_len + (opt ? opt->opt_flen : 0) -
1177 rt->rt6i_nfheader_len; 1177 rt->rt6i_nfheader_len;
1178 length += exthdrlen; 1178 length += exthdrlen;
1179 transhdrlen += exthdrlen; 1179 transhdrlen += exthdrlen;
@@ -1186,7 +1186,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
1186 mtu = inet->cork.fragsize; 1186 mtu = inet->cork.fragsize;
1187 } 1187 }
1188 1188
1189 hh_len = LL_RESERVED_SPACE(rt->u.dst.dev); 1189 hh_len = LL_RESERVED_SPACE(rt->dst.dev);
1190 1190
1191 fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len + 1191 fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
1192 (opt ? opt->opt_nflen : 0); 1192 (opt ? opt->opt_nflen : 0);
@@ -1224,7 +1224,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
1224 } 1224 }
1225 1225
1226 if (proto == IPPROTO_UDP && 1226 if (proto == IPPROTO_UDP &&
1227 (rt->u.dst.dev->features & NETIF_F_UFO)) { 1227 (rt->dst.dev->features & NETIF_F_UFO)) {
1228 1228
1229 err = ip6_ufo_append_data(sk, getfrag, from, length, 1229 err = ip6_ufo_append_data(sk, getfrag, from, length,
1230 hh_len, fragheaderlen, 1230 hh_len, fragheaderlen,
@@ -1270,7 +1270,7 @@ alloc_new_skb:
1270 1270
1271 fraglen = datalen + fragheaderlen; 1271 fraglen = datalen + fragheaderlen;
1272 if ((flags & MSG_MORE) && 1272 if ((flags & MSG_MORE) &&
1273 !(rt->u.dst.dev->features&NETIF_F_SG)) 1273 !(rt->dst.dev->features&NETIF_F_SG))
1274 alloclen = mtu; 1274 alloclen = mtu;
1275 else 1275 else
1276 alloclen = datalen + fragheaderlen; 1276 alloclen = datalen + fragheaderlen;
@@ -1281,7 +1281,7 @@ alloc_new_skb:
1281 * because we have no idea if we're the last one. 1281 * because we have no idea if we're the last one.
1282 */ 1282 */
1283 if (datalen == length + fraggap) 1283 if (datalen == length + fraggap)
1284 alloclen += rt->u.dst.trailer_len; 1284 alloclen += rt->dst.trailer_len;
1285 1285
1286 /* 1286 /*
1287 * We just reserve space for fragment header. 1287 * We just reserve space for fragment header.
@@ -1358,7 +1358,7 @@ alloc_new_skb:
1358 if (copy > length) 1358 if (copy > length)
1359 copy = length; 1359 copy = length;
1360 1360
1361 if (!(rt->u.dst.dev->features&NETIF_F_SG)) { 1361 if (!(rt->dst.dev->features&NETIF_F_SG)) {
1362 unsigned int off; 1362 unsigned int off;
1363 1363
1364 off = skb->len; 1364 off = skb->len;
@@ -1503,7 +1503,7 @@ int ip6_push_pending_frames(struct sock *sk)
1503 skb->priority = sk->sk_priority; 1503 skb->priority = sk->sk_priority;
1504 skb->mark = sk->sk_mark; 1504 skb->mark = sk->sk_mark;
1505 1505
1506 skb_dst_set(skb, dst_clone(&rt->u.dst)); 1506 skb_dst_set(skb, dst_clone(&rt->dst));
1507 IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len); 1507 IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
1508 if (proto == IPPROTO_ICMPV6) { 1508 if (proto == IPPROTO_ICMPV6) {
1509 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); 1509 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 8f39893d8081..0fd027f3f47e 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -552,7 +552,7 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
552 if (ip_route_output_key(dev_net(skb->dev), &rt, &fl)) 552 if (ip_route_output_key(dev_net(skb->dev), &rt, &fl))
553 goto out; 553 goto out;
554 554
555 skb2->dev = rt->u.dst.dev; 555 skb2->dev = rt->dst.dev;
556 556
557 /* route "incoming" packet */ 557 /* route "incoming" packet */
558 if (rt->rt_flags & RTCF_LOCAL) { 558 if (rt->rt_flags & RTCF_LOCAL) {
@@ -562,7 +562,7 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
562 fl.fl4_src = eiph->saddr; 562 fl.fl4_src = eiph->saddr;
563 fl.fl4_tos = eiph->tos; 563 fl.fl4_tos = eiph->tos;
564 if (ip_route_output_key(dev_net(skb->dev), &rt, &fl) || 564 if (ip_route_output_key(dev_net(skb->dev), &rt, &fl) ||
565 rt->u.dst.dev->type != ARPHRD_TUNNEL) { 565 rt->dst.dev->type != ARPHRD_TUNNEL) {
566 ip_rt_put(rt); 566 ip_rt_put(rt);
567 goto out; 567 goto out;
568 } 568 }
@@ -626,7 +626,7 @@ ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
626 icmpv6_send(skb2, rel_type, rel_code, rel_info); 626 icmpv6_send(skb2, rel_type, rel_code, rel_info);
627 627
628 if (rt) 628 if (rt)
629 dst_release(&rt->u.dst); 629 dst_release(&rt->dst);
630 630
631 kfree_skb(skb2); 631 kfree_skb(skb2);
632 } 632 }
@@ -1135,7 +1135,7 @@ static void ip6_tnl_link_config(struct ip6_tnl *t)
1135 if (dev->mtu < IPV6_MIN_MTU) 1135 if (dev->mtu < IPV6_MIN_MTU)
1136 dev->mtu = IPV6_MIN_MTU; 1136 dev->mtu = IPV6_MIN_MTU;
1137 } 1137 }
1138 dst_release(&rt->u.dst); 1138 dst_release(&rt->dst);
1139 } 1139 }
1140} 1140}
1141 1141
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index bd9e7d3e9c8e..66078dad7fe8 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -120,7 +120,7 @@ static void mroute_clean_tables(struct mr6_table *mrt);
120static void ipmr_expire_process(unsigned long arg); 120static void ipmr_expire_process(unsigned long arg);
121 121
122#ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES 122#ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
123#define ip6mr_for_each_table(mrt, met) \ 123#define ip6mr_for_each_table(mrt, net) \
124 list_for_each_entry_rcu(mrt, &net->ipv6.mr6_tables, list) 124 list_for_each_entry_rcu(mrt, &net->ipv6.mr6_tables, list)
125 125
126static struct mr6_table *ip6mr_get_table(struct net *net, u32 id) 126static struct mr6_table *ip6mr_get_table(struct net *net, u32 id)
@@ -254,8 +254,10 @@ static void __net_exit ip6mr_rules_exit(struct net *net)
254{ 254{
255 struct mr6_table *mrt, *next; 255 struct mr6_table *mrt, *next;
256 256
257 list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list) 257 list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list) {
258 list_del(&mrt->list);
258 ip6mr_free_table(mrt); 259 ip6mr_free_table(mrt);
260 }
259 fib_rules_unregister(net->ipv6.mr6_rules_ops); 261 fib_rules_unregister(net->ipv6.mr6_rules_ops);
260} 262}
261#else 263#else
@@ -2017,7 +2019,7 @@ static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
2017 struct rtattr *mp_head; 2019 struct rtattr *mp_head;
2018 2020
2019 /* If cache is unresolved, don't try to parse IIF and OIF */ 2021 /* If cache is unresolved, don't try to parse IIF and OIF */
2020 if (c->mf6c_parent > MAXMIFS) 2022 if (c->mf6c_parent >= MAXMIFS)
2021 return -ENOENT; 2023 return -ENOENT;
2022 2024
2023 if (MIF_EXISTS(mrt, c->mf6c_parent)) 2025 if (MIF_EXISTS(mrt, c->mf6c_parent))
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index bd43f0152c21..a7f66bc8f0b0 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -55,8 +55,6 @@
55 55
56#include <asm/uaccess.h> 56#include <asm/uaccess.h>
57 57
58DEFINE_SNMP_STAT(struct ipstats_mib, ipv6_statistics) __read_mostly;
59
60struct ip6_ra_chain *ip6_ra_chain; 58struct ip6_ra_chain *ip6_ra_chain;
61DEFINE_RWLOCK(ip6_ra_lock); 59DEFINE_RWLOCK(ip6_ra_lock);
62 60
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 59f1881968c7..d1444b95ad7e 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -152,18 +152,19 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
152 mc_lst->next = NULL; 152 mc_lst->next = NULL;
153 ipv6_addr_copy(&mc_lst->addr, addr); 153 ipv6_addr_copy(&mc_lst->addr, addr);
154 154
155 rcu_read_lock();
155 if (ifindex == 0) { 156 if (ifindex == 0) {
156 struct rt6_info *rt; 157 struct rt6_info *rt;
157 rt = rt6_lookup(net, addr, NULL, 0, 0); 158 rt = rt6_lookup(net, addr, NULL, 0, 0);
158 if (rt) { 159 if (rt) {
159 dev = rt->rt6i_dev; 160 dev = rt->rt6i_dev;
160 dev_hold(dev); 161 dst_release(&rt->dst);
161 dst_release(&rt->u.dst);
162 } 162 }
163 } else 163 } else
164 dev = dev_get_by_index(net, ifindex); 164 dev = dev_get_by_index_rcu(net, ifindex);
165 165
166 if (dev == NULL) { 166 if (dev == NULL) {
167 rcu_read_unlock();
167 sock_kfree_s(sk, mc_lst, sizeof(*mc_lst)); 168 sock_kfree_s(sk, mc_lst, sizeof(*mc_lst));
168 return -ENODEV; 169 return -ENODEV;
169 } 170 }
@@ -180,8 +181,8 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
180 err = ipv6_dev_mc_inc(dev, addr); 181 err = ipv6_dev_mc_inc(dev, addr);
181 182
182 if (err) { 183 if (err) {
184 rcu_read_unlock();
183 sock_kfree_s(sk, mc_lst, sizeof(*mc_lst)); 185 sock_kfree_s(sk, mc_lst, sizeof(*mc_lst));
184 dev_put(dev);
185 return err; 186 return err;
186 } 187 }
187 188
@@ -190,7 +191,7 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
190 np->ipv6_mc_list = mc_lst; 191 np->ipv6_mc_list = mc_lst;
191 write_unlock_bh(&ipv6_sk_mc_lock); 192 write_unlock_bh(&ipv6_sk_mc_lock);
192 193
193 dev_put(dev); 194 rcu_read_unlock();
194 195
195 return 0; 196 return 0;
196} 197}
@@ -213,18 +214,17 @@ int ipv6_sock_mc_drop(struct sock *sk, int ifindex, const struct in6_addr *addr)
213 *lnk = mc_lst->next; 214 *lnk = mc_lst->next;
214 write_unlock_bh(&ipv6_sk_mc_lock); 215 write_unlock_bh(&ipv6_sk_mc_lock);
215 216
216 dev = dev_get_by_index(net, mc_lst->ifindex); 217 rcu_read_lock();
218 dev = dev_get_by_index_rcu(net, mc_lst->ifindex);
217 if (dev != NULL) { 219 if (dev != NULL) {
218 struct inet6_dev *idev = in6_dev_get(dev); 220 struct inet6_dev *idev = __in6_dev_get(dev);
219 221
220 (void) ip6_mc_leave_src(sk, mc_lst, idev); 222 (void) ip6_mc_leave_src(sk, mc_lst, idev);
221 if (idev) { 223 if (idev)
222 __ipv6_dev_mc_dec(idev, &mc_lst->addr); 224 __ipv6_dev_mc_dec(idev, &mc_lst->addr);
223 in6_dev_put(idev);
224 }
225 dev_put(dev);
226 } else 225 } else
227 (void) ip6_mc_leave_src(sk, mc_lst, NULL); 226 (void) ip6_mc_leave_src(sk, mc_lst, NULL);
227 rcu_read_unlock();
228 sock_kfree_s(sk, mc_lst, sizeof(*mc_lst)); 228 sock_kfree_s(sk, mc_lst, sizeof(*mc_lst));
229 return 0; 229 return 0;
230 } 230 }
@@ -234,43 +234,36 @@ int ipv6_sock_mc_drop(struct sock *sk, int ifindex, const struct in6_addr *addr)
234 return -EADDRNOTAVAIL; 234 return -EADDRNOTAVAIL;
235} 235}
236 236
237static struct inet6_dev *ip6_mc_find_dev(struct net *net, 237/* called with rcu_read_lock() */
238 struct in6_addr *group, 238static struct inet6_dev *ip6_mc_find_dev_rcu(struct net *net,
239 int ifindex) 239 struct in6_addr *group,
240 int ifindex)
240{ 241{
241 struct net_device *dev = NULL; 242 struct net_device *dev = NULL;
242 struct inet6_dev *idev = NULL; 243 struct inet6_dev *idev = NULL;
243 244
244 if (ifindex == 0) { 245 if (ifindex == 0) {
245 struct rt6_info *rt; 246 struct rt6_info *rt = rt6_lookup(net, group, NULL, 0, 0);
246 247
247 rt = rt6_lookup(net, group, NULL, 0, 0);
248 if (rt) { 248 if (rt) {
249 dev = rt->rt6i_dev; 249 dev = rt->rt6i_dev;
250 dev_hold(dev); 250 dev_hold(dev);
251 dst_release(&rt->u.dst); 251 dst_release(&rt->dst);
252 } 252 }
253 } else 253 } else
254 dev = dev_get_by_index(net, ifindex); 254 dev = dev_get_by_index_rcu(net, ifindex);
255 255
256 if (!dev) 256 if (!dev)
257 goto nodev; 257 return NULL;
258 idev = in6_dev_get(dev); 258 idev = __in6_dev_get(dev);
259 if (!idev) 259 if (!idev)
260 goto release; 260 return NULL;;
261 read_lock_bh(&idev->lock); 261 read_lock_bh(&idev->lock);
262 if (idev->dead) 262 if (idev->dead) {
263 goto unlock_release; 263 read_unlock_bh(&idev->lock);
264 264 return NULL;
265 }
265 return idev; 266 return idev;
266
267unlock_release:
268 read_unlock_bh(&idev->lock);
269 in6_dev_put(idev);
270release:
271 dev_put(dev);
272nodev:
273 return NULL;
274} 267}
275 268
276void ipv6_sock_mc_close(struct sock *sk) 269void ipv6_sock_mc_close(struct sock *sk)
@@ -286,19 +279,17 @@ void ipv6_sock_mc_close(struct sock *sk)
286 np->ipv6_mc_list = mc_lst->next; 279 np->ipv6_mc_list = mc_lst->next;
287 write_unlock_bh(&ipv6_sk_mc_lock); 280 write_unlock_bh(&ipv6_sk_mc_lock);
288 281
289 dev = dev_get_by_index(net, mc_lst->ifindex); 282 rcu_read_lock();
283 dev = dev_get_by_index_rcu(net, mc_lst->ifindex);
290 if (dev) { 284 if (dev) {
291 struct inet6_dev *idev = in6_dev_get(dev); 285 struct inet6_dev *idev = __in6_dev_get(dev);
292 286
293 (void) ip6_mc_leave_src(sk, mc_lst, idev); 287 (void) ip6_mc_leave_src(sk, mc_lst, idev);
294 if (idev) { 288 if (idev)
295 __ipv6_dev_mc_dec(idev, &mc_lst->addr); 289 __ipv6_dev_mc_dec(idev, &mc_lst->addr);
296 in6_dev_put(idev);
297 }
298 dev_put(dev);
299 } else 290 } else
300 (void) ip6_mc_leave_src(sk, mc_lst, NULL); 291 (void) ip6_mc_leave_src(sk, mc_lst, NULL);
301 292 rcu_read_unlock();
302 sock_kfree_s(sk, mc_lst, sizeof(*mc_lst)); 293 sock_kfree_s(sk, mc_lst, sizeof(*mc_lst));
303 294
304 write_lock_bh(&ipv6_sk_mc_lock); 295 write_lock_bh(&ipv6_sk_mc_lock);
@@ -327,14 +318,17 @@ int ip6_mc_source(int add, int omode, struct sock *sk,
327 if (!ipv6_addr_is_multicast(group)) 318 if (!ipv6_addr_is_multicast(group))
328 return -EINVAL; 319 return -EINVAL;
329 320
330 idev = ip6_mc_find_dev(net, group, pgsr->gsr_interface); 321 rcu_read_lock();
331 if (!idev) 322 idev = ip6_mc_find_dev_rcu(net, group, pgsr->gsr_interface);
323 if (!idev) {
324 rcu_read_unlock();
332 return -ENODEV; 325 return -ENODEV;
326 }
333 dev = idev->dev; 327 dev = idev->dev;
334 328
335 err = -EADDRNOTAVAIL; 329 err = -EADDRNOTAVAIL;
336 330
337 read_lock_bh(&ipv6_sk_mc_lock); 331 read_lock(&ipv6_sk_mc_lock);
338 for (pmc=inet6->ipv6_mc_list; pmc; pmc=pmc->next) { 332 for (pmc=inet6->ipv6_mc_list; pmc; pmc=pmc->next) {
339 if (pgsr->gsr_interface && pmc->ifindex != pgsr->gsr_interface) 333 if (pgsr->gsr_interface && pmc->ifindex != pgsr->gsr_interface)
340 continue; 334 continue;
@@ -358,7 +352,7 @@ int ip6_mc_source(int add, int omode, struct sock *sk,
358 pmc->sfmode = omode; 352 pmc->sfmode = omode;
359 } 353 }
360 354
361 write_lock_bh(&pmc->sflock); 355 write_lock(&pmc->sflock);
362 pmclocked = 1; 356 pmclocked = 1;
363 357
364 psl = pmc->sflist; 358 psl = pmc->sflist;
@@ -433,11 +427,10 @@ int ip6_mc_source(int add, int omode, struct sock *sk,
433 ip6_mc_add_src(idev, group, omode, 1, source, 1); 427 ip6_mc_add_src(idev, group, omode, 1, source, 1);
434done: 428done:
435 if (pmclocked) 429 if (pmclocked)
436 write_unlock_bh(&pmc->sflock); 430 write_unlock(&pmc->sflock);
437 read_unlock_bh(&ipv6_sk_mc_lock); 431 read_unlock(&ipv6_sk_mc_lock);
438 read_unlock_bh(&idev->lock); 432 read_unlock_bh(&idev->lock);
439 in6_dev_put(idev); 433 rcu_read_unlock();
440 dev_put(dev);
441 if (leavegroup) 434 if (leavegroup)
442 return ipv6_sock_mc_drop(sk, pgsr->gsr_interface, group); 435 return ipv6_sock_mc_drop(sk, pgsr->gsr_interface, group);
443 return err; 436 return err;
@@ -463,14 +456,17 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf)
463 gsf->gf_fmode != MCAST_EXCLUDE) 456 gsf->gf_fmode != MCAST_EXCLUDE)
464 return -EINVAL; 457 return -EINVAL;
465 458
466 idev = ip6_mc_find_dev(net, group, gsf->gf_interface); 459 rcu_read_lock();
460 idev = ip6_mc_find_dev_rcu(net, group, gsf->gf_interface);
467 461
468 if (!idev) 462 if (!idev) {
463 rcu_read_unlock();
469 return -ENODEV; 464 return -ENODEV;
465 }
470 dev = idev->dev; 466 dev = idev->dev;
471 467
472 err = 0; 468 err = 0;
473 read_lock_bh(&ipv6_sk_mc_lock); 469 read_lock(&ipv6_sk_mc_lock);
474 470
475 if (gsf->gf_fmode == MCAST_INCLUDE && gsf->gf_numsrc == 0) { 471 if (gsf->gf_fmode == MCAST_INCLUDE && gsf->gf_numsrc == 0) {
476 leavegroup = 1; 472 leavegroup = 1;
@@ -512,7 +508,7 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf)
512 (void) ip6_mc_add_src(idev, group, gsf->gf_fmode, 0, NULL, 0); 508 (void) ip6_mc_add_src(idev, group, gsf->gf_fmode, 0, NULL, 0);
513 } 509 }
514 510
515 write_lock_bh(&pmc->sflock); 511 write_lock(&pmc->sflock);
516 psl = pmc->sflist; 512 psl = pmc->sflist;
517 if (psl) { 513 if (psl) {
518 (void) ip6_mc_del_src(idev, group, pmc->sfmode, 514 (void) ip6_mc_del_src(idev, group, pmc->sfmode,
@@ -522,13 +518,12 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf)
522 (void) ip6_mc_del_src(idev, group, pmc->sfmode, 0, NULL, 0); 518 (void) ip6_mc_del_src(idev, group, pmc->sfmode, 0, NULL, 0);
523 pmc->sflist = newpsl; 519 pmc->sflist = newpsl;
524 pmc->sfmode = gsf->gf_fmode; 520 pmc->sfmode = gsf->gf_fmode;
525 write_unlock_bh(&pmc->sflock); 521 write_unlock(&pmc->sflock);
526 err = 0; 522 err = 0;
527done: 523done:
528 read_unlock_bh(&ipv6_sk_mc_lock); 524 read_unlock(&ipv6_sk_mc_lock);
529 read_unlock_bh(&idev->lock); 525 read_unlock_bh(&idev->lock);
530 in6_dev_put(idev); 526 rcu_read_unlock();
531 dev_put(dev);
532 if (leavegroup) 527 if (leavegroup)
533 err = ipv6_sock_mc_drop(sk, gsf->gf_interface, group); 528 err = ipv6_sock_mc_drop(sk, gsf->gf_interface, group);
534 return err; 529 return err;
@@ -551,11 +546,13 @@ int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf,
551 if (!ipv6_addr_is_multicast(group)) 546 if (!ipv6_addr_is_multicast(group))
552 return -EINVAL; 547 return -EINVAL;
553 548
554 idev = ip6_mc_find_dev(net, group, gsf->gf_interface); 549 rcu_read_lock();
550 idev = ip6_mc_find_dev_rcu(net, group, gsf->gf_interface);
555 551
556 if (!idev) 552 if (!idev) {
553 rcu_read_unlock();
557 return -ENODEV; 554 return -ENODEV;
558 555 }
559 dev = idev->dev; 556 dev = idev->dev;
560 557
561 err = -EADDRNOTAVAIL; 558 err = -EADDRNOTAVAIL;
@@ -577,8 +574,7 @@ int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf,
577 psl = pmc->sflist; 574 psl = pmc->sflist;
578 count = psl ? psl->sl_count : 0; 575 count = psl ? psl->sl_count : 0;
579 read_unlock_bh(&idev->lock); 576 read_unlock_bh(&idev->lock);
580 in6_dev_put(idev); 577 rcu_read_unlock();
581 dev_put(dev);
582 578
583 copycount = count < gsf->gf_numsrc ? count : gsf->gf_numsrc; 579 copycount = count < gsf->gf_numsrc ? count : gsf->gf_numsrc;
584 gsf->gf_numsrc = count; 580 gsf->gf_numsrc = count;
@@ -604,8 +600,7 @@ int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf,
604 return 0; 600 return 0;
605done: 601done:
606 read_unlock_bh(&idev->lock); 602 read_unlock_bh(&idev->lock);
607 in6_dev_put(idev); 603 rcu_read_unlock();
608 dev_put(dev);
609 return err; 604 return err;
610} 605}
611 606
@@ -822,6 +817,7 @@ int ipv6_dev_mc_inc(struct net_device *dev, const struct in6_addr *addr)
822 struct ifmcaddr6 *mc; 817 struct ifmcaddr6 *mc;
823 struct inet6_dev *idev; 818 struct inet6_dev *idev;
824 819
820 /* we need to take a reference on idev */
825 idev = in6_dev_get(dev); 821 idev = in6_dev_get(dev);
826 822
827 if (idev == NULL) 823 if (idev == NULL)
@@ -860,7 +856,7 @@ int ipv6_dev_mc_inc(struct net_device *dev, const struct in6_addr *addr)
860 setup_timer(&mc->mca_timer, igmp6_timer_handler, (unsigned long)mc); 856 setup_timer(&mc->mca_timer, igmp6_timer_handler, (unsigned long)mc);
861 857
862 ipv6_addr_copy(&mc->mca_addr, addr); 858 ipv6_addr_copy(&mc->mca_addr, addr);
863 mc->idev = idev; 859 mc->idev = idev; /* (reference taken) */
864 mc->mca_users = 1; 860 mc->mca_users = 1;
865 /* mca_stamp should be updated upon changes */ 861 /* mca_stamp should be updated upon changes */
866 mc->mca_cstamp = mc->mca_tstamp = jiffies; 862 mc->mca_cstamp = mc->mca_tstamp = jiffies;
@@ -915,16 +911,18 @@ int __ipv6_dev_mc_dec(struct inet6_dev *idev, const struct in6_addr *addr)
915 911
916int ipv6_dev_mc_dec(struct net_device *dev, const struct in6_addr *addr) 912int ipv6_dev_mc_dec(struct net_device *dev, const struct in6_addr *addr)
917{ 913{
918 struct inet6_dev *idev = in6_dev_get(dev); 914 struct inet6_dev *idev;
919 int err; 915 int err;
920 916
921 if (!idev) 917 rcu_read_lock();
922 return -ENODEV;
923
924 err = __ipv6_dev_mc_dec(idev, addr);
925 918
926 in6_dev_put(idev); 919 idev = __in6_dev_get(dev);
920 if (!idev)
921 err = -ENODEV;
922 else
923 err = __ipv6_dev_mc_dec(idev, addr);
927 924
925 rcu_read_unlock();
928 return err; 926 return err;
929} 927}
930 928
@@ -965,7 +963,8 @@ int ipv6_chk_mcast_addr(struct net_device *dev, const struct in6_addr *group,
965 struct ifmcaddr6 *mc; 963 struct ifmcaddr6 *mc;
966 int rv = 0; 964 int rv = 0;
967 965
968 idev = in6_dev_get(dev); 966 rcu_read_lock();
967 idev = __in6_dev_get(dev);
969 if (idev) { 968 if (idev) {
970 read_lock_bh(&idev->lock); 969 read_lock_bh(&idev->lock);
971 for (mc = idev->mc_list; mc; mc=mc->next) { 970 for (mc = idev->mc_list; mc; mc=mc->next) {
@@ -992,8 +991,8 @@ int ipv6_chk_mcast_addr(struct net_device *dev, const struct in6_addr *group,
992 rv = 1; /* don't filter unspecified source */ 991 rv = 1; /* don't filter unspecified source */
993 } 992 }
994 read_unlock_bh(&idev->lock); 993 read_unlock_bh(&idev->lock);
995 in6_dev_put(idev);
996 } 994 }
995 rcu_read_unlock();
997 return rv; 996 return rv;
998} 997}
999 998
@@ -1104,6 +1103,7 @@ static int mld_marksources(struct ifmcaddr6 *pmc, int nsrcs,
1104 return 1; 1103 return 1;
1105} 1104}
1106 1105
1106/* called with rcu_read_lock() */
1107int igmp6_event_query(struct sk_buff *skb) 1107int igmp6_event_query(struct sk_buff *skb)
1108{ 1108{
1109 struct mld2_query *mlh2 = NULL; 1109 struct mld2_query *mlh2 = NULL;
@@ -1127,7 +1127,7 @@ int igmp6_event_query(struct sk_buff *skb)
1127 if (!(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL)) 1127 if (!(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL))
1128 return -EINVAL; 1128 return -EINVAL;
1129 1129
1130 idev = in6_dev_get(skb->dev); 1130 idev = __in6_dev_get(skb->dev);
1131 1131
1132 if (idev == NULL) 1132 if (idev == NULL)
1133 return 0; 1133 return 0;
@@ -1137,10 +1137,8 @@ int igmp6_event_query(struct sk_buff *skb)
1137 group_type = ipv6_addr_type(group); 1137 group_type = ipv6_addr_type(group);
1138 1138
1139 if (group_type != IPV6_ADDR_ANY && 1139 if (group_type != IPV6_ADDR_ANY &&
1140 !(group_type&IPV6_ADDR_MULTICAST)) { 1140 !(group_type&IPV6_ADDR_MULTICAST))
1141 in6_dev_put(idev);
1142 return -EINVAL; 1141 return -EINVAL;
1143 }
1144 1142
1145 if (len == 24) { 1143 if (len == 24) {
1146 int switchback; 1144 int switchback;
@@ -1161,10 +1159,9 @@ int igmp6_event_query(struct sk_buff *skb)
1161 } else if (len >= 28) { 1159 } else if (len >= 28) {
1162 int srcs_offset = sizeof(struct mld2_query) - 1160 int srcs_offset = sizeof(struct mld2_query) -
1163 sizeof(struct icmp6hdr); 1161 sizeof(struct icmp6hdr);
1164 if (!pskb_may_pull(skb, srcs_offset)) { 1162 if (!pskb_may_pull(skb, srcs_offset))
1165 in6_dev_put(idev);
1166 return -EINVAL; 1163 return -EINVAL;
1167 } 1164
1168 mlh2 = (struct mld2_query *)skb_transport_header(skb); 1165 mlh2 = (struct mld2_query *)skb_transport_header(skb);
1169 max_delay = (MLDV2_MRC(ntohs(mlh2->mld2q_mrc))*HZ)/1000; 1166 max_delay = (MLDV2_MRC(ntohs(mlh2->mld2q_mrc))*HZ)/1000;
1170 if (!max_delay) 1167 if (!max_delay)
@@ -1173,28 +1170,23 @@ int igmp6_event_query(struct sk_buff *skb)
1173 if (mlh2->mld2q_qrv) 1170 if (mlh2->mld2q_qrv)
1174 idev->mc_qrv = mlh2->mld2q_qrv; 1171 idev->mc_qrv = mlh2->mld2q_qrv;
1175 if (group_type == IPV6_ADDR_ANY) { /* general query */ 1172 if (group_type == IPV6_ADDR_ANY) { /* general query */
1176 if (mlh2->mld2q_nsrcs) { 1173 if (mlh2->mld2q_nsrcs)
1177 in6_dev_put(idev);
1178 return -EINVAL; /* no sources allowed */ 1174 return -EINVAL; /* no sources allowed */
1179 } 1175
1180 mld_gq_start_timer(idev); 1176 mld_gq_start_timer(idev);
1181 in6_dev_put(idev);
1182 return 0; 1177 return 0;
1183 } 1178 }
1184 /* mark sources to include, if group & source-specific */ 1179 /* mark sources to include, if group & source-specific */
1185 if (mlh2->mld2q_nsrcs != 0) { 1180 if (mlh2->mld2q_nsrcs != 0) {
1186 if (!pskb_may_pull(skb, srcs_offset + 1181 if (!pskb_may_pull(skb, srcs_offset +
1187 ntohs(mlh2->mld2q_nsrcs) * sizeof(struct in6_addr))) { 1182 ntohs(mlh2->mld2q_nsrcs) * sizeof(struct in6_addr)))
1188 in6_dev_put(idev);
1189 return -EINVAL; 1183 return -EINVAL;
1190 } 1184
1191 mlh2 = (struct mld2_query *)skb_transport_header(skb); 1185 mlh2 = (struct mld2_query *)skb_transport_header(skb);
1192 mark = 1; 1186 mark = 1;
1193 } 1187 }
1194 } else { 1188 } else
1195 in6_dev_put(idev);
1196 return -EINVAL; 1189 return -EINVAL;
1197 }
1198 1190
1199 read_lock_bh(&idev->lock); 1191 read_lock_bh(&idev->lock);
1200 if (group_type == IPV6_ADDR_ANY) { 1192 if (group_type == IPV6_ADDR_ANY) {
@@ -1227,12 +1219,11 @@ int igmp6_event_query(struct sk_buff *skb)
1227 } 1219 }
1228 } 1220 }
1229 read_unlock_bh(&idev->lock); 1221 read_unlock_bh(&idev->lock);
1230 in6_dev_put(idev);
1231 1222
1232 return 0; 1223 return 0;
1233} 1224}
1234 1225
1235 1226/* called with rcu_read_lock() */
1236int igmp6_event_report(struct sk_buff *skb) 1227int igmp6_event_report(struct sk_buff *skb)
1237{ 1228{
1238 struct ifmcaddr6 *ma; 1229 struct ifmcaddr6 *ma;
@@ -1260,7 +1251,7 @@ int igmp6_event_report(struct sk_buff *skb)
1260 !(addr_type&IPV6_ADDR_LINKLOCAL)) 1251 !(addr_type&IPV6_ADDR_LINKLOCAL))
1261 return -EINVAL; 1252 return -EINVAL;
1262 1253
1263 idev = in6_dev_get(skb->dev); 1254 idev = __in6_dev_get(skb->dev);
1264 if (idev == NULL) 1255 if (idev == NULL)
1265 return -ENODEV; 1256 return -ENODEV;
1266 1257
@@ -1280,7 +1271,6 @@ int igmp6_event_report(struct sk_buff *skb)
1280 } 1271 }
1281 } 1272 }
1282 read_unlock_bh(&idev->lock); 1273 read_unlock_bh(&idev->lock);
1283 in6_dev_put(idev);
1284 return 0; 1274 return 0;
1285} 1275}
1286 1276
@@ -1356,7 +1346,10 @@ static struct sk_buff *mld_newpack(struct net_device *dev, int size)
1356 IPV6_TLV_PADN, 0 }; 1346 IPV6_TLV_PADN, 0 };
1357 1347
1358 /* we assume size > sizeof(ra) here */ 1348 /* we assume size > sizeof(ra) here */
1359 skb = sock_alloc_send_skb(sk, size + LL_ALLOCATED_SPACE(dev), 1, &err); 1349 size += LL_ALLOCATED_SPACE(dev);
1350 /* limit our allocations to order-0 page */
1351 size = min_t(int, size, SKB_MAX_ORDER(0, 0));
1352 skb = sock_alloc_send_skb(sk, size, 1, &err);
1360 1353
1361 if (!skb) 1354 if (!skb)
1362 return NULL; 1355 return NULL;
@@ -1393,12 +1386,14 @@ static void mld_sendpack(struct sk_buff *skb)
1393 struct mld2_report *pmr = 1386 struct mld2_report *pmr =
1394 (struct mld2_report *)skb_transport_header(skb); 1387 (struct mld2_report *)skb_transport_header(skb);
1395 int payload_len, mldlen; 1388 int payload_len, mldlen;
1396 struct inet6_dev *idev = in6_dev_get(skb->dev); 1389 struct inet6_dev *idev;
1397 struct net *net = dev_net(skb->dev); 1390 struct net *net = dev_net(skb->dev);
1398 int err; 1391 int err;
1399 struct flowi fl; 1392 struct flowi fl;
1400 struct dst_entry *dst; 1393 struct dst_entry *dst;
1401 1394
1395 rcu_read_lock();
1396 idev = __in6_dev_get(skb->dev);
1402 IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len); 1397 IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len);
1403 1398
1404 payload_len = (skb->tail - skb->network_header) - sizeof(*pip6); 1399 payload_len = (skb->tail - skb->network_header) - sizeof(*pip6);
@@ -1438,8 +1433,7 @@ out:
1438 } else 1433 } else
1439 IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_OUTDISCARDS); 1434 IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_OUTDISCARDS);
1440 1435
1441 if (likely(idev != NULL)) 1436 rcu_read_unlock();
1442 in6_dev_put(idev);
1443 return; 1437 return;
1444 1438
1445err_out: 1439err_out:
@@ -1776,7 +1770,8 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type)
1776 IPPROTO_ICMPV6, 1770 IPPROTO_ICMPV6,
1777 csum_partial(hdr, len, 0)); 1771 csum_partial(hdr, len, 0));
1778 1772
1779 idev = in6_dev_get(skb->dev); 1773 rcu_read_lock();
1774 idev = __in6_dev_get(skb->dev);
1780 1775
1781 dst = icmp6_dst_alloc(skb->dev, NULL, &ipv6_hdr(skb)->daddr); 1776 dst = icmp6_dst_alloc(skb->dev, NULL, &ipv6_hdr(skb)->daddr);
1782 if (!dst) { 1777 if (!dst) {
@@ -1803,8 +1798,7 @@ out:
1803 } else 1798 } else
1804 IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS); 1799 IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
1805 1800
1806 if (likely(idev != NULL)) 1801 rcu_read_unlock();
1807 in6_dev_put(idev);
1808 return; 1802 return;
1809 1803
1810err_out: 1804err_out:
@@ -1995,8 +1989,7 @@ static int sf_setstate(struct ifmcaddr6 *pmc)
1995 &psf->sf_addr)) 1989 &psf->sf_addr))
1996 break; 1990 break;
1997 if (!dpsf) { 1991 if (!dpsf) {
1998 dpsf = (struct ip6_sf_list *) 1992 dpsf = kmalloc(sizeof(*dpsf), GFP_ATOMIC);
1999 kmalloc(sizeof(*dpsf), GFP_ATOMIC);
2000 if (!dpsf) 1993 if (!dpsf)
2001 continue; 1994 continue;
2002 *dpsf = *psf; 1995 *dpsf = *psf;
diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c
index 2794b6002836..d6e9599d0705 100644
--- a/net/ipv6/mip6.c
+++ b/net/ipv6/mip6.c
@@ -347,11 +347,12 @@ static const struct xfrm_type mip6_destopt_type =
347 347
348static int mip6_rthdr_input(struct xfrm_state *x, struct sk_buff *skb) 348static int mip6_rthdr_input(struct xfrm_state *x, struct sk_buff *skb)
349{ 349{
350 struct ipv6hdr *iph = ipv6_hdr(skb);
350 struct rt2_hdr *rt2 = (struct rt2_hdr *)skb->data; 351 struct rt2_hdr *rt2 = (struct rt2_hdr *)skb->data;
351 int err = rt2->rt_hdr.nexthdr; 352 int err = rt2->rt_hdr.nexthdr;
352 353
353 spin_lock(&x->lock); 354 spin_lock(&x->lock);
354 if (!ipv6_addr_equal(&rt2->addr, (struct in6_addr *)x->coaddr) && 355 if (!ipv6_addr_equal(&iph->daddr, (struct in6_addr *)x->coaddr) &&
355 !ipv6_addr_any((struct in6_addr *)x->coaddr)) 356 !ipv6_addr_any((struct in6_addr *)x->coaddr))
356 err = -ENOENT; 357 err = -ENOENT;
357 spin_unlock(&x->lock); 358 spin_unlock(&x->lock);
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 0abdc242ddb7..58841c4ae947 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -586,6 +586,7 @@ static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh,
586 src_addr = solicited_addr; 586 src_addr = solicited_addr;
587 if (ifp->flags & IFA_F_OPTIMISTIC) 587 if (ifp->flags & IFA_F_OPTIMISTIC)
588 override = 0; 588 override = 0;
589 inc_opt |= ifp->idev->cnf.force_tllao;
589 in6_ifa_put(ifp); 590 in6_ifa_put(ifp);
590 } else { 591 } else {
591 if (ipv6_dev_get_saddr(dev_net(dev), dev, daddr, 592 if (ipv6_dev_get_saddr(dev_net(dev), dev, daddr,
@@ -599,7 +600,6 @@ static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh,
599 icmp6h.icmp6_solicited = solicited; 600 icmp6h.icmp6_solicited = solicited;
600 icmp6h.icmp6_override = override; 601 icmp6h.icmp6_override = override;
601 602
602 inc_opt |= ifp->idev->cnf.force_tllao;
603 __ndisc_send(dev, neigh, daddr, src_addr, 603 __ndisc_send(dev, neigh, daddr, src_addr,
604 &icmp6h, solicited_addr, 604 &icmp6h, solicited_addr,
605 inc_opt ? ND_OPT_TARGET_LL_ADDR : 0); 605 inc_opt ? ND_OPT_TARGET_LL_ADDR : 0);
@@ -1229,7 +1229,7 @@ static void ndisc_router_discovery(struct sk_buff *skb)
1229 ND_PRINTK0(KERN_ERR 1229 ND_PRINTK0(KERN_ERR
1230 "ICMPv6 RA: %s() got default router without neighbour.\n", 1230 "ICMPv6 RA: %s() got default router without neighbour.\n",
1231 __func__); 1231 __func__);
1232 dst_release(&rt->u.dst); 1232 dst_release(&rt->dst);
1233 in6_dev_put(in6_dev); 1233 in6_dev_put(in6_dev);
1234 return; 1234 return;
1235 } 1235 }
@@ -1244,7 +1244,7 @@ static void ndisc_router_discovery(struct sk_buff *skb)
1244 if (ra_msg->icmph.icmp6_hop_limit) { 1244 if (ra_msg->icmph.icmp6_hop_limit) {
1245 in6_dev->cnf.hop_limit = ra_msg->icmph.icmp6_hop_limit; 1245 in6_dev->cnf.hop_limit = ra_msg->icmph.icmp6_hop_limit;
1246 if (rt) 1246 if (rt)
1247 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = ra_msg->icmph.icmp6_hop_limit; 1247 rt->dst.metrics[RTAX_HOPLIMIT-1] = ra_msg->icmph.icmp6_hop_limit;
1248 } 1248 }
1249 1249
1250skip_defrtr: 1250skip_defrtr:
@@ -1363,7 +1363,7 @@ skip_linkparms:
1363 in6_dev->cnf.mtu6 = mtu; 1363 in6_dev->cnf.mtu6 = mtu;
1364 1364
1365 if (rt) 1365 if (rt)
1366 rt->u.dst.metrics[RTAX_MTU-1] = mtu; 1366 rt->dst.metrics[RTAX_MTU-1] = mtu;
1367 1367
1368 rt6_mtu_change(skb->dev, mtu); 1368 rt6_mtu_change(skb->dev, mtu);
1369 } 1369 }
@@ -1384,7 +1384,7 @@ skip_linkparms:
1384 } 1384 }
1385out: 1385out:
1386 if (rt) 1386 if (rt)
1387 dst_release(&rt->u.dst); 1387 dst_release(&rt->dst);
1388 else if (neigh) 1388 else if (neigh)
1389 neigh_release(neigh); 1389 neigh_release(neigh);
1390 in6_dev_put(in6_dev); 1390 in6_dev_put(in6_dev);
diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c
index a74951c039b6..7155b2451d7c 100644
--- a/net/ipv6/netfilter.c
+++ b/net/ipv6/netfilter.c
@@ -151,9 +151,7 @@ static __sum16 nf_ip6_checksum_partial(struct sk_buff *skb, unsigned int hook,
151 protocol, 151 protocol,
152 csum_sub(0, hsum))); 152 csum_sub(0, hsum)));
153 skb->ip_summed = CHECKSUM_NONE; 153 skb->ip_summed = CHECKSUM_NONE;
154 csum = __skb_checksum_complete_head(skb, dataoff + len); 154 return __skb_checksum_complete_head(skb, dataoff + len);
155 if (!csum)
156 skb->ip_summed = CHECKSUM_UNNECESSARY;
157 } 155 }
158 return csum; 156 return csum;
159}; 157};
diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c
index 8c201743d96d..413ab0754e1f 100644
--- a/net/ipv6/netfilter/ip6_queue.c
+++ b/net/ipv6/netfilter/ip6_queue.c
@@ -43,7 +43,7 @@ typedef int (*ipq_cmpfn)(struct nf_queue_entry *, unsigned long);
43 43
44static unsigned char copy_mode __read_mostly = IPQ_COPY_NONE; 44static unsigned char copy_mode __read_mostly = IPQ_COPY_NONE;
45static unsigned int queue_maxlen __read_mostly = IPQ_QMAX_DEFAULT; 45static unsigned int queue_maxlen __read_mostly = IPQ_QMAX_DEFAULT;
46static DEFINE_RWLOCK(queue_lock); 46static DEFINE_SPINLOCK(queue_lock);
47static int peer_pid __read_mostly; 47static int peer_pid __read_mostly;
48static unsigned int copy_range __read_mostly; 48static unsigned int copy_range __read_mostly;
49static unsigned int queue_total; 49static unsigned int queue_total;
@@ -73,10 +73,10 @@ __ipq_set_mode(unsigned char mode, unsigned int range)
73 break; 73 break;
74 74
75 case IPQ_COPY_PACKET: 75 case IPQ_COPY_PACKET:
76 copy_mode = mode; 76 if (range > 0xFFFF)
77 range = 0xFFFF;
77 copy_range = range; 78 copy_range = range;
78 if (copy_range > 0xFFFF) 79 copy_mode = mode;
79 copy_range = 0xFFFF;
80 break; 80 break;
81 81
82 default: 82 default:
@@ -102,7 +102,7 @@ ipq_find_dequeue_entry(unsigned long id)
102{ 102{
103 struct nf_queue_entry *entry = NULL, *i; 103 struct nf_queue_entry *entry = NULL, *i;
104 104
105 write_lock_bh(&queue_lock); 105 spin_lock_bh(&queue_lock);
106 106
107 list_for_each_entry(i, &queue_list, list) { 107 list_for_each_entry(i, &queue_list, list) {
108 if ((unsigned long)i == id) { 108 if ((unsigned long)i == id) {
@@ -116,7 +116,7 @@ ipq_find_dequeue_entry(unsigned long id)
116 queue_total--; 116 queue_total--;
117 } 117 }
118 118
119 write_unlock_bh(&queue_lock); 119 spin_unlock_bh(&queue_lock);
120 return entry; 120 return entry;
121} 121}
122 122
@@ -137,9 +137,9 @@ __ipq_flush(ipq_cmpfn cmpfn, unsigned long data)
137static void 137static void
138ipq_flush(ipq_cmpfn cmpfn, unsigned long data) 138ipq_flush(ipq_cmpfn cmpfn, unsigned long data)
139{ 139{
140 write_lock_bh(&queue_lock); 140 spin_lock_bh(&queue_lock);
141 __ipq_flush(cmpfn, data); 141 __ipq_flush(cmpfn, data);
142 write_unlock_bh(&queue_lock); 142 spin_unlock_bh(&queue_lock);
143} 143}
144 144
145static struct sk_buff * 145static struct sk_buff *
@@ -153,9 +153,7 @@ ipq_build_packet_message(struct nf_queue_entry *entry, int *errp)
153 struct nlmsghdr *nlh; 153 struct nlmsghdr *nlh;
154 struct timeval tv; 154 struct timeval tv;
155 155
156 read_lock_bh(&queue_lock); 156 switch (ACCESS_ONCE(copy_mode)) {
157
158 switch (copy_mode) {
159 case IPQ_COPY_META: 157 case IPQ_COPY_META:
160 case IPQ_COPY_NONE: 158 case IPQ_COPY_NONE:
161 size = NLMSG_SPACE(sizeof(*pmsg)); 159 size = NLMSG_SPACE(sizeof(*pmsg));
@@ -163,26 +161,21 @@ ipq_build_packet_message(struct nf_queue_entry *entry, int *errp)
163 161
164 case IPQ_COPY_PACKET: 162 case IPQ_COPY_PACKET:
165 if (entry->skb->ip_summed == CHECKSUM_PARTIAL && 163 if (entry->skb->ip_summed == CHECKSUM_PARTIAL &&
166 (*errp = skb_checksum_help(entry->skb))) { 164 (*errp = skb_checksum_help(entry->skb)))
167 read_unlock_bh(&queue_lock);
168 return NULL; 165 return NULL;
169 } 166
170 if (copy_range == 0 || copy_range > entry->skb->len) 167 data_len = ACCESS_ONCE(copy_range);
168 if (data_len == 0 || data_len > entry->skb->len)
171 data_len = entry->skb->len; 169 data_len = entry->skb->len;
172 else
173 data_len = copy_range;
174 170
175 size = NLMSG_SPACE(sizeof(*pmsg) + data_len); 171 size = NLMSG_SPACE(sizeof(*pmsg) + data_len);
176 break; 172 break;
177 173
178 default: 174 default:
179 *errp = -EINVAL; 175 *errp = -EINVAL;
180 read_unlock_bh(&queue_lock);
181 return NULL; 176 return NULL;
182 } 177 }
183 178
184 read_unlock_bh(&queue_lock);
185
186 skb = alloc_skb(size, GFP_ATOMIC); 179 skb = alloc_skb(size, GFP_ATOMIC);
187 if (!skb) 180 if (!skb)
188 goto nlmsg_failure; 181 goto nlmsg_failure;
@@ -242,7 +235,7 @@ ipq_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
242 if (nskb == NULL) 235 if (nskb == NULL)
243 return status; 236 return status;
244 237
245 write_lock_bh(&queue_lock); 238 spin_lock_bh(&queue_lock);
246 239
247 if (!peer_pid) 240 if (!peer_pid)
248 goto err_out_free_nskb; 241 goto err_out_free_nskb;
@@ -266,14 +259,14 @@ ipq_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
266 259
267 __ipq_enqueue_entry(entry); 260 __ipq_enqueue_entry(entry);
268 261
269 write_unlock_bh(&queue_lock); 262 spin_unlock_bh(&queue_lock);
270 return status; 263 return status;
271 264
272err_out_free_nskb: 265err_out_free_nskb:
273 kfree_skb(nskb); 266 kfree_skb(nskb);
274 267
275err_out_unlock: 268err_out_unlock:
276 write_unlock_bh(&queue_lock); 269 spin_unlock_bh(&queue_lock);
277 return status; 270 return status;
278} 271}
279 272
@@ -342,9 +335,9 @@ ipq_set_mode(unsigned char mode, unsigned int range)
342{ 335{
343 int status; 336 int status;
344 337
345 write_lock_bh(&queue_lock); 338 spin_lock_bh(&queue_lock);
346 status = __ipq_set_mode(mode, range); 339 status = __ipq_set_mode(mode, range);
347 write_unlock_bh(&queue_lock); 340 spin_unlock_bh(&queue_lock);
348 return status; 341 return status;
349} 342}
350 343
@@ -441,11 +434,11 @@ __ipq_rcv_skb(struct sk_buff *skb)
441 if (security_netlink_recv(skb, CAP_NET_ADMIN)) 434 if (security_netlink_recv(skb, CAP_NET_ADMIN))
442 RCV_SKB_FAIL(-EPERM); 435 RCV_SKB_FAIL(-EPERM);
443 436
444 write_lock_bh(&queue_lock); 437 spin_lock_bh(&queue_lock);
445 438
446 if (peer_pid) { 439 if (peer_pid) {
447 if (peer_pid != pid) { 440 if (peer_pid != pid) {
448 write_unlock_bh(&queue_lock); 441 spin_unlock_bh(&queue_lock);
449 RCV_SKB_FAIL(-EBUSY); 442 RCV_SKB_FAIL(-EBUSY);
450 } 443 }
451 } else { 444 } else {
@@ -453,7 +446,7 @@ __ipq_rcv_skb(struct sk_buff *skb)
453 peer_pid = pid; 446 peer_pid = pid;
454 } 447 }
455 448
456 write_unlock_bh(&queue_lock); 449 spin_unlock_bh(&queue_lock);
457 450
458 status = ipq_receive_peer(NLMSG_DATA(nlh), type, 451 status = ipq_receive_peer(NLMSG_DATA(nlh), type,
459 nlmsglen - NLMSG_LENGTH(0)); 452 nlmsglen - NLMSG_LENGTH(0));
@@ -498,10 +491,10 @@ ipq_rcv_nl_event(struct notifier_block *this,
498 struct netlink_notify *n = ptr; 491 struct netlink_notify *n = ptr;
499 492
500 if (event == NETLINK_URELEASE && n->protocol == NETLINK_IP6_FW) { 493 if (event == NETLINK_URELEASE && n->protocol == NETLINK_IP6_FW) {
501 write_lock_bh(&queue_lock); 494 spin_lock_bh(&queue_lock);
502 if ((net_eq(n->net, &init_net)) && (n->pid == peer_pid)) 495 if ((net_eq(n->net, &init_net)) && (n->pid == peer_pid))
503 __ipq_reset(); 496 __ipq_reset();
504 write_unlock_bh(&queue_lock); 497 spin_unlock_bh(&queue_lock);
505 } 498 }
506 return NOTIFY_DONE; 499 return NOTIFY_DONE;
507} 500}
@@ -528,7 +521,7 @@ static ctl_table ipq_table[] = {
528#ifdef CONFIG_PROC_FS 521#ifdef CONFIG_PROC_FS
529static int ip6_queue_show(struct seq_file *m, void *v) 522static int ip6_queue_show(struct seq_file *m, void *v)
530{ 523{
531 read_lock_bh(&queue_lock); 524 spin_lock_bh(&queue_lock);
532 525
533 seq_printf(m, 526 seq_printf(m,
534 "Peer PID : %d\n" 527 "Peer PID : %d\n"
@@ -546,7 +539,7 @@ static int ip6_queue_show(struct seq_file *m, void *v)
546 queue_dropped, 539 queue_dropped,
547 queue_user_dropped); 540 queue_user_dropped);
548 541
549 read_unlock_bh(&queue_lock); 542 spin_unlock_bh(&queue_lock);
550 return 0; 543 return 0;
551} 544}
552 545
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 6f517bd83692..dc41d6d3c6c6 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -363,7 +363,7 @@ ip6t_do_table(struct sk_buff *skb,
363 cpu = smp_processor_id(); 363 cpu = smp_processor_id();
364 table_base = private->entries[cpu]; 364 table_base = private->entries[cpu];
365 jumpstack = (struct ip6t_entry **)private->jumpstack[cpu]; 365 jumpstack = (struct ip6t_entry **)private->jumpstack[cpu];
366 stackptr = &private->stackptr[cpu]; 366 stackptr = per_cpu_ptr(private->stackptr, cpu);
367 origptr = *stackptr; 367 origptr = *stackptr;
368 368
369 e = get_entry(table_base, private->hook_entry[hook]); 369 e = get_entry(table_base, private->hook_entry[hook]);
@@ -943,7 +943,7 @@ static struct xt_counters *alloc_counters(const struct xt_table *table)
943 (other than comefrom, which userspace doesn't care 943 (other than comefrom, which userspace doesn't care
944 about). */ 944 about). */
945 countersize = sizeof(struct xt_counters) * private->number; 945 countersize = sizeof(struct xt_counters) * private->number;
946 counters = vmalloc_node(countersize, numa_node_id()); 946 counters = vmalloc(countersize);
947 947
948 if (counters == NULL) 948 if (counters == NULL)
949 return ERR_PTR(-ENOMEM); 949 return ERR_PTR(-ENOMEM);
@@ -1213,8 +1213,7 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
1213 struct ip6t_entry *iter; 1213 struct ip6t_entry *iter;
1214 1214
1215 ret = 0; 1215 ret = 0;
1216 counters = vmalloc_node(num_counters * sizeof(struct xt_counters), 1216 counters = vmalloc(num_counters * sizeof(struct xt_counters));
1217 numa_node_id());
1218 if (!counters) { 1217 if (!counters) {
1219 ret = -ENOMEM; 1218 ret = -ENOMEM;
1220 goto out; 1219 goto out;
@@ -1368,7 +1367,7 @@ do_add_counters(struct net *net, const void __user *user, unsigned int len,
1368 if (len != size + num_counters * sizeof(struct xt_counters)) 1367 if (len != size + num_counters * sizeof(struct xt_counters))
1369 return -EINVAL; 1368 return -EINVAL;
1370 1369
1371 paddc = vmalloc_node(len - size, numa_node_id()); 1370 paddc = vmalloc(len - size);
1372 if (!paddc) 1371 if (!paddc)
1373 return -ENOMEM; 1372 return -ENOMEM;
1374 1373
diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c
index af4ee11f2066..0a07ae7b933f 100644
--- a/net/ipv6/netfilter/ip6t_LOG.c
+++ b/net/ipv6/netfilter/ip6t_LOG.c
@@ -373,6 +373,56 @@ static void dump_packet(const struct nf_loginfo *info,
373 printk("MARK=0x%x ", skb->mark); 373 printk("MARK=0x%x ", skb->mark);
374} 374}
375 375
376static void dump_mac_header(const struct nf_loginfo *info,
377 const struct sk_buff *skb)
378{
379 struct net_device *dev = skb->dev;
380 unsigned int logflags = 0;
381
382 if (info->type == NF_LOG_TYPE_LOG)
383 logflags = info->u.log.logflags;
384
385 if (!(logflags & IP6T_LOG_MACDECODE))
386 goto fallback;
387
388 switch (dev->type) {
389 case ARPHRD_ETHER:
390 printk("MACSRC=%pM MACDST=%pM MACPROTO=%04x ",
391 eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest,
392 ntohs(eth_hdr(skb)->h_proto));
393 return;
394 default:
395 break;
396 }
397
398fallback:
399 printk("MAC=");
400 if (dev->hard_header_len &&
401 skb->mac_header != skb->network_header) {
402 const unsigned char *p = skb_mac_header(skb);
403 unsigned int len = dev->hard_header_len;
404 unsigned int i;
405
406 if (dev->type == ARPHRD_SIT &&
407 (p -= ETH_HLEN) < skb->head)
408 p = NULL;
409
410 if (p != NULL) {
411 printk("%02x", *p++);
412 for (i = 1; i < len; i++)
413 printk(":%02x", p[i]);
414 }
415 printk(" ");
416
417 if (dev->type == ARPHRD_SIT) {
418 const struct iphdr *iph =
419 (struct iphdr *)skb_mac_header(skb);
420 printk("TUNNEL=%pI4->%pI4 ", &iph->saddr, &iph->daddr);
421 }
422 } else
423 printk(" ");
424}
425
376static struct nf_loginfo default_loginfo = { 426static struct nf_loginfo default_loginfo = {
377 .type = NF_LOG_TYPE_LOG, 427 .type = NF_LOG_TYPE_LOG,
378 .u = { 428 .u = {
@@ -400,35 +450,10 @@ ip6t_log_packet(u_int8_t pf,
400 prefix, 450 prefix,
401 in ? in->name : "", 451 in ? in->name : "",
402 out ? out->name : ""); 452 out ? out->name : "");
403 if (in && !out) {
404 unsigned int len;
405 /* MAC logging for input chain only. */
406 printk("MAC=");
407 if (skb->dev && (len = skb->dev->hard_header_len) &&
408 skb->mac_header != skb->network_header) {
409 const unsigned char *p = skb_mac_header(skb);
410 int i;
411
412 if (skb->dev->type == ARPHRD_SIT &&
413 (p -= ETH_HLEN) < skb->head)
414 p = NULL;
415
416 if (p != NULL) {
417 for (i = 0; i < len; i++)
418 printk("%02x%s", p[i],
419 i == len - 1 ? "" : ":");
420 }
421 printk(" ");
422 453
423 if (skb->dev->type == ARPHRD_SIT) { 454 /* MAC logging for input path only. */
424 const struct iphdr *iph = 455 if (in && !out)
425 (struct iphdr *)skb_mac_header(skb); 456 dump_mac_header(loginfo, skb);
426 printk("TUNNEL=%pI4->%pI4 ",
427 &iph->saddr, &iph->daddr);
428 }
429 } else
430 printk(" ");
431 }
432 457
433 dump_packet(loginfo, skb, skb_network_offset(skb), 1); 458 dump_packet(loginfo, skb, skb_network_offset(skb), 1);
434 printk("\n"); 459 printk("\n");
diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c
index 47d227713758..2933396e0281 100644
--- a/net/ipv6/netfilter/ip6t_REJECT.c
+++ b/net/ipv6/netfilter/ip6t_REJECT.c
@@ -97,9 +97,11 @@ static void send_reset(struct net *net, struct sk_buff *oldskb)
97 fl.fl_ip_dport = otcph.source; 97 fl.fl_ip_dport = otcph.source;
98 security_skb_classify_flow(oldskb, &fl); 98 security_skb_classify_flow(oldskb, &fl);
99 dst = ip6_route_output(net, NULL, &fl); 99 dst = ip6_route_output(net, NULL, &fl);
100 if (dst == NULL) 100 if (dst == NULL || dst->error) {
101 dst_release(dst);
101 return; 102 return;
102 if (dst->error || xfrm_lookup(net, &dst, &fl, NULL, 0)) 103 }
104 if (xfrm_lookup(net, &dst, &fl, NULL, 0))
103 return; 105 return;
104 106
105 hh_len = (dst->dev->hard_header_len + 15)&~15; 107 hh_len = (dst->dev->hard_header_len + 15)&~15;
diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
index 9be81776415e..1df3c8b6bf47 100644
--- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
@@ -208,7 +208,7 @@ icmpv6_error(struct net *net, struct nf_conn *tmpl,
208 type = icmp6h->icmp6_type - 130; 208 type = icmp6h->icmp6_type - 130;
209 if (type >= 0 && type < sizeof(noct_valid_new) && 209 if (type >= 0 && type < sizeof(noct_valid_new) &&
210 noct_valid_new[type]) { 210 noct_valid_new[type]) {
211 skb->nfct = &nf_conntrack_untracked.ct_general; 211 skb->nfct = &nf_ct_untracked_get()->ct_general;
212 skb->nfctinfo = IP_CT_NEW; 212 skb->nfctinfo = IP_CT_NEW;
213 nf_conntrack_get(skb->nfct); 213 nf_conntrack_get(skb->nfct);
214 return NF_ACCEPT; 214 return NF_ACCEPT;
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 6fb890187de0..9254008602d4 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -114,10 +114,8 @@ static void nf_skb_free(struct sk_buff *skb)
114} 114}
115 115
116/* Memory Tracking Functions. */ 116/* Memory Tracking Functions. */
117static inline void frag_kfree_skb(struct sk_buff *skb, unsigned int *work) 117static void frag_kfree_skb(struct sk_buff *skb)
118{ 118{
119 if (work)
120 *work -= skb->truesize;
121 atomic_sub(skb->truesize, &nf_init_frags.mem); 119 atomic_sub(skb->truesize, &nf_init_frags.mem);
122 nf_skb_free(skb); 120 nf_skb_free(skb);
123 kfree_skb(skb); 121 kfree_skb(skb);
@@ -335,7 +333,7 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb,
335 fq->q.fragments = next; 333 fq->q.fragments = next;
336 334
337 fq->q.meat -= free_it->len; 335 fq->q.meat -= free_it->len;
338 frag_kfree_skb(free_it, NULL); 336 frag_kfree_skb(free_it);
339 } 337 }
340 } 338 }
341 339
@@ -442,7 +440,6 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev)
442 skb_shinfo(head)->frag_list = head->next; 440 skb_shinfo(head)->frag_list = head->next;
443 skb_reset_transport_header(head); 441 skb_reset_transport_header(head);
444 skb_push(head, head->data - skb_network_header(head)); 442 skb_push(head, head->data - skb_network_header(head));
445 atomic_sub(head->truesize, &nf_init_frags.mem);
446 443
447 for (fp=head->next; fp; fp = fp->next) { 444 for (fp=head->next; fp; fp = fp->next) {
448 head->data_len += fp->len; 445 head->data_len += fp->len;
@@ -452,8 +449,8 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev)
452 else if (head->ip_summed == CHECKSUM_COMPLETE) 449 else if (head->ip_summed == CHECKSUM_COMPLETE)
453 head->csum = csum_add(head->csum, fp->csum); 450 head->csum = csum_add(head->csum, fp->csum);
454 head->truesize += fp->truesize; 451 head->truesize += fp->truesize;
455 atomic_sub(fp->truesize, &nf_init_frags.mem);
456 } 452 }
453 atomic_sub(head->truesize, &nf_init_frags.mem);
457 454
458 head->next = NULL; 455 head->next = NULL;
459 head->dev = dev; 456 head->dev = dev;
diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index 566798d69f37..d082eaeefa25 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -174,17 +174,28 @@ static void snmp6_seq_show_item(struct seq_file *seq, void __percpu **mib,
174 const struct snmp_mib *itemlist) 174 const struct snmp_mib *itemlist)
175{ 175{
176 int i; 176 int i;
177 for (i=0; itemlist[i].name; i++) 177
178 for (i = 0; itemlist[i].name; i++)
178 seq_printf(seq, "%-32s\t%lu\n", itemlist[i].name, 179 seq_printf(seq, "%-32s\t%lu\n", itemlist[i].name,
179 snmp_fold_field(mib, itemlist[i].entry)); 180 snmp_fold_field(mib, itemlist[i].entry));
180} 181}
181 182
183static void snmp6_seq_show_item64(struct seq_file *seq, void __percpu **mib,
184 const struct snmp_mib *itemlist, size_t syncpoff)
185{
186 int i;
187
188 for (i = 0; itemlist[i].name; i++)
189 seq_printf(seq, "%-32s\t%llu\n", itemlist[i].name,
190 snmp_fold_field64(mib, itemlist[i].entry, syncpoff));
191}
192
182static int snmp6_seq_show(struct seq_file *seq, void *v) 193static int snmp6_seq_show(struct seq_file *seq, void *v)
183{ 194{
184 struct net *net = (struct net *)seq->private; 195 struct net *net = (struct net *)seq->private;
185 196
186 snmp6_seq_show_item(seq, (void __percpu **)net->mib.ipv6_statistics, 197 snmp6_seq_show_item64(seq, (void __percpu **)net->mib.ipv6_statistics,
187 snmp6_ipstats_list); 198 snmp6_ipstats_list, offsetof(struct ipstats_mib, syncp));
188 snmp6_seq_show_item(seq, (void __percpu **)net->mib.icmpv6_statistics, 199 snmp6_seq_show_item(seq, (void __percpu **)net->mib.icmpv6_statistics,
189 snmp6_icmp6_list); 200 snmp6_icmp6_list);
190 snmp6_seq_show_icmpv6msg(seq, 201 snmp6_seq_show_icmpv6msg(seq,
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 4a4dcbe4f8b2..e677937a07fc 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -602,31 +602,33 @@ out:
602} 602}
603 603
604static int rawv6_send_hdrinc(struct sock *sk, void *from, int length, 604static int rawv6_send_hdrinc(struct sock *sk, void *from, int length,
605 struct flowi *fl, struct rt6_info *rt, 605 struct flowi *fl, struct dst_entry **dstp,
606 unsigned int flags) 606 unsigned int flags)
607{ 607{
608 struct ipv6_pinfo *np = inet6_sk(sk); 608 struct ipv6_pinfo *np = inet6_sk(sk);
609 struct ipv6hdr *iph; 609 struct ipv6hdr *iph;
610 struct sk_buff *skb; 610 struct sk_buff *skb;
611 int err; 611 int err;
612 struct rt6_info *rt = (struct rt6_info *)*dstp;
612 613
613 if (length > rt->u.dst.dev->mtu) { 614 if (length > rt->dst.dev->mtu) {
614 ipv6_local_error(sk, EMSGSIZE, fl, rt->u.dst.dev->mtu); 615 ipv6_local_error(sk, EMSGSIZE, fl, rt->dst.dev->mtu);
615 return -EMSGSIZE; 616 return -EMSGSIZE;
616 } 617 }
617 if (flags&MSG_PROBE) 618 if (flags&MSG_PROBE)
618 goto out; 619 goto out;
619 620
620 skb = sock_alloc_send_skb(sk, 621 skb = sock_alloc_send_skb(sk,
621 length + LL_ALLOCATED_SPACE(rt->u.dst.dev) + 15, 622 length + LL_ALLOCATED_SPACE(rt->dst.dev) + 15,
622 flags & MSG_DONTWAIT, &err); 623 flags & MSG_DONTWAIT, &err);
623 if (skb == NULL) 624 if (skb == NULL)
624 goto error; 625 goto error;
625 skb_reserve(skb, LL_RESERVED_SPACE(rt->u.dst.dev)); 626 skb_reserve(skb, LL_RESERVED_SPACE(rt->dst.dev));
626 627
627 skb->priority = sk->sk_priority; 628 skb->priority = sk->sk_priority;
628 skb->mark = sk->sk_mark; 629 skb->mark = sk->sk_mark;
629 skb_dst_set(skb, dst_clone(&rt->u.dst)); 630 skb_dst_set(skb, &rt->dst);
631 *dstp = NULL;
630 632
631 skb_put(skb, length); 633 skb_put(skb, length);
632 skb_reset_network_header(skb); 634 skb_reset_network_header(skb);
@@ -641,7 +643,7 @@ static int rawv6_send_hdrinc(struct sock *sk, void *from, int length,
641 643
642 IP6_UPD_PO_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len); 644 IP6_UPD_PO_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
643 err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL, 645 err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL,
644 rt->u.dst.dev, dst_output); 646 rt->dst.dev, dst_output);
645 if (err > 0) 647 if (err > 0)
646 err = net_xmit_errno(err); 648 err = net_xmit_errno(err);
647 if (err) 649 if (err)
@@ -725,7 +727,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
725{ 727{
726 struct ipv6_txoptions opt_space; 728 struct ipv6_txoptions opt_space;
727 struct sockaddr_in6 * sin6 = (struct sockaddr_in6 *) msg->msg_name; 729 struct sockaddr_in6 * sin6 = (struct sockaddr_in6 *) msg->msg_name;
728 struct in6_addr *daddr, *final_p = NULL, final; 730 struct in6_addr *daddr, *final_p, final;
729 struct inet_sock *inet = inet_sk(sk); 731 struct inet_sock *inet = inet_sk(sk);
730 struct ipv6_pinfo *np = inet6_sk(sk); 732 struct ipv6_pinfo *np = inet6_sk(sk);
731 struct raw6_sock *rp = raw6_sk(sk); 733 struct raw6_sock *rp = raw6_sk(sk);
@@ -847,13 +849,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
847 if (ipv6_addr_any(&fl.fl6_src) && !ipv6_addr_any(&np->saddr)) 849 if (ipv6_addr_any(&fl.fl6_src) && !ipv6_addr_any(&np->saddr))
848 ipv6_addr_copy(&fl.fl6_src, &np->saddr); 850 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
849 851
850 /* merge ip6_build_xmit from ip6_output */ 852 final_p = fl6_update_dst(&fl, opt, &final);
851 if (opt && opt->srcrt) {
852 struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
853 ipv6_addr_copy(&final, &fl.fl6_dst);
854 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
855 final_p = &final;
856 }
857 853
858 if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst)) 854 if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst))
859 fl.oif = np->mcast_oif; 855 fl.oif = np->mcast_oif;
@@ -892,9 +888,9 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
892 goto do_confirm; 888 goto do_confirm;
893 889
894back_from_confirm: 890back_from_confirm:
895 if (inet->hdrincl) { 891 if (inet->hdrincl)
896 err = rawv6_send_hdrinc(sk, msg->msg_iov, len, &fl, (struct rt6_info*)dst, msg->msg_flags); 892 err = rawv6_send_hdrinc(sk, msg->msg_iov, len, &fl, &dst, msg->msg_flags);
897 } else { 893 else {
898 lock_sock(sk); 894 lock_sock(sk);
899 err = ip6_append_data(sk, ip_generic_getfrag, msg->msg_iov, 895 err = ip6_append_data(sk, ip_generic_getfrag, msg->msg_iov,
900 len, 0, hlimit, tclass, opt, &fl, (struct rt6_info*)dst, 896 len, 0, hlimit, tclass, opt, &fl, (struct rt6_info*)dst,
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 6d4292ff5854..545c4141b755 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -150,11 +150,8 @@ int ip6_frag_match(struct inet_frag_queue *q, void *a)
150EXPORT_SYMBOL(ip6_frag_match); 150EXPORT_SYMBOL(ip6_frag_match);
151 151
152/* Memory Tracking Functions. */ 152/* Memory Tracking Functions. */
153static inline void frag_kfree_skb(struct netns_frags *nf, 153static void frag_kfree_skb(struct netns_frags *nf, struct sk_buff *skb)
154 struct sk_buff *skb, int *work)
155{ 154{
156 if (work)
157 *work -= skb->truesize;
158 atomic_sub(skb->truesize, &nf->mem); 155 atomic_sub(skb->truesize, &nf->mem);
159 kfree_skb(skb); 156 kfree_skb(skb);
160} 157}
@@ -336,6 +333,11 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
336 * in the chain of fragments so far. We must know where to put 333 * in the chain of fragments so far. We must know where to put
337 * this fragment, right? 334 * this fragment, right?
338 */ 335 */
336 prev = fq->q.fragments_tail;
337 if (!prev || FRAG6_CB(prev)->offset < offset) {
338 next = NULL;
339 goto found;
340 }
339 prev = NULL; 341 prev = NULL;
340 for(next = fq->q.fragments; next != NULL; next = next->next) { 342 for(next = fq->q.fragments; next != NULL; next = next->next) {
341 if (FRAG6_CB(next)->offset >= offset) 343 if (FRAG6_CB(next)->offset >= offset)
@@ -343,6 +345,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
343 prev = next; 345 prev = next;
344 } 346 }
345 347
348found:
346 /* We found where to put this one. Check for overlap with 349 /* We found where to put this one. Check for overlap with
347 * preceding fragment, and, if needed, align things so that 350 * preceding fragment, and, if needed, align things so that
348 * any overlaps are eliminated. 351 * any overlaps are eliminated.
@@ -392,7 +395,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
392 fq->q.fragments = next; 395 fq->q.fragments = next;
393 396
394 fq->q.meat -= free_it->len; 397 fq->q.meat -= free_it->len;
395 frag_kfree_skb(fq->q.net, free_it, NULL); 398 frag_kfree_skb(fq->q.net, free_it);
396 } 399 }
397 } 400 }
398 401
@@ -400,6 +403,8 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
400 403
401 /* Insert this fragment in the chain of fragments. */ 404 /* Insert this fragment in the chain of fragments. */
402 skb->next = next; 405 skb->next = next;
406 if (!next)
407 fq->q.fragments_tail = skb;
403 if (prev) 408 if (prev)
404 prev->next = skb; 409 prev->next = skb;
405 else 410 else
@@ -466,6 +471,8 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
466 goto out_oom; 471 goto out_oom;
467 472
468 fp->next = head->next; 473 fp->next = head->next;
474 if (!fp->next)
475 fq->q.fragments_tail = fp;
469 prev->next = fp; 476 prev->next = fp;
470 477
471 skb_morph(head, fq->q.fragments); 478 skb_morph(head, fq->q.fragments);
@@ -524,7 +531,6 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
524 skb_shinfo(head)->frag_list = head->next; 531 skb_shinfo(head)->frag_list = head->next;
525 skb_reset_transport_header(head); 532 skb_reset_transport_header(head);
526 skb_push(head, head->data - skb_network_header(head)); 533 skb_push(head, head->data - skb_network_header(head));
527 atomic_sub(head->truesize, &fq->q.net->mem);
528 534
529 for (fp=head->next; fp; fp = fp->next) { 535 for (fp=head->next; fp; fp = fp->next) {
530 head->data_len += fp->len; 536 head->data_len += fp->len;
@@ -534,8 +540,8 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
534 else if (head->ip_summed == CHECKSUM_COMPLETE) 540 else if (head->ip_summed == CHECKSUM_COMPLETE)
535 head->csum = csum_add(head->csum, fp->csum); 541 head->csum = csum_add(head->csum, fp->csum);
536 head->truesize += fp->truesize; 542 head->truesize += fp->truesize;
537 atomic_sub(fp->truesize, &fq->q.net->mem);
538 } 543 }
544 atomic_sub(head->truesize, &fq->q.net->mem);
539 545
540 head->next = NULL; 546 head->next = NULL;
541 head->dev = dev; 547 head->dev = dev;
@@ -553,6 +559,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
553 IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMOKS); 559 IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMOKS);
554 rcu_read_unlock(); 560 rcu_read_unlock();
555 fq->q.fragments = NULL; 561 fq->q.fragments = NULL;
562 fq->q.fragments_tail = NULL;
556 return 1; 563 return 1;
557 564
558out_oversize: 565out_oversize:
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 294cbe8b0725..8f2d0400cf8a 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -126,16 +126,14 @@ static struct dst_ops ip6_dst_blackhole_ops = {
126}; 126};
127 127
128static struct rt6_info ip6_null_entry_template = { 128static struct rt6_info ip6_null_entry_template = {
129 .u = { 129 .dst = {
130 .dst = { 130 .__refcnt = ATOMIC_INIT(1),
131 .__refcnt = ATOMIC_INIT(1), 131 .__use = 1,
132 .__use = 1, 132 .obsolete = -1,
133 .obsolete = -1, 133 .error = -ENETUNREACH,
134 .error = -ENETUNREACH, 134 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
135 .metrics = { [RTAX_HOPLIMIT - 1] = 255, }, 135 .input = ip6_pkt_discard,
136 .input = ip6_pkt_discard, 136 .output = ip6_pkt_discard_out,
137 .output = ip6_pkt_discard_out,
138 }
139 }, 137 },
140 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), 138 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
141 .rt6i_protocol = RTPROT_KERNEL, 139 .rt6i_protocol = RTPROT_KERNEL,
@@ -149,16 +147,14 @@ static int ip6_pkt_prohibit(struct sk_buff *skb);
149static int ip6_pkt_prohibit_out(struct sk_buff *skb); 147static int ip6_pkt_prohibit_out(struct sk_buff *skb);
150 148
151static struct rt6_info ip6_prohibit_entry_template = { 149static struct rt6_info ip6_prohibit_entry_template = {
152 .u = { 150 .dst = {
153 .dst = { 151 .__refcnt = ATOMIC_INIT(1),
154 .__refcnt = ATOMIC_INIT(1), 152 .__use = 1,
155 .__use = 1, 153 .obsolete = -1,
156 .obsolete = -1, 154 .error = -EACCES,
157 .error = -EACCES, 155 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
158 .metrics = { [RTAX_HOPLIMIT - 1] = 255, }, 156 .input = ip6_pkt_prohibit,
159 .input = ip6_pkt_prohibit, 157 .output = ip6_pkt_prohibit_out,
160 .output = ip6_pkt_prohibit_out,
161 }
162 }, 158 },
163 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), 159 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
164 .rt6i_protocol = RTPROT_KERNEL, 160 .rt6i_protocol = RTPROT_KERNEL,
@@ -167,16 +163,14 @@ static struct rt6_info ip6_prohibit_entry_template = {
167}; 163};
168 164
169static struct rt6_info ip6_blk_hole_entry_template = { 165static struct rt6_info ip6_blk_hole_entry_template = {
170 .u = { 166 .dst = {
171 .dst = { 167 .__refcnt = ATOMIC_INIT(1),
172 .__refcnt = ATOMIC_INIT(1), 168 .__use = 1,
173 .__use = 1, 169 .obsolete = -1,
174 .obsolete = -1, 170 .error = -EINVAL,
175 .error = -EINVAL, 171 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
176 .metrics = { [RTAX_HOPLIMIT - 1] = 255, }, 172 .input = dst_discard,
177 .input = dst_discard, 173 .output = dst_discard,
178 .output = dst_discard,
179 }
180 }, 174 },
181 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), 175 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
182 .rt6i_protocol = RTPROT_KERNEL, 176 .rt6i_protocol = RTPROT_KERNEL,
@@ -249,7 +243,7 @@ static inline struct rt6_info *rt6_device_match(struct net *net,
249 if (!oif && ipv6_addr_any(saddr)) 243 if (!oif && ipv6_addr_any(saddr))
250 goto out; 244 goto out;
251 245
252 for (sprt = rt; sprt; sprt = sprt->u.dst.rt6_next) { 246 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
253 struct net_device *dev = sprt->rt6i_dev; 247 struct net_device *dev = sprt->rt6i_dev;
254 248
255 if (oif) { 249 if (oif) {
@@ -407,10 +401,10 @@ static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
407 401
408 match = NULL; 402 match = NULL;
409 for (rt = rr_head; rt && rt->rt6i_metric == metric; 403 for (rt = rr_head; rt && rt->rt6i_metric == metric;
410 rt = rt->u.dst.rt6_next) 404 rt = rt->dst.rt6_next)
411 match = find_match(rt, oif, strict, &mpri, match); 405 match = find_match(rt, oif, strict, &mpri, match);
412 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric; 406 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
413 rt = rt->u.dst.rt6_next) 407 rt = rt->dst.rt6_next)
414 match = find_match(rt, oif, strict, &mpri, match); 408 match = find_match(rt, oif, strict, &mpri, match);
415 409
416 return match; 410 return match;
@@ -432,7 +426,7 @@ static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
432 426
433 if (!match && 427 if (!match &&
434 (strict & RT6_LOOKUP_F_REACHABLE)) { 428 (strict & RT6_LOOKUP_F_REACHABLE)) {
435 struct rt6_info *next = rt0->u.dst.rt6_next; 429 struct rt6_info *next = rt0->dst.rt6_next;
436 430
437 /* no entries matched; do round-robin */ 431 /* no entries matched; do round-robin */
438 if (!next || next->rt6i_metric != rt0->rt6i_metric) 432 if (!next || next->rt6i_metric != rt0->rt6i_metric)
@@ -517,7 +511,7 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
517 rt->rt6i_expires = jiffies + HZ * lifetime; 511 rt->rt6i_expires = jiffies + HZ * lifetime;
518 rt->rt6i_flags |= RTF_EXPIRES; 512 rt->rt6i_flags |= RTF_EXPIRES;
519 } 513 }
520 dst_release(&rt->u.dst); 514 dst_release(&rt->dst);
521 } 515 }
522 return 0; 516 return 0;
523} 517}
@@ -555,7 +549,7 @@ restart:
555 rt = rt6_device_match(net, rt, &fl->fl6_src, fl->oif, flags); 549 rt = rt6_device_match(net, rt, &fl->fl6_src, fl->oif, flags);
556 BACKTRACK(net, &fl->fl6_src); 550 BACKTRACK(net, &fl->fl6_src);
557out: 551out:
558 dst_use(&rt->u.dst, jiffies); 552 dst_use(&rt->dst, jiffies);
559 read_unlock_bh(&table->tb6_lock); 553 read_unlock_bh(&table->tb6_lock);
560 return rt; 554 return rt;
561 555
@@ -643,7 +637,7 @@ static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *dad
643 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr); 637 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
644 rt->rt6i_dst.plen = 128; 638 rt->rt6i_dst.plen = 128;
645 rt->rt6i_flags |= RTF_CACHE; 639 rt->rt6i_flags |= RTF_CACHE;
646 rt->u.dst.flags |= DST_HOST; 640 rt->dst.flags |= DST_HOST;
647 641
648#ifdef CONFIG_IPV6_SUBTREES 642#ifdef CONFIG_IPV6_SUBTREES
649 if (rt->rt6i_src.plen && saddr) { 643 if (rt->rt6i_src.plen && saddr) {
@@ -677,7 +671,7 @@ static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *dad
677 if (net_ratelimit()) 671 if (net_ratelimit())
678 printk(KERN_WARNING 672 printk(KERN_WARNING
679 "Neighbour table overflow.\n"); 673 "Neighbour table overflow.\n");
680 dst_free(&rt->u.dst); 674 dst_free(&rt->dst);
681 return NULL; 675 return NULL;
682 } 676 }
683 rt->rt6i_nexthop = neigh; 677 rt->rt6i_nexthop = neigh;
@@ -694,7 +688,7 @@ static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *d
694 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr); 688 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
695 rt->rt6i_dst.plen = 128; 689 rt->rt6i_dst.plen = 128;
696 rt->rt6i_flags |= RTF_CACHE; 690 rt->rt6i_flags |= RTF_CACHE;
697 rt->u.dst.flags |= DST_HOST; 691 rt->dst.flags |= DST_HOST;
698 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop); 692 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
699 } 693 }
700 return rt; 694 return rt;
@@ -726,7 +720,7 @@ restart:
726 rt->rt6i_flags & RTF_CACHE) 720 rt->rt6i_flags & RTF_CACHE)
727 goto out; 721 goto out;
728 722
729 dst_hold(&rt->u.dst); 723 dst_hold(&rt->dst);
730 read_unlock_bh(&table->tb6_lock); 724 read_unlock_bh(&table->tb6_lock);
731 725
732 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) 726 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
@@ -739,10 +733,10 @@ restart:
739#endif 733#endif
740 } 734 }
741 735
742 dst_release(&rt->u.dst); 736 dst_release(&rt->dst);
743 rt = nrt ? : net->ipv6.ip6_null_entry; 737 rt = nrt ? : net->ipv6.ip6_null_entry;
744 738
745 dst_hold(&rt->u.dst); 739 dst_hold(&rt->dst);
746 if (nrt) { 740 if (nrt) {
747 err = ip6_ins_rt(nrt); 741 err = ip6_ins_rt(nrt);
748 if (!err) 742 if (!err)
@@ -756,7 +750,7 @@ restart:
756 * Race condition! In the gap, when table->tb6_lock was 750 * Race condition! In the gap, when table->tb6_lock was
757 * released someone could insert this route. Relookup. 751 * released someone could insert this route. Relookup.
758 */ 752 */
759 dst_release(&rt->u.dst); 753 dst_release(&rt->dst);
760 goto relookup; 754 goto relookup;
761 755
762out: 756out:
@@ -764,11 +758,11 @@ out:
764 reachable = 0; 758 reachable = 0;
765 goto restart_2; 759 goto restart_2;
766 } 760 }
767 dst_hold(&rt->u.dst); 761 dst_hold(&rt->dst);
768 read_unlock_bh(&table->tb6_lock); 762 read_unlock_bh(&table->tb6_lock);
769out2: 763out2:
770 rt->u.dst.lastuse = jiffies; 764 rt->dst.lastuse = jiffies;
771 rt->u.dst.__use++; 765 rt->dst.__use++;
772 766
773 return rt; 767 return rt;
774} 768}
@@ -814,7 +808,7 @@ struct dst_entry * ip6_route_output(struct net *net, struct sock *sk,
814{ 808{
815 int flags = 0; 809 int flags = 0;
816 810
817 if (fl->oif || rt6_need_strict(&fl->fl6_dst)) 811 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl->fl6_dst))
818 flags |= RT6_LOOKUP_F_IFACE; 812 flags |= RT6_LOOKUP_F_IFACE;
819 813
820 if (!ipv6_addr_any(&fl->fl6_src)) 814 if (!ipv6_addr_any(&fl->fl6_src))
@@ -835,15 +829,15 @@ int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl
835 struct dst_entry *new = NULL; 829 struct dst_entry *new = NULL;
836 830
837 if (rt) { 831 if (rt) {
838 new = &rt->u.dst; 832 new = &rt->dst;
839 833
840 atomic_set(&new->__refcnt, 1); 834 atomic_set(&new->__refcnt, 1);
841 new->__use = 1; 835 new->__use = 1;
842 new->input = dst_discard; 836 new->input = dst_discard;
843 new->output = dst_discard; 837 new->output = dst_discard;
844 838
845 memcpy(new->metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32)); 839 memcpy(new->metrics, ort->dst.metrics, RTAX_MAX*sizeof(u32));
846 new->dev = ort->u.dst.dev; 840 new->dev = ort->dst.dev;
847 if (new->dev) 841 if (new->dev)
848 dev_hold(new->dev); 842 dev_hold(new->dev);
849 rt->rt6i_idev = ort->rt6i_idev; 843 rt->rt6i_idev = ort->rt6i_idev;
@@ -912,7 +906,7 @@ static void ip6_link_failure(struct sk_buff *skb)
912 rt = (struct rt6_info *) skb_dst(skb); 906 rt = (struct rt6_info *) skb_dst(skb);
913 if (rt) { 907 if (rt) {
914 if (rt->rt6i_flags&RTF_CACHE) { 908 if (rt->rt6i_flags&RTF_CACHE) {
915 dst_set_expires(&rt->u.dst, 0); 909 dst_set_expires(&rt->dst, 0);
916 rt->rt6i_flags |= RTF_EXPIRES; 910 rt->rt6i_flags |= RTF_EXPIRES;
917 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) 911 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
918 rt->rt6i_node->fn_sernum = -1; 912 rt->rt6i_node->fn_sernum = -1;
@@ -986,14 +980,14 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
986 rt->rt6i_dev = dev; 980 rt->rt6i_dev = dev;
987 rt->rt6i_idev = idev; 981 rt->rt6i_idev = idev;
988 rt->rt6i_nexthop = neigh; 982 rt->rt6i_nexthop = neigh;
989 atomic_set(&rt->u.dst.__refcnt, 1); 983 atomic_set(&rt->dst.__refcnt, 1);
990 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255; 984 rt->dst.metrics[RTAX_HOPLIMIT-1] = 255;
991 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev); 985 rt->dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
992 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst)); 986 rt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->dst));
993 rt->u.dst.output = ip6_output; 987 rt->dst.output = ip6_output;
994 988
995#if 0 /* there's no chance to use these for ndisc */ 989#if 0 /* there's no chance to use these for ndisc */
996 rt->u.dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST 990 rt->dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
997 ? DST_HOST 991 ? DST_HOST
998 : 0; 992 : 0;
999 ipv6_addr_copy(&rt->rt6i_dst.addr, addr); 993 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
@@ -1001,14 +995,14 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1001#endif 995#endif
1002 996
1003 spin_lock_bh(&icmp6_dst_lock); 997 spin_lock_bh(&icmp6_dst_lock);
1004 rt->u.dst.next = icmp6_dst_gc_list; 998 rt->dst.next = icmp6_dst_gc_list;
1005 icmp6_dst_gc_list = &rt->u.dst; 999 icmp6_dst_gc_list = &rt->dst;
1006 spin_unlock_bh(&icmp6_dst_lock); 1000 spin_unlock_bh(&icmp6_dst_lock);
1007 1001
1008 fib6_force_start_gc(net); 1002 fib6_force_start_gc(net);
1009 1003
1010out: 1004out:
1011 return &rt->u.dst; 1005 return &rt->dst;
1012} 1006}
1013 1007
1014int icmp6_dst_gc(void) 1008int icmp6_dst_gc(void)
@@ -1090,11 +1084,11 @@ static int ipv6_get_mtu(struct net_device *dev)
1090 int mtu = IPV6_MIN_MTU; 1084 int mtu = IPV6_MIN_MTU;
1091 struct inet6_dev *idev; 1085 struct inet6_dev *idev;
1092 1086
1093 idev = in6_dev_get(dev); 1087 rcu_read_lock();
1094 if (idev) { 1088 idev = __in6_dev_get(dev);
1089 if (idev)
1095 mtu = idev->cnf.mtu6; 1090 mtu = idev->cnf.mtu6;
1096 in6_dev_put(idev); 1091 rcu_read_unlock();
1097 }
1098 return mtu; 1092 return mtu;
1099} 1093}
1100 1094
@@ -1103,12 +1097,15 @@ int ip6_dst_hoplimit(struct dst_entry *dst)
1103 int hoplimit = dst_metric(dst, RTAX_HOPLIMIT); 1097 int hoplimit = dst_metric(dst, RTAX_HOPLIMIT);
1104 if (hoplimit < 0) { 1098 if (hoplimit < 0) {
1105 struct net_device *dev = dst->dev; 1099 struct net_device *dev = dst->dev;
1106 struct inet6_dev *idev = in6_dev_get(dev); 1100 struct inet6_dev *idev;
1107 if (idev) { 1101
1102 rcu_read_lock();
1103 idev = __in6_dev_get(dev);
1104 if (idev)
1108 hoplimit = idev->cnf.hop_limit; 1105 hoplimit = idev->cnf.hop_limit;
1109 in6_dev_put(idev); 1106 else
1110 } else
1111 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit; 1107 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
1108 rcu_read_unlock();
1112 } 1109 }
1113 return hoplimit; 1110 return hoplimit;
1114} 1111}
@@ -1159,7 +1156,7 @@ int ip6_route_add(struct fib6_config *cfg)
1159 goto out; 1156 goto out;
1160 } 1157 }
1161 1158
1162 rt->u.dst.obsolete = -1; 1159 rt->dst.obsolete = -1;
1163 rt->rt6i_expires = (cfg->fc_flags & RTF_EXPIRES) ? 1160 rt->rt6i_expires = (cfg->fc_flags & RTF_EXPIRES) ?
1164 jiffies + clock_t_to_jiffies(cfg->fc_expires) : 1161 jiffies + clock_t_to_jiffies(cfg->fc_expires) :
1165 0; 1162 0;
@@ -1171,16 +1168,16 @@ int ip6_route_add(struct fib6_config *cfg)
1171 addr_type = ipv6_addr_type(&cfg->fc_dst); 1168 addr_type = ipv6_addr_type(&cfg->fc_dst);
1172 1169
1173 if (addr_type & IPV6_ADDR_MULTICAST) 1170 if (addr_type & IPV6_ADDR_MULTICAST)
1174 rt->u.dst.input = ip6_mc_input; 1171 rt->dst.input = ip6_mc_input;
1175 else 1172 else
1176 rt->u.dst.input = ip6_forward; 1173 rt->dst.input = ip6_forward;
1177 1174
1178 rt->u.dst.output = ip6_output; 1175 rt->dst.output = ip6_output;
1179 1176
1180 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len); 1177 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1181 rt->rt6i_dst.plen = cfg->fc_dst_len; 1178 rt->rt6i_dst.plen = cfg->fc_dst_len;
1182 if (rt->rt6i_dst.plen == 128) 1179 if (rt->rt6i_dst.plen == 128)
1183 rt->u.dst.flags = DST_HOST; 1180 rt->dst.flags = DST_HOST;
1184 1181
1185#ifdef CONFIG_IPV6_SUBTREES 1182#ifdef CONFIG_IPV6_SUBTREES
1186 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len); 1183 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
@@ -1208,9 +1205,9 @@ int ip6_route_add(struct fib6_config *cfg)
1208 goto out; 1205 goto out;
1209 } 1206 }
1210 } 1207 }
1211 rt->u.dst.output = ip6_pkt_discard_out; 1208 rt->dst.output = ip6_pkt_discard_out;
1212 rt->u.dst.input = ip6_pkt_discard; 1209 rt->dst.input = ip6_pkt_discard;
1213 rt->u.dst.error = -ENETUNREACH; 1210 rt->dst.error = -ENETUNREACH;
1214 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP; 1211 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1215 goto install_route; 1212 goto install_route;
1216 } 1213 }
@@ -1244,7 +1241,7 @@ int ip6_route_add(struct fib6_config *cfg)
1244 goto out; 1241 goto out;
1245 if (dev) { 1242 if (dev) {
1246 if (dev != grt->rt6i_dev) { 1243 if (dev != grt->rt6i_dev) {
1247 dst_release(&grt->u.dst); 1244 dst_release(&grt->dst);
1248 goto out; 1245 goto out;
1249 } 1246 }
1250 } else { 1247 } else {
@@ -1255,7 +1252,7 @@ int ip6_route_add(struct fib6_config *cfg)
1255 } 1252 }
1256 if (!(grt->rt6i_flags&RTF_GATEWAY)) 1253 if (!(grt->rt6i_flags&RTF_GATEWAY))
1257 err = 0; 1254 err = 0;
1258 dst_release(&grt->u.dst); 1255 dst_release(&grt->dst);
1259 1256
1260 if (err) 1257 if (err)
1261 goto out; 1258 goto out;
@@ -1294,18 +1291,18 @@ install_route:
1294 goto out; 1291 goto out;
1295 } 1292 }
1296 1293
1297 rt->u.dst.metrics[type - 1] = nla_get_u32(nla); 1294 rt->dst.metrics[type - 1] = nla_get_u32(nla);
1298 } 1295 }
1299 } 1296 }
1300 } 1297 }
1301 1298
1302 if (dst_metric(&rt->u.dst, RTAX_HOPLIMIT) == 0) 1299 if (dst_metric(&rt->dst, RTAX_HOPLIMIT) == 0)
1303 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1; 1300 rt->dst.metrics[RTAX_HOPLIMIT-1] = -1;
1304 if (!dst_mtu(&rt->u.dst)) 1301 if (!dst_mtu(&rt->dst))
1305 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev); 1302 rt->dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
1306 if (!dst_metric(&rt->u.dst, RTAX_ADVMSS)) 1303 if (!dst_metric(&rt->dst, RTAX_ADVMSS))
1307 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst)); 1304 rt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->dst));
1308 rt->u.dst.dev = dev; 1305 rt->dst.dev = dev;
1309 rt->rt6i_idev = idev; 1306 rt->rt6i_idev = idev;
1310 rt->rt6i_table = table; 1307 rt->rt6i_table = table;
1311 1308
@@ -1319,7 +1316,7 @@ out:
1319 if (idev) 1316 if (idev)
1320 in6_dev_put(idev); 1317 in6_dev_put(idev);
1321 if (rt) 1318 if (rt)
1322 dst_free(&rt->u.dst); 1319 dst_free(&rt->dst);
1323 return err; 1320 return err;
1324} 1321}
1325 1322
@@ -1336,7 +1333,7 @@ static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1336 write_lock_bh(&table->tb6_lock); 1333 write_lock_bh(&table->tb6_lock);
1337 1334
1338 err = fib6_del(rt, info); 1335 err = fib6_del(rt, info);
1339 dst_release(&rt->u.dst); 1336 dst_release(&rt->dst);
1340 1337
1341 write_unlock_bh(&table->tb6_lock); 1338 write_unlock_bh(&table->tb6_lock);
1342 1339
@@ -1369,7 +1366,7 @@ static int ip6_route_del(struct fib6_config *cfg)
1369 &cfg->fc_src, cfg->fc_src_len); 1366 &cfg->fc_src, cfg->fc_src_len);
1370 1367
1371 if (fn) { 1368 if (fn) {
1372 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) { 1369 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1373 if (cfg->fc_ifindex && 1370 if (cfg->fc_ifindex &&
1374 (rt->rt6i_dev == NULL || 1371 (rt->rt6i_dev == NULL ||
1375 rt->rt6i_dev->ifindex != cfg->fc_ifindex)) 1372 rt->rt6i_dev->ifindex != cfg->fc_ifindex))
@@ -1379,7 +1376,7 @@ static int ip6_route_del(struct fib6_config *cfg)
1379 continue; 1376 continue;
1380 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric) 1377 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1381 continue; 1378 continue;
1382 dst_hold(&rt->u.dst); 1379 dst_hold(&rt->dst);
1383 read_unlock_bh(&table->tb6_lock); 1380 read_unlock_bh(&table->tb6_lock);
1384 1381
1385 return __ip6_del_rt(rt, &cfg->fc_nlinfo); 1382 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
@@ -1421,7 +1418,7 @@ static struct rt6_info *__ip6_route_redirect(struct net *net,
1421 read_lock_bh(&table->tb6_lock); 1418 read_lock_bh(&table->tb6_lock);
1422 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src); 1419 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
1423restart: 1420restart:
1424 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) { 1421 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1425 /* 1422 /*
1426 * Current route is on-link; redirect is always invalid. 1423 * Current route is on-link; redirect is always invalid.
1427 * 1424 *
@@ -1445,7 +1442,7 @@ restart:
1445 rt = net->ipv6.ip6_null_entry; 1442 rt = net->ipv6.ip6_null_entry;
1446 BACKTRACK(net, &fl->fl6_src); 1443 BACKTRACK(net, &fl->fl6_src);
1447out: 1444out:
1448 dst_hold(&rt->u.dst); 1445 dst_hold(&rt->dst);
1449 1446
1450 read_unlock_bh(&table->tb6_lock); 1447 read_unlock_bh(&table->tb6_lock);
1451 1448
@@ -1513,10 +1510,10 @@ void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
1513 * Look, redirects are sent only in response to data packets, 1510 * Look, redirects are sent only in response to data packets,
1514 * so that this nexthop apparently is reachable. --ANK 1511 * so that this nexthop apparently is reachable. --ANK
1515 */ 1512 */
1516 dst_confirm(&rt->u.dst); 1513 dst_confirm(&rt->dst);
1517 1514
1518 /* Duplicate redirect: silently ignore. */ 1515 /* Duplicate redirect: silently ignore. */
1519 if (neigh == rt->u.dst.neighbour) 1516 if (neigh == rt->dst.neighbour)
1520 goto out; 1517 goto out;
1521 1518
1522 nrt = ip6_rt_copy(rt); 1519 nrt = ip6_rt_copy(rt);
@@ -1529,20 +1526,20 @@ void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
1529 1526
1530 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest); 1527 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1531 nrt->rt6i_dst.plen = 128; 1528 nrt->rt6i_dst.plen = 128;
1532 nrt->u.dst.flags |= DST_HOST; 1529 nrt->dst.flags |= DST_HOST;
1533 1530
1534 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key); 1531 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1535 nrt->rt6i_nexthop = neigh_clone(neigh); 1532 nrt->rt6i_nexthop = neigh_clone(neigh);
1536 /* Reset pmtu, it may be better */ 1533 /* Reset pmtu, it may be better */
1537 nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev); 1534 nrt->dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1538 nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dev_net(neigh->dev), 1535 nrt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dev_net(neigh->dev),
1539 dst_mtu(&nrt->u.dst)); 1536 dst_mtu(&nrt->dst));
1540 1537
1541 if (ip6_ins_rt(nrt)) 1538 if (ip6_ins_rt(nrt))
1542 goto out; 1539 goto out;
1543 1540
1544 netevent.old = &rt->u.dst; 1541 netevent.old = &rt->dst;
1545 netevent.new = &nrt->u.dst; 1542 netevent.new = &nrt->dst;
1546 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent); 1543 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1547 1544
1548 if (rt->rt6i_flags&RTF_CACHE) { 1545 if (rt->rt6i_flags&RTF_CACHE) {
@@ -1551,7 +1548,7 @@ void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
1551 } 1548 }
1552 1549
1553out: 1550out:
1554 dst_release(&rt->u.dst); 1551 dst_release(&rt->dst);
1555} 1552}
1556 1553
1557/* 1554/*
@@ -1570,7 +1567,7 @@ void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1570 if (rt == NULL) 1567 if (rt == NULL)
1571 return; 1568 return;
1572 1569
1573 if (pmtu >= dst_mtu(&rt->u.dst)) 1570 if (pmtu >= dst_mtu(&rt->dst))
1574 goto out; 1571 goto out;
1575 1572
1576 if (pmtu < IPV6_MIN_MTU) { 1573 if (pmtu < IPV6_MIN_MTU) {
@@ -1588,7 +1585,7 @@ void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1588 They are sent only in response to data packets, 1585 They are sent only in response to data packets,
1589 so that this nexthop apparently is reachable. --ANK 1586 so that this nexthop apparently is reachable. --ANK
1590 */ 1587 */
1591 dst_confirm(&rt->u.dst); 1588 dst_confirm(&rt->dst);
1592 1589
1593 /* Host route. If it is static, it would be better 1590 /* Host route. If it is static, it would be better
1594 not to override it, but add new one, so that 1591 not to override it, but add new one, so that
@@ -1596,10 +1593,10 @@ void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1596 would return automatically. 1593 would return automatically.
1597 */ 1594 */
1598 if (rt->rt6i_flags & RTF_CACHE) { 1595 if (rt->rt6i_flags & RTF_CACHE) {
1599 rt->u.dst.metrics[RTAX_MTU-1] = pmtu; 1596 rt->dst.metrics[RTAX_MTU-1] = pmtu;
1600 if (allfrag) 1597 if (allfrag)
1601 rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG; 1598 rt->dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1602 dst_set_expires(&rt->u.dst, net->ipv6.sysctl.ip6_rt_mtu_expires); 1599 dst_set_expires(&rt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1603 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES; 1600 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1604 goto out; 1601 goto out;
1605 } 1602 }
@@ -1615,9 +1612,9 @@ void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1615 nrt = rt6_alloc_clone(rt, daddr); 1612 nrt = rt6_alloc_clone(rt, daddr);
1616 1613
1617 if (nrt) { 1614 if (nrt) {
1618 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu; 1615 nrt->dst.metrics[RTAX_MTU-1] = pmtu;
1619 if (allfrag) 1616 if (allfrag)
1620 nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG; 1617 nrt->dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1621 1618
1622 /* According to RFC 1981, detecting PMTU increase shouldn't be 1619 /* According to RFC 1981, detecting PMTU increase shouldn't be
1623 * happened within 5 mins, the recommended timer is 10 mins. 1620 * happened within 5 mins, the recommended timer is 10 mins.
@@ -1625,13 +1622,13 @@ void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1625 * which is 10 mins. After 10 mins the decreased pmtu is expired 1622 * which is 10 mins. After 10 mins the decreased pmtu is expired
1626 * and detecting PMTU increase will be automatically happened. 1623 * and detecting PMTU increase will be automatically happened.
1627 */ 1624 */
1628 dst_set_expires(&nrt->u.dst, net->ipv6.sysctl.ip6_rt_mtu_expires); 1625 dst_set_expires(&nrt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1629 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES; 1626 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1630 1627
1631 ip6_ins_rt(nrt); 1628 ip6_ins_rt(nrt);
1632 } 1629 }
1633out: 1630out:
1634 dst_release(&rt->u.dst); 1631 dst_release(&rt->dst);
1635} 1632}
1636 1633
1637/* 1634/*
@@ -1644,18 +1641,18 @@ static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1644 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops); 1641 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
1645 1642
1646 if (rt) { 1643 if (rt) {
1647 rt->u.dst.input = ort->u.dst.input; 1644 rt->dst.input = ort->dst.input;
1648 rt->u.dst.output = ort->u.dst.output; 1645 rt->dst.output = ort->dst.output;
1649 1646
1650 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32)); 1647 memcpy(rt->dst.metrics, ort->dst.metrics, RTAX_MAX*sizeof(u32));
1651 rt->u.dst.error = ort->u.dst.error; 1648 rt->dst.error = ort->dst.error;
1652 rt->u.dst.dev = ort->u.dst.dev; 1649 rt->dst.dev = ort->dst.dev;
1653 if (rt->u.dst.dev) 1650 if (rt->dst.dev)
1654 dev_hold(rt->u.dst.dev); 1651 dev_hold(rt->dst.dev);
1655 rt->rt6i_idev = ort->rt6i_idev; 1652 rt->rt6i_idev = ort->rt6i_idev;
1656 if (rt->rt6i_idev) 1653 if (rt->rt6i_idev)
1657 in6_dev_hold(rt->rt6i_idev); 1654 in6_dev_hold(rt->rt6i_idev);
1658 rt->u.dst.lastuse = jiffies; 1655 rt->dst.lastuse = jiffies;
1659 rt->rt6i_expires = 0; 1656 rt->rt6i_expires = 0;
1660 1657
1661 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway); 1658 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
@@ -1689,14 +1686,14 @@ static struct rt6_info *rt6_get_route_info(struct net *net,
1689 if (!fn) 1686 if (!fn)
1690 goto out; 1687 goto out;
1691 1688
1692 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) { 1689 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1693 if (rt->rt6i_dev->ifindex != ifindex) 1690 if (rt->rt6i_dev->ifindex != ifindex)
1694 continue; 1691 continue;
1695 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY)) 1692 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1696 continue; 1693 continue;
1697 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr)) 1694 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1698 continue; 1695 continue;
1699 dst_hold(&rt->u.dst); 1696 dst_hold(&rt->dst);
1700 break; 1697 break;
1701 } 1698 }
1702out: 1699out:
@@ -1744,14 +1741,14 @@ struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *d
1744 return NULL; 1741 return NULL;
1745 1742
1746 write_lock_bh(&table->tb6_lock); 1743 write_lock_bh(&table->tb6_lock);
1747 for (rt = table->tb6_root.leaf; rt; rt=rt->u.dst.rt6_next) { 1744 for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
1748 if (dev == rt->rt6i_dev && 1745 if (dev == rt->rt6i_dev &&
1749 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) && 1746 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1750 ipv6_addr_equal(&rt->rt6i_gateway, addr)) 1747 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1751 break; 1748 break;
1752 } 1749 }
1753 if (rt) 1750 if (rt)
1754 dst_hold(&rt->u.dst); 1751 dst_hold(&rt->dst);
1755 write_unlock_bh(&table->tb6_lock); 1752 write_unlock_bh(&table->tb6_lock);
1756 return rt; 1753 return rt;
1757} 1754}
@@ -1790,9 +1787,9 @@ void rt6_purge_dflt_routers(struct net *net)
1790 1787
1791restart: 1788restart:
1792 read_lock_bh(&table->tb6_lock); 1789 read_lock_bh(&table->tb6_lock);
1793 for (rt = table->tb6_root.leaf; rt; rt = rt->u.dst.rt6_next) { 1790 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1794 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) { 1791 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1795 dst_hold(&rt->u.dst); 1792 dst_hold(&rt->dst);
1796 read_unlock_bh(&table->tb6_lock); 1793 read_unlock_bh(&table->tb6_lock);
1797 ip6_del_rt(rt); 1794 ip6_del_rt(rt);
1798 goto restart; 1795 goto restart;
@@ -1930,15 +1927,15 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1930 dev_hold(net->loopback_dev); 1927 dev_hold(net->loopback_dev);
1931 in6_dev_hold(idev); 1928 in6_dev_hold(idev);
1932 1929
1933 rt->u.dst.flags = DST_HOST; 1930 rt->dst.flags = DST_HOST;
1934 rt->u.dst.input = ip6_input; 1931 rt->dst.input = ip6_input;
1935 rt->u.dst.output = ip6_output; 1932 rt->dst.output = ip6_output;
1936 rt->rt6i_dev = net->loopback_dev; 1933 rt->rt6i_dev = net->loopback_dev;
1937 rt->rt6i_idev = idev; 1934 rt->rt6i_idev = idev;
1938 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev); 1935 rt->dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1939 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst)); 1936 rt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->dst));
1940 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1; 1937 rt->dst.metrics[RTAX_HOPLIMIT-1] = -1;
1941 rt->u.dst.obsolete = -1; 1938 rt->dst.obsolete = -1;
1942 1939
1943 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP; 1940 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
1944 if (anycast) 1941 if (anycast)
@@ -1947,7 +1944,7 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1947 rt->rt6i_flags |= RTF_LOCAL; 1944 rt->rt6i_flags |= RTF_LOCAL;
1948 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway); 1945 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1949 if (IS_ERR(neigh)) { 1946 if (IS_ERR(neigh)) {
1950 dst_free(&rt->u.dst); 1947 dst_free(&rt->dst);
1951 1948
1952 /* We are casting this because that is the return 1949 /* We are casting this because that is the return
1953 * value type. But an errno encoded pointer is the 1950 * value type. But an errno encoded pointer is the
@@ -1962,7 +1959,7 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1962 rt->rt6i_dst.plen = 128; 1959 rt->rt6i_dst.plen = 128;
1963 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL); 1960 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
1964 1961
1965 atomic_set(&rt->u.dst.__refcnt, 1); 1962 atomic_set(&rt->dst.__refcnt, 1);
1966 1963
1967 return rt; 1964 return rt;
1968} 1965}
@@ -2033,12 +2030,12 @@ static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2033 PMTU discouvery. 2030 PMTU discouvery.
2034 */ 2031 */
2035 if (rt->rt6i_dev == arg->dev && 2032 if (rt->rt6i_dev == arg->dev &&
2036 !dst_metric_locked(&rt->u.dst, RTAX_MTU) && 2033 !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2037 (dst_mtu(&rt->u.dst) >= arg->mtu || 2034 (dst_mtu(&rt->dst) >= arg->mtu ||
2038 (dst_mtu(&rt->u.dst) < arg->mtu && 2035 (dst_mtu(&rt->dst) < arg->mtu &&
2039 dst_mtu(&rt->u.dst) == idev->cnf.mtu6))) { 2036 dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
2040 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu; 2037 rt->dst.metrics[RTAX_MTU-1] = arg->mtu;
2041 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, arg->mtu); 2038 rt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, arg->mtu);
2042 } 2039 }
2043 return 0; 2040 return 0;
2044} 2041}
@@ -2252,20 +2249,20 @@ static int rt6_fill_node(struct net *net,
2252#endif 2249#endif
2253 NLA_PUT_U32(skb, RTA_IIF, iif); 2250 NLA_PUT_U32(skb, RTA_IIF, iif);
2254 } else if (dst) { 2251 } else if (dst) {
2255 struct inet6_dev *idev = ip6_dst_idev(&rt->u.dst); 2252 struct inet6_dev *idev = ip6_dst_idev(&rt->dst);
2256 struct in6_addr saddr_buf; 2253 struct in6_addr saddr_buf;
2257 if (ipv6_dev_get_saddr(net, idev ? idev->dev : NULL, 2254 if (ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2258 dst, 0, &saddr_buf) == 0) 2255 dst, 0, &saddr_buf) == 0)
2259 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf); 2256 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
2260 } 2257 }
2261 2258
2262 if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0) 2259 if (rtnetlink_put_metrics(skb, rt->dst.metrics) < 0)
2263 goto nla_put_failure; 2260 goto nla_put_failure;
2264 2261
2265 if (rt->u.dst.neighbour) 2262 if (rt->dst.neighbour)
2266 NLA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key); 2263 NLA_PUT(skb, RTA_GATEWAY, 16, &rt->dst.neighbour->primary_key);
2267 2264
2268 if (rt->u.dst.dev) 2265 if (rt->dst.dev)
2269 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex); 2266 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2270 2267
2271 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric); 2268 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
@@ -2277,8 +2274,8 @@ static int rt6_fill_node(struct net *net,
2277 else 2274 else
2278 expires = INT_MAX; 2275 expires = INT_MAX;
2279 2276
2280 if (rtnl_put_cacheinfo(skb, &rt->u.dst, 0, 0, 0, 2277 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, 0, 0,
2281 expires, rt->u.dst.error) < 0) 2278 expires, rt->dst.error) < 0)
2282 goto nla_put_failure; 2279 goto nla_put_failure;
2283 2280
2284 return nlmsg_end(skb, nlh); 2281 return nlmsg_end(skb, nlh);
@@ -2364,7 +2361,7 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void
2364 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr)); 2361 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2365 2362
2366 rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl); 2363 rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl);
2367 skb_dst_set(skb, &rt->u.dst); 2364 skb_dst_set(skb, &rt->dst);
2368 2365
2369 err = rt6_fill_node(net, skb, rt, &fl.fl6_dst, &fl.fl6_src, iif, 2366 err = rt6_fill_node(net, skb, rt, &fl.fl6_dst, &fl.fl6_src, iif,
2370 RTM_NEWROUTE, NETLINK_CB(in_skb).pid, 2367 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
@@ -2416,12 +2413,12 @@ static int ip6_route_dev_notify(struct notifier_block *this,
2416 struct net *net = dev_net(dev); 2413 struct net *net = dev_net(dev);
2417 2414
2418 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) { 2415 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2419 net->ipv6.ip6_null_entry->u.dst.dev = dev; 2416 net->ipv6.ip6_null_entry->dst.dev = dev;
2420 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev); 2417 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2421#ifdef CONFIG_IPV6_MULTIPLE_TABLES 2418#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2422 net->ipv6.ip6_prohibit_entry->u.dst.dev = dev; 2419 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
2423 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev); 2420 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2424 net->ipv6.ip6_blk_hole_entry->u.dst.dev = dev; 2421 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
2425 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev); 2422 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2426#endif 2423#endif
2427 } 2424 }
@@ -2464,8 +2461,8 @@ static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2464 seq_puts(m, "00000000000000000000000000000000"); 2461 seq_puts(m, "00000000000000000000000000000000");
2465 } 2462 }
2466 seq_printf(m, " %08x %08x %08x %08x %8s\n", 2463 seq_printf(m, " %08x %08x %08x %08x %8s\n",
2467 rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt), 2464 rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2468 rt->u.dst.__use, rt->rt6i_flags, 2465 rt->dst.__use, rt->rt6i_flags,
2469 rt->rt6i_dev ? rt->rt6i_dev->name : ""); 2466 rt->rt6i_dev ? rt->rt6i_dev->name : "");
2470 return 0; 2467 return 0;
2471} 2468}
@@ -2646,9 +2643,9 @@ static int __net_init ip6_route_net_init(struct net *net)
2646 GFP_KERNEL); 2643 GFP_KERNEL);
2647 if (!net->ipv6.ip6_null_entry) 2644 if (!net->ipv6.ip6_null_entry)
2648 goto out_ip6_dst_ops; 2645 goto out_ip6_dst_ops;
2649 net->ipv6.ip6_null_entry->u.dst.path = 2646 net->ipv6.ip6_null_entry->dst.path =
2650 (struct dst_entry *)net->ipv6.ip6_null_entry; 2647 (struct dst_entry *)net->ipv6.ip6_null_entry;
2651 net->ipv6.ip6_null_entry->u.dst.ops = &net->ipv6.ip6_dst_ops; 2648 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2652 2649
2653#ifdef CONFIG_IPV6_MULTIPLE_TABLES 2650#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2654 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template, 2651 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
@@ -2656,18 +2653,18 @@ static int __net_init ip6_route_net_init(struct net *net)
2656 GFP_KERNEL); 2653 GFP_KERNEL);
2657 if (!net->ipv6.ip6_prohibit_entry) 2654 if (!net->ipv6.ip6_prohibit_entry)
2658 goto out_ip6_null_entry; 2655 goto out_ip6_null_entry;
2659 net->ipv6.ip6_prohibit_entry->u.dst.path = 2656 net->ipv6.ip6_prohibit_entry->dst.path =
2660 (struct dst_entry *)net->ipv6.ip6_prohibit_entry; 2657 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
2661 net->ipv6.ip6_prohibit_entry->u.dst.ops = &net->ipv6.ip6_dst_ops; 2658 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2662 2659
2663 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template, 2660 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2664 sizeof(*net->ipv6.ip6_blk_hole_entry), 2661 sizeof(*net->ipv6.ip6_blk_hole_entry),
2665 GFP_KERNEL); 2662 GFP_KERNEL);
2666 if (!net->ipv6.ip6_blk_hole_entry) 2663 if (!net->ipv6.ip6_blk_hole_entry)
2667 goto out_ip6_prohibit_entry; 2664 goto out_ip6_prohibit_entry;
2668 net->ipv6.ip6_blk_hole_entry->u.dst.path = 2665 net->ipv6.ip6_blk_hole_entry->dst.path =
2669 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry; 2666 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
2670 net->ipv6.ip6_blk_hole_entry->u.dst.ops = &net->ipv6.ip6_dst_ops; 2667 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2671#endif 2668#endif
2672 2669
2673 net->ipv6.sysctl.flush_delay = 0; 2670 net->ipv6.sysctl.flush_delay = 0;
@@ -2742,12 +2739,12 @@ int __init ip6_route_init(void)
2742 /* Registering of the loopback is done before this portion of code, 2739 /* Registering of the loopback is done before this portion of code,
2743 * the loopback reference in rt6_info will not be taken, do it 2740 * the loopback reference in rt6_info will not be taken, do it
2744 * manually for init_net */ 2741 * manually for init_net */
2745 init_net.ipv6.ip6_null_entry->u.dst.dev = init_net.loopback_dev; 2742 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
2746 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); 2743 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2747 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 2744 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2748 init_net.ipv6.ip6_prohibit_entry->u.dst.dev = init_net.loopback_dev; 2745 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
2749 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); 2746 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2750 init_net.ipv6.ip6_blk_hole_entry->u.dst.dev = init_net.loopback_dev; 2747 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
2751 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); 2748 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2752 #endif 2749 #endif
2753 ret = fib6_init(); 2750 ret = fib6_init();
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index e51e650ea80b..4699cd3c3118 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -249,8 +249,6 @@ failed:
249 return NULL; 249 return NULL;
250} 250}
251 251
252static DEFINE_SPINLOCK(ipip6_prl_lock);
253
254#define for_each_prl_rcu(start) \ 252#define for_each_prl_rcu(start) \
255 for (prl = rcu_dereference(start); \ 253 for (prl = rcu_dereference(start); \
256 prl; \ 254 prl; \
@@ -340,7 +338,7 @@ ipip6_tunnel_add_prl(struct ip_tunnel *t, struct ip_tunnel_prl *a, int chg)
340 if (a->addr == htonl(INADDR_ANY)) 338 if (a->addr == htonl(INADDR_ANY))
341 return -EINVAL; 339 return -EINVAL;
342 340
343 spin_lock(&ipip6_prl_lock); 341 ASSERT_RTNL();
344 342
345 for (p = t->prl; p; p = p->next) { 343 for (p = t->prl; p; p = p->next) {
346 if (p->addr == a->addr) { 344 if (p->addr == a->addr) {
@@ -370,7 +368,6 @@ ipip6_tunnel_add_prl(struct ip_tunnel *t, struct ip_tunnel_prl *a, int chg)
370 t->prl_count++; 368 t->prl_count++;
371 rcu_assign_pointer(t->prl, p); 369 rcu_assign_pointer(t->prl, p);
372out: 370out:
373 spin_unlock(&ipip6_prl_lock);
374 return err; 371 return err;
375} 372}
376 373
@@ -397,7 +394,7 @@ ipip6_tunnel_del_prl(struct ip_tunnel *t, struct ip_tunnel_prl *a)
397 struct ip_tunnel_prl_entry *x, **p; 394 struct ip_tunnel_prl_entry *x, **p;
398 int err = 0; 395 int err = 0;
399 396
400 spin_lock(&ipip6_prl_lock); 397 ASSERT_RTNL();
401 398
402 if (a && a->addr != htonl(INADDR_ANY)) { 399 if (a && a->addr != htonl(INADDR_ANY)) {
403 for (p = &t->prl; *p; p = &(*p)->next) { 400 for (p = &t->prl; *p; p = &(*p)->next) {
@@ -419,7 +416,6 @@ ipip6_tunnel_del_prl(struct ip_tunnel *t, struct ip_tunnel_prl *a)
419 } 416 }
420 } 417 }
421out: 418out:
422 spin_unlock(&ipip6_prl_lock);
423 return err; 419 return err;
424} 420}
425 421
@@ -716,7 +712,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
716 stats->tx_carrier_errors++; 712 stats->tx_carrier_errors++;
717 goto tx_error_icmp; 713 goto tx_error_icmp;
718 } 714 }
719 tdev = rt->u.dst.dev; 715 tdev = rt->dst.dev;
720 716
721 if (tdev == dev) { 717 if (tdev == dev) {
722 ip_rt_put(rt); 718 ip_rt_put(rt);
@@ -725,7 +721,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
725 } 721 }
726 722
727 if (df) { 723 if (df) {
728 mtu = dst_mtu(&rt->u.dst) - sizeof(struct iphdr); 724 mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr);
729 725
730 if (mtu < 68) { 726 if (mtu < 68) {
731 stats->collisions++; 727 stats->collisions++;
@@ -784,7 +780,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
784 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); 780 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
785 IPCB(skb)->flags = 0; 781 IPCB(skb)->flags = 0;
786 skb_dst_drop(skb); 782 skb_dst_drop(skb);
787 skb_dst_set(skb, &rt->u.dst); 783 skb_dst_set(skb, &rt->dst);
788 784
789 /* 785 /*
790 * Push down and install the IPIP header. 786 * Push down and install the IPIP header.
@@ -833,7 +829,7 @@ static void ipip6_tunnel_bind_dev(struct net_device *dev)
833 .proto = IPPROTO_IPV6 }; 829 .proto = IPPROTO_IPV6 };
834 struct rtable *rt; 830 struct rtable *rt;
835 if (!ip_route_output_key(dev_net(dev), &rt, &fl)) { 831 if (!ip_route_output_key(dev_net(dev), &rt, &fl)) {
836 tdev = rt->u.dst.dev; 832 tdev = rt->dst.dev;
837 ip_rt_put(rt); 833 ip_rt_put(rt);
838 } 834 }
839 dev->flags |= IFF_POINTOPOINT; 835 dev->flags |= IFF_POINTOPOINT;
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 34d1f0690d7e..09fd34f0dbf2 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -27,28 +27,17 @@ extern __u32 syncookie_secret[2][16-4+SHA_DIGEST_WORDS];
27#define COOKIEBITS 24 /* Upper bits store count */ 27#define COOKIEBITS 24 /* Upper bits store count */
28#define COOKIEMASK (((__u32)1 << COOKIEBITS) - 1) 28#define COOKIEMASK (((__u32)1 << COOKIEBITS) - 1)
29 29
30/* 30/* Table must be sorted. */
31 * This table has to be sorted and terminated with (__u16)-1.
32 * XXX generate a better table.
33 * Unresolved Issues: HIPPI with a 64k MSS is not well supported.
34 *
35 * Taken directly from ipv4 implementation.
36 * Should this list be modified for ipv6 use or is it close enough?
37 * rfc 2460 8.3 suggests mss values 20 bytes less than ipv4 counterpart
38 */
39static __u16 const msstab[] = { 31static __u16 const msstab[] = {
40 64 - 1, 32 64,
41 256 - 1, 33 512,
42 512 - 1, 34 536,
43 536 - 1, 35 1280 - 60,
44 1024 - 1, 36 1480 - 60,
45 1440 - 1, 37 1500 - 60,
46 1460 - 1, 38 4460 - 60,
47 4312 - 1, 39 9000 - 60,
48 (__u16)-1
49}; 40};
50/* The number doesn't include the -1 terminator */
51#define NUM_MSS (ARRAY_SIZE(msstab) - 1)
52 41
53/* 42/*
54 * This (misnamed) value is the age of syncookie which is permitted. 43 * This (misnamed) value is the age of syncookie which is permitted.
@@ -134,9 +123,11 @@ __u32 cookie_v6_init_sequence(struct sock *sk, struct sk_buff *skb, __u16 *mssp)
134 123
135 tcp_synq_overflow(sk); 124 tcp_synq_overflow(sk);
136 125
137 for (mssind = 0; mss > msstab[mssind + 1]; mssind++) 126 for (mssind = ARRAY_SIZE(msstab) - 1; mssind ; mssind--)
138 ; 127 if (mss >= msstab[mssind])
139 *mssp = msstab[mssind] + 1; 128 break;
129
130 *mssp = msstab[mssind];
140 131
141 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESSENT); 132 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESSENT);
142 133
@@ -154,7 +145,7 @@ static inline int cookie_check(struct sk_buff *skb, __u32 cookie)
154 th->source, th->dest, seq, 145 th->source, th->dest, seq,
155 jiffies / (HZ * 60), COUNTER_TRIES); 146 jiffies / (HZ * 60), COUNTER_TRIES);
156 147
157 return mssind < NUM_MSS ? msstab[mssind] + 1 : 0; 148 return mssind < ARRAY_SIZE(msstab) ? msstab[mssind] : 0;
158} 149}
159 150
160struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) 151struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
@@ -173,8 +164,9 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
173 int mss; 164 int mss;
174 struct dst_entry *dst; 165 struct dst_entry *dst;
175 __u8 rcv_wscale; 166 __u8 rcv_wscale;
167 bool ecn_ok;
176 168
177 if (!sysctl_tcp_syncookies || !th->ack) 169 if (!sysctl_tcp_syncookies || !th->ack || th->rst)
178 goto out; 170 goto out;
179 171
180 if (tcp_synq_no_recent_overflow(sk) || 172 if (tcp_synq_no_recent_overflow(sk) ||
@@ -189,8 +181,8 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
189 memset(&tcp_opt, 0, sizeof(tcp_opt)); 181 memset(&tcp_opt, 0, sizeof(tcp_opt));
190 tcp_parse_options(skb, &tcp_opt, &hash_location, 0); 182 tcp_parse_options(skb, &tcp_opt, &hash_location, 0);
191 183
192 if (tcp_opt.saw_tstamp) 184 if (!cookie_check_timestamp(&tcp_opt, &ecn_ok))
193 cookie_check_timestamp(&tcp_opt); 185 goto out;
194 186
195 ret = NULL; 187 ret = NULL;
196 req = inet6_reqsk_alloc(&tcp6_request_sock_ops); 188 req = inet6_reqsk_alloc(&tcp6_request_sock_ops);
@@ -224,9 +216,8 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
224 216
225 req->expires = 0UL; 217 req->expires = 0UL;
226 req->retrans = 0; 218 req->retrans = 0;
227 ireq->ecn_ok = 0; 219 ireq->ecn_ok = ecn_ok;
228 ireq->snd_wscale = tcp_opt.snd_wscale; 220 ireq->snd_wscale = tcp_opt.snd_wscale;
229 ireq->rcv_wscale = tcp_opt.rcv_wscale;
230 ireq->sack_ok = tcp_opt.sack_ok; 221 ireq->sack_ok = tcp_opt.sack_ok;
231 ireq->wscale_ok = tcp_opt.wscale_ok; 222 ireq->wscale_ok = tcp_opt.wscale_ok;
232 ireq->tstamp_ok = tcp_opt.saw_tstamp; 223 ireq->tstamp_ok = tcp_opt.saw_tstamp;
@@ -240,17 +231,12 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
240 * me if there is a preferred way. 231 * me if there is a preferred way.
241 */ 232 */
242 { 233 {
243 struct in6_addr *final_p = NULL, final; 234 struct in6_addr *final_p, final;
244 struct flowi fl; 235 struct flowi fl;
245 memset(&fl, 0, sizeof(fl)); 236 memset(&fl, 0, sizeof(fl));
246 fl.proto = IPPROTO_TCP; 237 fl.proto = IPPROTO_TCP;
247 ipv6_addr_copy(&fl.fl6_dst, &ireq6->rmt_addr); 238 ipv6_addr_copy(&fl.fl6_dst, &ireq6->rmt_addr);
248 if (np->opt && np->opt->srcrt) { 239 final_p = fl6_update_dst(&fl, np->opt, &final);
249 struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
250 ipv6_addr_copy(&final, &fl.fl6_dst);
251 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
252 final_p = &final;
253 }
254 ipv6_addr_copy(&fl.fl6_src, &ireq6->loc_addr); 240 ipv6_addr_copy(&fl.fl6_src, &ireq6->loc_addr);
255 fl.oif = sk->sk_bound_dev_if; 241 fl.oif = sk->sk_bound_dev_if;
256 fl.mark = sk->sk_mark; 242 fl.mark = sk->sk_mark;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 2b7c3a100e2c..fe6d40418c0b 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -129,7 +129,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
129 struct inet_connection_sock *icsk = inet_csk(sk); 129 struct inet_connection_sock *icsk = inet_csk(sk);
130 struct ipv6_pinfo *np = inet6_sk(sk); 130 struct ipv6_pinfo *np = inet6_sk(sk);
131 struct tcp_sock *tp = tcp_sk(sk); 131 struct tcp_sock *tp = tcp_sk(sk);
132 struct in6_addr *saddr = NULL, *final_p = NULL, final; 132 struct in6_addr *saddr = NULL, *final_p, final;
133 struct flowi fl; 133 struct flowi fl;
134 struct dst_entry *dst; 134 struct dst_entry *dst;
135 int addr_type; 135 int addr_type;
@@ -250,12 +250,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
250 fl.fl_ip_dport = usin->sin6_port; 250 fl.fl_ip_dport = usin->sin6_port;
251 fl.fl_ip_sport = inet->inet_sport; 251 fl.fl_ip_sport = inet->inet_sport;
252 252
253 if (np->opt && np->opt->srcrt) { 253 final_p = fl6_update_dst(&fl, np->opt, &final);
254 struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt;
255 ipv6_addr_copy(&final, &fl.fl6_dst);
256 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
257 final_p = &final;
258 }
259 254
260 security_sk_classify_flow(sk, &fl); 255 security_sk_classify_flow(sk, &fl);
261 256
@@ -477,7 +472,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req,
477 struct ipv6_pinfo *np = inet6_sk(sk); 472 struct ipv6_pinfo *np = inet6_sk(sk);
478 struct sk_buff * skb; 473 struct sk_buff * skb;
479 struct ipv6_txoptions *opt = NULL; 474 struct ipv6_txoptions *opt = NULL;
480 struct in6_addr * final_p = NULL, final; 475 struct in6_addr * final_p, final;
481 struct flowi fl; 476 struct flowi fl;
482 struct dst_entry *dst; 477 struct dst_entry *dst;
483 int err = -1; 478 int err = -1;
@@ -494,12 +489,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req,
494 security_req_classify_flow(req, &fl); 489 security_req_classify_flow(req, &fl);
495 490
496 opt = np->opt; 491 opt = np->opt;
497 if (opt && opt->srcrt) { 492 final_p = fl6_update_dst(&fl, opt, &final);
498 struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
499 ipv6_addr_copy(&final, &fl.fl6_dst);
500 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
501 final_p = &final;
502 }
503 493
504 err = ip6_dst_lookup(sk, &dst, &fl); 494 err = ip6_dst_lookup(sk, &dst, &fl);
505 if (err) 495 if (err)
@@ -1167,7 +1157,7 @@ static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
1167 } 1157 }
1168 1158
1169#ifdef CONFIG_SYN_COOKIES 1159#ifdef CONFIG_SYN_COOKIES
1170 if (!th->rst && !th->syn && th->ack) 1160 if (!th->syn)
1171 sk = cookie_v6_check(sk, skb); 1161 sk = cookie_v6_check(sk, skb);
1172#endif 1162#endif
1173 return sk; 1163 return sk;
@@ -1279,13 +1269,10 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1279 treq = inet6_rsk(req); 1269 treq = inet6_rsk(req);
1280 ipv6_addr_copy(&treq->rmt_addr, &ipv6_hdr(skb)->saddr); 1270 ipv6_addr_copy(&treq->rmt_addr, &ipv6_hdr(skb)->saddr);
1281 ipv6_addr_copy(&treq->loc_addr, &ipv6_hdr(skb)->daddr); 1271 ipv6_addr_copy(&treq->loc_addr, &ipv6_hdr(skb)->daddr);
1282 if (!want_cookie) 1272 if (!want_cookie || tmp_opt.tstamp_ok)
1283 TCP_ECN_create_request(req, tcp_hdr(skb)); 1273 TCP_ECN_create_request(req, tcp_hdr(skb));
1284 1274
1285 if (want_cookie) { 1275 if (!isn) {
1286 isn = cookie_v6_init_sequence(sk, skb, &req->mss);
1287 req->cookie_ts = tmp_opt.tstamp_ok;
1288 } else if (!isn) {
1289 if (ipv6_opt_accepted(sk, skb) || 1276 if (ipv6_opt_accepted(sk, skb) ||
1290 np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo || 1277 np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
1291 np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) { 1278 np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) {
@@ -1298,8 +1285,12 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1298 if (!sk->sk_bound_dev_if && 1285 if (!sk->sk_bound_dev_if &&
1299 ipv6_addr_type(&treq->rmt_addr) & IPV6_ADDR_LINKLOCAL) 1286 ipv6_addr_type(&treq->rmt_addr) & IPV6_ADDR_LINKLOCAL)
1300 treq->iif = inet6_iif(skb); 1287 treq->iif = inet6_iif(skb);
1301 1288 if (!want_cookie) {
1302 isn = tcp_v6_init_sequence(skb); 1289 isn = tcp_v6_init_sequence(skb);
1290 } else {
1291 isn = cookie_v6_init_sequence(sk, skb, &req->mss);
1292 req->cookie_ts = tmp_opt.tstamp_ok;
1293 }
1303 } 1294 }
1304 tcp_rsk(req)->snt_isn = isn; 1295 tcp_rsk(req)->snt_isn = isn;
1305 1296
@@ -1392,18 +1383,13 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1392 goto out_overflow; 1383 goto out_overflow;
1393 1384
1394 if (dst == NULL) { 1385 if (dst == NULL) {
1395 struct in6_addr *final_p = NULL, final; 1386 struct in6_addr *final_p, final;
1396 struct flowi fl; 1387 struct flowi fl;
1397 1388
1398 memset(&fl, 0, sizeof(fl)); 1389 memset(&fl, 0, sizeof(fl));
1399 fl.proto = IPPROTO_TCP; 1390 fl.proto = IPPROTO_TCP;
1400 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr); 1391 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
1401 if (opt && opt->srcrt) { 1392 final_p = fl6_update_dst(&fl, opt, &final);
1402 struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
1403 ipv6_addr_copy(&final, &fl.fl6_dst);
1404 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1405 final_p = &final;
1406 }
1407 ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr); 1393 ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr);
1408 fl.oif = sk->sk_bound_dev_if; 1394 fl.oif = sk->sk_bound_dev_if;
1409 fl.mark = sk->sk_mark; 1395 fl.mark = sk->sk_mark;
@@ -2156,6 +2142,8 @@ struct proto tcpv6_prot = {
2156 .setsockopt = tcp_setsockopt, 2142 .setsockopt = tcp_setsockopt,
2157 .getsockopt = tcp_getsockopt, 2143 .getsockopt = tcp_getsockopt,
2158 .recvmsg = tcp_recvmsg, 2144 .recvmsg = tcp_recvmsg,
2145 .sendmsg = tcp_sendmsg,
2146 .sendpage = tcp_sendpage,
2159 .backlog_rcv = tcp_v6_do_rcv, 2147 .backlog_rcv = tcp_v6_do_rcv,
2160 .hash = tcp_v6_hash, 2148 .hash = tcp_v6_hash,
2161 .unhash = inet_unhash, 2149 .unhash = inet_unhash,
@@ -2174,6 +2162,7 @@ struct proto tcpv6_prot = {
2174 .twsk_prot = &tcp6_timewait_sock_ops, 2162 .twsk_prot = &tcp6_timewait_sock_ops,
2175 .rsk_prot = &tcp6_request_sock_ops, 2163 .rsk_prot = &tcp6_request_sock_ops,
2176 .h.hashinfo = &tcp_hashinfo, 2164 .h.hashinfo = &tcp_hashinfo,
2165 .no_autobind = true,
2177#ifdef CONFIG_COMPAT 2166#ifdef CONFIG_COMPAT
2178 .compat_setsockopt = compat_tcp_setsockopt, 2167 .compat_setsockopt = compat_tcp_setsockopt,
2179 .compat_getsockopt = compat_tcp_getsockopt, 2168 .compat_getsockopt = compat_tcp_getsockopt,
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 3d7a2c0b836a..1dd1affdead2 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -328,6 +328,7 @@ int udpv6_recvmsg(struct kiocb *iocb, struct sock *sk,
328 int err; 328 int err;
329 int is_udplite = IS_UDPLITE(sk); 329 int is_udplite = IS_UDPLITE(sk);
330 int is_udp4; 330 int is_udp4;
331 bool slow;
331 332
332 if (addr_len) 333 if (addr_len)
333 *addr_len=sizeof(struct sockaddr_in6); 334 *addr_len=sizeof(struct sockaddr_in6);
@@ -424,7 +425,7 @@ out:
424 return err; 425 return err;
425 426
426csum_copy_err: 427csum_copy_err:
427 lock_sock_bh(sk); 428 slow = lock_sock_fast(sk);
428 if (!skb_kill_datagram(sk, skb, flags)) { 429 if (!skb_kill_datagram(sk, skb, flags)) {
429 if (is_udp4) 430 if (is_udp4)
430 UDP_INC_STATS_USER(sock_net(sk), 431 UDP_INC_STATS_USER(sock_net(sk),
@@ -433,7 +434,7 @@ csum_copy_err:
433 UDP6_INC_STATS_USER(sock_net(sk), 434 UDP6_INC_STATS_USER(sock_net(sk),
434 UDP_MIB_INERRORS, is_udplite); 435 UDP_MIB_INERRORS, is_udplite);
435 } 436 }
436 unlock_sock_bh(sk); 437 unlock_sock_fast(sk, slow);
437 438
438 if (flags & MSG_DONTWAIT) 439 if (flags & MSG_DONTWAIT)
439 return -EAGAIN; 440 return -EAGAIN;
@@ -926,7 +927,7 @@ int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk,
926 struct inet_sock *inet = inet_sk(sk); 927 struct inet_sock *inet = inet_sk(sk);
927 struct ipv6_pinfo *np = inet6_sk(sk); 928 struct ipv6_pinfo *np = inet6_sk(sk);
928 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) msg->msg_name; 929 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) msg->msg_name;
929 struct in6_addr *daddr, *final_p = NULL, final; 930 struct in6_addr *daddr, *final_p, final;
930 struct ipv6_txoptions *opt = NULL; 931 struct ipv6_txoptions *opt = NULL;
931 struct ip6_flowlabel *flowlabel = NULL; 932 struct ip6_flowlabel *flowlabel = NULL;
932 struct flowi fl; 933 struct flowi fl;
@@ -1096,14 +1097,9 @@ do_udp_sendmsg:
1096 ipv6_addr_copy(&fl.fl6_src, &np->saddr); 1097 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
1097 fl.fl_ip_sport = inet->inet_sport; 1098 fl.fl_ip_sport = inet->inet_sport;
1098 1099
1099 /* merge ip6_build_xmit from ip6_output */ 1100 final_p = fl6_update_dst(&fl, opt, &final);
1100 if (opt && opt->srcrt) { 1101 if (final_p)
1101 struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
1102 ipv6_addr_copy(&final, &fl.fl6_dst);
1103 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1104 final_p = &final;
1105 connected = 0; 1102 connected = 0;
1106 }
1107 1103
1108 if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst)) { 1104 if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst)) {
1109 fl.oif = np->mcast_oif; 1105 fl.oif = np->mcast_oif;
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 4a0e77e14468..6baeabbbca82 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -124,6 +124,8 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse)
124 u8 nexthdr = nh[IP6CB(skb)->nhoff]; 124 u8 nexthdr = nh[IP6CB(skb)->nhoff];
125 125
126 memset(fl, 0, sizeof(struct flowi)); 126 memset(fl, 0, sizeof(struct flowi));
127 fl->mark = skb->mark;
128
127 ipv6_addr_copy(&fl->fl6_dst, reverse ? &hdr->saddr : &hdr->daddr); 129 ipv6_addr_copy(&fl->fl6_dst, reverse ? &hdr->saddr : &hdr->daddr);
128 ipv6_addr_copy(&fl->fl6_src, reverse ? &hdr->daddr : &hdr->saddr); 130 ipv6_addr_copy(&fl->fl6_src, reverse ? &hdr->daddr : &hdr->saddr);
129 131
diff --git a/net/irda/irnet/irnet_ppp.c b/net/irda/irnet/irnet_ppp.c
index 6a1a202710c5..800bc53b7f63 100644
--- a/net/irda/irnet/irnet_ppp.c
+++ b/net/irda/irnet/irnet_ppp.c
@@ -527,7 +527,7 @@ static int
527dev_irnet_close(struct inode * inode, 527dev_irnet_close(struct inode * inode,
528 struct file * file) 528 struct file * file)
529{ 529{
530 irnet_socket * ap = (struct irnet_socket *) file->private_data; 530 irnet_socket * ap = file->private_data;
531 531
532 DENTER(FS_TRACE, "(file=0x%p, ap=0x%p)\n", 532 DENTER(FS_TRACE, "(file=0x%p, ap=0x%p)\n",
533 file, ap); 533 file, ap);
@@ -564,7 +564,7 @@ dev_irnet_write(struct file * file,
564 size_t count, 564 size_t count,
565 loff_t * ppos) 565 loff_t * ppos)
566{ 566{
567 irnet_socket * ap = (struct irnet_socket *) file->private_data; 567 irnet_socket * ap = file->private_data;
568 568
569 DPASS(FS_TRACE, "(file=0x%p, ap=0x%p, count=%Zd)\n", 569 DPASS(FS_TRACE, "(file=0x%p, ap=0x%p, count=%Zd)\n",
570 file, ap, count); 570 file, ap, count);
@@ -588,7 +588,7 @@ dev_irnet_read(struct file * file,
588 size_t count, 588 size_t count,
589 loff_t * ppos) 589 loff_t * ppos)
590{ 590{
591 irnet_socket * ap = (struct irnet_socket *) file->private_data; 591 irnet_socket * ap = file->private_data;
592 592
593 DPASS(FS_TRACE, "(file=0x%p, ap=0x%p, count=%Zd)\n", 593 DPASS(FS_TRACE, "(file=0x%p, ap=0x%p, count=%Zd)\n",
594 file, ap, count); 594 file, ap, count);
@@ -609,7 +609,7 @@ static unsigned int
609dev_irnet_poll(struct file * file, 609dev_irnet_poll(struct file * file,
610 poll_table * wait) 610 poll_table * wait)
611{ 611{
612 irnet_socket * ap = (struct irnet_socket *) file->private_data; 612 irnet_socket * ap = file->private_data;
613 unsigned int mask; 613 unsigned int mask;
614 614
615 DENTER(FS_TRACE, "(file=0x%p, ap=0x%p)\n", 615 DENTER(FS_TRACE, "(file=0x%p, ap=0x%p)\n",
@@ -638,7 +638,7 @@ dev_irnet_ioctl(
638 unsigned int cmd, 638 unsigned int cmd,
639 unsigned long arg) 639 unsigned long arg)
640{ 640{
641 irnet_socket * ap = (struct irnet_socket *) file->private_data; 641 irnet_socket * ap = file->private_data;
642 int err; 642 int err;
643 int val; 643 int val;
644 void __user *argp = (void __user *)arg; 644 void __user *argp = (void __user *)arg;
diff --git a/net/irda/irttp.c b/net/irda/irttp.c
index 47db1d8a0d92..285761e77d90 100644
--- a/net/irda/irttp.c
+++ b/net/irda/irttp.c
@@ -1853,23 +1853,23 @@ static int irttp_seq_show(struct seq_file *seq, void *v)
1853 self->remote_credit); 1853 self->remote_credit);
1854 seq_printf(seq, "send credit: %d\n", 1854 seq_printf(seq, "send credit: %d\n",
1855 self->send_credit); 1855 self->send_credit);
1856 seq_printf(seq, " tx packets: %ld, ", 1856 seq_printf(seq, " tx packets: %lu, ",
1857 self->stats.tx_packets); 1857 self->stats.tx_packets);
1858 seq_printf(seq, "rx packets: %ld, ", 1858 seq_printf(seq, "rx packets: %lu, ",
1859 self->stats.rx_packets); 1859 self->stats.rx_packets);
1860 seq_printf(seq, "tx_queue len: %d ", 1860 seq_printf(seq, "tx_queue len: %u ",
1861 skb_queue_len(&self->tx_queue)); 1861 skb_queue_len(&self->tx_queue));
1862 seq_printf(seq, "rx_queue len: %d\n", 1862 seq_printf(seq, "rx_queue len: %u\n",
1863 skb_queue_len(&self->rx_queue)); 1863 skb_queue_len(&self->rx_queue));
1864 seq_printf(seq, " tx_sdu_busy: %s, ", 1864 seq_printf(seq, " tx_sdu_busy: %s, ",
1865 self->tx_sdu_busy? "TRUE":"FALSE"); 1865 self->tx_sdu_busy? "TRUE":"FALSE");
1866 seq_printf(seq, "rx_sdu_busy: %s\n", 1866 seq_printf(seq, "rx_sdu_busy: %s\n",
1867 self->rx_sdu_busy? "TRUE":"FALSE"); 1867 self->rx_sdu_busy? "TRUE":"FALSE");
1868 seq_printf(seq, " max_seg_size: %d, ", 1868 seq_printf(seq, " max_seg_size: %u, ",
1869 self->max_seg_size); 1869 self->max_seg_size);
1870 seq_printf(seq, "tx_max_sdu_size: %d, ", 1870 seq_printf(seq, "tx_max_sdu_size: %u, ",
1871 self->tx_max_sdu_size); 1871 self->tx_max_sdu_size);
1872 seq_printf(seq, "rx_max_sdu_size: %d\n", 1872 seq_printf(seq, "rx_max_sdu_size: %u\n",
1873 self->rx_max_sdu_size); 1873 self->rx_max_sdu_size);
1874 1874
1875 seq_printf(seq, " Used by (%s)\n\n", 1875 seq_printf(seq, " Used by (%s)\n\n",
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index c8b4599a752e..9637e45744fa 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -1619,7 +1619,7 @@ static void iucv_callback_rx(struct iucv_path *path, struct iucv_message *msg)
1619save_message: 1619save_message:
1620 save_msg = kzalloc(sizeof(struct sock_msg_q), GFP_ATOMIC | GFP_DMA); 1620 save_msg = kzalloc(sizeof(struct sock_msg_q), GFP_ATOMIC | GFP_DMA);
1621 if (!save_msg) 1621 if (!save_msg)
1622 return; 1622 goto out_unlock;
1623 save_msg->path = path; 1623 save_msg->path = path;
1624 save_msg->msg = *msg; 1624 save_msg->msg = *msg;
1625 1625
diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c
index fd8b28361a64..499c045d6910 100644
--- a/net/iucv/iucv.c
+++ b/net/iucv/iucv.c
@@ -632,13 +632,14 @@ static int __cpuinit iucv_cpu_notify(struct notifier_block *self,
632 iucv_irq_data[cpu] = kmalloc_node(sizeof(struct iucv_irq_data), 632 iucv_irq_data[cpu] = kmalloc_node(sizeof(struct iucv_irq_data),
633 GFP_KERNEL|GFP_DMA, cpu_to_node(cpu)); 633 GFP_KERNEL|GFP_DMA, cpu_to_node(cpu));
634 if (!iucv_irq_data[cpu]) 634 if (!iucv_irq_data[cpu])
635 return NOTIFY_BAD; 635 return notifier_from_errno(-ENOMEM);
636
636 iucv_param[cpu] = kmalloc_node(sizeof(union iucv_param), 637 iucv_param[cpu] = kmalloc_node(sizeof(union iucv_param),
637 GFP_KERNEL|GFP_DMA, cpu_to_node(cpu)); 638 GFP_KERNEL|GFP_DMA, cpu_to_node(cpu));
638 if (!iucv_param[cpu]) { 639 if (!iucv_param[cpu]) {
639 kfree(iucv_irq_data[cpu]); 640 kfree(iucv_irq_data[cpu]);
640 iucv_irq_data[cpu] = NULL; 641 iucv_irq_data[cpu] = NULL;
641 return NOTIFY_BAD; 642 return notifier_from_errno(-ENOMEM);
642 } 643 }
643 iucv_param_irq[cpu] = kmalloc_node(sizeof(union iucv_param), 644 iucv_param_irq[cpu] = kmalloc_node(sizeof(union iucv_param),
644 GFP_KERNEL|GFP_DMA, cpu_to_node(cpu)); 645 GFP_KERNEL|GFP_DMA, cpu_to_node(cpu));
@@ -647,7 +648,7 @@ static int __cpuinit iucv_cpu_notify(struct notifier_block *self,
647 iucv_param[cpu] = NULL; 648 iucv_param[cpu] = NULL;
648 kfree(iucv_irq_data[cpu]); 649 kfree(iucv_irq_data[cpu]);
649 iucv_irq_data[cpu] = NULL; 650 iucv_irq_data[cpu] = NULL;
650 return NOTIFY_BAD; 651 return notifier_from_errno(-ENOMEM);
651 } 652 }
652 break; 653 break;
653 case CPU_UP_CANCELED: 654 case CPU_UP_CANCELED:
@@ -677,7 +678,7 @@ static int __cpuinit iucv_cpu_notify(struct notifier_block *self,
677 cpu_clear(cpu, cpumask); 678 cpu_clear(cpu, cpumask);
678 if (cpus_empty(cpumask)) 679 if (cpus_empty(cpumask))
679 /* Can't offline last IUCV enabled cpu. */ 680 /* Can't offline last IUCV enabled cpu. */
680 return NOTIFY_BAD; 681 return notifier_from_errno(-EINVAL);
681 smp_call_function_single(cpu, iucv_retrieve_cpu, NULL, 1); 682 smp_call_function_single(cpu, iucv_retrieve_cpu, NULL, 1);
682 if (cpus_empty(iucv_irq_cpumask)) 683 if (cpus_empty(iucv_irq_cpumask))
683 smp_call_function_single(first_cpu(iucv_buffer_cpumask), 684 smp_call_function_single(first_cpu(iucv_buffer_cpumask),
@@ -1462,7 +1463,7 @@ struct iucv_path_pending {
1462 u32 res3; 1463 u32 res3;
1463 u8 ippollfg; 1464 u8 ippollfg;
1464 u8 res4[3]; 1465 u8 res4[3];
1465} __attribute__ ((packed)); 1466} __packed;
1466 1467
1467static void iucv_path_pending(struct iucv_irq_data *data) 1468static void iucv_path_pending(struct iucv_irq_data *data)
1468{ 1469{
@@ -1523,7 +1524,7 @@ struct iucv_path_complete {
1523 u32 res3; 1524 u32 res3;
1524 u8 ippollfg; 1525 u8 ippollfg;
1525 u8 res4[3]; 1526 u8 res4[3];
1526} __attribute__ ((packed)); 1527} __packed;
1527 1528
1528static void iucv_path_complete(struct iucv_irq_data *data) 1529static void iucv_path_complete(struct iucv_irq_data *data)
1529{ 1530{
@@ -1553,7 +1554,7 @@ struct iucv_path_severed {
1553 u32 res4; 1554 u32 res4;
1554 u8 ippollfg; 1555 u8 ippollfg;
1555 u8 res5[3]; 1556 u8 res5[3];
1556} __attribute__ ((packed)); 1557} __packed;
1557 1558
1558static void iucv_path_severed(struct iucv_irq_data *data) 1559static void iucv_path_severed(struct iucv_irq_data *data)
1559{ 1560{
@@ -1589,7 +1590,7 @@ struct iucv_path_quiesced {
1589 u32 res4; 1590 u32 res4;
1590 u8 ippollfg; 1591 u8 ippollfg;
1591 u8 res5[3]; 1592 u8 res5[3];
1592} __attribute__ ((packed)); 1593} __packed;
1593 1594
1594static void iucv_path_quiesced(struct iucv_irq_data *data) 1595static void iucv_path_quiesced(struct iucv_irq_data *data)
1595{ 1596{
@@ -1617,7 +1618,7 @@ struct iucv_path_resumed {
1617 u32 res4; 1618 u32 res4;
1618 u8 ippollfg; 1619 u8 ippollfg;
1619 u8 res5[3]; 1620 u8 res5[3];
1620} __attribute__ ((packed)); 1621} __packed;
1621 1622
1622static void iucv_path_resumed(struct iucv_irq_data *data) 1623static void iucv_path_resumed(struct iucv_irq_data *data)
1623{ 1624{
@@ -1648,7 +1649,7 @@ struct iucv_message_complete {
1648 u32 ipbfln2f; 1649 u32 ipbfln2f;
1649 u8 ippollfg; 1650 u8 ippollfg;
1650 u8 res2[3]; 1651 u8 res2[3];
1651} __attribute__ ((packed)); 1652} __packed;
1652 1653
1653static void iucv_message_complete(struct iucv_irq_data *data) 1654static void iucv_message_complete(struct iucv_irq_data *data)
1654{ 1655{
@@ -1693,7 +1694,7 @@ struct iucv_message_pending {
1693 u32 ipbfln2f; 1694 u32 ipbfln2f;
1694 u8 ippollfg; 1695 u8 ippollfg;
1695 u8 res2[3]; 1696 u8 res2[3];
1696} __attribute__ ((packed)); 1697} __packed;
1697 1698
1698static void iucv_message_pending(struct iucv_irq_data *data) 1699static void iucv_message_pending(struct iucv_irq_data *data)
1699{ 1700{
diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c
index 0852512d392c..226a0ae3bcfd 100644
--- a/net/l2tp/l2tp_ip.c
+++ b/net/l2tp/l2tp_ip.c
@@ -348,7 +348,7 @@ static int l2tp_ip_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len
348 sk->sk_state = TCP_ESTABLISHED; 348 sk->sk_state = TCP_ESTABLISHED;
349 inet->inet_id = jiffies; 349 inet->inet_id = jiffies;
350 350
351 sk_dst_set(sk, &rt->u.dst); 351 sk_dst_set(sk, &rt->dst);
352 352
353 write_lock_bh(&l2tp_ip_lock); 353 write_lock_bh(&l2tp_ip_lock);
354 hlist_del_init(&sk->sk_bind_node); 354 hlist_del_init(&sk->sk_bind_node);
@@ -496,9 +496,9 @@ static int l2tp_ip_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *m
496 if (ip_route_output_flow(sock_net(sk), &rt, &fl, sk, 0)) 496 if (ip_route_output_flow(sock_net(sk), &rt, &fl, sk, 0))
497 goto no_route; 497 goto no_route;
498 } 498 }
499 sk_setup_caps(sk, &rt->u.dst); 499 sk_setup_caps(sk, &rt->dst);
500 } 500 }
501 skb_dst_set(skb, dst_clone(&rt->u.dst)); 501 skb_dst_set(skb, dst_clone(&rt->dst));
502 502
503 /* Queue the packet to IP for output */ 503 /* Queue the packet to IP for output */
504 rc = ip_queue_xmit(skb); 504 rc = ip_queue_xmit(skb);
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index db82da90df76..b769567949be 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -595,7 +595,7 @@ struct iapp_layer2_update {
595 u8 ssap; /* 0 */ 595 u8 ssap; /* 0 */
596 u8 control; 596 u8 control;
597 u8 xid_info[3]; 597 u8 xid_info[3];
598} __attribute__ ((packed)); 598} __packed;
599 599
600static void ieee80211_send_layer2_update(struct sta_info *sta) 600static void ieee80211_send_layer2_update(struct sta_info *sta)
601{ 601{
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index c6b5c2d3ffde..ef470064b154 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -1090,7 +1090,7 @@ struct ieee80211_tx_status_rtap_hdr {
1090 u8 padding_for_rate; 1090 u8 padding_for_rate;
1091 __le16 tx_flags; 1091 __le16 tx_flags;
1092 u8 data_retries; 1092 u8 data_retries;
1093} __attribute__ ((packed)); 1093} __packed;
1094 1094
1095 1095
1096/* HT */ 1096/* HT */
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index d70e1a9c4354..fa0f37e4afe4 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -2194,7 +2194,7 @@ static void ieee80211_rx_cooked_monitor(struct ieee80211_rx_data *rx,
2194 u8 rate_or_pad; 2194 u8 rate_or_pad;
2195 __le16 chan_freq; 2195 __le16 chan_freq;
2196 __le16 chan_flags; 2196 __le16 chan_flags;
2197 } __attribute__ ((packed)) *rthdr; 2197 } __packed *rthdr;
2198 struct sk_buff *skb = rx->skb, *skb2; 2198 struct sk_buff *skb = rx->skb, *skb2;
2199 struct net_device *prev_dev = NULL; 2199 struct net_device *prev_dev = NULL;
2200 struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); 2200 struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index f54d8ba7d788..67656cbf2b15 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -259,7 +259,7 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata,
259 skb_queue_head_init(&sta->tx_filtered); 259 skb_queue_head_init(&sta->tx_filtered);
260 260
261 for (i = 0; i < NUM_RX_DATA_QUEUES; i++) 261 for (i = 0; i < NUM_RX_DATA_QUEUES; i++)
262 sta->last_seq_ctrl[i] = cpu_to_le16(USHORT_MAX); 262 sta->last_seq_ctrl[i] = cpu_to_le16(USHRT_MAX);
263 263
264#ifdef CONFIG_MAC80211_VERBOSE_DEBUG 264#ifdef CONFIG_MAC80211_VERBOSE_DEBUG
265 printk(KERN_DEBUG "%s: Allocated STA %pM\n", 265 printk(KERN_DEBUG "%s: Allocated STA %pM\n",
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 8593a77cfea9..aa2f106347e4 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -40,27 +40,6 @@ config NF_CONNTRACK
40 40
41if NF_CONNTRACK 41if NF_CONNTRACK
42 42
43config NF_CT_ACCT
44 bool "Connection tracking flow accounting"
45 depends on NETFILTER_ADVANCED
46 help
47 If this option is enabled, the connection tracking code will
48 keep per-flow packet and byte counters.
49
50 Those counters can be used for flow-based accounting or the
51 `connbytes' match.
52
53 Please note that currently this option only sets a default state.
54 You may change it at boot time with nf_conntrack.acct=0/1 kernel
55 parameter or by loading the nf_conntrack module with acct=0/1.
56
57 You may also disable/enable it on a running system with:
58 sysctl net.netfilter.nf_conntrack_acct=0/1
59
60 This option will be removed in 2.6.29.
61
62 If unsure, say `N'.
63
64config NF_CONNTRACK_MARK 43config NF_CONNTRACK_MARK
65 bool 'Connection mark tracking support' 44 bool 'Connection mark tracking support'
66 depends on NETFILTER_ADVANCED 45 depends on NETFILTER_ADVANCED
@@ -424,6 +403,18 @@ config NETFILTER_XT_TARGET_HL
424 since you can easily create immortal packets that loop 403 since you can easily create immortal packets that loop
425 forever on the network. 404 forever on the network.
426 405
406config NETFILTER_XT_TARGET_IDLETIMER
407 tristate "IDLETIMER target support"
408 depends on NETFILTER_ADVANCED
409 help
410
411 This option adds the `IDLETIMER' target. Each matching packet
412 resets the timer associated with label specified when the rule is
413 added. When the timer expires, it triggers a sysfs notification.
414 The remaining time for expiration can be read via sysfs.
415
416 To compile it as a module, choose M here. If unsure, say N.
417
427config NETFILTER_XT_TARGET_LED 418config NETFILTER_XT_TARGET_LED
428 tristate '"LED" target support' 419 tristate '"LED" target support'
429 depends on LEDS_CLASS && LEDS_TRIGGERS 420 depends on LEDS_CLASS && LEDS_TRIGGERS
@@ -503,7 +494,7 @@ config NETFILTER_XT_TARGET_RATEEST
503 To compile it as a module, choose M here. If unsure, say N. 494 To compile it as a module, choose M here. If unsure, say N.
504 495
505config NETFILTER_XT_TARGET_TEE 496config NETFILTER_XT_TARGET_TEE
506 tristate '"TEE" - packet cloning to alternate destiantion' 497 tristate '"TEE" - packet cloning to alternate destination'
507 depends on NETFILTER_ADVANCED 498 depends on NETFILTER_ADVANCED
508 depends on (IPV6 || IPV6=n) 499 depends on (IPV6 || IPV6=n)
509 depends on !NF_CONNTRACK || NF_CONNTRACK 500 depends on !NF_CONNTRACK || NF_CONNTRACK
@@ -618,7 +609,6 @@ config NETFILTER_XT_MATCH_CONNBYTES
618 tristate '"connbytes" per-connection counter match support' 609 tristate '"connbytes" per-connection counter match support'
619 depends on NF_CONNTRACK 610 depends on NF_CONNTRACK
620 depends on NETFILTER_ADVANCED 611 depends on NETFILTER_ADVANCED
621 select NF_CT_ACCT
622 help 612 help
623 This option adds a `connbytes' match, which allows you to match the 613 This option adds a `connbytes' match, which allows you to match the
624 number of bytes and/or packets for each direction within a connection. 614 number of bytes and/or packets for each direction within a connection.
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 14e3a8fd8180..e28420aac5ef 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -61,6 +61,7 @@ obj-$(CONFIG_NETFILTER_XT_TARGET_TCPMSS) += xt_TCPMSS.o
61obj-$(CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP) += xt_TCPOPTSTRIP.o 61obj-$(CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP) += xt_TCPOPTSTRIP.o
62obj-$(CONFIG_NETFILTER_XT_TARGET_TEE) += xt_TEE.o 62obj-$(CONFIG_NETFILTER_XT_TARGET_TEE) += xt_TEE.o
63obj-$(CONFIG_NETFILTER_XT_TARGET_TRACE) += xt_TRACE.o 63obj-$(CONFIG_NETFILTER_XT_TARGET_TRACE) += xt_TRACE.o
64obj-$(CONFIG_NETFILTER_XT_TARGET_IDLETIMER) += xt_IDLETIMER.o
64 65
65# matches 66# matches
66obj-$(CONFIG_NETFILTER_XT_MATCH_CLUSTER) += xt_cluster.o 67obj-$(CONFIG_NETFILTER_XT_MATCH_CLUSTER) += xt_cluster.o
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index d8f7e8ef67b4..654544e72264 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -158,10 +158,14 @@ static inline int ip_vs_conn_hash(struct ip_vs_conn *cp)
158 unsigned hash; 158 unsigned hash;
159 int ret; 159 int ret;
160 160
161 if (cp->flags & IP_VS_CONN_F_ONE_PACKET)
162 return 0;
163
161 /* Hash by protocol, client address and port */ 164 /* Hash by protocol, client address and port */
162 hash = ip_vs_conn_hashkey(cp->af, cp->protocol, &cp->caddr, cp->cport); 165 hash = ip_vs_conn_hashkey(cp->af, cp->protocol, &cp->caddr, cp->cport);
163 166
164 ct_write_lock(hash); 167 ct_write_lock(hash);
168 spin_lock(&cp->lock);
165 169
166 if (!(cp->flags & IP_VS_CONN_F_HASHED)) { 170 if (!(cp->flags & IP_VS_CONN_F_HASHED)) {
167 list_add(&cp->c_list, &ip_vs_conn_tab[hash]); 171 list_add(&cp->c_list, &ip_vs_conn_tab[hash]);
@@ -174,6 +178,7 @@ static inline int ip_vs_conn_hash(struct ip_vs_conn *cp)
174 ret = 0; 178 ret = 0;
175 } 179 }
176 180
181 spin_unlock(&cp->lock);
177 ct_write_unlock(hash); 182 ct_write_unlock(hash);
178 183
179 return ret; 184 return ret;
@@ -193,6 +198,7 @@ static inline int ip_vs_conn_unhash(struct ip_vs_conn *cp)
193 hash = ip_vs_conn_hashkey(cp->af, cp->protocol, &cp->caddr, cp->cport); 198 hash = ip_vs_conn_hashkey(cp->af, cp->protocol, &cp->caddr, cp->cport);
194 199
195 ct_write_lock(hash); 200 ct_write_lock(hash);
201 spin_lock(&cp->lock);
196 202
197 if (cp->flags & IP_VS_CONN_F_HASHED) { 203 if (cp->flags & IP_VS_CONN_F_HASHED) {
198 list_del(&cp->c_list); 204 list_del(&cp->c_list);
@@ -202,6 +208,7 @@ static inline int ip_vs_conn_unhash(struct ip_vs_conn *cp)
202 } else 208 } else
203 ret = 0; 209 ret = 0;
204 210
211 spin_unlock(&cp->lock);
205 ct_write_unlock(hash); 212 ct_write_unlock(hash);
206 213
207 return ret; 214 return ret;
@@ -355,8 +362,9 @@ struct ip_vs_conn *ip_vs_conn_out_get
355 */ 362 */
356void ip_vs_conn_put(struct ip_vs_conn *cp) 363void ip_vs_conn_put(struct ip_vs_conn *cp)
357{ 364{
358 /* reset it expire in its timeout */ 365 unsigned long t = (cp->flags & IP_VS_CONN_F_ONE_PACKET) ?
359 mod_timer(&cp->timer, jiffies+cp->timeout); 366 0 : cp->timeout;
367 mod_timer(&cp->timer, jiffies+t);
360 368
361 __ip_vs_conn_put(cp); 369 __ip_vs_conn_put(cp);
362} 370}
@@ -649,7 +657,7 @@ static void ip_vs_conn_expire(unsigned long data)
649 /* 657 /*
650 * unhash it if it is hashed in the conn table 658 * unhash it if it is hashed in the conn table
651 */ 659 */
652 if (!ip_vs_conn_unhash(cp)) 660 if (!ip_vs_conn_unhash(cp) && !(cp->flags & IP_VS_CONN_F_ONE_PACKET))
653 goto expire_later; 661 goto expire_later;
654 662
655 /* 663 /*
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 1cd6e3fd058b..50907d8472a3 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -194,6 +194,7 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
194 struct ip_vs_dest *dest; 194 struct ip_vs_dest *dest;
195 struct ip_vs_conn *ct; 195 struct ip_vs_conn *ct;
196 __be16 dport; /* destination port to forward */ 196 __be16 dport; /* destination port to forward */
197 __be16 flags;
197 union nf_inet_addr snet; /* source network of the client, 198 union nf_inet_addr snet; /* source network of the client,
198 after masking */ 199 after masking */
199 200
@@ -340,6 +341,10 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
340 dport = ports[1]; 341 dport = ports[1];
341 } 342 }
342 343
344 flags = (svc->flags & IP_VS_SVC_F_ONEPACKET
345 && iph.protocol == IPPROTO_UDP)?
346 IP_VS_CONN_F_ONE_PACKET : 0;
347
343 /* 348 /*
344 * Create a new connection according to the template 349 * Create a new connection according to the template
345 */ 350 */
@@ -347,7 +352,7 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
347 &iph.saddr, ports[0], 352 &iph.saddr, ports[0],
348 &iph.daddr, ports[1], 353 &iph.daddr, ports[1],
349 &dest->addr, dport, 354 &dest->addr, dport,
350 0, 355 flags,
351 dest); 356 dest);
352 if (cp == NULL) { 357 if (cp == NULL) {
353 ip_vs_conn_put(ct); 358 ip_vs_conn_put(ct);
@@ -377,7 +382,7 @@ ip_vs_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
377 struct ip_vs_conn *cp = NULL; 382 struct ip_vs_conn *cp = NULL;
378 struct ip_vs_iphdr iph; 383 struct ip_vs_iphdr iph;
379 struct ip_vs_dest *dest; 384 struct ip_vs_dest *dest;
380 __be16 _ports[2], *pptr; 385 __be16 _ports[2], *pptr, flags;
381 386
382 ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph); 387 ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph);
383 pptr = skb_header_pointer(skb, iph.len, sizeof(_ports), _ports); 388 pptr = skb_header_pointer(skb, iph.len, sizeof(_ports), _ports);
@@ -407,6 +412,10 @@ ip_vs_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
407 return NULL; 412 return NULL;
408 } 413 }
409 414
415 flags = (svc->flags & IP_VS_SVC_F_ONEPACKET
416 && iph.protocol == IPPROTO_UDP)?
417 IP_VS_CONN_F_ONE_PACKET : 0;
418
410 /* 419 /*
411 * Create a connection entry. 420 * Create a connection entry.
412 */ 421 */
@@ -414,7 +423,7 @@ ip_vs_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
414 &iph.saddr, pptr[0], 423 &iph.saddr, pptr[0],
415 &iph.daddr, pptr[1], 424 &iph.daddr, pptr[1],
416 &dest->addr, dest->port ? dest->port : pptr[1], 425 &dest->addr, dest->port ? dest->port : pptr[1],
417 0, 426 flags,
418 dest); 427 dest);
419 if (cp == NULL) 428 if (cp == NULL)
420 return NULL; 429 return NULL;
@@ -464,6 +473,9 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
464 if (sysctl_ip_vs_cache_bypass && svc->fwmark && unicast) { 473 if (sysctl_ip_vs_cache_bypass && svc->fwmark && unicast) {
465 int ret, cs; 474 int ret, cs;
466 struct ip_vs_conn *cp; 475 struct ip_vs_conn *cp;
476 __u16 flags = (svc->flags & IP_VS_SVC_F_ONEPACKET &&
477 iph.protocol == IPPROTO_UDP)?
478 IP_VS_CONN_F_ONE_PACKET : 0;
467 union nf_inet_addr daddr = { .all = { 0, 0, 0, 0 } }; 479 union nf_inet_addr daddr = { .all = { 0, 0, 0, 0 } };
468 480
469 ip_vs_service_put(svc); 481 ip_vs_service_put(svc);
@@ -474,7 +486,7 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
474 &iph.saddr, pptr[0], 486 &iph.saddr, pptr[0],
475 &iph.daddr, pptr[1], 487 &iph.daddr, pptr[1],
476 &daddr, 0, 488 &daddr, 0,
477 IP_VS_CONN_F_BYPASS, 489 IP_VS_CONN_F_BYPASS | flags,
478 NULL); 490 NULL);
479 if (cp == NULL) 491 if (cp == NULL)
480 return NF_DROP; 492 return NF_DROP;
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 36dc1d88c2fa..0f0c079c422a 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -1864,14 +1864,16 @@ static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
1864 svc->scheduler->name); 1864 svc->scheduler->name);
1865 else 1865 else
1866#endif 1866#endif
1867 seq_printf(seq, "%s %08X:%04X %s ", 1867 seq_printf(seq, "%s %08X:%04X %s %s ",
1868 ip_vs_proto_name(svc->protocol), 1868 ip_vs_proto_name(svc->protocol),
1869 ntohl(svc->addr.ip), 1869 ntohl(svc->addr.ip),
1870 ntohs(svc->port), 1870 ntohs(svc->port),
1871 svc->scheduler->name); 1871 svc->scheduler->name,
1872 (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
1872 } else { 1873 } else {
1873 seq_printf(seq, "FWM %08X %s ", 1874 seq_printf(seq, "FWM %08X %s %s",
1874 svc->fwmark, svc->scheduler->name); 1875 svc->fwmark, svc->scheduler->name,
1876 (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
1875 } 1877 }
1876 1878
1877 if (svc->flags & IP_VS_SVC_F_PERSISTENT) 1879 if (svc->flags & IP_VS_SVC_F_PERSISTENT)
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index 93c15a107b2c..02b078e11cf3 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -90,10 +90,10 @@ __ip_vs_get_out_rt(struct ip_vs_conn *cp, u32 rtos)
90 &dest->addr.ip); 90 &dest->addr.ip);
91 return NULL; 91 return NULL;
92 } 92 }
93 __ip_vs_dst_set(dest, rtos, dst_clone(&rt->u.dst)); 93 __ip_vs_dst_set(dest, rtos, dst_clone(&rt->dst));
94 IP_VS_DBG(10, "new dst %pI4, refcnt=%d, rtos=%X\n", 94 IP_VS_DBG(10, "new dst %pI4, refcnt=%d, rtos=%X\n",
95 &dest->addr.ip, 95 &dest->addr.ip,
96 atomic_read(&rt->u.dst.__refcnt), rtos); 96 atomic_read(&rt->dst.__refcnt), rtos);
97 } 97 }
98 spin_unlock(&dest->dst_lock); 98 spin_unlock(&dest->dst_lock);
99 } else { 99 } else {
@@ -148,10 +148,10 @@ __ip_vs_get_out_rt_v6(struct ip_vs_conn *cp)
148 &dest->addr.in6); 148 &dest->addr.in6);
149 return NULL; 149 return NULL;
150 } 150 }
151 __ip_vs_dst_set(dest, 0, dst_clone(&rt->u.dst)); 151 __ip_vs_dst_set(dest, 0, dst_clone(&rt->dst));
152 IP_VS_DBG(10, "new dst %pI6, refcnt=%d\n", 152 IP_VS_DBG(10, "new dst %pI6, refcnt=%d\n",
153 &dest->addr.in6, 153 &dest->addr.in6,
154 atomic_read(&rt->u.dst.__refcnt)); 154 atomic_read(&rt->dst.__refcnt));
155 } 155 }
156 spin_unlock(&dest->dst_lock); 156 spin_unlock(&dest->dst_lock);
157 } else { 157 } else {
@@ -198,7 +198,7 @@ do { \
198 (skb)->ipvs_property = 1; \ 198 (skb)->ipvs_property = 1; \
199 skb_forward_csum(skb); \ 199 skb_forward_csum(skb); \
200 NF_HOOK(pf, NF_INET_LOCAL_OUT, (skb), NULL, \ 200 NF_HOOK(pf, NF_INET_LOCAL_OUT, (skb), NULL, \
201 (rt)->u.dst.dev, dst_output); \ 201 (rt)->dst.dev, dst_output); \
202} while (0) 202} while (0)
203 203
204 204
@@ -245,7 +245,7 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
245 } 245 }
246 246
247 /* MTU checking */ 247 /* MTU checking */
248 mtu = dst_mtu(&rt->u.dst); 248 mtu = dst_mtu(&rt->dst);
249 if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF))) { 249 if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF))) {
250 ip_rt_put(rt); 250 ip_rt_put(rt);
251 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); 251 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
@@ -265,7 +265,7 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
265 265
266 /* drop old route */ 266 /* drop old route */
267 skb_dst_drop(skb); 267 skb_dst_drop(skb);
268 skb_dst_set(skb, &rt->u.dst); 268 skb_dst_set(skb, &rt->dst);
269 269
270 /* Another hack: avoid icmp_send in ip_fragment */ 270 /* Another hack: avoid icmp_send in ip_fragment */
271 skb->local_df = 1; 271 skb->local_df = 1;
@@ -309,9 +309,9 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
309 } 309 }
310 310
311 /* MTU checking */ 311 /* MTU checking */
312 mtu = dst_mtu(&rt->u.dst); 312 mtu = dst_mtu(&rt->dst);
313 if (skb->len > mtu) { 313 if (skb->len > mtu) {
314 dst_release(&rt->u.dst); 314 dst_release(&rt->dst);
315 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 315 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
316 IP_VS_DBG_RL("%s(): frag needed\n", __func__); 316 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
317 goto tx_error; 317 goto tx_error;
@@ -323,13 +323,13 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
323 */ 323 */
324 skb = skb_share_check(skb, GFP_ATOMIC); 324 skb = skb_share_check(skb, GFP_ATOMIC);
325 if (unlikely(skb == NULL)) { 325 if (unlikely(skb == NULL)) {
326 dst_release(&rt->u.dst); 326 dst_release(&rt->dst);
327 return NF_STOLEN; 327 return NF_STOLEN;
328 } 328 }
329 329
330 /* drop old route */ 330 /* drop old route */
331 skb_dst_drop(skb); 331 skb_dst_drop(skb);
332 skb_dst_set(skb, &rt->u.dst); 332 skb_dst_set(skb, &rt->dst);
333 333
334 /* Another hack: avoid icmp_send in ip_fragment */ 334 /* Another hack: avoid icmp_send in ip_fragment */
335 skb->local_df = 1; 335 skb->local_df = 1;
@@ -376,7 +376,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
376 goto tx_error_icmp; 376 goto tx_error_icmp;
377 377
378 /* MTU checking */ 378 /* MTU checking */
379 mtu = dst_mtu(&rt->u.dst); 379 mtu = dst_mtu(&rt->dst);
380 if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF))) { 380 if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF))) {
381 ip_rt_put(rt); 381 ip_rt_put(rt);
382 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); 382 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
@@ -388,12 +388,12 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
388 if (!skb_make_writable(skb, sizeof(struct iphdr))) 388 if (!skb_make_writable(skb, sizeof(struct iphdr)))
389 goto tx_error_put; 389 goto tx_error_put;
390 390
391 if (skb_cow(skb, rt->u.dst.dev->hard_header_len)) 391 if (skb_cow(skb, rt->dst.dev->hard_header_len))
392 goto tx_error_put; 392 goto tx_error_put;
393 393
394 /* drop old route */ 394 /* drop old route */
395 skb_dst_drop(skb); 395 skb_dst_drop(skb);
396 skb_dst_set(skb, &rt->u.dst); 396 skb_dst_set(skb, &rt->dst);
397 397
398 /* mangle the packet */ 398 /* mangle the packet */
399 if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp)) 399 if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp))
@@ -452,9 +452,9 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
452 goto tx_error_icmp; 452 goto tx_error_icmp;
453 453
454 /* MTU checking */ 454 /* MTU checking */
455 mtu = dst_mtu(&rt->u.dst); 455 mtu = dst_mtu(&rt->dst);
456 if (skb->len > mtu) { 456 if (skb->len > mtu) {
457 dst_release(&rt->u.dst); 457 dst_release(&rt->dst);
458 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 458 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
459 IP_VS_DBG_RL_PKT(0, pp, skb, 0, 459 IP_VS_DBG_RL_PKT(0, pp, skb, 0,
460 "ip_vs_nat_xmit_v6(): frag needed for"); 460 "ip_vs_nat_xmit_v6(): frag needed for");
@@ -465,12 +465,12 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
465 if (!skb_make_writable(skb, sizeof(struct ipv6hdr))) 465 if (!skb_make_writable(skb, sizeof(struct ipv6hdr)))
466 goto tx_error_put; 466 goto tx_error_put;
467 467
468 if (skb_cow(skb, rt->u.dst.dev->hard_header_len)) 468 if (skb_cow(skb, rt->dst.dev->hard_header_len))
469 goto tx_error_put; 469 goto tx_error_put;
470 470
471 /* drop old route */ 471 /* drop old route */
472 skb_dst_drop(skb); 472 skb_dst_drop(skb);
473 skb_dst_set(skb, &rt->u.dst); 473 skb_dst_set(skb, &rt->dst);
474 474
475 /* mangle the packet */ 475 /* mangle the packet */
476 if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp)) 476 if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp))
@@ -498,7 +498,7 @@ tx_error:
498 kfree_skb(skb); 498 kfree_skb(skb);
499 return NF_STOLEN; 499 return NF_STOLEN;
500tx_error_put: 500tx_error_put:
501 dst_release(&rt->u.dst); 501 dst_release(&rt->dst);
502 goto tx_error; 502 goto tx_error;
503} 503}
504#endif 504#endif
@@ -549,9 +549,9 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
549 if (!(rt = __ip_vs_get_out_rt(cp, RT_TOS(tos)))) 549 if (!(rt = __ip_vs_get_out_rt(cp, RT_TOS(tos))))
550 goto tx_error_icmp; 550 goto tx_error_icmp;
551 551
552 tdev = rt->u.dst.dev; 552 tdev = rt->dst.dev;
553 553
554 mtu = dst_mtu(&rt->u.dst) - sizeof(struct iphdr); 554 mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr);
555 if (mtu < 68) { 555 if (mtu < 68) {
556 ip_rt_put(rt); 556 ip_rt_put(rt);
557 IP_VS_DBG_RL("%s(): mtu less than 68\n", __func__); 557 IP_VS_DBG_RL("%s(): mtu less than 68\n", __func__);
@@ -601,7 +601,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
601 601
602 /* drop old route */ 602 /* drop old route */
603 skb_dst_drop(skb); 603 skb_dst_drop(skb);
604 skb_dst_set(skb, &rt->u.dst); 604 skb_dst_set(skb, &rt->dst);
605 605
606 /* 606 /*
607 * Push down and install the IPIP header. 607 * Push down and install the IPIP header.
@@ -615,7 +615,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
615 iph->daddr = rt->rt_dst; 615 iph->daddr = rt->rt_dst;
616 iph->saddr = rt->rt_src; 616 iph->saddr = rt->rt_src;
617 iph->ttl = old_iph->ttl; 617 iph->ttl = old_iph->ttl;
618 ip_select_ident(iph, &rt->u.dst, NULL); 618 ip_select_ident(iph, &rt->dst, NULL);
619 619
620 /* Another hack: avoid icmp_send in ip_fragment */ 620 /* Another hack: avoid icmp_send in ip_fragment */
621 skb->local_df = 1; 621 skb->local_df = 1;
@@ -660,12 +660,12 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
660 if (!rt) 660 if (!rt)
661 goto tx_error_icmp; 661 goto tx_error_icmp;
662 662
663 tdev = rt->u.dst.dev; 663 tdev = rt->dst.dev;
664 664
665 mtu = dst_mtu(&rt->u.dst) - sizeof(struct ipv6hdr); 665 mtu = dst_mtu(&rt->dst) - sizeof(struct ipv6hdr);
666 /* TODO IPv6: do we need this check in IPv6? */ 666 /* TODO IPv6: do we need this check in IPv6? */
667 if (mtu < 1280) { 667 if (mtu < 1280) {
668 dst_release(&rt->u.dst); 668 dst_release(&rt->dst);
669 IP_VS_DBG_RL("%s(): mtu less than 1280\n", __func__); 669 IP_VS_DBG_RL("%s(): mtu less than 1280\n", __func__);
670 goto tx_error; 670 goto tx_error;
671 } 671 }
@@ -674,7 +674,7 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
674 674
675 if (mtu < ntohs(old_iph->payload_len) + sizeof(struct ipv6hdr)) { 675 if (mtu < ntohs(old_iph->payload_len) + sizeof(struct ipv6hdr)) {
676 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 676 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
677 dst_release(&rt->u.dst); 677 dst_release(&rt->dst);
678 IP_VS_DBG_RL("%s(): frag needed\n", __func__); 678 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
679 goto tx_error; 679 goto tx_error;
680 } 680 }
@@ -689,7 +689,7 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
689 struct sk_buff *new_skb = 689 struct sk_buff *new_skb =
690 skb_realloc_headroom(skb, max_headroom); 690 skb_realloc_headroom(skb, max_headroom);
691 if (!new_skb) { 691 if (!new_skb) {
692 dst_release(&rt->u.dst); 692 dst_release(&rt->dst);
693 kfree_skb(skb); 693 kfree_skb(skb);
694 IP_VS_ERR_RL("%s(): no memory\n", __func__); 694 IP_VS_ERR_RL("%s(): no memory\n", __func__);
695 return NF_STOLEN; 695 return NF_STOLEN;
@@ -707,7 +707,7 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
707 707
708 /* drop old route */ 708 /* drop old route */
709 skb_dst_drop(skb); 709 skb_dst_drop(skb);
710 skb_dst_set(skb, &rt->u.dst); 710 skb_dst_set(skb, &rt->dst);
711 711
712 /* 712 /*
713 * Push down and install the IPIP header. 713 * Push down and install the IPIP header.
@@ -760,7 +760,7 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
760 goto tx_error_icmp; 760 goto tx_error_icmp;
761 761
762 /* MTU checking */ 762 /* MTU checking */
763 mtu = dst_mtu(&rt->u.dst); 763 mtu = dst_mtu(&rt->dst);
764 if ((iph->frag_off & htons(IP_DF)) && skb->len > mtu) { 764 if ((iph->frag_off & htons(IP_DF)) && skb->len > mtu) {
765 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); 765 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
766 ip_rt_put(rt); 766 ip_rt_put(rt);
@@ -780,7 +780,7 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
780 780
781 /* drop old route */ 781 /* drop old route */
782 skb_dst_drop(skb); 782 skb_dst_drop(skb);
783 skb_dst_set(skb, &rt->u.dst); 783 skb_dst_set(skb, &rt->dst);
784 784
785 /* Another hack: avoid icmp_send in ip_fragment */ 785 /* Another hack: avoid icmp_send in ip_fragment */
786 skb->local_df = 1; 786 skb->local_df = 1;
@@ -813,10 +813,10 @@ ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
813 goto tx_error_icmp; 813 goto tx_error_icmp;
814 814
815 /* MTU checking */ 815 /* MTU checking */
816 mtu = dst_mtu(&rt->u.dst); 816 mtu = dst_mtu(&rt->dst);
817 if (skb->len > mtu) { 817 if (skb->len > mtu) {
818 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 818 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
819 dst_release(&rt->u.dst); 819 dst_release(&rt->dst);
820 IP_VS_DBG_RL("%s(): frag needed\n", __func__); 820 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
821 goto tx_error; 821 goto tx_error;
822 } 822 }
@@ -827,13 +827,13 @@ ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
827 */ 827 */
828 skb = skb_share_check(skb, GFP_ATOMIC); 828 skb = skb_share_check(skb, GFP_ATOMIC);
829 if (unlikely(skb == NULL)) { 829 if (unlikely(skb == NULL)) {
830 dst_release(&rt->u.dst); 830 dst_release(&rt->dst);
831 return NF_STOLEN; 831 return NF_STOLEN;
832 } 832 }
833 833
834 /* drop old route */ 834 /* drop old route */
835 skb_dst_drop(skb); 835 skb_dst_drop(skb);
836 skb_dst_set(skb, &rt->u.dst); 836 skb_dst_set(skb, &rt->dst);
837 837
838 /* Another hack: avoid icmp_send in ip_fragment */ 838 /* Another hack: avoid icmp_send in ip_fragment */
839 skb->local_df = 1; 839 skb->local_df = 1;
@@ -888,7 +888,7 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
888 goto tx_error_icmp; 888 goto tx_error_icmp;
889 889
890 /* MTU checking */ 890 /* MTU checking */
891 mtu = dst_mtu(&rt->u.dst); 891 mtu = dst_mtu(&rt->dst);
892 if ((skb->len > mtu) && (ip_hdr(skb)->frag_off & htons(IP_DF))) { 892 if ((skb->len > mtu) && (ip_hdr(skb)->frag_off & htons(IP_DF))) {
893 ip_rt_put(rt); 893 ip_rt_put(rt);
894 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); 894 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
@@ -900,12 +900,12 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
900 if (!skb_make_writable(skb, offset)) 900 if (!skb_make_writable(skb, offset))
901 goto tx_error_put; 901 goto tx_error_put;
902 902
903 if (skb_cow(skb, rt->u.dst.dev->hard_header_len)) 903 if (skb_cow(skb, rt->dst.dev->hard_header_len))
904 goto tx_error_put; 904 goto tx_error_put;
905 905
906 /* drop the old route when skb is not shared */ 906 /* drop the old route when skb is not shared */
907 skb_dst_drop(skb); 907 skb_dst_drop(skb);
908 skb_dst_set(skb, &rt->u.dst); 908 skb_dst_set(skb, &rt->dst);
909 909
910 ip_vs_nat_icmp(skb, pp, cp, 0); 910 ip_vs_nat_icmp(skb, pp, cp, 0);
911 911
@@ -963,9 +963,9 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
963 goto tx_error_icmp; 963 goto tx_error_icmp;
964 964
965 /* MTU checking */ 965 /* MTU checking */
966 mtu = dst_mtu(&rt->u.dst); 966 mtu = dst_mtu(&rt->dst);
967 if (skb->len > mtu) { 967 if (skb->len > mtu) {
968 dst_release(&rt->u.dst); 968 dst_release(&rt->dst);
969 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 969 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
970 IP_VS_DBG_RL("%s(): frag needed\n", __func__); 970 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
971 goto tx_error; 971 goto tx_error;
@@ -975,12 +975,12 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
975 if (!skb_make_writable(skb, offset)) 975 if (!skb_make_writable(skb, offset))
976 goto tx_error_put; 976 goto tx_error_put;
977 977
978 if (skb_cow(skb, rt->u.dst.dev->hard_header_len)) 978 if (skb_cow(skb, rt->dst.dev->hard_header_len))
979 goto tx_error_put; 979 goto tx_error_put;
980 980
981 /* drop the old route when skb is not shared */ 981 /* drop the old route when skb is not shared */
982 skb_dst_drop(skb); 982 skb_dst_drop(skb);
983 skb_dst_set(skb, &rt->u.dst); 983 skb_dst_set(skb, &rt->dst);
984 984
985 ip_vs_nat_icmp_v6(skb, pp, cp, 0); 985 ip_vs_nat_icmp_v6(skb, pp, cp, 0);
986 986
@@ -1001,7 +1001,7 @@ out:
1001 LeaveFunction(10); 1001 LeaveFunction(10);
1002 return rc; 1002 return rc;
1003tx_error_put: 1003tx_error_put:
1004 dst_release(&rt->u.dst); 1004 dst_release(&rt->dst);
1005 goto tx_error; 1005 goto tx_error;
1006} 1006}
1007#endif 1007#endif
diff --git a/net/netfilter/nf_conntrack_acct.c b/net/netfilter/nf_conntrack_acct.c
index ab81b380eae6..5178c691ecbf 100644
--- a/net/netfilter/nf_conntrack_acct.c
+++ b/net/netfilter/nf_conntrack_acct.c
@@ -17,13 +17,7 @@
17#include <net/netfilter/nf_conntrack_extend.h> 17#include <net/netfilter/nf_conntrack_extend.h>
18#include <net/netfilter/nf_conntrack_acct.h> 18#include <net/netfilter/nf_conntrack_acct.h>
19 19
20#ifdef CONFIG_NF_CT_ACCT 20static int nf_ct_acct __read_mostly;
21#define NF_CT_ACCT_DEFAULT 1
22#else
23#define NF_CT_ACCT_DEFAULT 0
24#endif
25
26static int nf_ct_acct __read_mostly = NF_CT_ACCT_DEFAULT;
27 21
28module_param_named(acct, nf_ct_acct, bool, 0644); 22module_param_named(acct, nf_ct_acct, bool, 0644);
29MODULE_PARM_DESC(acct, "Enable connection tracking flow accounting."); 23MODULE_PARM_DESC(acct, "Enable connection tracking flow accounting.");
@@ -114,12 +108,6 @@ int nf_conntrack_acct_init(struct net *net)
114 net->ct.sysctl_acct = nf_ct_acct; 108 net->ct.sysctl_acct = nf_ct_acct;
115 109
116 if (net_eq(net, &init_net)) { 110 if (net_eq(net, &init_net)) {
117#ifdef CONFIG_NF_CT_ACCT
118 printk(KERN_WARNING "CONFIG_NF_CT_ACCT is deprecated and will be removed soon. Please use\n");
119 printk(KERN_WARNING "nf_conntrack.acct=1 kernel parameter, acct=1 nf_conntrack module option or\n");
120 printk(KERN_WARNING "sysctl net.netfilter.nf_conntrack_acct=1 to enable it.\n");
121#endif
122
123 ret = nf_ct_extend_register(&acct_extend); 111 ret = nf_ct_extend_register(&acct_extend);
124 if (ret < 0) { 112 if (ret < 0) {
125 printk(KERN_ERR "nf_conntrack_acct: Unable to register extension\n"); 113 printk(KERN_ERR "nf_conntrack_acct: Unable to register extension\n");
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index eeeb8bc73982..16b41b4e2a3c 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -62,8 +62,8 @@ EXPORT_SYMBOL_GPL(nf_conntrack_htable_size);
62unsigned int nf_conntrack_max __read_mostly; 62unsigned int nf_conntrack_max __read_mostly;
63EXPORT_SYMBOL_GPL(nf_conntrack_max); 63EXPORT_SYMBOL_GPL(nf_conntrack_max);
64 64
65struct nf_conn nf_conntrack_untracked __read_mostly; 65DEFINE_PER_CPU(struct nf_conn, nf_conntrack_untracked);
66EXPORT_SYMBOL_GPL(nf_conntrack_untracked); 66EXPORT_PER_CPU_SYMBOL(nf_conntrack_untracked);
67 67
68static int nf_conntrack_hash_rnd_initted; 68static int nf_conntrack_hash_rnd_initted;
69static unsigned int nf_conntrack_hash_rnd; 69static unsigned int nf_conntrack_hash_rnd;
@@ -619,9 +619,7 @@ struct nf_conn *nf_conntrack_alloc(struct net *net, u16 zone,
619 ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev = NULL; 619 ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev = NULL;
620 /* Don't set timer yet: wait for confirmation */ 620 /* Don't set timer yet: wait for confirmation */
621 setup_timer(&ct->timeout, death_by_timeout, (unsigned long)ct); 621 setup_timer(&ct->timeout, death_by_timeout, (unsigned long)ct);
622#ifdef CONFIG_NET_NS 622 write_pnet(&ct->ct_net, net);
623 ct->ct_net = net;
624#endif
625#ifdef CONFIG_NF_CONNTRACK_ZONES 623#ifdef CONFIG_NF_CONNTRACK_ZONES
626 if (zone) { 624 if (zone) {
627 struct nf_conntrack_zone *nf_ct_zone; 625 struct nf_conntrack_zone *nf_ct_zone;
@@ -1183,10 +1181,21 @@ static void nf_ct_release_dying_list(struct net *net)
1183 spin_unlock_bh(&nf_conntrack_lock); 1181 spin_unlock_bh(&nf_conntrack_lock);
1184} 1182}
1185 1183
1184static int untrack_refs(void)
1185{
1186 int cnt = 0, cpu;
1187
1188 for_each_possible_cpu(cpu) {
1189 struct nf_conn *ct = &per_cpu(nf_conntrack_untracked, cpu);
1190
1191 cnt += atomic_read(&ct->ct_general.use) - 1;
1192 }
1193 return cnt;
1194}
1195
1186static void nf_conntrack_cleanup_init_net(void) 1196static void nf_conntrack_cleanup_init_net(void)
1187{ 1197{
1188 /* wait until all references to nf_conntrack_untracked are dropped */ 1198 while (untrack_refs() > 0)
1189 while (atomic_read(&nf_conntrack_untracked.ct_general.use) > 1)
1190 schedule(); 1199 schedule();
1191 1200
1192 nf_conntrack_helper_fini(); 1201 nf_conntrack_helper_fini();
@@ -1321,10 +1330,19 @@ EXPORT_SYMBOL_GPL(nf_conntrack_set_hashsize);
1321module_param_call(hashsize, nf_conntrack_set_hashsize, param_get_uint, 1330module_param_call(hashsize, nf_conntrack_set_hashsize, param_get_uint,
1322 &nf_conntrack_htable_size, 0600); 1331 &nf_conntrack_htable_size, 0600);
1323 1332
1333void nf_ct_untracked_status_or(unsigned long bits)
1334{
1335 int cpu;
1336
1337 for_each_possible_cpu(cpu)
1338 per_cpu(nf_conntrack_untracked, cpu).status |= bits;
1339}
1340EXPORT_SYMBOL_GPL(nf_ct_untracked_status_or);
1341
1324static int nf_conntrack_init_init_net(void) 1342static int nf_conntrack_init_init_net(void)
1325{ 1343{
1326 int max_factor = 8; 1344 int max_factor = 8;
1327 int ret; 1345 int ret, cpu;
1328 1346
1329 /* Idea from tcp.c: use 1/16384 of memory. On i386: 32MB 1347 /* Idea from tcp.c: use 1/16384 of memory. On i386: 32MB
1330 * machine has 512 buckets. >= 1GB machines have 16384 buckets. */ 1348 * machine has 512 buckets. >= 1GB machines have 16384 buckets. */
@@ -1363,13 +1381,13 @@ static int nf_conntrack_init_init_net(void)
1363 goto err_extend; 1381 goto err_extend;
1364#endif 1382#endif
1365 /* Set up fake conntrack: to never be deleted, not in any hashes */ 1383 /* Set up fake conntrack: to never be deleted, not in any hashes */
1366#ifdef CONFIG_NET_NS 1384 for_each_possible_cpu(cpu) {
1367 nf_conntrack_untracked.ct_net = &init_net; 1385 struct nf_conn *ct = &per_cpu(nf_conntrack_untracked, cpu);
1368#endif 1386 write_pnet(&ct->ct_net, &init_net);
1369 atomic_set(&nf_conntrack_untracked.ct_general.use, 1); 1387 atomic_set(&ct->ct_general.use, 1);
1388 }
1370 /* - and look it like as a confirmed connection */ 1389 /* - and look it like as a confirmed connection */
1371 set_bit(IPS_CONFIRMED_BIT, &nf_conntrack_untracked.status); 1390 nf_ct_untracked_status_or(IPS_CONFIRMED | IPS_UNTRACKED);
1372
1373 return 0; 1391 return 0;
1374 1392
1375#ifdef CONFIG_NF_CONNTRACK_ZONES 1393#ifdef CONFIG_NF_CONNTRACK_ZONES
diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c
index 6eaee7c8a337..b969025cf82f 100644
--- a/net/netfilter/nf_conntrack_h323_main.c
+++ b/net/netfilter/nf_conntrack_h323_main.c
@@ -734,11 +734,11 @@ static int callforward_do_filter(const union nf_inet_addr *src,
734 if (!afinfo->route((struct dst_entry **)&rt1, &fl1)) { 734 if (!afinfo->route((struct dst_entry **)&rt1, &fl1)) {
735 if (!afinfo->route((struct dst_entry **)&rt2, &fl2)) { 735 if (!afinfo->route((struct dst_entry **)&rt2, &fl2)) {
736 if (rt1->rt_gateway == rt2->rt_gateway && 736 if (rt1->rt_gateway == rt2->rt_gateway &&
737 rt1->u.dst.dev == rt2->u.dst.dev) 737 rt1->dst.dev == rt2->dst.dev)
738 ret = 1; 738 ret = 1;
739 dst_release(&rt2->u.dst); 739 dst_release(&rt2->dst);
740 } 740 }
741 dst_release(&rt1->u.dst); 741 dst_release(&rt1->dst);
742 } 742 }
743 break; 743 break;
744 } 744 }
@@ -753,11 +753,11 @@ static int callforward_do_filter(const union nf_inet_addr *src,
753 if (!afinfo->route((struct dst_entry **)&rt2, &fl2)) { 753 if (!afinfo->route((struct dst_entry **)&rt2, &fl2)) {
754 if (!memcmp(&rt1->rt6i_gateway, &rt2->rt6i_gateway, 754 if (!memcmp(&rt1->rt6i_gateway, &rt2->rt6i_gateway,
755 sizeof(rt1->rt6i_gateway)) && 755 sizeof(rt1->rt6i_gateway)) &&
756 rt1->u.dst.dev == rt2->u.dst.dev) 756 rt1->dst.dev == rt2->dst.dev)
757 ret = 1; 757 ret = 1;
758 dst_release(&rt2->u.dst); 758 dst_release(&rt2->dst);
759 } 759 }
760 dst_release(&rt1->u.dst); 760 dst_release(&rt1->dst);
761 } 761 }
762 break; 762 break;
763 } 763 }
diff --git a/net/netfilter/nf_conntrack_netbios_ns.c b/net/netfilter/nf_conntrack_netbios_ns.c
index 497b2224536f..aadde018a072 100644
--- a/net/netfilter/nf_conntrack_netbios_ns.c
+++ b/net/netfilter/nf_conntrack_netbios_ns.c
@@ -61,7 +61,7 @@ static int help(struct sk_buff *skb, unsigned int protoff,
61 goto out; 61 goto out;
62 62
63 rcu_read_lock(); 63 rcu_read_lock();
64 in_dev = __in_dev_get_rcu(rt->u.dst.dev); 64 in_dev = __in_dev_get_rcu(rt->dst.dev);
65 if (in_dev != NULL) { 65 if (in_dev != NULL) {
66 for_primary_ifa(in_dev) { 66 for_primary_ifa(in_dev) {
67 if (ifa->ifa_broadcast == iph->daddr) { 67 if (ifa->ifa_broadcast == iph->daddr) {
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index c42ff6aa441d..5bae1cd15eea 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -480,7 +480,7 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item)
480 int err; 480 int err;
481 481
482 /* ignore our fake conntrack entry */ 482 /* ignore our fake conntrack entry */
483 if (ct == &nf_conntrack_untracked) 483 if (nf_ct_is_untracked(ct))
484 return 0; 484 return 0;
485 485
486 if (events & (1 << IPCT_DESTROY)) { 486 if (events & (1 << IPCT_DESTROY)) {
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 9dd8cd4fb6e6..802dbffae8b4 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -736,27 +736,19 @@ static bool tcp_in_window(const struct nf_conn *ct,
736 return res; 736 return res;
737} 737}
738 738
739#define TH_FIN 0x01
740#define TH_SYN 0x02
741#define TH_RST 0x04
742#define TH_PUSH 0x08
743#define TH_ACK 0x10
744#define TH_URG 0x20
745#define TH_ECE 0x40
746#define TH_CWR 0x80
747
748/* table of valid flag combinations - PUSH, ECE and CWR are always valid */ 739/* table of valid flag combinations - PUSH, ECE and CWR are always valid */
749static const u8 tcp_valid_flags[(TH_FIN|TH_SYN|TH_RST|TH_ACK|TH_URG) + 1] = 740static const u8 tcp_valid_flags[(TCPHDR_FIN|TCPHDR_SYN|TCPHDR_RST|TCPHDR_ACK|
741 TCPHDR_URG) + 1] =
750{ 742{
751 [TH_SYN] = 1, 743 [TCPHDR_SYN] = 1,
752 [TH_SYN|TH_URG] = 1, 744 [TCPHDR_SYN|TCPHDR_URG] = 1,
753 [TH_SYN|TH_ACK] = 1, 745 [TCPHDR_SYN|TCPHDR_ACK] = 1,
754 [TH_RST] = 1, 746 [TCPHDR_RST] = 1,
755 [TH_RST|TH_ACK] = 1, 747 [TCPHDR_RST|TCPHDR_ACK] = 1,
756 [TH_FIN|TH_ACK] = 1, 748 [TCPHDR_FIN|TCPHDR_ACK] = 1,
757 [TH_FIN|TH_ACK|TH_URG] = 1, 749 [TCPHDR_FIN|TCPHDR_ACK|TCPHDR_URG] = 1,
758 [TH_ACK] = 1, 750 [TCPHDR_ACK] = 1,
759 [TH_ACK|TH_URG] = 1, 751 [TCPHDR_ACK|TCPHDR_URG] = 1,
760}; 752};
761 753
762/* Protect conntrack agaist broken packets. Code taken from ipt_unclean.c. */ 754/* Protect conntrack agaist broken packets. Code taken from ipt_unclean.c. */
@@ -803,7 +795,7 @@ static int tcp_error(struct net *net, struct nf_conn *tmpl,
803 } 795 }
804 796
805 /* Check TCP flags. */ 797 /* Check TCP flags. */
806 tcpflags = (((u_int8_t *)th)[13] & ~(TH_ECE|TH_CWR|TH_PUSH)); 798 tcpflags = (tcp_flag_byte(th) & ~(TCPHDR_ECE|TCPHDR_CWR|TCPHDR_PSH));
807 if (!tcp_valid_flags[tcpflags]) { 799 if (!tcp_valid_flags[tcpflags]) {
808 if (LOG_INVALID(net, IPPROTO_TCP)) 800 if (LOG_INVALID(net, IPPROTO_TCP))
809 nf_log_packet(pf, 0, skb, NULL, NULL, NULL, 801 nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index fc9a211e629e..6a1572b0ab41 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -66,9 +66,10 @@ struct nfulnl_instance {
66 u_int16_t group_num; /* number of this queue */ 66 u_int16_t group_num; /* number of this queue */
67 u_int16_t flags; 67 u_int16_t flags;
68 u_int8_t copy_mode; 68 u_int8_t copy_mode;
69 struct rcu_head rcu;
69}; 70};
70 71
71static DEFINE_RWLOCK(instances_lock); 72static DEFINE_SPINLOCK(instances_lock);
72static atomic_t global_seq; 73static atomic_t global_seq;
73 74
74#define INSTANCE_BUCKETS 16 75#define INSTANCE_BUCKETS 16
@@ -88,7 +89,7 @@ __instance_lookup(u_int16_t group_num)
88 struct nfulnl_instance *inst; 89 struct nfulnl_instance *inst;
89 90
90 head = &instance_table[instance_hashfn(group_num)]; 91 head = &instance_table[instance_hashfn(group_num)];
91 hlist_for_each_entry(inst, pos, head, hlist) { 92 hlist_for_each_entry_rcu(inst, pos, head, hlist) {
92 if (inst->group_num == group_num) 93 if (inst->group_num == group_num)
93 return inst; 94 return inst;
94 } 95 }
@@ -106,22 +107,26 @@ instance_lookup_get(u_int16_t group_num)
106{ 107{
107 struct nfulnl_instance *inst; 108 struct nfulnl_instance *inst;
108 109
109 read_lock_bh(&instances_lock); 110 rcu_read_lock_bh();
110 inst = __instance_lookup(group_num); 111 inst = __instance_lookup(group_num);
111 if (inst) 112 if (inst && !atomic_inc_not_zero(&inst->use))
112 instance_get(inst); 113 inst = NULL;
113 read_unlock_bh(&instances_lock); 114 rcu_read_unlock_bh();
114 115
115 return inst; 116 return inst;
116} 117}
117 118
119static void nfulnl_instance_free_rcu(struct rcu_head *head)
120{
121 kfree(container_of(head, struct nfulnl_instance, rcu));
122 module_put(THIS_MODULE);
123}
124
118static void 125static void
119instance_put(struct nfulnl_instance *inst) 126instance_put(struct nfulnl_instance *inst)
120{ 127{
121 if (inst && atomic_dec_and_test(&inst->use)) { 128 if (inst && atomic_dec_and_test(&inst->use))
122 kfree(inst); 129 call_rcu_bh(&inst->rcu, nfulnl_instance_free_rcu);
123 module_put(THIS_MODULE);
124 }
125} 130}
126 131
127static void nfulnl_timer(unsigned long data); 132static void nfulnl_timer(unsigned long data);
@@ -132,7 +137,7 @@ instance_create(u_int16_t group_num, int pid)
132 struct nfulnl_instance *inst; 137 struct nfulnl_instance *inst;
133 int err; 138 int err;
134 139
135 write_lock_bh(&instances_lock); 140 spin_lock_bh(&instances_lock);
136 if (__instance_lookup(group_num)) { 141 if (__instance_lookup(group_num)) {
137 err = -EEXIST; 142 err = -EEXIST;
138 goto out_unlock; 143 goto out_unlock;
@@ -166,32 +171,37 @@ instance_create(u_int16_t group_num, int pid)
166 inst->copy_mode = NFULNL_COPY_PACKET; 171 inst->copy_mode = NFULNL_COPY_PACKET;
167 inst->copy_range = NFULNL_COPY_RANGE_MAX; 172 inst->copy_range = NFULNL_COPY_RANGE_MAX;
168 173
169 hlist_add_head(&inst->hlist, 174 hlist_add_head_rcu(&inst->hlist,
170 &instance_table[instance_hashfn(group_num)]); 175 &instance_table[instance_hashfn(group_num)]);
171 176
172 write_unlock_bh(&instances_lock); 177 spin_unlock_bh(&instances_lock);
173 178
174 return inst; 179 return inst;
175 180
176out_unlock: 181out_unlock:
177 write_unlock_bh(&instances_lock); 182 spin_unlock_bh(&instances_lock);
178 return ERR_PTR(err); 183 return ERR_PTR(err);
179} 184}
180 185
181static void __nfulnl_flush(struct nfulnl_instance *inst); 186static void __nfulnl_flush(struct nfulnl_instance *inst);
182 187
188/* called with BH disabled */
183static void 189static void
184__instance_destroy(struct nfulnl_instance *inst) 190__instance_destroy(struct nfulnl_instance *inst)
185{ 191{
186 /* first pull it out of the global list */ 192 /* first pull it out of the global list */
187 hlist_del(&inst->hlist); 193 hlist_del_rcu(&inst->hlist);
188 194
189 /* then flush all pending packets from skb */ 195 /* then flush all pending packets from skb */
190 196
191 spin_lock_bh(&inst->lock); 197 spin_lock(&inst->lock);
198
199 /* lockless readers wont be able to use us */
200 inst->copy_mode = NFULNL_COPY_DISABLED;
201
192 if (inst->skb) 202 if (inst->skb)
193 __nfulnl_flush(inst); 203 __nfulnl_flush(inst);
194 spin_unlock_bh(&inst->lock); 204 spin_unlock(&inst->lock);
195 205
196 /* and finally put the refcount */ 206 /* and finally put the refcount */
197 instance_put(inst); 207 instance_put(inst);
@@ -200,9 +210,9 @@ __instance_destroy(struct nfulnl_instance *inst)
200static inline void 210static inline void
201instance_destroy(struct nfulnl_instance *inst) 211instance_destroy(struct nfulnl_instance *inst)
202{ 212{
203 write_lock_bh(&instances_lock); 213 spin_lock_bh(&instances_lock);
204 __instance_destroy(inst); 214 __instance_destroy(inst);
205 write_unlock_bh(&instances_lock); 215 spin_unlock_bh(&instances_lock);
206} 216}
207 217
208static int 218static int
@@ -403,8 +413,9 @@ __build_packet_message(struct nfulnl_instance *inst,
403 NLA_PUT_BE32(inst->skb, NFULA_IFINDEX_PHYSINDEV, 413 NLA_PUT_BE32(inst->skb, NFULA_IFINDEX_PHYSINDEV,
404 htonl(indev->ifindex)); 414 htonl(indev->ifindex));
405 /* this is the bridge group "brX" */ 415 /* this is the bridge group "brX" */
416 /* rcu_read_lock()ed by nf_hook_slow or nf_log_packet */
406 NLA_PUT_BE32(inst->skb, NFULA_IFINDEX_INDEV, 417 NLA_PUT_BE32(inst->skb, NFULA_IFINDEX_INDEV,
407 htonl(indev->br_port->br->dev->ifindex)); 418 htonl(br_port_get_rcu(indev)->br->dev->ifindex));
408 } else { 419 } else {
409 /* Case 2: indev is bridge group, we need to look for 420 /* Case 2: indev is bridge group, we need to look for
410 * physical device (when called from ipv4) */ 421 * physical device (when called from ipv4) */
@@ -430,8 +441,9 @@ __build_packet_message(struct nfulnl_instance *inst,
430 NLA_PUT_BE32(inst->skb, NFULA_IFINDEX_PHYSOUTDEV, 441 NLA_PUT_BE32(inst->skb, NFULA_IFINDEX_PHYSOUTDEV,
431 htonl(outdev->ifindex)); 442 htonl(outdev->ifindex));
432 /* this is the bridge group "brX" */ 443 /* this is the bridge group "brX" */
444 /* rcu_read_lock()ed by nf_hook_slow or nf_log_packet */
433 NLA_PUT_BE32(inst->skb, NFULA_IFINDEX_OUTDEV, 445 NLA_PUT_BE32(inst->skb, NFULA_IFINDEX_OUTDEV,
434 htonl(outdev->br_port->br->dev->ifindex)); 446 htonl(br_port_get_rcu(outdev)->br->dev->ifindex));
435 } else { 447 } else {
436 /* Case 2: indev is a bridge group, we need to look 448 /* Case 2: indev is a bridge group, we need to look
437 * for physical device (when called from ipv4) */ 449 * for physical device (when called from ipv4) */
@@ -619,6 +631,7 @@ nfulnl_log_packet(u_int8_t pf,
619 size += nla_total_size(data_len); 631 size += nla_total_size(data_len);
620 break; 632 break;
621 633
634 case NFULNL_COPY_DISABLED:
622 default: 635 default:
623 goto unlock_and_release; 636 goto unlock_and_release;
624 } 637 }
@@ -672,7 +685,7 @@ nfulnl_rcv_nl_event(struct notifier_block *this,
672 int i; 685 int i;
673 686
674 /* destroy all instances for this pid */ 687 /* destroy all instances for this pid */
675 write_lock_bh(&instances_lock); 688 spin_lock_bh(&instances_lock);
676 for (i = 0; i < INSTANCE_BUCKETS; i++) { 689 for (i = 0; i < INSTANCE_BUCKETS; i++) {
677 struct hlist_node *tmp, *t2; 690 struct hlist_node *tmp, *t2;
678 struct nfulnl_instance *inst; 691 struct nfulnl_instance *inst;
@@ -684,7 +697,7 @@ nfulnl_rcv_nl_event(struct notifier_block *this,
684 __instance_destroy(inst); 697 __instance_destroy(inst);
685 } 698 }
686 } 699 }
687 write_unlock_bh(&instances_lock); 700 spin_unlock_bh(&instances_lock);
688 } 701 }
689 return NOTIFY_DONE; 702 return NOTIFY_DONE;
690} 703}
@@ -861,19 +874,19 @@ static struct hlist_node *get_first(struct iter_state *st)
861 874
862 for (st->bucket = 0; st->bucket < INSTANCE_BUCKETS; st->bucket++) { 875 for (st->bucket = 0; st->bucket < INSTANCE_BUCKETS; st->bucket++) {
863 if (!hlist_empty(&instance_table[st->bucket])) 876 if (!hlist_empty(&instance_table[st->bucket]))
864 return instance_table[st->bucket].first; 877 return rcu_dereference_bh(instance_table[st->bucket].first);
865 } 878 }
866 return NULL; 879 return NULL;
867} 880}
868 881
869static struct hlist_node *get_next(struct iter_state *st, struct hlist_node *h) 882static struct hlist_node *get_next(struct iter_state *st, struct hlist_node *h)
870{ 883{
871 h = h->next; 884 h = rcu_dereference_bh(h->next);
872 while (!h) { 885 while (!h) {
873 if (++st->bucket >= INSTANCE_BUCKETS) 886 if (++st->bucket >= INSTANCE_BUCKETS)
874 return NULL; 887 return NULL;
875 888
876 h = instance_table[st->bucket].first; 889 h = rcu_dereference_bh(instance_table[st->bucket].first);
877 } 890 }
878 return h; 891 return h;
879} 892}
@@ -890,9 +903,9 @@ static struct hlist_node *get_idx(struct iter_state *st, loff_t pos)
890} 903}
891 904
892static void *seq_start(struct seq_file *seq, loff_t *pos) 905static void *seq_start(struct seq_file *seq, loff_t *pos)
893 __acquires(instances_lock) 906 __acquires(rcu_bh)
894{ 907{
895 read_lock_bh(&instances_lock); 908 rcu_read_lock_bh();
896 return get_idx(seq->private, *pos); 909 return get_idx(seq->private, *pos);
897} 910}
898 911
@@ -903,9 +916,9 @@ static void *seq_next(struct seq_file *s, void *v, loff_t *pos)
903} 916}
904 917
905static void seq_stop(struct seq_file *s, void *v) 918static void seq_stop(struct seq_file *s, void *v)
906 __releases(instances_lock) 919 __releases(rcu_bh)
907{ 920{
908 read_unlock_bh(&instances_lock); 921 rcu_read_unlock_bh();
909} 922}
910 923
911static int seq_show(struct seq_file *s, void *v) 924static int seq_show(struct seq_file *s, void *v)
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 12e1ab37fcd8..68e67d19724d 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -46,17 +46,19 @@ struct nfqnl_instance {
46 int peer_pid; 46 int peer_pid;
47 unsigned int queue_maxlen; 47 unsigned int queue_maxlen;
48 unsigned int copy_range; 48 unsigned int copy_range;
49 unsigned int queue_total;
50 unsigned int queue_dropped; 49 unsigned int queue_dropped;
51 unsigned int queue_user_dropped; 50 unsigned int queue_user_dropped;
52 51
53 unsigned int id_sequence; /* 'sequence' of pkt ids */
54 52
55 u_int16_t queue_num; /* number of this queue */ 53 u_int16_t queue_num; /* number of this queue */
56 u_int8_t copy_mode; 54 u_int8_t copy_mode;
57 55/*
58 spinlock_t lock; 56 * Following fields are dirtied for each queued packet,
59 57 * keep them in same cache line if possible.
58 */
59 spinlock_t lock;
60 unsigned int queue_total;
61 atomic_t id_sequence; /* 'sequence' of pkt ids */
60 struct list_head queue_list; /* packets in queue */ 62 struct list_head queue_list; /* packets in queue */
61}; 63};
62 64
@@ -238,32 +240,24 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
238 240
239 outdev = entry->outdev; 241 outdev = entry->outdev;
240 242
241 spin_lock_bh(&queue->lock); 243 switch ((enum nfqnl_config_mode)ACCESS_ONCE(queue->copy_mode)) {
242
243 switch ((enum nfqnl_config_mode)queue->copy_mode) {
244 case NFQNL_COPY_META: 244 case NFQNL_COPY_META:
245 case NFQNL_COPY_NONE: 245 case NFQNL_COPY_NONE:
246 break; 246 break;
247 247
248 case NFQNL_COPY_PACKET: 248 case NFQNL_COPY_PACKET:
249 if (entskb->ip_summed == CHECKSUM_PARTIAL && 249 if (entskb->ip_summed == CHECKSUM_PARTIAL &&
250 skb_checksum_help(entskb)) { 250 skb_checksum_help(entskb))
251 spin_unlock_bh(&queue->lock);
252 return NULL; 251 return NULL;
253 } 252
254 if (queue->copy_range == 0 253 data_len = ACCESS_ONCE(queue->copy_range);
255 || queue->copy_range > entskb->len) 254 if (data_len == 0 || data_len > entskb->len)
256 data_len = entskb->len; 255 data_len = entskb->len;
257 else
258 data_len = queue->copy_range;
259 256
260 size += nla_total_size(data_len); 257 size += nla_total_size(data_len);
261 break; 258 break;
262 } 259 }
263 260
264 entry->id = queue->id_sequence++;
265
266 spin_unlock_bh(&queue->lock);
267 261
268 skb = alloc_skb(size, GFP_ATOMIC); 262 skb = alloc_skb(size, GFP_ATOMIC);
269 if (!skb) 263 if (!skb)
@@ -278,6 +272,7 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
278 nfmsg->version = NFNETLINK_V0; 272 nfmsg->version = NFNETLINK_V0;
279 nfmsg->res_id = htons(queue->queue_num); 273 nfmsg->res_id = htons(queue->queue_num);
280 274
275 entry->id = atomic_inc_return(&queue->id_sequence);
281 pmsg.packet_id = htonl(entry->id); 276 pmsg.packet_id = htonl(entry->id);
282 pmsg.hw_protocol = entskb->protocol; 277 pmsg.hw_protocol = entskb->protocol;
283 pmsg.hook = entry->hook; 278 pmsg.hook = entry->hook;
@@ -296,8 +291,9 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
296 NLA_PUT_BE32(skb, NFQA_IFINDEX_PHYSINDEV, 291 NLA_PUT_BE32(skb, NFQA_IFINDEX_PHYSINDEV,
297 htonl(indev->ifindex)); 292 htonl(indev->ifindex));
298 /* this is the bridge group "brX" */ 293 /* this is the bridge group "brX" */
294 /* rcu_read_lock()ed by __nf_queue */
299 NLA_PUT_BE32(skb, NFQA_IFINDEX_INDEV, 295 NLA_PUT_BE32(skb, NFQA_IFINDEX_INDEV,
300 htonl(indev->br_port->br->dev->ifindex)); 296 htonl(br_port_get_rcu(indev)->br->dev->ifindex));
301 } else { 297 } else {
302 /* Case 2: indev is bridge group, we need to look for 298 /* Case 2: indev is bridge group, we need to look for
303 * physical device (when called from ipv4) */ 299 * physical device (when called from ipv4) */
@@ -321,8 +317,9 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
321 NLA_PUT_BE32(skb, NFQA_IFINDEX_PHYSOUTDEV, 317 NLA_PUT_BE32(skb, NFQA_IFINDEX_PHYSOUTDEV,
322 htonl(outdev->ifindex)); 318 htonl(outdev->ifindex));
323 /* this is the bridge group "brX" */ 319 /* this is the bridge group "brX" */
320 /* rcu_read_lock()ed by __nf_queue */
324 NLA_PUT_BE32(skb, NFQA_IFINDEX_OUTDEV, 321 NLA_PUT_BE32(skb, NFQA_IFINDEX_OUTDEV,
325 htonl(outdev->br_port->br->dev->ifindex)); 322 htonl(br_port_get_rcu(outdev)->br->dev->ifindex));
326 } else { 323 } else {
327 /* Case 2: outdev is bridge group, we need to look for 324 /* Case 2: outdev is bridge group, we need to look for
328 * physical output device (when called from ipv4) */ 325 * physical output device (when called from ipv4) */
@@ -866,7 +863,7 @@ static int seq_show(struct seq_file *s, void *v)
866 inst->peer_pid, inst->queue_total, 863 inst->peer_pid, inst->queue_total,
867 inst->copy_mode, inst->copy_range, 864 inst->copy_mode, inst->copy_range,
868 inst->queue_dropped, inst->queue_user_dropped, 865 inst->queue_dropped, inst->queue_user_dropped,
869 inst->id_sequence, 1); 866 atomic_read(&inst->id_sequence), 1);
870} 867}
871 868
872static const struct seq_operations nfqnl_seq_ops = { 869static const struct seq_operations nfqnl_seq_ops = {
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 445de702b8b7..e34622fa0003 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -699,10 +699,8 @@ void xt_free_table_info(struct xt_table_info *info)
699 vfree(info->jumpstack); 699 vfree(info->jumpstack);
700 else 700 else
701 kfree(info->jumpstack); 701 kfree(info->jumpstack);
702 if (sizeof(unsigned int) * nr_cpu_ids > PAGE_SIZE) 702
703 vfree(info->stackptr); 703 free_percpu(info->stackptr);
704 else
705 kfree(info->stackptr);
706 704
707 kfree(info); 705 kfree(info);
708} 706}
@@ -753,14 +751,9 @@ static int xt_jumpstack_alloc(struct xt_table_info *i)
753 unsigned int size; 751 unsigned int size;
754 int cpu; 752 int cpu;
755 753
756 size = sizeof(unsigned int) * nr_cpu_ids; 754 i->stackptr = alloc_percpu(unsigned int);
757 if (size > PAGE_SIZE)
758 i->stackptr = vmalloc(size);
759 else
760 i->stackptr = kmalloc(size, GFP_KERNEL);
761 if (i->stackptr == NULL) 755 if (i->stackptr == NULL)
762 return -ENOMEM; 756 return -ENOMEM;
763 memset(i->stackptr, 0, size);
764 757
765 size = sizeof(void **) * nr_cpu_ids; 758 size = sizeof(void **) * nr_cpu_ids;
766 if (size > PAGE_SIZE) 759 if (size > PAGE_SIZE)
@@ -844,10 +837,6 @@ struct xt_table *xt_register_table(struct net *net,
844 struct xt_table_info *private; 837 struct xt_table_info *private;
845 struct xt_table *t, *table; 838 struct xt_table *t, *table;
846 839
847 ret = xt_jumpstack_alloc(newinfo);
848 if (ret < 0)
849 return ERR_PTR(ret);
850
851 /* Don't add one object to multiple lists. */ 840 /* Don't add one object to multiple lists. */
852 table = kmemdup(input_table, sizeof(struct xt_table), GFP_KERNEL); 841 table = kmemdup(input_table, sizeof(struct xt_table), GFP_KERNEL);
853 if (!table) { 842 if (!table) {
diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c
index 562bf3266e04..0cb6053f02fd 100644
--- a/net/netfilter/xt_CT.c
+++ b/net/netfilter/xt_CT.c
@@ -67,7 +67,7 @@ static int xt_ct_tg_check(const struct xt_tgchk_param *par)
67 return -EINVAL; 67 return -EINVAL;
68 68
69 if (info->flags & XT_CT_NOTRACK) { 69 if (info->flags & XT_CT_NOTRACK) {
70 ct = &nf_conntrack_untracked; 70 ct = nf_ct_untracked_get();
71 atomic_inc(&ct->ct_general.use); 71 atomic_inc(&ct->ct_general.use);
72 goto out; 72 goto out;
73 } 73 }
@@ -132,7 +132,7 @@ static void xt_ct_tg_destroy(const struct xt_tgdtor_param *par)
132 struct nf_conn *ct = info->ct; 132 struct nf_conn *ct = info->ct;
133 struct nf_conn_help *help; 133 struct nf_conn_help *help;
134 134
135 if (ct != &nf_conntrack_untracked) { 135 if (!nf_ct_is_untracked(ct)) {
136 help = nfct_help(ct); 136 help = nfct_help(ct);
137 if (help) 137 if (help)
138 module_put(help->helper->me); 138 module_put(help->helper->me);
diff --git a/net/netfilter/xt_IDLETIMER.c b/net/netfilter/xt_IDLETIMER.c
new file mode 100644
index 000000000000..be1f22e13545
--- /dev/null
+++ b/net/netfilter/xt_IDLETIMER.c
@@ -0,0 +1,315 @@
1/*
2 * linux/net/netfilter/xt_IDLETIMER.c
3 *
4 * Netfilter module to trigger a timer when packet matches.
5 * After timer expires a kevent will be sent.
6 *
7 * Copyright (C) 2004, 2010 Nokia Corporation
8 * Written by Timo Teras <ext-timo.teras@nokia.com>
9 *
10 * Converted to x_tables and reworked for upstream inclusion
11 * by Luciano Coelho <luciano.coelho@nokia.com>
12 *
13 * Contact: Luciano Coelho <luciano.coelho@nokia.com>
14 *
15 * This program is free software; you can redistribute it and/or
16 * modify it under the terms of the GNU General Public License
17 * version 2 as published by the Free Software Foundation.
18 *
19 * This program is distributed in the hope that it will be useful, but
20 * WITHOUT ANY WARRANTY; without even the implied warranty of
21 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 * General Public License for more details.
23 *
24 * You should have received a copy of the GNU General Public License
25 * along with this program; if not, write to the Free Software
26 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
27 * 02110-1301 USA
28 */
29
30#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
31
32#include <linux/module.h>
33#include <linux/timer.h>
34#include <linux/list.h>
35#include <linux/mutex.h>
36#include <linux/netfilter.h>
37#include <linux/netfilter/x_tables.h>
38#include <linux/netfilter/xt_IDLETIMER.h>
39#include <linux/kdev_t.h>
40#include <linux/kobject.h>
41#include <linux/workqueue.h>
42#include <linux/sysfs.h>
43
44struct idletimer_tg_attr {
45 struct attribute attr;
46 ssize_t (*show)(struct kobject *kobj,
47 struct attribute *attr, char *buf);
48};
49
50struct idletimer_tg {
51 struct list_head entry;
52 struct timer_list timer;
53 struct work_struct work;
54
55 struct kobject *kobj;
56 struct idletimer_tg_attr attr;
57
58 unsigned int refcnt;
59};
60
61static LIST_HEAD(idletimer_tg_list);
62static DEFINE_MUTEX(list_mutex);
63
64static struct kobject *idletimer_tg_kobj;
65
66static
67struct idletimer_tg *__idletimer_tg_find_by_label(const char *label)
68{
69 struct idletimer_tg *entry;
70
71 BUG_ON(!label);
72
73 list_for_each_entry(entry, &idletimer_tg_list, entry) {
74 if (!strcmp(label, entry->attr.attr.name))
75 return entry;
76 }
77
78 return NULL;
79}
80
81static ssize_t idletimer_tg_show(struct kobject *kobj, struct attribute *attr,
82 char *buf)
83{
84 struct idletimer_tg *timer;
85 unsigned long expires = 0;
86
87 mutex_lock(&list_mutex);
88
89 timer = __idletimer_tg_find_by_label(attr->name);
90 if (timer)
91 expires = timer->timer.expires;
92
93 mutex_unlock(&list_mutex);
94
95 if (time_after(expires, jiffies))
96 return sprintf(buf, "%u\n",
97 jiffies_to_msecs(expires - jiffies) / 1000);
98
99 return sprintf(buf, "0\n");
100}
101
102static void idletimer_tg_work(struct work_struct *work)
103{
104 struct idletimer_tg *timer = container_of(work, struct idletimer_tg,
105 work);
106
107 sysfs_notify(idletimer_tg_kobj, NULL, timer->attr.attr.name);
108}
109
110static void idletimer_tg_expired(unsigned long data)
111{
112 struct idletimer_tg *timer = (struct idletimer_tg *) data;
113
114 pr_debug("timer %s expired\n", timer->attr.attr.name);
115
116 schedule_work(&timer->work);
117}
118
119static int idletimer_tg_create(struct idletimer_tg_info *info)
120{
121 int ret;
122
123 info->timer = kmalloc(sizeof(*info->timer), GFP_KERNEL);
124 if (!info->timer) {
125 pr_debug("couldn't alloc timer\n");
126 ret = -ENOMEM;
127 goto out;
128 }
129
130 info->timer->attr.attr.name = kstrdup(info->label, GFP_KERNEL);
131 if (!info->timer->attr.attr.name) {
132 pr_debug("couldn't alloc attribute name\n");
133 ret = -ENOMEM;
134 goto out_free_timer;
135 }
136 info->timer->attr.attr.mode = S_IRUGO;
137 info->timer->attr.show = idletimer_tg_show;
138
139 ret = sysfs_create_file(idletimer_tg_kobj, &info->timer->attr.attr);
140 if (ret < 0) {
141 pr_debug("couldn't add file to sysfs");
142 goto out_free_attr;
143 }
144
145 list_add(&info->timer->entry, &idletimer_tg_list);
146
147 setup_timer(&info->timer->timer, idletimer_tg_expired,
148 (unsigned long) info->timer);
149 info->timer->refcnt = 1;
150
151 mod_timer(&info->timer->timer,
152 msecs_to_jiffies(info->timeout * 1000) + jiffies);
153
154 INIT_WORK(&info->timer->work, idletimer_tg_work);
155
156 return 0;
157
158out_free_attr:
159 kfree(info->timer->attr.attr.name);
160out_free_timer:
161 kfree(info->timer);
162out:
163 return ret;
164}
165
166/*
167 * The actual xt_tables plugin.
168 */
169static unsigned int idletimer_tg_target(struct sk_buff *skb,
170 const struct xt_action_param *par)
171{
172 const struct idletimer_tg_info *info = par->targinfo;
173
174 pr_debug("resetting timer %s, timeout period %u\n",
175 info->label, info->timeout);
176
177 BUG_ON(!info->timer);
178
179 mod_timer(&info->timer->timer,
180 msecs_to_jiffies(info->timeout * 1000) + jiffies);
181
182 return XT_CONTINUE;
183}
184
185static int idletimer_tg_checkentry(const struct xt_tgchk_param *par)
186{
187 struct idletimer_tg_info *info = par->targinfo;
188 int ret;
189
190 pr_debug("checkentry targinfo%s\n", info->label);
191
192 if (info->timeout == 0) {
193 pr_debug("timeout value is zero\n");
194 return -EINVAL;
195 }
196
197 if (info->label[0] == '\0' ||
198 strnlen(info->label,
199 MAX_IDLETIMER_LABEL_SIZE) == MAX_IDLETIMER_LABEL_SIZE) {
200 pr_debug("label is empty or not nul-terminated\n");
201 return -EINVAL;
202 }
203
204 mutex_lock(&list_mutex);
205
206 info->timer = __idletimer_tg_find_by_label(info->label);
207 if (info->timer) {
208 info->timer->refcnt++;
209 mod_timer(&info->timer->timer,
210 msecs_to_jiffies(info->timeout * 1000) + jiffies);
211
212 pr_debug("increased refcnt of timer %s to %u\n",
213 info->label, info->timer->refcnt);
214 } else {
215 ret = idletimer_tg_create(info);
216 if (ret < 0) {
217 pr_debug("failed to create timer\n");
218 mutex_unlock(&list_mutex);
219 return ret;
220 }
221 }
222
223 mutex_unlock(&list_mutex);
224 return 0;
225}
226
227static void idletimer_tg_destroy(const struct xt_tgdtor_param *par)
228{
229 const struct idletimer_tg_info *info = par->targinfo;
230
231 pr_debug("destroy targinfo %s\n", info->label);
232
233 mutex_lock(&list_mutex);
234
235 if (--info->timer->refcnt == 0) {
236 pr_debug("deleting timer %s\n", info->label);
237
238 list_del(&info->timer->entry);
239 del_timer_sync(&info->timer->timer);
240 sysfs_remove_file(idletimer_tg_kobj, &info->timer->attr.attr);
241 kfree(info->timer->attr.attr.name);
242 kfree(info->timer);
243 } else {
244 pr_debug("decreased refcnt of timer %s to %u\n",
245 info->label, info->timer->refcnt);
246 }
247
248 mutex_unlock(&list_mutex);
249}
250
251static struct xt_target idletimer_tg __read_mostly = {
252 .name = "IDLETIMER",
253 .family = NFPROTO_UNSPEC,
254 .target = idletimer_tg_target,
255 .targetsize = sizeof(struct idletimer_tg_info),
256 .checkentry = idletimer_tg_checkentry,
257 .destroy = idletimer_tg_destroy,
258 .me = THIS_MODULE,
259};
260
261static struct class *idletimer_tg_class;
262
263static struct device *idletimer_tg_device;
264
265static int __init idletimer_tg_init(void)
266{
267 int err;
268
269 idletimer_tg_class = class_create(THIS_MODULE, "xt_idletimer");
270 err = PTR_ERR(idletimer_tg_class);
271 if (IS_ERR(idletimer_tg_class)) {
272 pr_debug("couldn't register device class\n");
273 goto out;
274 }
275
276 idletimer_tg_device = device_create(idletimer_tg_class, NULL,
277 MKDEV(0, 0), NULL, "timers");
278 err = PTR_ERR(idletimer_tg_device);
279 if (IS_ERR(idletimer_tg_device)) {
280 pr_debug("couldn't register system device\n");
281 goto out_class;
282 }
283
284 idletimer_tg_kobj = &idletimer_tg_device->kobj;
285
286 err = xt_register_target(&idletimer_tg);
287 if (err < 0) {
288 pr_debug("couldn't register xt target\n");
289 goto out_dev;
290 }
291
292 return 0;
293out_dev:
294 device_destroy(idletimer_tg_class, MKDEV(0, 0));
295out_class:
296 class_destroy(idletimer_tg_class);
297out:
298 return err;
299}
300
301static void __exit idletimer_tg_exit(void)
302{
303 xt_unregister_target(&idletimer_tg);
304
305 device_destroy(idletimer_tg_class, MKDEV(0, 0));
306 class_destroy(idletimer_tg_class);
307}
308
309module_init(idletimer_tg_init);
310module_exit(idletimer_tg_exit);
311
312MODULE_AUTHOR("Timo Teras <ext-timo.teras@nokia.com>");
313MODULE_AUTHOR("Luciano Coelho <luciano.coelho@nokia.com>");
314MODULE_DESCRIPTION("Xtables: idle time monitor");
315MODULE_LICENSE("GPL v2");
diff --git a/net/netfilter/xt_NOTRACK.c b/net/netfilter/xt_NOTRACK.c
index 512b9123252f..9d782181b6c8 100644
--- a/net/netfilter/xt_NOTRACK.c
+++ b/net/netfilter/xt_NOTRACK.c
@@ -23,7 +23,7 @@ notrack_tg(struct sk_buff *skb, const struct xt_action_param *par)
23 If there is a real ct entry correspondig to this packet, 23 If there is a real ct entry correspondig to this packet,
24 it'll hang aroun till timing out. We don't deal with it 24 it'll hang aroun till timing out. We don't deal with it
25 for performance reasons. JK */ 25 for performance reasons. JK */
26 skb->nfct = &nf_conntrack_untracked.ct_general; 26 skb->nfct = &nf_ct_untracked_get()->ct_general;
27 skb->nfctinfo = IP_CT_NEW; 27 skb->nfctinfo = IP_CT_NEW;
28 nf_conntrack_get(skb->nfct); 28 nf_conntrack_get(skb->nfct);
29 29
diff --git a/net/netfilter/xt_RATEEST.c b/net/netfilter/xt_RATEEST.c
index 69c01e10f8af..de079abd5bc8 100644
--- a/net/netfilter/xt_RATEEST.c
+++ b/net/netfilter/xt_RATEEST.c
@@ -60,13 +60,22 @@ struct xt_rateest *xt_rateest_lookup(const char *name)
60} 60}
61EXPORT_SYMBOL_GPL(xt_rateest_lookup); 61EXPORT_SYMBOL_GPL(xt_rateest_lookup);
62 62
63static void xt_rateest_free_rcu(struct rcu_head *head)
64{
65 kfree(container_of(head, struct xt_rateest, rcu));
66}
67
63void xt_rateest_put(struct xt_rateest *est) 68void xt_rateest_put(struct xt_rateest *est)
64{ 69{
65 mutex_lock(&xt_rateest_mutex); 70 mutex_lock(&xt_rateest_mutex);
66 if (--est->refcnt == 0) { 71 if (--est->refcnt == 0) {
67 hlist_del(&est->list); 72 hlist_del(&est->list);
68 gen_kill_estimator(&est->bstats, &est->rstats); 73 gen_kill_estimator(&est->bstats, &est->rstats);
69 kfree(est); 74 /*
75 * gen_estimator est_timer() might access est->lock or bstats,
76 * wait a RCU grace period before freeing 'est'
77 */
78 call_rcu(&est->rcu, xt_rateest_free_rcu);
70 } 79 }
71 mutex_unlock(&xt_rateest_mutex); 80 mutex_unlock(&xt_rateest_mutex);
72} 81}
@@ -179,6 +188,7 @@ static int __init xt_rateest_tg_init(void)
179static void __exit xt_rateest_tg_fini(void) 188static void __exit xt_rateest_tg_fini(void)
180{ 189{
181 xt_unregister_target(&xt_rateest_tg_reg); 190 xt_unregister_target(&xt_rateest_tg_reg);
191 rcu_barrier(); /* Wait for completion of call_rcu()'s (xt_rateest_free_rcu) */
182} 192}
183 193
184 194
diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c
index 62ec021fbd50..eb81c380da1b 100644
--- a/net/netfilter/xt_TCPMSS.c
+++ b/net/netfilter/xt_TCPMSS.c
@@ -165,8 +165,8 @@ static u_int32_t tcpmss_reverse_mtu(const struct sk_buff *skb,
165 rcu_read_unlock(); 165 rcu_read_unlock();
166 166
167 if (rt != NULL) { 167 if (rt != NULL) {
168 mtu = dst_mtu(&rt->u.dst); 168 mtu = dst_mtu(&rt->dst);
169 dst_release(&rt->u.dst); 169 dst_release(&rt->dst);
170 } 170 }
171 return mtu; 171 return mtu;
172} 172}
@@ -220,15 +220,13 @@ tcpmss_tg6(struct sk_buff *skb, const struct xt_action_param *par)
220} 220}
221#endif 221#endif
222 222
223#define TH_SYN 0x02
224
225/* Must specify -p tcp --syn */ 223/* Must specify -p tcp --syn */
226static inline bool find_syn_match(const struct xt_entry_match *m) 224static inline bool find_syn_match(const struct xt_entry_match *m)
227{ 225{
228 const struct xt_tcp *tcpinfo = (const struct xt_tcp *)m->data; 226 const struct xt_tcp *tcpinfo = (const struct xt_tcp *)m->data;
229 227
230 if (strcmp(m->u.kernel.match->name, "tcp") == 0 && 228 if (strcmp(m->u.kernel.match->name, "tcp") == 0 &&
231 tcpinfo->flg_cmp & TH_SYN && 229 tcpinfo->flg_cmp & TCPHDR_SYN &&
232 !(tcpinfo->invflags & XT_TCP_INV_FLAGS)) 230 !(tcpinfo->invflags & XT_TCP_INV_FLAGS))
233 return true; 231 return true;
234 232
diff --git a/net/netfilter/xt_TEE.c b/net/netfilter/xt_TEE.c
index d7920d9f49e9..22a2d421e7eb 100644
--- a/net/netfilter/xt_TEE.c
+++ b/net/netfilter/xt_TEE.c
@@ -76,9 +76,9 @@ tee_tg_route4(struct sk_buff *skb, const struct xt_tee_tginfo *info)
76 if (ip_route_output_key(net, &rt, &fl) != 0) 76 if (ip_route_output_key(net, &rt, &fl) != 0)
77 return false; 77 return false;
78 78
79 dst_release(skb_dst(skb)); 79 skb_dst_drop(skb);
80 skb_dst_set(skb, &rt->u.dst); 80 skb_dst_set(skb, &rt->dst);
81 skb->dev = rt->u.dst.dev; 81 skb->dev = rt->dst.dev;
82 skb->protocol = htons(ETH_P_IP); 82 skb->protocol = htons(ETH_P_IP);
83 return true; 83 return true;
84} 84}
@@ -104,7 +104,7 @@ tee_tg4(struct sk_buff *skb, const struct xt_action_param *par)
104#ifdef WITH_CONNTRACK 104#ifdef WITH_CONNTRACK
105 /* Avoid counting cloned packets towards the original connection. */ 105 /* Avoid counting cloned packets towards the original connection. */
106 nf_conntrack_put(skb->nfct); 106 nf_conntrack_put(skb->nfct);
107 skb->nfct = &nf_conntrack_untracked.ct_general; 107 skb->nfct = &nf_ct_untracked_get()->ct_general;
108 skb->nfctinfo = IP_CT_NEW; 108 skb->nfctinfo = IP_CT_NEW;
109 nf_conntrack_get(skb->nfct); 109 nf_conntrack_get(skb->nfct);
110#endif 110#endif
@@ -157,7 +157,7 @@ tee_tg_route6(struct sk_buff *skb, const struct xt_tee_tginfo *info)
157 if (dst == NULL) 157 if (dst == NULL)
158 return false; 158 return false;
159 159
160 dst_release(skb_dst(skb)); 160 skb_dst_drop(skb);
161 skb_dst_set(skb, dst); 161 skb_dst_set(skb, dst);
162 skb->dev = dst->dev; 162 skb->dev = dst->dev;
163 skb->protocol = htons(ETH_P_IPV6); 163 skb->protocol = htons(ETH_P_IPV6);
@@ -177,7 +177,7 @@ tee_tg6(struct sk_buff *skb, const struct xt_action_param *par)
177 177
178#ifdef WITH_CONNTRACK 178#ifdef WITH_CONNTRACK
179 nf_conntrack_put(skb->nfct); 179 nf_conntrack_put(skb->nfct);
180 skb->nfct = &nf_conntrack_untracked.ct_general; 180 skb->nfct = &nf_ct_untracked_get()->ct_general;
181 skb->nfctinfo = IP_CT_NEW; 181 skb->nfctinfo = IP_CT_NEW;
182 nf_conntrack_get(skb->nfct); 182 nf_conntrack_get(skb->nfct);
183#endif 183#endif
diff --git a/net/netfilter/xt_cluster.c b/net/netfilter/xt_cluster.c
index 30b95a1c1c89..f4af1bfafb1c 100644
--- a/net/netfilter/xt_cluster.c
+++ b/net/netfilter/xt_cluster.c
@@ -120,7 +120,7 @@ xt_cluster_mt(const struct sk_buff *skb, struct xt_action_param *par)
120 if (ct == NULL) 120 if (ct == NULL)
121 return false; 121 return false;
122 122
123 if (ct == &nf_conntrack_untracked) 123 if (nf_ct_is_untracked(ct))
124 return false; 124 return false;
125 125
126 if (ct->master) 126 if (ct->master)
diff --git a/net/netfilter/xt_connbytes.c b/net/netfilter/xt_connbytes.c
index 73517835303d..5b138506690e 100644
--- a/net/netfilter/xt_connbytes.c
+++ b/net/netfilter/xt_connbytes.c
@@ -112,6 +112,16 @@ static int connbytes_mt_check(const struct xt_mtchk_param *par)
112 if (ret < 0) 112 if (ret < 0)
113 pr_info("cannot load conntrack support for proto=%u\n", 113 pr_info("cannot load conntrack support for proto=%u\n",
114 par->family); 114 par->family);
115
116 /*
117 * This filter cannot function correctly unless connection tracking
118 * accounting is enabled, so complain in the hope that someone notices.
119 */
120 if (!nf_ct_acct_enabled(par->net)) {
121 pr_warning("Forcing CT accounting to be enabled\n");
122 nf_ct_set_acct(par->net, true);
123 }
124
115 return ret; 125 return ret;
116} 126}
117 127
diff --git a/net/netfilter/xt_conntrack.c b/net/netfilter/xt_conntrack.c
index 39681f10291c..e536710ad916 100644
--- a/net/netfilter/xt_conntrack.c
+++ b/net/netfilter/xt_conntrack.c
@@ -123,11 +123,12 @@ conntrack_mt(const struct sk_buff *skb, struct xt_action_param *par,
123 123
124 ct = nf_ct_get(skb, &ctinfo); 124 ct = nf_ct_get(skb, &ctinfo);
125 125
126 if (ct == &nf_conntrack_untracked) 126 if (ct) {
127 statebit = XT_CONNTRACK_STATE_UNTRACKED; 127 if (nf_ct_is_untracked(ct))
128 else if (ct != NULL) 128 statebit = XT_CONNTRACK_STATE_UNTRACKED;
129 statebit = XT_CONNTRACK_STATE_BIT(ctinfo); 129 else
130 else 130 statebit = XT_CONNTRACK_STATE_BIT(ctinfo);
131 } else
131 statebit = XT_CONNTRACK_STATE_INVALID; 132 statebit = XT_CONNTRACK_STATE_INVALID;
132 133
133 if (info->match_flags & XT_CONNTRACK_STATE) { 134 if (info->match_flags & XT_CONNTRACK_STATE) {
diff --git a/net/netfilter/xt_sctp.c b/net/netfilter/xt_sctp.c
index c04fcf385c59..ef36a56a02c6 100644
--- a/net/netfilter/xt_sctp.c
+++ b/net/netfilter/xt_sctp.c
@@ -3,6 +3,7 @@
3#include <linux/skbuff.h> 3#include <linux/skbuff.h>
4#include <net/ip.h> 4#include <net/ip.h>
5#include <net/ipv6.h> 5#include <net/ipv6.h>
6#include <net/sctp/sctp.h>
6#include <linux/sctp.h> 7#include <linux/sctp.h>
7 8
8#include <linux/netfilter/x_tables.h> 9#include <linux/netfilter/x_tables.h>
@@ -67,7 +68,7 @@ match_packet(const struct sk_buff *skb,
67 ++i, offset, sch->type, htons(sch->length), 68 ++i, offset, sch->type, htons(sch->length),
68 sch->flags); 69 sch->flags);
69#endif 70#endif
70 offset += (ntohs(sch->length) + 3) & ~3; 71 offset += WORD_ROUND(ntohs(sch->length));
71 72
72 pr_debug("skb->len: %d\toffset: %d\n", skb->len, offset); 73 pr_debug("skb->len: %d\toffset: %d\n", skb->len, offset);
73 74
diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c
index 3d54c236a1ba..1ca89908cbad 100644
--- a/net/netfilter/xt_socket.c
+++ b/net/netfilter/xt_socket.c
@@ -127,7 +127,7 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par,
127 * reply packet of an established SNAT-ted connection. */ 127 * reply packet of an established SNAT-ted connection. */
128 128
129 ct = nf_ct_get(skb, &ctinfo); 129 ct = nf_ct_get(skb, &ctinfo);
130 if (ct && (ct != &nf_conntrack_untracked) && 130 if (ct && !nf_ct_is_untracked(ct) &&
131 ((iph->protocol != IPPROTO_ICMP && 131 ((iph->protocol != IPPROTO_ICMP &&
132 ctinfo == IP_CT_IS_REPLY + IP_CT_ESTABLISHED) || 132 ctinfo == IP_CT_IS_REPLY + IP_CT_ESTABLISHED) ||
133 (iph->protocol == IPPROTO_ICMP && 133 (iph->protocol == IPPROTO_ICMP &&
diff --git a/net/netfilter/xt_state.c b/net/netfilter/xt_state.c
index e12e053d3782..a507922d80cd 100644
--- a/net/netfilter/xt_state.c
+++ b/net/netfilter/xt_state.c
@@ -26,14 +26,16 @@ state_mt(const struct sk_buff *skb, struct xt_action_param *par)
26 const struct xt_state_info *sinfo = par->matchinfo; 26 const struct xt_state_info *sinfo = par->matchinfo;
27 enum ip_conntrack_info ctinfo; 27 enum ip_conntrack_info ctinfo;
28 unsigned int statebit; 28 unsigned int statebit;
29 struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
29 30
30 if (nf_ct_is_untracked(skb)) 31 if (!ct)
31 statebit = XT_STATE_UNTRACKED;
32 else if (!nf_ct_get(skb, &ctinfo))
33 statebit = XT_STATE_INVALID; 32 statebit = XT_STATE_INVALID;
34 else 33 else {
35 statebit = XT_STATE_BIT(ctinfo); 34 if (nf_ct_is_untracked(ct))
36 35 statebit = XT_STATE_UNTRACKED;
36 else
37 statebit = XT_STATE_BIT(ctinfo);
38 }
37 return (sinfo->statemask & statebit); 39 return (sinfo->statemask & statebit);
38} 40}
39 41
diff --git a/net/netfilter/xt_statistic.c b/net/netfilter/xt_statistic.c
index 96e62b8fd6b1..42ecb71d445f 100644
--- a/net/netfilter/xt_statistic.c
+++ b/net/netfilter/xt_statistic.c
@@ -18,8 +18,8 @@
18#include <linux/netfilter/x_tables.h> 18#include <linux/netfilter/x_tables.h>
19 19
20struct xt_statistic_priv { 20struct xt_statistic_priv {
21 uint32_t count; 21 atomic_t count;
22}; 22} ____cacheline_aligned_in_smp;
23 23
24MODULE_LICENSE("GPL"); 24MODULE_LICENSE("GPL");
25MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); 25MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
@@ -27,13 +27,12 @@ MODULE_DESCRIPTION("Xtables: statistics-based matching (\"Nth\", random)");
27MODULE_ALIAS("ipt_statistic"); 27MODULE_ALIAS("ipt_statistic");
28MODULE_ALIAS("ip6t_statistic"); 28MODULE_ALIAS("ip6t_statistic");
29 29
30static DEFINE_SPINLOCK(nth_lock);
31
32static bool 30static bool
33statistic_mt(const struct sk_buff *skb, struct xt_action_param *par) 31statistic_mt(const struct sk_buff *skb, struct xt_action_param *par)
34{ 32{
35 const struct xt_statistic_info *info = par->matchinfo; 33 const struct xt_statistic_info *info = par->matchinfo;
36 bool ret = info->flags & XT_STATISTIC_INVERT; 34 bool ret = info->flags & XT_STATISTIC_INVERT;
35 int nval, oval;
37 36
38 switch (info->mode) { 37 switch (info->mode) {
39 case XT_STATISTIC_MODE_RANDOM: 38 case XT_STATISTIC_MODE_RANDOM:
@@ -41,12 +40,12 @@ statistic_mt(const struct sk_buff *skb, struct xt_action_param *par)
41 ret = !ret; 40 ret = !ret;
42 break; 41 break;
43 case XT_STATISTIC_MODE_NTH: 42 case XT_STATISTIC_MODE_NTH:
44 spin_lock_bh(&nth_lock); 43 do {
45 if (info->master->count++ == info->u.nth.every) { 44 oval = atomic_read(&info->master->count);
46 info->master->count = 0; 45 nval = (oval == info->u.nth.every) ? 0 : oval + 1;
46 } while (atomic_cmpxchg(&info->master->count, oval, nval) != oval);
47 if (nval == 0)
47 ret = !ret; 48 ret = !ret;
48 }
49 spin_unlock_bh(&nth_lock);
50 break; 49 break;
51 } 50 }
52 51
@@ -64,7 +63,7 @@ static int statistic_mt_check(const struct xt_mtchk_param *par)
64 info->master = kzalloc(sizeof(*info->master), GFP_KERNEL); 63 info->master = kzalloc(sizeof(*info->master), GFP_KERNEL);
65 if (info->master == NULL) 64 if (info->master == NULL)
66 return -ENOMEM; 65 return -ENOMEM;
67 info->master->count = info->u.nth.count; 66 atomic_set(&info->master->count, info->u.nth.count);
68 67
69 return 0; 68 return 0;
70} 69}
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 6464a1972a69..8648a9922aab 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -978,6 +978,8 @@ struct netlink_broadcast_data {
978 int delivered; 978 int delivered;
979 gfp_t allocation; 979 gfp_t allocation;
980 struct sk_buff *skb, *skb2; 980 struct sk_buff *skb, *skb2;
981 int (*tx_filter)(struct sock *dsk, struct sk_buff *skb, void *data);
982 void *tx_data;
981}; 983};
982 984
983static inline int do_one_broadcast(struct sock *sk, 985static inline int do_one_broadcast(struct sock *sk,
@@ -1020,6 +1022,9 @@ static inline int do_one_broadcast(struct sock *sk,
1020 p->failure = 1; 1022 p->failure = 1;
1021 if (nlk->flags & NETLINK_BROADCAST_SEND_ERROR) 1023 if (nlk->flags & NETLINK_BROADCAST_SEND_ERROR)
1022 p->delivery_failure = 1; 1024 p->delivery_failure = 1;
1025 } else if (p->tx_filter && p->tx_filter(sk, p->skb2, p->tx_data)) {
1026 kfree_skb(p->skb2);
1027 p->skb2 = NULL;
1023 } else if (sk_filter(sk, p->skb2)) { 1028 } else if (sk_filter(sk, p->skb2)) {
1024 kfree_skb(p->skb2); 1029 kfree_skb(p->skb2);
1025 p->skb2 = NULL; 1030 p->skb2 = NULL;
@@ -1038,8 +1043,10 @@ out:
1038 return 0; 1043 return 0;
1039} 1044}
1040 1045
1041int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 pid, 1046int netlink_broadcast_filtered(struct sock *ssk, struct sk_buff *skb, u32 pid,
1042 u32 group, gfp_t allocation) 1047 u32 group, gfp_t allocation,
1048 int (*filter)(struct sock *dsk, struct sk_buff *skb, void *data),
1049 void *filter_data)
1043{ 1050{
1044 struct net *net = sock_net(ssk); 1051 struct net *net = sock_net(ssk);
1045 struct netlink_broadcast_data info; 1052 struct netlink_broadcast_data info;
@@ -1059,6 +1066,8 @@ int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 pid,
1059 info.allocation = allocation; 1066 info.allocation = allocation;
1060 info.skb = skb; 1067 info.skb = skb;
1061 info.skb2 = NULL; 1068 info.skb2 = NULL;
1069 info.tx_filter = filter;
1070 info.tx_data = filter_data;
1062 1071
1063 /* While we sleep in clone, do not allow to change socket list */ 1072 /* While we sleep in clone, do not allow to change socket list */
1064 1073
@@ -1067,14 +1076,15 @@ int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 pid,
1067 sk_for_each_bound(sk, node, &nl_table[ssk->sk_protocol].mc_list) 1076 sk_for_each_bound(sk, node, &nl_table[ssk->sk_protocol].mc_list)
1068 do_one_broadcast(sk, &info); 1077 do_one_broadcast(sk, &info);
1069 1078
1070 kfree_skb(skb); 1079 consume_skb(skb);
1071 1080
1072 netlink_unlock_table(); 1081 netlink_unlock_table();
1073 1082
1074 kfree_skb(info.skb2); 1083 if (info.delivery_failure) {
1075 1084 kfree_skb(info.skb2);
1076 if (info.delivery_failure)
1077 return -ENOBUFS; 1085 return -ENOBUFS;
1086 } else
1087 consume_skb(info.skb2);
1078 1088
1079 if (info.delivered) { 1089 if (info.delivered) {
1080 if (info.congested && (allocation & __GFP_WAIT)) 1090 if (info.congested && (allocation & __GFP_WAIT))
@@ -1083,6 +1093,14 @@ int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 pid,
1083 } 1093 }
1084 return -ESRCH; 1094 return -ESRCH;
1085} 1095}
1096EXPORT_SYMBOL(netlink_broadcast_filtered);
1097
1098int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 pid,
1099 u32 group, gfp_t allocation)
1100{
1101 return netlink_broadcast_filtered(ssk, skb, pid, group, allocation,
1102 NULL, NULL);
1103}
1086EXPORT_SYMBOL(netlink_broadcast); 1104EXPORT_SYMBOL(netlink_broadcast);
1087 1105
1088struct netlink_set_err_data { 1106struct netlink_set_err_data {
@@ -1306,19 +1324,23 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock,
1306 if (msg->msg_flags&MSG_OOB) 1324 if (msg->msg_flags&MSG_OOB)
1307 return -EOPNOTSUPP; 1325 return -EOPNOTSUPP;
1308 1326
1309 if (NULL == siocb->scm) 1327 if (NULL == siocb->scm) {
1310 siocb->scm = &scm; 1328 siocb->scm = &scm;
1329 memset(&scm, 0, sizeof(scm));
1330 }
1311 err = scm_send(sock, msg, siocb->scm); 1331 err = scm_send(sock, msg, siocb->scm);
1312 if (err < 0) 1332 if (err < 0)
1313 return err; 1333 return err;
1314 1334
1315 if (msg->msg_namelen) { 1335 if (msg->msg_namelen) {
1336 err = -EINVAL;
1316 if (addr->nl_family != AF_NETLINK) 1337 if (addr->nl_family != AF_NETLINK)
1317 return -EINVAL; 1338 goto out;
1318 dst_pid = addr->nl_pid; 1339 dst_pid = addr->nl_pid;
1319 dst_group = ffs(addr->nl_groups); 1340 dst_group = ffs(addr->nl_groups);
1341 err = -EPERM;
1320 if (dst_group && !netlink_capable(sock, NL_NONROOT_SEND)) 1342 if (dst_group && !netlink_capable(sock, NL_NONROOT_SEND))
1321 return -EPERM; 1343 goto out;
1322 } else { 1344 } else {
1323 dst_pid = nlk->dst_pid; 1345 dst_pid = nlk->dst_pid;
1324 dst_group = nlk->dst_group; 1346 dst_group = nlk->dst_group;
@@ -1370,6 +1392,7 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock,
1370 err = netlink_unicast(sk, skb, dst_pid, msg->msg_flags&MSG_DONTWAIT); 1392 err = netlink_unicast(sk, skb, dst_pid, msg->msg_flags&MSG_DONTWAIT);
1371 1393
1372out: 1394out:
1395 scm_destroy(siocb->scm);
1373 return err; 1396 return err;
1374} 1397}
1375 1398
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 2078a277e06b..9a17f28b1253 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -83,6 +83,7 @@
83#include <linux/if_vlan.h> 83#include <linux/if_vlan.h>
84#include <linux/virtio_net.h> 84#include <linux/virtio_net.h>
85#include <linux/errqueue.h> 85#include <linux/errqueue.h>
86#include <linux/net_tstamp.h>
86 87
87#ifdef CONFIG_INET 88#ifdef CONFIG_INET
88#include <net/inet_common.h> 89#include <net/inet_common.h>
@@ -202,6 +203,7 @@ struct packet_sock {
202 unsigned int tp_hdrlen; 203 unsigned int tp_hdrlen;
203 unsigned int tp_reserve; 204 unsigned int tp_reserve;
204 unsigned int tp_loss:1; 205 unsigned int tp_loss:1;
206 unsigned int tp_tstamp;
205 struct packet_type prot_hook ____cacheline_aligned_in_smp; 207 struct packet_type prot_hook ____cacheline_aligned_in_smp;
206}; 208};
207 209
@@ -656,6 +658,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
656 struct sk_buff *copy_skb = NULL; 658 struct sk_buff *copy_skb = NULL;
657 struct timeval tv; 659 struct timeval tv;
658 struct timespec ts; 660 struct timespec ts;
661 struct skb_shared_hwtstamps *shhwtstamps = skb_hwtstamps(skb);
659 662
660 if (skb->pkt_type == PACKET_LOOPBACK) 663 if (skb->pkt_type == PACKET_LOOPBACK)
661 goto drop; 664 goto drop;
@@ -737,7 +740,13 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
737 h.h1->tp_snaplen = snaplen; 740 h.h1->tp_snaplen = snaplen;
738 h.h1->tp_mac = macoff; 741 h.h1->tp_mac = macoff;
739 h.h1->tp_net = netoff; 742 h.h1->tp_net = netoff;
740 if (skb->tstamp.tv64) 743 if ((po->tp_tstamp & SOF_TIMESTAMPING_SYS_HARDWARE)
744 && shhwtstamps->syststamp.tv64)
745 tv = ktime_to_timeval(shhwtstamps->syststamp);
746 else if ((po->tp_tstamp & SOF_TIMESTAMPING_RAW_HARDWARE)
747 && shhwtstamps->hwtstamp.tv64)
748 tv = ktime_to_timeval(shhwtstamps->hwtstamp);
749 else if (skb->tstamp.tv64)
741 tv = ktime_to_timeval(skb->tstamp); 750 tv = ktime_to_timeval(skb->tstamp);
742 else 751 else
743 do_gettimeofday(&tv); 752 do_gettimeofday(&tv);
@@ -750,7 +759,13 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
750 h.h2->tp_snaplen = snaplen; 759 h.h2->tp_snaplen = snaplen;
751 h.h2->tp_mac = macoff; 760 h.h2->tp_mac = macoff;
752 h.h2->tp_net = netoff; 761 h.h2->tp_net = netoff;
753 if (skb->tstamp.tv64) 762 if ((po->tp_tstamp & SOF_TIMESTAMPING_SYS_HARDWARE)
763 && shhwtstamps->syststamp.tv64)
764 ts = ktime_to_timespec(shhwtstamps->syststamp);
765 else if ((po->tp_tstamp & SOF_TIMESTAMPING_RAW_HARDWARE)
766 && shhwtstamps->hwtstamp.tv64)
767 ts = ktime_to_timespec(shhwtstamps->hwtstamp);
768 else if (skb->tstamp.tv64)
754 ts = ktime_to_timespec(skb->tstamp); 769 ts = ktime_to_timespec(skb->tstamp);
755 else 770 else
756 getnstimeofday(&ts); 771 getnstimeofday(&ts);
@@ -2027,6 +2042,18 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
2027 po->has_vnet_hdr = !!val; 2042 po->has_vnet_hdr = !!val;
2028 return 0; 2043 return 0;
2029 } 2044 }
2045 case PACKET_TIMESTAMP:
2046 {
2047 int val;
2048
2049 if (optlen != sizeof(val))
2050 return -EINVAL;
2051 if (copy_from_user(&val, optval, sizeof(val)))
2052 return -EFAULT;
2053
2054 po->tp_tstamp = val;
2055 return 0;
2056 }
2030 default: 2057 default:
2031 return -ENOPROTOOPT; 2058 return -ENOPROTOOPT;
2032 } 2059 }
@@ -2119,6 +2146,12 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,
2119 val = po->tp_loss; 2146 val = po->tp_loss;
2120 data = &val; 2147 data = &val;
2121 break; 2148 break;
2149 case PACKET_TIMESTAMP:
2150 if (len > sizeof(int))
2151 len = sizeof(int);
2152 val = po->tp_tstamp;
2153 data = &val;
2154 break;
2122 default: 2155 default:
2123 return -ENOPROTOOPT; 2156 return -ENOPROTOOPT;
2124 } 2157 }
diff --git a/net/phonet/pep.c b/net/phonet/pep.c
index af4d38bc3b22..b2a3ae6cad78 100644
--- a/net/phonet/pep.c
+++ b/net/phonet/pep.c
@@ -626,6 +626,7 @@ static void pep_sock_close(struct sock *sk, long timeout)
626 struct pep_sock *pn = pep_sk(sk); 626 struct pep_sock *pn = pep_sk(sk);
627 int ifindex = 0; 627 int ifindex = 0;
628 628
629 sock_hold(sk); /* keep a reference after sk_common_release() */
629 sk_common_release(sk); 630 sk_common_release(sk);
630 631
631 lock_sock(sk); 632 lock_sock(sk);
@@ -644,6 +645,7 @@ static void pep_sock_close(struct sock *sk, long timeout)
644 645
645 if (ifindex) 646 if (ifindex)
646 gprs_detach(sk); 647 gprs_detach(sk);
648 sock_put(sk);
647} 649}
648 650
649static int pep_wait_connreq(struct sock *sk, int noblock) 651static int pep_wait_connreq(struct sock *sk, int noblock)
@@ -696,6 +698,7 @@ static struct sock *pep_sock_accept(struct sock *sk, int flags, int *errp)
696 newsk = NULL; 698 newsk = NULL;
697 goto out; 699 goto out;
698 } 700 }
701 kfree_skb(oskb);
699 702
700 sock_hold(sk); 703 sock_hold(sk);
701 pep_sk(newsk)->listener = sk; 704 pep_sk(newsk)->listener = sk;
@@ -1043,12 +1046,12 @@ static void pep_sock_unhash(struct sock *sk)
1043 lock_sock(sk); 1046 lock_sock(sk);
1044 if ((1 << sk->sk_state) & ~(TCPF_CLOSE|TCPF_LISTEN)) { 1047 if ((1 << sk->sk_state) & ~(TCPF_CLOSE|TCPF_LISTEN)) {
1045 skparent = pn->listener; 1048 skparent = pn->listener;
1046 sk_del_node_init(sk);
1047 release_sock(sk); 1049 release_sock(sk);
1048 1050
1049 sk = skparent;
1050 pn = pep_sk(skparent); 1051 pn = pep_sk(skparent);
1051 lock_sock(sk); 1052 lock_sock(skparent);
1053 sk_del_node_init(sk);
1054 sk = skparent;
1052 } 1055 }
1053 /* Unhash a listening sock only when it is closed 1056 /* Unhash a listening sock only when it is closed
1054 * and all of its active connected pipes are closed. */ 1057 * and all of its active connected pipes are closed. */
diff --git a/net/phonet/pn_dev.c b/net/phonet/pn_dev.c
index c33da6576942..b18e48fae975 100644
--- a/net/phonet/pn_dev.c
+++ b/net/phonet/pn_dev.c
@@ -162,6 +162,14 @@ int phonet_address_add(struct net_device *dev, u8 addr)
162 return err; 162 return err;
163} 163}
164 164
165static void phonet_device_rcu_free(struct rcu_head *head)
166{
167 struct phonet_device *pnd;
168
169 pnd = container_of(head, struct phonet_device, rcu);
170 kfree(pnd);
171}
172
165int phonet_address_del(struct net_device *dev, u8 addr) 173int phonet_address_del(struct net_device *dev, u8 addr)
166{ 174{
167 struct phonet_device_list *pndevs = phonet_device_list(dev_net(dev)); 175 struct phonet_device_list *pndevs = phonet_device_list(dev_net(dev));
@@ -179,10 +187,9 @@ int phonet_address_del(struct net_device *dev, u8 addr)
179 pnd = NULL; 187 pnd = NULL;
180 mutex_unlock(&pndevs->lock); 188 mutex_unlock(&pndevs->lock);
181 189
182 if (pnd) { 190 if (pnd)
183 synchronize_rcu(); 191 call_rcu(&pnd->rcu, phonet_device_rcu_free);
184 kfree(pnd); 192
185 }
186 return err; 193 return err;
187} 194}
188 195
diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c
index 10ed0d55f759..f68832798db2 100644
--- a/net/rds/ib_cm.c
+++ b/net/rds/ib_cm.c
@@ -475,6 +475,7 @@ int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id,
475 err = rds_ib_setup_qp(conn); 475 err = rds_ib_setup_qp(conn);
476 if (err) { 476 if (err) {
477 rds_ib_conn_error(conn, "rds_ib_setup_qp failed (%d)\n", err); 477 rds_ib_conn_error(conn, "rds_ib_setup_qp failed (%d)\n", err);
478 mutex_unlock(&conn->c_cm_lock);
478 goto out; 479 goto out;
479 } 480 }
480 481
diff --git a/net/rds/iw_cm.c b/net/rds/iw_cm.c
index a9d951b4fbae..b5dd6ac39be8 100644
--- a/net/rds/iw_cm.c
+++ b/net/rds/iw_cm.c
@@ -452,6 +452,7 @@ int rds_iw_cm_handle_connect(struct rdma_cm_id *cm_id,
452 err = rds_iw_setup_qp(conn); 452 err = rds_iw_setup_qp(conn);
453 if (err) { 453 if (err) {
454 rds_iw_conn_error(conn, "rds_iw_setup_qp failed (%d)\n", err); 454 rds_iw_conn_error(conn, "rds_iw_setup_qp failed (%d)\n", err);
455 mutex_unlock(&conn->c_cm_lock);
455 goto out; 456 goto out;
456 } 457 }
457 458
diff --git a/net/rxrpc/ar-peer.c b/net/rxrpc/ar-peer.c
index f0f85b0123f7..9f1729bd60de 100644
--- a/net/rxrpc/ar-peer.c
+++ b/net/rxrpc/ar-peer.c
@@ -64,8 +64,8 @@ static void rxrpc_assess_MTU_size(struct rxrpc_peer *peer)
64 return; 64 return;
65 } 65 }
66 66
67 peer->if_mtu = dst_mtu(&rt->u.dst); 67 peer->if_mtu = dst_mtu(&rt->dst);
68 dst_release(&rt->u.dst); 68 dst_release(&rt->dst);
69 69
70 _leave(" [if_mtu %u]", peer->if_mtu); 70 _leave(" [if_mtu %u]", peer->if_mtu);
71} 71}
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 972378f47f3c..23b25f89e7e0 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -26,6 +26,11 @@
26#include <net/act_api.h> 26#include <net/act_api.h>
27#include <net/netlink.h> 27#include <net/netlink.h>
28 28
29static void tcf_common_free_rcu(struct rcu_head *head)
30{
31 kfree(container_of(head, struct tcf_common, tcfc_rcu));
32}
33
29void tcf_hash_destroy(struct tcf_common *p, struct tcf_hashinfo *hinfo) 34void tcf_hash_destroy(struct tcf_common *p, struct tcf_hashinfo *hinfo)
30{ 35{
31 unsigned int h = tcf_hash(p->tcfc_index, hinfo->hmask); 36 unsigned int h = tcf_hash(p->tcfc_index, hinfo->hmask);
@@ -38,7 +43,11 @@ void tcf_hash_destroy(struct tcf_common *p, struct tcf_hashinfo *hinfo)
38 write_unlock_bh(hinfo->lock); 43 write_unlock_bh(hinfo->lock);
39 gen_kill_estimator(&p->tcfc_bstats, 44 gen_kill_estimator(&p->tcfc_bstats,
40 &p->tcfc_rate_est); 45 &p->tcfc_rate_est);
41 kfree(p); 46 /*
47 * gen_estimator est_timer() might access p->tcfc_lock
48 * or bstats, wait a RCU grace period before freeing p
49 */
50 call_rcu(&p->tcfc_rcu, tcf_common_free_rcu);
42 return; 51 return;
43 } 52 }
44 } 53 }
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index c0b6863e3b87..a16b0175f890 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -160,6 +160,8 @@ static int tcf_mirred(struct sk_buff *skb, struct tc_action *a,
160 160
161 spin_lock(&m->tcf_lock); 161 spin_lock(&m->tcf_lock);
162 m->tcf_tm.lastuse = jiffies; 162 m->tcf_tm.lastuse = jiffies;
163 m->tcf_bstats.bytes += qdisc_pkt_len(skb);
164 m->tcf_bstats.packets++;
163 165
164 dev = m->tcfm_dev; 166 dev = m->tcfm_dev;
165 if (!(dev->flags & IFF_UP)) { 167 if (!(dev->flags & IFF_UP)) {
@@ -169,13 +171,11 @@ static int tcf_mirred(struct sk_buff *skb, struct tc_action *a,
169 goto out; 171 goto out;
170 } 172 }
171 173
172 skb2 = skb_act_clone(skb, GFP_ATOMIC); 174 at = G_TC_AT(skb->tc_verd);
175 skb2 = skb_act_clone(skb, GFP_ATOMIC, m->tcf_action);
173 if (skb2 == NULL) 176 if (skb2 == NULL)
174 goto out; 177 goto out;
175 178
176 m->tcf_bstats.bytes += qdisc_pkt_len(skb2);
177 m->tcf_bstats.packets++;
178 at = G_TC_AT(skb->tc_verd);
179 if (!(at & AT_EGRESS)) { 179 if (!(at & AT_EGRESS)) {
180 if (m->tcfm_ok_push) 180 if (m->tcfm_ok_push)
181 skb_push(skb2, skb2->dev->hard_header_len); 181 skb_push(skb2, skb2->dev->hard_header_len);
@@ -185,16 +185,14 @@ static int tcf_mirred(struct sk_buff *skb, struct tc_action *a,
185 if (m->tcfm_eaction != TCA_EGRESS_MIRROR) 185 if (m->tcfm_eaction != TCA_EGRESS_MIRROR)
186 skb2->tc_verd = SET_TC_FROM(skb2->tc_verd, at); 186 skb2->tc_verd = SET_TC_FROM(skb2->tc_verd, at);
187 187
188 skb2->dev = dev;
189 skb2->skb_iif = skb->dev->ifindex; 188 skb2->skb_iif = skb->dev->ifindex;
189 skb2->dev = dev;
190 dev_queue_xmit(skb2); 190 dev_queue_xmit(skb2);
191 err = 0; 191 err = 0;
192 192
193out: 193out:
194 if (err) { 194 if (err) {
195 m->tcf_qstats.overlimits++; 195 m->tcf_qstats.overlimits++;
196 m->tcf_bstats.bytes += qdisc_pkt_len(skb);
197 m->tcf_bstats.packets++;
198 /* should we be asking for packet to be dropped? 196 /* should we be asking for packet to be dropped?
199 * may make sense for redirect case only 197 * may make sense for redirect case only
200 */ 198 */
diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c
index d885ba311564..24e614c495f2 100644
--- a/net/sched/act_nat.c
+++ b/net/sched/act_nat.c
@@ -159,6 +159,9 @@ static int tcf_nat(struct sk_buff *skb, struct tc_action *a,
159 iph->daddr = new_addr; 159 iph->daddr = new_addr;
160 160
161 csum_replace4(&iph->check, addr, new_addr); 161 csum_replace4(&iph->check, addr, new_addr);
162 } else if ((iph->frag_off & htons(IP_OFFSET)) ||
163 iph->protocol != IPPROTO_ICMP) {
164 goto out;
162 } 165 }
163 166
164 ihl = iph->ihl * 4; 167 ihl = iph->ihl * 4;
@@ -202,7 +205,7 @@ static int tcf_nat(struct sk_buff *skb, struct tc_action *a,
202 { 205 {
203 struct icmphdr *icmph; 206 struct icmphdr *icmph;
204 207
205 if (!pskb_may_pull(skb, ihl + sizeof(*icmph) + sizeof(*iph))) 208 if (!pskb_may_pull(skb, ihl + sizeof(*icmph)))
206 goto drop; 209 goto drop;
207 210
208 icmph = (void *)(skb_network_header(skb) + ihl); 211 icmph = (void *)(skb_network_header(skb) + ihl);
@@ -212,6 +215,9 @@ static int tcf_nat(struct sk_buff *skb, struct tc_action *a,
212 (icmph->type != ICMP_PARAMETERPROB)) 215 (icmph->type != ICMP_PARAMETERPROB))
213 break; 216 break;
214 217
218 if (!pskb_may_pull(skb, ihl + sizeof(*icmph) + sizeof(*iph)))
219 goto drop;
220
215 iph = (void *)(icmph + 1); 221 iph = (void *)(icmph + 1);
216 if (egress) 222 if (egress)
217 addr = iph->daddr; 223 addr = iph->daddr;
@@ -247,6 +253,7 @@ static int tcf_nat(struct sk_buff *skb, struct tc_action *a,
247 break; 253 break;
248 } 254 }
249 255
256out:
250 return action; 257 return action;
251 258
252drop: 259drop:
@@ -261,40 +268,29 @@ static int tcf_nat_dump(struct sk_buff *skb, struct tc_action *a,
261{ 268{
262 unsigned char *b = skb_tail_pointer(skb); 269 unsigned char *b = skb_tail_pointer(skb);
263 struct tcf_nat *p = a->priv; 270 struct tcf_nat *p = a->priv;
264 struct tc_nat *opt; 271 struct tc_nat opt;
265 struct tcf_t t; 272 struct tcf_t t;
266 int s;
267 273
268 s = sizeof(*opt); 274 opt.old_addr = p->old_addr;
275 opt.new_addr = p->new_addr;
276 opt.mask = p->mask;
277 opt.flags = p->flags;
269 278
270 /* netlink spinlocks held above us - must use ATOMIC */ 279 opt.index = p->tcf_index;
271 opt = kzalloc(s, GFP_ATOMIC); 280 opt.action = p->tcf_action;
272 if (unlikely(!opt)) 281 opt.refcnt = p->tcf_refcnt - ref;
273 return -ENOBUFS; 282 opt.bindcnt = p->tcf_bindcnt - bind;
274 283
275 opt->old_addr = p->old_addr; 284 NLA_PUT(skb, TCA_NAT_PARMS, sizeof(opt), &opt);
276 opt->new_addr = p->new_addr;
277 opt->mask = p->mask;
278 opt->flags = p->flags;
279
280 opt->index = p->tcf_index;
281 opt->action = p->tcf_action;
282 opt->refcnt = p->tcf_refcnt - ref;
283 opt->bindcnt = p->tcf_bindcnt - bind;
284
285 NLA_PUT(skb, TCA_NAT_PARMS, s, opt);
286 t.install = jiffies_to_clock_t(jiffies - p->tcf_tm.install); 285 t.install = jiffies_to_clock_t(jiffies - p->tcf_tm.install);
287 t.lastuse = jiffies_to_clock_t(jiffies - p->tcf_tm.lastuse); 286 t.lastuse = jiffies_to_clock_t(jiffies - p->tcf_tm.lastuse);
288 t.expires = jiffies_to_clock_t(p->tcf_tm.expires); 287 t.expires = jiffies_to_clock_t(p->tcf_tm.expires);
289 NLA_PUT(skb, TCA_NAT_TM, sizeof(t), &t); 288 NLA_PUT(skb, TCA_NAT_TM, sizeof(t), &t);
290 289
291 kfree(opt);
292
293 return skb->len; 290 return skb->len;
294 291
295nla_put_failure: 292nla_put_failure:
296 nlmsg_trim(skb, b); 293 nlmsg_trim(skb, b);
297 kfree(opt);
298 return -1; 294 return -1;
299} 295}
300 296
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index fdbd0b7bd840..a0593c9640db 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -125,16 +125,15 @@ static int tcf_pedit(struct sk_buff *skb, struct tc_action *a,
125{ 125{
126 struct tcf_pedit *p = a->priv; 126 struct tcf_pedit *p = a->priv;
127 int i, munged = 0; 127 int i, munged = 0;
128 u8 *pptr; 128 unsigned int off;
129 129
130 if (!(skb->tc_verd & TC_OK2MUNGE)) { 130 if (skb_cloned(skb)) {
131 /* should we set skb->cloned? */
132 if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) { 131 if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) {
133 return p->tcf_action; 132 return p->tcf_action;
134 } 133 }
135 } 134 }
136 135
137 pptr = skb_network_header(skb); 136 off = skb_network_offset(skb);
138 137
139 spin_lock(&p->tcf_lock); 138 spin_lock(&p->tcf_lock);
140 139
@@ -144,17 +143,17 @@ static int tcf_pedit(struct sk_buff *skb, struct tc_action *a,
144 struct tc_pedit_key *tkey = p->tcfp_keys; 143 struct tc_pedit_key *tkey = p->tcfp_keys;
145 144
146 for (i = p->tcfp_nkeys; i > 0; i--, tkey++) { 145 for (i = p->tcfp_nkeys; i > 0; i--, tkey++) {
147 u32 *ptr; 146 u32 *ptr, _data;
148 int offset = tkey->off; 147 int offset = tkey->off;
149 148
150 if (tkey->offmask) { 149 if (tkey->offmask) {
151 if (skb->len > tkey->at) { 150 char *d, _d;
152 char *j = pptr + tkey->at; 151
153 offset += ((*j & tkey->offmask) >> 152 d = skb_header_pointer(skb, off + tkey->at, 1,
154 tkey->shift); 153 &_d);
155 } else { 154 if (!d)
156 goto bad; 155 goto bad;
157 } 156 offset += (*d & tkey->offmask) >> tkey->shift;
158 } 157 }
159 158
160 if (offset % 4) { 159 if (offset % 4) {
@@ -169,9 +168,13 @@ static int tcf_pedit(struct sk_buff *skb, struct tc_action *a,
169 goto bad; 168 goto bad;
170 } 169 }
171 170
172 ptr = (u32 *)(pptr+offset); 171 ptr = skb_header_pointer(skb, off + offset, 4, &_data);
172 if (!ptr)
173 goto bad;
173 /* just do it, baby */ 174 /* just do it, baby */
174 *ptr = ((*ptr & tkey->mask) ^ tkey->val); 175 *ptr = ((*ptr & tkey->mask) ^ tkey->val);
176 if (ptr == &_data)
177 skb_store_bits(skb, off + offset, ptr, 4);
175 munged++; 178 munged++;
176 } 179 }
177 180
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index 654f73dff7c1..537a48732e9e 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -97,6 +97,11 @@ nla_put_failure:
97 goto done; 97 goto done;
98} 98}
99 99
100static void tcf_police_free_rcu(struct rcu_head *head)
101{
102 kfree(container_of(head, struct tcf_police, tcf_rcu));
103}
104
100static void tcf_police_destroy(struct tcf_police *p) 105static void tcf_police_destroy(struct tcf_police *p)
101{ 106{
102 unsigned int h = tcf_hash(p->tcf_index, POL_TAB_MASK); 107 unsigned int h = tcf_hash(p->tcf_index, POL_TAB_MASK);
@@ -113,7 +118,11 @@ static void tcf_police_destroy(struct tcf_police *p)
113 qdisc_put_rtab(p->tcfp_R_tab); 118 qdisc_put_rtab(p->tcfp_R_tab);
114 if (p->tcfp_P_tab) 119 if (p->tcfp_P_tab)
115 qdisc_put_rtab(p->tcfp_P_tab); 120 qdisc_put_rtab(p->tcfp_P_tab);
116 kfree(p); 121 /*
122 * gen_estimator est_timer() might access p->tcf_lock
123 * or bstats, wait a RCU grace period before freeing p
124 */
125 call_rcu(&p->tcf_rcu, tcf_police_free_rcu);
117 return; 126 return;
118 } 127 }
119 } 128 }
@@ -397,6 +406,7 @@ static void __exit
397police_cleanup_module(void) 406police_cleanup_module(void)
398{ 407{
399 tcf_unregister_action(&act_police_ops); 408 tcf_unregister_action(&act_police_ops);
409 rcu_barrier(); /* Wait for completion of call_rcu()'s (tcf_police_free_rcu) */
400} 410}
401 411
402module_init(police_init_module); 412module_init(police_init_module);
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index 1b4bc691d7d1..4a1d640b0cf1 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -73,10 +73,10 @@ static int tcf_simp_release(struct tcf_defact *d, int bind)
73 73
74static int alloc_defdata(struct tcf_defact *d, char *defdata) 74static int alloc_defdata(struct tcf_defact *d, char *defdata)
75{ 75{
76 d->tcfd_defdata = kstrndup(defdata, SIMP_MAX_DATA, GFP_KERNEL); 76 d->tcfd_defdata = kzalloc(SIMP_MAX_DATA, GFP_KERNEL);
77 if (unlikely(!d->tcfd_defdata)) 77 if (unlikely(!d->tcfd_defdata))
78 return -ENOMEM; 78 return -ENOMEM;
79 79 strlcpy(d->tcfd_defdata, defdata, SIMP_MAX_DATA);
80 return 0; 80 return 0;
81} 81}
82 82
diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c
index 221180384fd7..78ef2c5e130b 100644
--- a/net/sched/cls_cgroup.c
+++ b/net/sched/cls_cgroup.c
@@ -16,14 +16,11 @@
16#include <linux/errno.h> 16#include <linux/errno.h>
17#include <linux/skbuff.h> 17#include <linux/skbuff.h>
18#include <linux/cgroup.h> 18#include <linux/cgroup.h>
19#include <linux/rcupdate.h>
19#include <net/rtnetlink.h> 20#include <net/rtnetlink.h>
20#include <net/pkt_cls.h> 21#include <net/pkt_cls.h>
21 22#include <net/sock.h>
22struct cgroup_cls_state 23#include <net/cls_cgroup.h>
23{
24 struct cgroup_subsys_state css;
25 u32 classid;
26};
27 24
28static struct cgroup_subsys_state *cgrp_create(struct cgroup_subsys *ss, 25static struct cgroup_subsys_state *cgrp_create(struct cgroup_subsys *ss,
29 struct cgroup *cgrp); 26 struct cgroup *cgrp);
@@ -112,6 +109,10 @@ static int cls_cgroup_classify(struct sk_buff *skb, struct tcf_proto *tp,
112 struct cls_cgroup_head *head = tp->root; 109 struct cls_cgroup_head *head = tp->root;
113 u32 classid; 110 u32 classid;
114 111
112 rcu_read_lock();
113 classid = task_cls_state(current)->classid;
114 rcu_read_unlock();
115
115 /* 116 /*
116 * Due to the nature of the classifier it is required to ignore all 117 * Due to the nature of the classifier it is required to ignore all
117 * packets originating from softirq context as accessing `current' 118 * packets originating from softirq context as accessing `current'
@@ -122,12 +123,12 @@ static int cls_cgroup_classify(struct sk_buff *skb, struct tcf_proto *tp,
122 * calls by looking at the number of nested bh disable calls because 123 * calls by looking at the number of nested bh disable calls because
123 * softirqs always disables bh. 124 * softirqs always disables bh.
124 */ 125 */
125 if (softirq_count() != SOFTIRQ_OFFSET) 126 if (softirq_count() != SOFTIRQ_OFFSET) {
126 return -1; 127 /* If there is an sk_classid we'll use that. */
127 128 if (!skb->sk)
128 rcu_read_lock(); 129 return -1;
129 classid = task_cls_state(current)->classid; 130 classid = skb->sk->sk_classid;
130 rcu_read_unlock(); 131 }
131 132
132 if (!classid) 133 if (!classid)
133 return -1; 134 return -1;
@@ -289,18 +290,35 @@ static struct tcf_proto_ops cls_cgroup_ops __read_mostly = {
289 290
290static int __init init_cgroup_cls(void) 291static int __init init_cgroup_cls(void)
291{ 292{
292 int ret = register_tcf_proto_ops(&cls_cgroup_ops); 293 int ret;
293 if (ret) 294
294 return ret;
295 ret = cgroup_load_subsys(&net_cls_subsys); 295 ret = cgroup_load_subsys(&net_cls_subsys);
296 if (ret) 296 if (ret)
297 unregister_tcf_proto_ops(&cls_cgroup_ops); 297 goto out;
298
299#ifndef CONFIG_NET_CLS_CGROUP
300 /* We can't use rcu_assign_pointer because this is an int. */
301 smp_wmb();
302 net_cls_subsys_id = net_cls_subsys.subsys_id;
303#endif
304
305 ret = register_tcf_proto_ops(&cls_cgroup_ops);
306 if (ret)
307 cgroup_unload_subsys(&net_cls_subsys);
308
309out:
298 return ret; 310 return ret;
299} 311}
300 312
301static void __exit exit_cgroup_cls(void) 313static void __exit exit_cgroup_cls(void)
302{ 314{
303 unregister_tcf_proto_ops(&cls_cgroup_ops); 315 unregister_tcf_proto_ops(&cls_cgroup_ops);
316
317#ifndef CONFIG_NET_CLS_CGROUP
318 net_cls_subsys_id = -1;
319 synchronize_rcu();
320#endif
321
304 cgroup_unload_subsys(&net_cls_subsys); 322 cgroup_unload_subsys(&net_cls_subsys);
305} 323}
306 324
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index 96275422c619..4f522143811e 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -98,11 +98,11 @@ static int u32_classify(struct sk_buff *skb, struct tcf_proto *tp, struct tcf_re
98{ 98{
99 struct { 99 struct {
100 struct tc_u_knode *knode; 100 struct tc_u_knode *knode;
101 u8 *ptr; 101 unsigned int off;
102 } stack[TC_U32_MAXDEPTH]; 102 } stack[TC_U32_MAXDEPTH];
103 103
104 struct tc_u_hnode *ht = (struct tc_u_hnode*)tp->root; 104 struct tc_u_hnode *ht = (struct tc_u_hnode*)tp->root;
105 u8 *ptr = skb_network_header(skb); 105 unsigned int off = skb_network_offset(skb);
106 struct tc_u_knode *n; 106 struct tc_u_knode *n;
107 int sdepth = 0; 107 int sdepth = 0;
108 int off2 = 0; 108 int off2 = 0;
@@ -134,8 +134,14 @@ next_knode:
134#endif 134#endif
135 135
136 for (i = n->sel.nkeys; i>0; i--, key++) { 136 for (i = n->sel.nkeys; i>0; i--, key++) {
137 137 unsigned int toff;
138 if ((*(__be32*)(ptr+key->off+(off2&key->offmask))^key->val)&key->mask) { 138 __be32 *data, _data;
139
140 toff = off + key->off + (off2 & key->offmask);
141 data = skb_header_pointer(skb, toff, 4, &_data);
142 if (!data)
143 goto out;
144 if ((*data ^ key->val) & key->mask) {
139 n = n->next; 145 n = n->next;
140 goto next_knode; 146 goto next_knode;
141 } 147 }
@@ -174,29 +180,45 @@ check_terminal:
174 if (sdepth >= TC_U32_MAXDEPTH) 180 if (sdepth >= TC_U32_MAXDEPTH)
175 goto deadloop; 181 goto deadloop;
176 stack[sdepth].knode = n; 182 stack[sdepth].knode = n;
177 stack[sdepth].ptr = ptr; 183 stack[sdepth].off = off;
178 sdepth++; 184 sdepth++;
179 185
180 ht = n->ht_down; 186 ht = n->ht_down;
181 sel = 0; 187 sel = 0;
182 if (ht->divisor) 188 if (ht->divisor) {
183 sel = ht->divisor&u32_hash_fold(*(__be32*)(ptr+n->sel.hoff), &n->sel,n->fshift); 189 __be32 *data, _data;
184 190
191 data = skb_header_pointer(skb, off + n->sel.hoff, 4,
192 &_data);
193 if (!data)
194 goto out;
195 sel = ht->divisor & u32_hash_fold(*data, &n->sel,
196 n->fshift);
197 }
185 if (!(n->sel.flags&(TC_U32_VAROFFSET|TC_U32_OFFSET|TC_U32_EAT))) 198 if (!(n->sel.flags&(TC_U32_VAROFFSET|TC_U32_OFFSET|TC_U32_EAT)))
186 goto next_ht; 199 goto next_ht;
187 200
188 if (n->sel.flags&(TC_U32_OFFSET|TC_U32_VAROFFSET)) { 201 if (n->sel.flags&(TC_U32_OFFSET|TC_U32_VAROFFSET)) {
189 off2 = n->sel.off + 3; 202 off2 = n->sel.off + 3;
190 if (n->sel.flags&TC_U32_VAROFFSET) 203 if (n->sel.flags & TC_U32_VAROFFSET) {
191 off2 += ntohs(n->sel.offmask & *(__be16*)(ptr+n->sel.offoff)) >>n->sel.offshift; 204 __be16 *data, _data;
205
206 data = skb_header_pointer(skb,
207 off + n->sel.offoff,
208 2, &_data);
209 if (!data)
210 goto out;
211 off2 += ntohs(n->sel.offmask & *data) >>
212 n->sel.offshift;
213 }
192 off2 &= ~3; 214 off2 &= ~3;
193 } 215 }
194 if (n->sel.flags&TC_U32_EAT) { 216 if (n->sel.flags&TC_U32_EAT) {
195 ptr += off2; 217 off += off2;
196 off2 = 0; 218 off2 = 0;
197 } 219 }
198 220
199 if (ptr < skb_tail_pointer(skb)) 221 if (off < skb->len)
200 goto next_ht; 222 goto next_ht;
201 } 223 }
202 224
@@ -204,9 +226,10 @@ check_terminal:
204 if (sdepth--) { 226 if (sdepth--) {
205 n = stack[sdepth].knode; 227 n = stack[sdepth].knode;
206 ht = n->ht_up; 228 ht = n->ht_up;
207 ptr = stack[sdepth].ptr; 229 off = stack[sdepth].off;
208 goto check_terminal; 230 goto check_terminal;
209 } 231 }
232out:
210 return -1; 233 return -1;
211 234
212deadloop: 235deadloop:
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index fe35c1f338c2..b9e8c3b7d406 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -1195,6 +1195,11 @@ nla_put_failure:
1195 return -1; 1195 return -1;
1196} 1196}
1197 1197
1198static bool tc_qdisc_dump_ignore(struct Qdisc *q)
1199{
1200 return (q->flags & TCQ_F_BUILTIN) ? true : false;
1201}
1202
1198static int qdisc_notify(struct net *net, struct sk_buff *oskb, 1203static int qdisc_notify(struct net *net, struct sk_buff *oskb,
1199 struct nlmsghdr *n, u32 clid, 1204 struct nlmsghdr *n, u32 clid,
1200 struct Qdisc *old, struct Qdisc *new) 1205 struct Qdisc *old, struct Qdisc *new)
@@ -1206,11 +1211,11 @@ static int qdisc_notify(struct net *net, struct sk_buff *oskb,
1206 if (!skb) 1211 if (!skb)
1207 return -ENOBUFS; 1212 return -ENOBUFS;
1208 1213
1209 if (old && old->handle) { 1214 if (old && !tc_qdisc_dump_ignore(old)) {
1210 if (tc_fill_qdisc(skb, old, clid, pid, n->nlmsg_seq, 0, RTM_DELQDISC) < 0) 1215 if (tc_fill_qdisc(skb, old, clid, pid, n->nlmsg_seq, 0, RTM_DELQDISC) < 0)
1211 goto err_out; 1216 goto err_out;
1212 } 1217 }
1213 if (new) { 1218 if (new && !tc_qdisc_dump_ignore(new)) {
1214 if (tc_fill_qdisc(skb, new, clid, pid, n->nlmsg_seq, old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0) 1219 if (tc_fill_qdisc(skb, new, clid, pid, n->nlmsg_seq, old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0)
1215 goto err_out; 1220 goto err_out;
1216 } 1221 }
@@ -1223,11 +1228,6 @@ err_out:
1223 return -EINVAL; 1228 return -EINVAL;
1224} 1229}
1225 1230
1226static bool tc_qdisc_dump_ignore(struct Qdisc *q)
1227{
1228 return (q->flags & TCQ_F_BUILTIN) ? true : false;
1229}
1230
1231static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb, 1231static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb,
1232 struct netlink_callback *cb, 1232 struct netlink_callback *cb,
1233 int *q_idx_p, int s_q_idx) 1233 int *q_idx_p, int s_q_idx)
diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index fcbb86a486a2..e114f23d5eae 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -52,7 +52,7 @@ struct atm_flow_data {
52 int ref; /* reference count */ 52 int ref; /* reference count */
53 struct gnet_stats_basic_packed bstats; 53 struct gnet_stats_basic_packed bstats;
54 struct gnet_stats_queue qstats; 54 struct gnet_stats_queue qstats;
55 struct atm_flow_data *next; 55 struct list_head list;
56 struct atm_flow_data *excess; /* flow for excess traffic; 56 struct atm_flow_data *excess; /* flow for excess traffic;
57 NULL to set CLP instead */ 57 NULL to set CLP instead */
58 int hdr_len; 58 int hdr_len;
@@ -61,34 +61,23 @@ struct atm_flow_data {
61 61
62struct atm_qdisc_data { 62struct atm_qdisc_data {
63 struct atm_flow_data link; /* unclassified skbs go here */ 63 struct atm_flow_data link; /* unclassified skbs go here */
64 struct atm_flow_data *flows; /* NB: "link" is also on this 64 struct list_head flows; /* NB: "link" is also on this
65 list */ 65 list */
66 struct tasklet_struct task; /* dequeue tasklet */ 66 struct tasklet_struct task; /* dequeue tasklet */
67}; 67};
68 68
69/* ------------------------- Class/flow operations ------------------------- */ 69/* ------------------------- Class/flow operations ------------------------- */
70 70
71static int find_flow(struct atm_qdisc_data *qdisc, struct atm_flow_data *flow)
72{
73 struct atm_flow_data *walk;
74
75 pr_debug("find_flow(qdisc %p,flow %p)\n", qdisc, flow);
76 for (walk = qdisc->flows; walk; walk = walk->next)
77 if (walk == flow)
78 return 1;
79 pr_debug("find_flow: not found\n");
80 return 0;
81}
82
83static inline struct atm_flow_data *lookup_flow(struct Qdisc *sch, u32 classid) 71static inline struct atm_flow_data *lookup_flow(struct Qdisc *sch, u32 classid)
84{ 72{
85 struct atm_qdisc_data *p = qdisc_priv(sch); 73 struct atm_qdisc_data *p = qdisc_priv(sch);
86 struct atm_flow_data *flow; 74 struct atm_flow_data *flow;
87 75
88 for (flow = p->flows; flow; flow = flow->next) 76 list_for_each_entry(flow, &p->flows, list) {
89 if (flow->classid == classid) 77 if (flow->classid == classid)
90 break; 78 return flow;
91 return flow; 79 }
80 return NULL;
92} 81}
93 82
94static int atm_tc_graft(struct Qdisc *sch, unsigned long arg, 83static int atm_tc_graft(struct Qdisc *sch, unsigned long arg,
@@ -99,7 +88,7 @@ static int atm_tc_graft(struct Qdisc *sch, unsigned long arg,
99 88
100 pr_debug("atm_tc_graft(sch %p,[qdisc %p],flow %p,new %p,old %p)\n", 89 pr_debug("atm_tc_graft(sch %p,[qdisc %p],flow %p,new %p,old %p)\n",
101 sch, p, flow, new, old); 90 sch, p, flow, new, old);
102 if (!find_flow(p, flow)) 91 if (list_empty(&flow->list))
103 return -EINVAL; 92 return -EINVAL;
104 if (!new) 93 if (!new)
105 new = &noop_qdisc; 94 new = &noop_qdisc;
@@ -146,20 +135,12 @@ static void atm_tc_put(struct Qdisc *sch, unsigned long cl)
146{ 135{
147 struct atm_qdisc_data *p = qdisc_priv(sch); 136 struct atm_qdisc_data *p = qdisc_priv(sch);
148 struct atm_flow_data *flow = (struct atm_flow_data *)cl; 137 struct atm_flow_data *flow = (struct atm_flow_data *)cl;
149 struct atm_flow_data **prev;
150 138
151 pr_debug("atm_tc_put(sch %p,[qdisc %p],flow %p)\n", sch, p, flow); 139 pr_debug("atm_tc_put(sch %p,[qdisc %p],flow %p)\n", sch, p, flow);
152 if (--flow->ref) 140 if (--flow->ref)
153 return; 141 return;
154 pr_debug("atm_tc_put: destroying\n"); 142 pr_debug("atm_tc_put: destroying\n");
155 for (prev = &p->flows; *prev; prev = &(*prev)->next) 143 list_del_init(&flow->list);
156 if (*prev == flow)
157 break;
158 if (!*prev) {
159 printk(KERN_CRIT "atm_tc_put: class %p not found\n", flow);
160 return;
161 }
162 *prev = flow->next;
163 pr_debug("atm_tc_put: qdisc %p\n", flow->q); 144 pr_debug("atm_tc_put: qdisc %p\n", flow->q);
164 qdisc_destroy(flow->q); 145 qdisc_destroy(flow->q);
165 tcf_destroy_chain(&flow->filter_list); 146 tcf_destroy_chain(&flow->filter_list);
@@ -274,7 +255,7 @@ static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent,
274 error = -EINVAL; 255 error = -EINVAL;
275 goto err_out; 256 goto err_out;
276 } 257 }
277 if (find_flow(p, flow)) { 258 if (!list_empty(&flow->list)) {
278 error = -EEXIST; 259 error = -EEXIST;
279 goto err_out; 260 goto err_out;
280 } 261 }
@@ -313,8 +294,7 @@ static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent,
313 flow->classid = classid; 294 flow->classid = classid;
314 flow->ref = 1; 295 flow->ref = 1;
315 flow->excess = excess; 296 flow->excess = excess;
316 flow->next = p->link.next; 297 list_add(&flow->list, &p->link.list);
317 p->link.next = flow;
318 flow->hdr_len = hdr_len; 298 flow->hdr_len = hdr_len;
319 if (hdr) 299 if (hdr)
320 memcpy(flow->hdr, hdr, hdr_len); 300 memcpy(flow->hdr, hdr, hdr_len);
@@ -335,7 +315,7 @@ static int atm_tc_delete(struct Qdisc *sch, unsigned long arg)
335 struct atm_flow_data *flow = (struct atm_flow_data *)arg; 315 struct atm_flow_data *flow = (struct atm_flow_data *)arg;
336 316
337 pr_debug("atm_tc_delete(sch %p,[qdisc %p],flow %p)\n", sch, p, flow); 317 pr_debug("atm_tc_delete(sch %p,[qdisc %p],flow %p)\n", sch, p, flow);
338 if (!find_flow(qdisc_priv(sch), flow)) 318 if (list_empty(&flow->list))
339 return -EINVAL; 319 return -EINVAL;
340 if (flow->filter_list || flow == &p->link) 320 if (flow->filter_list || flow == &p->link)
341 return -EBUSY; 321 return -EBUSY;
@@ -361,12 +341,12 @@ static void atm_tc_walk(struct Qdisc *sch, struct qdisc_walker *walker)
361 pr_debug("atm_tc_walk(sch %p,[qdisc %p],walker %p)\n", sch, p, walker); 341 pr_debug("atm_tc_walk(sch %p,[qdisc %p],walker %p)\n", sch, p, walker);
362 if (walker->stop) 342 if (walker->stop)
363 return; 343 return;
364 for (flow = p->flows; flow; flow = flow->next) { 344 list_for_each_entry(flow, &p->flows, list) {
365 if (walker->count >= walker->skip) 345 if (walker->count >= walker->skip &&
366 if (walker->fn(sch, (unsigned long)flow, walker) < 0) { 346 walker->fn(sch, (unsigned long)flow, walker) < 0) {
367 walker->stop = 1; 347 walker->stop = 1;
368 break; 348 break;
369 } 349 }
370 walker->count++; 350 walker->count++;
371 } 351 }
372} 352}
@@ -385,16 +365,17 @@ static struct tcf_proto **atm_tc_find_tcf(struct Qdisc *sch, unsigned long cl)
385static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch) 365static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
386{ 366{
387 struct atm_qdisc_data *p = qdisc_priv(sch); 367 struct atm_qdisc_data *p = qdisc_priv(sch);
388 struct atm_flow_data *flow = NULL; /* @@@ */ 368 struct atm_flow_data *flow;
389 struct tcf_result res; 369 struct tcf_result res;
390 int result; 370 int result;
391 int ret = NET_XMIT_POLICED; 371 int ret = NET_XMIT_POLICED;
392 372
393 pr_debug("atm_tc_enqueue(skb %p,sch %p,[qdisc %p])\n", skb, sch, p); 373 pr_debug("atm_tc_enqueue(skb %p,sch %p,[qdisc %p])\n", skb, sch, p);
394 result = TC_POLICE_OK; /* be nice to gcc */ 374 result = TC_POLICE_OK; /* be nice to gcc */
375 flow = NULL;
395 if (TC_H_MAJ(skb->priority) != sch->handle || 376 if (TC_H_MAJ(skb->priority) != sch->handle ||
396 !(flow = (struct atm_flow_data *)atm_tc_get(sch, skb->priority))) 377 !(flow = (struct atm_flow_data *)atm_tc_get(sch, skb->priority))) {
397 for (flow = p->flows; flow; flow = flow->next) 378 list_for_each_entry(flow, &p->flows, list) {
398 if (flow->filter_list) { 379 if (flow->filter_list) {
399 result = tc_classify_compat(skb, 380 result = tc_classify_compat(skb,
400 flow->filter_list, 381 flow->filter_list,
@@ -404,8 +385,13 @@ static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
404 flow = (struct atm_flow_data *)res.class; 385 flow = (struct atm_flow_data *)res.class;
405 if (!flow) 386 if (!flow)
406 flow = lookup_flow(sch, res.classid); 387 flow = lookup_flow(sch, res.classid);
407 break; 388 goto done;
408 } 389 }
390 }
391 flow = NULL;
392 done:
393 ;
394 }
409 if (!flow) 395 if (!flow)
410 flow = &p->link; 396 flow = &p->link;
411 else { 397 else {
@@ -477,7 +463,9 @@ static void sch_atm_dequeue(unsigned long data)
477 struct sk_buff *skb; 463 struct sk_buff *skb;
478 464
479 pr_debug("sch_atm_dequeue(sch %p,[qdisc %p])\n", sch, p); 465 pr_debug("sch_atm_dequeue(sch %p,[qdisc %p])\n", sch, p);
480 for (flow = p->link.next; flow; flow = flow->next) 466 list_for_each_entry(flow, &p->flows, list) {
467 if (flow == &p->link)
468 continue;
481 /* 469 /*
482 * If traffic is properly shaped, this won't generate nasty 470 * If traffic is properly shaped, this won't generate nasty
483 * little bursts. Otherwise, it may ... (but that's okay) 471 * little bursts. Otherwise, it may ... (but that's okay)
@@ -512,6 +500,7 @@ static void sch_atm_dequeue(unsigned long data)
512 /* atm.atm_options are already set by atm_tc_enqueue */ 500 /* atm.atm_options are already set by atm_tc_enqueue */
513 flow->vcc->send(flow->vcc, skb); 501 flow->vcc->send(flow->vcc, skb);
514 } 502 }
503 }
515} 504}
516 505
517static struct sk_buff *atm_tc_dequeue(struct Qdisc *sch) 506static struct sk_buff *atm_tc_dequeue(struct Qdisc *sch)
@@ -543,9 +532,10 @@ static unsigned int atm_tc_drop(struct Qdisc *sch)
543 unsigned int len; 532 unsigned int len;
544 533
545 pr_debug("atm_tc_drop(sch %p,[qdisc %p])\n", sch, p); 534 pr_debug("atm_tc_drop(sch %p,[qdisc %p])\n", sch, p);
546 for (flow = p->flows; flow; flow = flow->next) 535 list_for_each_entry(flow, &p->flows, list) {
547 if (flow->q->ops->drop && (len = flow->q->ops->drop(flow->q))) 536 if (flow->q->ops->drop && (len = flow->q->ops->drop(flow->q)))
548 return len; 537 return len;
538 }
549 return 0; 539 return 0;
550} 540}
551 541
@@ -554,7 +544,9 @@ static int atm_tc_init(struct Qdisc *sch, struct nlattr *opt)
554 struct atm_qdisc_data *p = qdisc_priv(sch); 544 struct atm_qdisc_data *p = qdisc_priv(sch);
555 545
556 pr_debug("atm_tc_init(sch %p,[qdisc %p],opt %p)\n", sch, p, opt); 546 pr_debug("atm_tc_init(sch %p,[qdisc %p],opt %p)\n", sch, p, opt);
557 p->flows = &p->link; 547 INIT_LIST_HEAD(&p->flows);
548 INIT_LIST_HEAD(&p->link.list);
549 list_add(&p->link.list, &p->flows);
558 p->link.q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, 550 p->link.q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
559 &pfifo_qdisc_ops, sch->handle); 551 &pfifo_qdisc_ops, sch->handle);
560 if (!p->link.q) 552 if (!p->link.q)
@@ -565,7 +557,6 @@ static int atm_tc_init(struct Qdisc *sch, struct nlattr *opt)
565 p->link.sock = NULL; 557 p->link.sock = NULL;
566 p->link.classid = sch->handle; 558 p->link.classid = sch->handle;
567 p->link.ref = 1; 559 p->link.ref = 1;
568 p->link.next = NULL;
569 tasklet_init(&p->task, sch_atm_dequeue, (unsigned long)sch); 560 tasklet_init(&p->task, sch_atm_dequeue, (unsigned long)sch);
570 return 0; 561 return 0;
571} 562}
@@ -576,7 +567,7 @@ static void atm_tc_reset(struct Qdisc *sch)
576 struct atm_flow_data *flow; 567 struct atm_flow_data *flow;
577 568
578 pr_debug("atm_tc_reset(sch %p,[qdisc %p])\n", sch, p); 569 pr_debug("atm_tc_reset(sch %p,[qdisc %p])\n", sch, p);
579 for (flow = p->flows; flow; flow = flow->next) 570 list_for_each_entry(flow, &p->flows, list)
580 qdisc_reset(flow->q); 571 qdisc_reset(flow->q);
581 sch->q.qlen = 0; 572 sch->q.qlen = 0;
582} 573}
@@ -584,24 +575,17 @@ static void atm_tc_reset(struct Qdisc *sch)
584static void atm_tc_destroy(struct Qdisc *sch) 575static void atm_tc_destroy(struct Qdisc *sch)
585{ 576{
586 struct atm_qdisc_data *p = qdisc_priv(sch); 577 struct atm_qdisc_data *p = qdisc_priv(sch);
587 struct atm_flow_data *flow; 578 struct atm_flow_data *flow, *tmp;
588 579
589 pr_debug("atm_tc_destroy(sch %p,[qdisc %p])\n", sch, p); 580 pr_debug("atm_tc_destroy(sch %p,[qdisc %p])\n", sch, p);
590 for (flow = p->flows; flow; flow = flow->next) 581 list_for_each_entry(flow, &p->flows, list)
591 tcf_destroy_chain(&flow->filter_list); 582 tcf_destroy_chain(&flow->filter_list);
592 583
593 /* races ? */ 584 list_for_each_entry_safe(flow, tmp, &p->flows, list) {
594 while ((flow = p->flows)) {
595 if (flow->ref > 1) 585 if (flow->ref > 1)
596 printk(KERN_ERR "atm_destroy: %p->ref = %d\n", flow, 586 printk(KERN_ERR "atm_destroy: %p->ref = %d\n", flow,
597 flow->ref); 587 flow->ref);
598 atm_tc_put(sch, (unsigned long)flow); 588 atm_tc_put(sch, (unsigned long)flow);
599 if (p->flows == flow) {
600 printk(KERN_ERR "atm_destroy: putting flow %p didn't "
601 "kill it\n", flow);
602 p->flows = flow->next; /* brute force */
603 break;
604 }
605 } 589 }
606 tasklet_kill(&p->task); 590 tasklet_kill(&p->task);
607} 591}
@@ -615,7 +599,7 @@ static int atm_tc_dump_class(struct Qdisc *sch, unsigned long cl,
615 599
616 pr_debug("atm_tc_dump_class(sch %p,[qdisc %p],flow %p,skb %p,tcm %p)\n", 600 pr_debug("atm_tc_dump_class(sch %p,[qdisc %p],flow %p,skb %p,tcm %p)\n",
617 sch, p, flow, skb, tcm); 601 sch, p, flow, skb, tcm);
618 if (!find_flow(p, flow)) 602 if (list_empty(&flow->list))
619 return -EINVAL; 603 return -EINVAL;
620 tcm->tcm_handle = flow->classid; 604 tcm->tcm_handle = flow->classid;
621 tcm->tcm_info = flow->q->handle; 605 tcm->tcm_info = flow->q->handle;
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index a63029ef3edd..2aeb3a4386a1 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -96,7 +96,7 @@ static inline int handle_dev_cpu_collision(struct sk_buff *skb,
96 * Another cpu is holding lock, requeue & delay xmits for 96 * Another cpu is holding lock, requeue & delay xmits for
97 * some time. 97 * some time.
98 */ 98 */
99 __get_cpu_var(softnet_data).cpu_collision++; 99 __this_cpu_inc(softnet_data.cpu_collision);
100 ret = dev_requeue_skb(skb, q); 100 ret = dev_requeue_skb(skb, q);
101 } 101 }
102 102
@@ -205,7 +205,7 @@ void __qdisc_run(struct Qdisc *q)
205 } 205 }
206 } 206 }
207 207
208 clear_bit(__QDISC_STATE_RUNNING, &q->state); 208 qdisc_run_end(q);
209} 209}
210 210
211unsigned long dev_trans_start(struct net_device *dev) 211unsigned long dev_trans_start(struct net_device *dev)
@@ -327,6 +327,24 @@ void netif_carrier_off(struct net_device *dev)
327} 327}
328EXPORT_SYMBOL(netif_carrier_off); 328EXPORT_SYMBOL(netif_carrier_off);
329 329
330/**
331 * netif_notify_peers - notify network peers about existence of @dev
332 * @dev: network device
333 *
334 * Generate traffic such that interested network peers are aware of
335 * @dev, such as by generating a gratuitous ARP. This may be used when
336 * a device wants to inform the rest of the network about some sort of
337 * reconfiguration such as a failover event or virtual machine
338 * migration.
339 */
340void netif_notify_peers(struct net_device *dev)
341{
342 rtnl_lock();
343 call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, dev);
344 rtnl_unlock();
345}
346EXPORT_SYMBOL(netif_notify_peers);
347
330/* "NOOP" scheduler: the best scheduler, recommended for all interfaces 348/* "NOOP" scheduler: the best scheduler, recommended for all interfaces
331 under all circumstances. It is difficult to invent anything faster or 349 under all circumstances. It is difficult to invent anything faster or
332 cheaper. 350 cheaper.
@@ -543,6 +561,7 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
543 561
544 INIT_LIST_HEAD(&sch->list); 562 INIT_LIST_HEAD(&sch->list);
545 skb_queue_head_init(&sch->q); 563 skb_queue_head_init(&sch->q);
564 spin_lock_init(&sch->busylock);
546 sch->ops = ops; 565 sch->ops = ops;
547 sch->enqueue = ops->enqueue; 566 sch->enqueue = ops->enqueue;
548 sch->dequeue = ops->dequeue; 567 sch->dequeue = ops->dequeue;
@@ -779,7 +798,7 @@ static bool some_qdisc_is_busy(struct net_device *dev)
779 798
780 spin_lock_bh(root_lock); 799 spin_lock_bh(root_lock);
781 800
782 val = (test_bit(__QDISC_STATE_RUNNING, &q->state) || 801 val = (qdisc_is_running(q) ||
783 test_bit(__QDISC_STATE_SCHED, &q->state)); 802 test_bit(__QDISC_STATE_SCHED, &q->state));
784 803
785 spin_unlock_bh(root_lock); 804 spin_unlock_bh(root_lock);
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 0b52b8de562c..4be8d04b262d 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -1550,7 +1550,6 @@ static const struct Qdisc_class_ops htb_class_ops = {
1550}; 1550};
1551 1551
1552static struct Qdisc_ops htb_qdisc_ops __read_mostly = { 1552static struct Qdisc_ops htb_qdisc_ops __read_mostly = {
1553 .next = NULL,
1554 .cl_ops = &htb_class_ops, 1553 .cl_ops = &htb_class_ops,
1555 .id = "htb", 1554 .id = "htb",
1556 .priv_size = sizeof(struct htb_sched), 1555 .priv_size = sizeof(struct htb_sched),
@@ -1561,7 +1560,6 @@ static struct Qdisc_ops htb_qdisc_ops __read_mostly = {
1561 .init = htb_init, 1560 .init = htb_init,
1562 .reset = htb_reset, 1561 .reset = htb_reset,
1563 .destroy = htb_destroy, 1562 .destroy = htb_destroy,
1564 .change = NULL /* htb_change */,
1565 .dump = htb_dump, 1563 .dump = htb_dump,
1566 .owner = THIS_MODULE, 1564 .owner = THIS_MODULE,
1567}; 1565};
diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c
index 3415b6ce1c0a..807643bdcbac 100644
--- a/net/sched/sch_teql.c
+++ b/net/sched/sch_teql.c
@@ -449,6 +449,7 @@ static __init void teql_master_setup(struct net_device *dev)
449 dev->tx_queue_len = 100; 449 dev->tx_queue_len = 100;
450 dev->flags = IFF_NOARP; 450 dev->flags = IFF_NOARP;
451 dev->hard_header_len = LL_MAX_HEADER; 451 dev->hard_header_len = LL_MAX_HEADER;
452 dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
452} 453}
453 454
454static LIST_HEAD(master_dev_list); 455static LIST_HEAD(master_dev_list);
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 182749867c72..c0e162aeb0bd 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -490,7 +490,7 @@ static struct dst_entry *sctp_v4_get_dst(struct sctp_association *asoc,
490 __func__, &fl.fl4_dst, &fl.fl4_src); 490 __func__, &fl.fl4_dst, &fl.fl4_src);
491 491
492 if (!ip_route_output_key(&init_net, &rt, &fl)) { 492 if (!ip_route_output_key(&init_net, &rt, &fl)) {
493 dst = &rt->u.dst; 493 dst = &rt->dst;
494 } 494 }
495 495
496 /* If there is no association or if a source address is passed, no 496 /* If there is no association or if a source address is passed, no
@@ -534,7 +534,7 @@ static struct dst_entry *sctp_v4_get_dst(struct sctp_association *asoc,
534 fl.fl4_src = laddr->a.v4.sin_addr.s_addr; 534 fl.fl4_src = laddr->a.v4.sin_addr.s_addr;
535 fl.fl_ip_sport = laddr->a.v4.sin_port; 535 fl.fl_ip_sport = laddr->a.v4.sin_port;
536 if (!ip_route_output_key(&init_net, &rt, &fl)) { 536 if (!ip_route_output_key(&init_net, &rt, &fl)) {
537 dst = &rt->u.dst; 537 dst = &rt->dst;
538 goto out_unlock; 538 goto out_unlock;
539 } 539 }
540 } 540 }
@@ -1002,7 +1002,8 @@ int sctp_register_pf(struct sctp_pf *pf, sa_family_t family)
1002static inline int init_sctp_mibs(void) 1002static inline int init_sctp_mibs(void)
1003{ 1003{
1004 return snmp_mib_init((void __percpu **)sctp_statistics, 1004 return snmp_mib_init((void __percpu **)sctp_statistics,
1005 sizeof(struct sctp_mib)); 1005 sizeof(struct sctp_mib),
1006 __alignof__(struct sctp_mib));
1006} 1007}
1007 1008
1008static inline void cleanup_sctp_mibs(void) 1009static inline void cleanup_sctp_mibs(void)
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index bd2a50b482ac..246f92924658 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -1817,7 +1817,7 @@ malformed:
1817struct __sctp_missing { 1817struct __sctp_missing {
1818 __be32 num_missing; 1818 __be32 num_missing;
1819 __be16 type; 1819 __be16 type;
1820} __attribute__((packed)); 1820} __packed;
1821 1821
1822/* 1822/*
1823 * Report a missing mandatory parameter. 1823 * Report a missing mandatory parameter.
diff --git a/net/socket.c b/net/socket.c
index f9f7d0872cac..2270b941bcc7 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -94,6 +94,7 @@
94 94
95#include <net/compat.h> 95#include <net/compat.h>
96#include <net/wext.h> 96#include <net/wext.h>
97#include <net/cls_cgroup.h>
97 98
98#include <net/sock.h> 99#include <net/sock.h>
99#include <linux/netfilter.h> 100#include <linux/netfilter.h>
@@ -123,7 +124,7 @@ static int sock_fasync(int fd, struct file *filp, int on);
123static ssize_t sock_sendpage(struct file *file, struct page *page, 124static ssize_t sock_sendpage(struct file *file, struct page *page,
124 int offset, size_t size, loff_t *ppos, int more); 125 int offset, size_t size, loff_t *ppos, int more);
125static ssize_t sock_splice_read(struct file *file, loff_t *ppos, 126static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
126 struct pipe_inode_info *pipe, size_t len, 127 struct pipe_inode_info *pipe, size_t len,
127 unsigned int flags); 128 unsigned int flags);
128 129
129/* 130/*
@@ -161,7 +162,7 @@ static const struct net_proto_family *net_families[NPROTO] __read_mostly;
161 * Statistics counters of the socket lists 162 * Statistics counters of the socket lists
162 */ 163 */
163 164
164static DEFINE_PER_CPU(int, sockets_in_use) = 0; 165static DEFINE_PER_CPU(int, sockets_in_use);
165 166
166/* 167/*
167 * Support routines. 168 * Support routines.
@@ -169,15 +170,6 @@ static DEFINE_PER_CPU(int, sockets_in_use) = 0;
169 * divide and look after the messy bits. 170 * divide and look after the messy bits.
170 */ 171 */
171 172
172#define MAX_SOCK_ADDR 128 /* 108 for Unix domain -
173 16 for IP, 16 for IPX,
174 24 for IPv6,
175 about 80 for AX.25
176 must be at least one bigger than
177 the AF_UNIX size (see net/unix/af_unix.c
178 :unix_mkname()).
179 */
180
181/** 173/**
182 * move_addr_to_kernel - copy a socket address into kernel space 174 * move_addr_to_kernel - copy a socket address into kernel space
183 * @uaddr: Address in user space 175 * @uaddr: Address in user space
@@ -308,9 +300,9 @@ static int init_inodecache(void)
308} 300}
309 301
310static const struct super_operations sockfs_ops = { 302static const struct super_operations sockfs_ops = {
311 .alloc_inode = sock_alloc_inode, 303 .alloc_inode = sock_alloc_inode,
312 .destroy_inode =sock_destroy_inode, 304 .destroy_inode = sock_destroy_inode,
313 .statfs = simple_statfs, 305 .statfs = simple_statfs,
314}; 306};
315 307
316static int sockfs_get_sb(struct file_system_type *fs_type, 308static int sockfs_get_sb(struct file_system_type *fs_type,
@@ -410,6 +402,7 @@ int sock_map_fd(struct socket *sock, int flags)
410 402
411 return fd; 403 return fd;
412} 404}
405EXPORT_SYMBOL(sock_map_fd);
413 406
414static struct socket *sock_from_file(struct file *file, int *err) 407static struct socket *sock_from_file(struct file *file, int *err)
415{ 408{
@@ -421,7 +414,7 @@ static struct socket *sock_from_file(struct file *file, int *err)
421} 414}
422 415
423/** 416/**
424 * sockfd_lookup - Go from a file number to its socket slot 417 * sockfd_lookup - Go from a file number to its socket slot
425 * @fd: file handle 418 * @fd: file handle
426 * @err: pointer to an error code return 419 * @err: pointer to an error code return
427 * 420 *
@@ -449,6 +442,7 @@ struct socket *sockfd_lookup(int fd, int *err)
449 fput(file); 442 fput(file);
450 return sock; 443 return sock;
451} 444}
445EXPORT_SYMBOL(sockfd_lookup);
452 446
453static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed) 447static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
454{ 448{
@@ -539,6 +533,7 @@ void sock_release(struct socket *sock)
539 } 533 }
540 sock->file = NULL; 534 sock->file = NULL;
541} 535}
536EXPORT_SYMBOL(sock_release);
542 537
543int sock_tx_timestamp(struct msghdr *msg, struct sock *sk, 538int sock_tx_timestamp(struct msghdr *msg, struct sock *sk,
544 union skb_shared_tx *shtx) 539 union skb_shared_tx *shtx)
@@ -558,6 +553,8 @@ static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock,
558 struct sock_iocb *si = kiocb_to_siocb(iocb); 553 struct sock_iocb *si = kiocb_to_siocb(iocb);
559 int err; 554 int err;
560 555
556 sock_update_classid(sock->sk);
557
561 si->sock = sock; 558 si->sock = sock;
562 si->scm = NULL; 559 si->scm = NULL;
563 si->msg = msg; 560 si->msg = msg;
@@ -583,6 +580,7 @@ int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
583 ret = wait_on_sync_kiocb(&iocb); 580 ret = wait_on_sync_kiocb(&iocb);
584 return ret; 581 return ret;
585} 582}
583EXPORT_SYMBOL(sock_sendmsg);
586 584
587int kernel_sendmsg(struct socket *sock, struct msghdr *msg, 585int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
588 struct kvec *vec, size_t num, size_t size) 586 struct kvec *vec, size_t num, size_t size)
@@ -601,6 +599,7 @@ int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
601 set_fs(oldfs); 599 set_fs(oldfs);
602 return result; 600 return result;
603} 601}
602EXPORT_SYMBOL(kernel_sendmsg);
604 603
605static int ktime2ts(ktime_t kt, struct timespec *ts) 604static int ktime2ts(ktime_t kt, struct timespec *ts)
606{ 605{
@@ -661,7 +660,6 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
661 put_cmsg(msg, SOL_SOCKET, 660 put_cmsg(msg, SOL_SOCKET,
662 SCM_TIMESTAMPING, sizeof(ts), &ts); 661 SCM_TIMESTAMPING, sizeof(ts), &ts);
663} 662}
664
665EXPORT_SYMBOL_GPL(__sock_recv_timestamp); 663EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
666 664
667inline void sock_recv_drops(struct msghdr *msg, struct sock *sk, struct sk_buff *skb) 665inline void sock_recv_drops(struct msghdr *msg, struct sock *sk, struct sk_buff *skb)
@@ -684,6 +682,8 @@ static inline int __sock_recvmsg_nosec(struct kiocb *iocb, struct socket *sock,
684{ 682{
685 struct sock_iocb *si = kiocb_to_siocb(iocb); 683 struct sock_iocb *si = kiocb_to_siocb(iocb);
686 684
685 sock_update_classid(sock->sk);
686
687 si->sock = sock; 687 si->sock = sock;
688 si->scm = NULL; 688 si->scm = NULL;
689 si->msg = msg; 689 si->msg = msg;
@@ -715,6 +715,7 @@ int sock_recvmsg(struct socket *sock, struct msghdr *msg,
715 ret = wait_on_sync_kiocb(&iocb); 715 ret = wait_on_sync_kiocb(&iocb);
716 return ret; 716 return ret;
717} 717}
718EXPORT_SYMBOL(sock_recvmsg);
718 719
719static int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg, 720static int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg,
720 size_t size, int flags) 721 size_t size, int flags)
@@ -747,6 +748,7 @@ int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
747 set_fs(oldfs); 748 set_fs(oldfs);
748 return result; 749 return result;
749} 750}
751EXPORT_SYMBOL(kernel_recvmsg);
750 752
751static void sock_aio_dtor(struct kiocb *iocb) 753static void sock_aio_dtor(struct kiocb *iocb)
752{ 754{
@@ -769,7 +771,7 @@ static ssize_t sock_sendpage(struct file *file, struct page *page,
769} 771}
770 772
771static ssize_t sock_splice_read(struct file *file, loff_t *ppos, 773static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
772 struct pipe_inode_info *pipe, size_t len, 774 struct pipe_inode_info *pipe, size_t len,
773 unsigned int flags) 775 unsigned int flags)
774{ 776{
775 struct socket *sock = file->private_data; 777 struct socket *sock = file->private_data;
@@ -777,6 +779,8 @@ static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
777 if (unlikely(!sock->ops->splice_read)) 779 if (unlikely(!sock->ops->splice_read))
778 return -EINVAL; 780 return -EINVAL;
779 781
782 sock_update_classid(sock->sk);
783
780 return sock->ops->splice_read(sock, ppos, pipe, len, flags); 784 return sock->ops->splice_read(sock, ppos, pipe, len, flags);
781} 785}
782 786
@@ -880,7 +884,7 @@ static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
880 */ 884 */
881 885
882static DEFINE_MUTEX(br_ioctl_mutex); 886static DEFINE_MUTEX(br_ioctl_mutex);
883static int (*br_ioctl_hook) (struct net *, unsigned int cmd, void __user *arg) = NULL; 887static int (*br_ioctl_hook) (struct net *, unsigned int cmd, void __user *arg);
884 888
885void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *)) 889void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *))
886{ 890{
@@ -888,7 +892,6 @@ void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *))
888 br_ioctl_hook = hook; 892 br_ioctl_hook = hook;
889 mutex_unlock(&br_ioctl_mutex); 893 mutex_unlock(&br_ioctl_mutex);
890} 894}
891
892EXPORT_SYMBOL(brioctl_set); 895EXPORT_SYMBOL(brioctl_set);
893 896
894static DEFINE_MUTEX(vlan_ioctl_mutex); 897static DEFINE_MUTEX(vlan_ioctl_mutex);
@@ -900,7 +903,6 @@ void vlan_ioctl_set(int (*hook) (struct net *, void __user *))
900 vlan_ioctl_hook = hook; 903 vlan_ioctl_hook = hook;
901 mutex_unlock(&vlan_ioctl_mutex); 904 mutex_unlock(&vlan_ioctl_mutex);
902} 905}
903
904EXPORT_SYMBOL(vlan_ioctl_set); 906EXPORT_SYMBOL(vlan_ioctl_set);
905 907
906static DEFINE_MUTEX(dlci_ioctl_mutex); 908static DEFINE_MUTEX(dlci_ioctl_mutex);
@@ -912,7 +914,6 @@ void dlci_ioctl_set(int (*hook) (unsigned int, void __user *))
912 dlci_ioctl_hook = hook; 914 dlci_ioctl_hook = hook;
913 mutex_unlock(&dlci_ioctl_mutex); 915 mutex_unlock(&dlci_ioctl_mutex);
914} 916}
915
916EXPORT_SYMBOL(dlci_ioctl_set); 917EXPORT_SYMBOL(dlci_ioctl_set);
917 918
918static long sock_do_ioctl(struct net *net, struct socket *sock, 919static long sock_do_ioctl(struct net *net, struct socket *sock,
@@ -1040,6 +1041,7 @@ out_release:
1040 sock = NULL; 1041 sock = NULL;
1041 goto out; 1042 goto out;
1042} 1043}
1044EXPORT_SYMBOL(sock_create_lite);
1043 1045
1044/* No kernel lock held - perfect */ 1046/* No kernel lock held - perfect */
1045static unsigned int sock_poll(struct file *file, poll_table *wait) 1047static unsigned int sock_poll(struct file *file, poll_table *wait)
@@ -1140,6 +1142,7 @@ call_kill:
1140 rcu_read_unlock(); 1142 rcu_read_unlock();
1141 return 0; 1143 return 0;
1142} 1144}
1145EXPORT_SYMBOL(sock_wake_async);
1143 1146
1144static int __sock_create(struct net *net, int family, int type, int protocol, 1147static int __sock_create(struct net *net, int family, int type, int protocol,
1145 struct socket **res, int kern) 1148 struct socket **res, int kern)
@@ -1258,11 +1261,13 @@ int sock_create(int family, int type, int protocol, struct socket **res)
1258{ 1261{
1259 return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0); 1262 return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0);
1260} 1263}
1264EXPORT_SYMBOL(sock_create);
1261 1265
1262int sock_create_kern(int family, int type, int protocol, struct socket **res) 1266int sock_create_kern(int family, int type, int protocol, struct socket **res)
1263{ 1267{
1264 return __sock_create(&init_net, family, type, protocol, res, 1); 1268 return __sock_create(&init_net, family, type, protocol, res, 1);
1265} 1269}
1270EXPORT_SYMBOL(sock_create_kern);
1266 1271
1267SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol) 1272SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol)
1268{ 1273{
@@ -1467,7 +1472,8 @@ SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr,
1467 goto out; 1472 goto out;
1468 1473
1469 err = -ENFILE; 1474 err = -ENFILE;
1470 if (!(newsock = sock_alloc())) 1475 newsock = sock_alloc();
1476 if (!newsock)
1471 goto out_put; 1477 goto out_put;
1472 1478
1473 newsock->type = sock->type; 1479 newsock->type = sock->type;
@@ -1854,8 +1860,7 @@ SYSCALL_DEFINE3(sendmsg, int, fd, struct msghdr __user *, msg, unsigned, flags)
1854 if (MSG_CMSG_COMPAT & flags) { 1860 if (MSG_CMSG_COMPAT & flags) {
1855 if (get_compat_msghdr(&msg_sys, msg_compat)) 1861 if (get_compat_msghdr(&msg_sys, msg_compat))
1856 return -EFAULT; 1862 return -EFAULT;
1857 } 1863 } else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr)))
1858 else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr)))
1859 return -EFAULT; 1864 return -EFAULT;
1860 1865
1861 sock = sockfd_lookup_light(fd, &err, &fput_needed); 1866 sock = sockfd_lookup_light(fd, &err, &fput_needed);
@@ -1957,8 +1962,7 @@ static int __sys_recvmsg(struct socket *sock, struct msghdr __user *msg,
1957 if (MSG_CMSG_COMPAT & flags) { 1962 if (MSG_CMSG_COMPAT & flags) {
1958 if (get_compat_msghdr(msg_sys, msg_compat)) 1963 if (get_compat_msghdr(msg_sys, msg_compat))
1959 return -EFAULT; 1964 return -EFAULT;
1960 } 1965 } else if (copy_from_user(msg_sys, msg, sizeof(struct msghdr)))
1961 else if (copy_from_user(msg_sys, msg, sizeof(struct msghdr)))
1962 return -EFAULT; 1966 return -EFAULT;
1963 1967
1964 err = -EMSGSIZE; 1968 err = -EMSGSIZE;
@@ -2184,10 +2188,10 @@ SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg,
2184/* Argument list sizes for sys_socketcall */ 2188/* Argument list sizes for sys_socketcall */
2185#define AL(x) ((x) * sizeof(unsigned long)) 2189#define AL(x) ((x) * sizeof(unsigned long))
2186static const unsigned char nargs[20] = { 2190static const unsigned char nargs[20] = {
2187 AL(0),AL(3),AL(3),AL(3),AL(2),AL(3), 2191 AL(0), AL(3), AL(3), AL(3), AL(2), AL(3),
2188 AL(3),AL(3),AL(4),AL(4),AL(4),AL(6), 2192 AL(3), AL(3), AL(4), AL(4), AL(4), AL(6),
2189 AL(6),AL(2),AL(5),AL(5),AL(3),AL(3), 2193 AL(6), AL(2), AL(5), AL(5), AL(3), AL(3),
2190 AL(4),AL(5) 2194 AL(4), AL(5)
2191}; 2195};
2192 2196
2193#undef AL 2197#undef AL
@@ -2333,6 +2337,7 @@ int sock_register(const struct net_proto_family *ops)
2333 printk(KERN_INFO "NET: Registered protocol family %d\n", ops->family); 2337 printk(KERN_INFO "NET: Registered protocol family %d\n", ops->family);
2334 return err; 2338 return err;
2335} 2339}
2340EXPORT_SYMBOL(sock_register);
2336 2341
2337/** 2342/**
2338 * sock_unregister - remove a protocol handler 2343 * sock_unregister - remove a protocol handler
@@ -2359,6 +2364,7 @@ void sock_unregister(int family)
2359 2364
2360 printk(KERN_INFO "NET: Unregistered protocol family %d\n", family); 2365 printk(KERN_INFO "NET: Unregistered protocol family %d\n", family);
2361} 2366}
2367EXPORT_SYMBOL(sock_unregister);
2362 2368
2363static int __init sock_init(void) 2369static int __init sock_init(void)
2364{ 2370{
@@ -2388,6 +2394,10 @@ static int __init sock_init(void)
2388 netfilter_init(); 2394 netfilter_init();
2389#endif 2395#endif
2390 2396
2397#ifdef CONFIG_NETWORK_PHY_TIMESTAMPING
2398 skb_timestamping_init();
2399#endif
2400
2391 return 0; 2401 return 0;
2392} 2402}
2393 2403
@@ -2483,13 +2493,13 @@ static int dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32)
2483 ifc.ifc_req = NULL; 2493 ifc.ifc_req = NULL;
2484 uifc = compat_alloc_user_space(sizeof(struct ifconf)); 2494 uifc = compat_alloc_user_space(sizeof(struct ifconf));
2485 } else { 2495 } else {
2486 size_t len =((ifc32.ifc_len / sizeof (struct compat_ifreq)) + 1) * 2496 size_t len = ((ifc32.ifc_len / sizeof(struct compat_ifreq)) + 1) *
2487 sizeof (struct ifreq); 2497 sizeof(struct ifreq);
2488 uifc = compat_alloc_user_space(sizeof(struct ifconf) + len); 2498 uifc = compat_alloc_user_space(sizeof(struct ifconf) + len);
2489 ifc.ifc_len = len; 2499 ifc.ifc_len = len;
2490 ifr = ifc.ifc_req = (void __user *)(uifc + 1); 2500 ifr = ifc.ifc_req = (void __user *)(uifc + 1);
2491 ifr32 = compat_ptr(ifc32.ifcbuf); 2501 ifr32 = compat_ptr(ifc32.ifcbuf);
2492 for (i = 0; i < ifc32.ifc_len; i += sizeof (struct compat_ifreq)) { 2502 for (i = 0; i < ifc32.ifc_len; i += sizeof(struct compat_ifreq)) {
2493 if (copy_in_user(ifr, ifr32, sizeof(struct compat_ifreq))) 2503 if (copy_in_user(ifr, ifr32, sizeof(struct compat_ifreq)))
2494 return -EFAULT; 2504 return -EFAULT;
2495 ifr++; 2505 ifr++;
@@ -2509,9 +2519,9 @@ static int dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32)
2509 ifr = ifc.ifc_req; 2519 ifr = ifc.ifc_req;
2510 ifr32 = compat_ptr(ifc32.ifcbuf); 2520 ifr32 = compat_ptr(ifc32.ifcbuf);
2511 for (i = 0, j = 0; 2521 for (i = 0, j = 0;
2512 i + sizeof (struct compat_ifreq) <= ifc32.ifc_len && j < ifc.ifc_len; 2522 i + sizeof(struct compat_ifreq) <= ifc32.ifc_len && j < ifc.ifc_len;
2513 i += sizeof (struct compat_ifreq), j += sizeof (struct ifreq)) { 2523 i += sizeof(struct compat_ifreq), j += sizeof(struct ifreq)) {
2514 if (copy_in_user(ifr32, ifr, sizeof (struct compat_ifreq))) 2524 if (copy_in_user(ifr32, ifr, sizeof(struct compat_ifreq)))
2515 return -EFAULT; 2525 return -EFAULT;
2516 ifr32++; 2526 ifr32++;
2517 ifr++; 2527 ifr++;
@@ -2560,7 +2570,7 @@ static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32
2560 compat_uptr_t uptr32; 2570 compat_uptr_t uptr32;
2561 struct ifreq __user *uifr; 2571 struct ifreq __user *uifr;
2562 2572
2563 uifr = compat_alloc_user_space(sizeof (*uifr)); 2573 uifr = compat_alloc_user_space(sizeof(*uifr));
2564 if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq))) 2574 if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq)))
2565 return -EFAULT; 2575 return -EFAULT;
2566 2576
@@ -2594,9 +2604,9 @@ static int bond_ioctl(struct net *net, unsigned int cmd,
2594 return -EFAULT; 2604 return -EFAULT;
2595 2605
2596 old_fs = get_fs(); 2606 old_fs = get_fs();
2597 set_fs (KERNEL_DS); 2607 set_fs(KERNEL_DS);
2598 err = dev_ioctl(net, cmd, &kifr); 2608 err = dev_ioctl(net, cmd, &kifr);
2599 set_fs (old_fs); 2609 set_fs(old_fs);
2600 2610
2601 return err; 2611 return err;
2602 case SIOCBONDSLAVEINFOQUERY: 2612 case SIOCBONDSLAVEINFOQUERY:
@@ -2703,9 +2713,9 @@ static int compat_sioc_ifmap(struct net *net, unsigned int cmd,
2703 return -EFAULT; 2713 return -EFAULT;
2704 2714
2705 old_fs = get_fs(); 2715 old_fs = get_fs();
2706 set_fs (KERNEL_DS); 2716 set_fs(KERNEL_DS);
2707 err = dev_ioctl(net, cmd, (void __user *)&ifr); 2717 err = dev_ioctl(net, cmd, (void __user *)&ifr);
2708 set_fs (old_fs); 2718 set_fs(old_fs);
2709 2719
2710 if (cmd == SIOCGIFMAP && !err) { 2720 if (cmd == SIOCGIFMAP && !err) {
2711 err = copy_to_user(uifr32, &ifr, sizeof(ifr.ifr_name)); 2721 err = copy_to_user(uifr32, &ifr, sizeof(ifr.ifr_name));
@@ -2727,7 +2737,7 @@ static int compat_siocshwtstamp(struct net *net, struct compat_ifreq __user *uif
2727 compat_uptr_t uptr32; 2737 compat_uptr_t uptr32;
2728 struct ifreq __user *uifr; 2738 struct ifreq __user *uifr;
2729 2739
2730 uifr = compat_alloc_user_space(sizeof (*uifr)); 2740 uifr = compat_alloc_user_space(sizeof(*uifr));
2731 if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq))) 2741 if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq)))
2732 return -EFAULT; 2742 return -EFAULT;
2733 2743
@@ -2743,20 +2753,20 @@ static int compat_siocshwtstamp(struct net *net, struct compat_ifreq __user *uif
2743} 2753}
2744 2754
2745struct rtentry32 { 2755struct rtentry32 {
2746 u32 rt_pad1; 2756 u32 rt_pad1;
2747 struct sockaddr rt_dst; /* target address */ 2757 struct sockaddr rt_dst; /* target address */
2748 struct sockaddr rt_gateway; /* gateway addr (RTF_GATEWAY) */ 2758 struct sockaddr rt_gateway; /* gateway addr (RTF_GATEWAY) */
2749 struct sockaddr rt_genmask; /* target network mask (IP) */ 2759 struct sockaddr rt_genmask; /* target network mask (IP) */
2750 unsigned short rt_flags; 2760 unsigned short rt_flags;
2751 short rt_pad2; 2761 short rt_pad2;
2752 u32 rt_pad3; 2762 u32 rt_pad3;
2753 unsigned char rt_tos; 2763 unsigned char rt_tos;
2754 unsigned char rt_class; 2764 unsigned char rt_class;
2755 short rt_pad4; 2765 short rt_pad4;
2756 short rt_metric; /* +1 for binary compatibility! */ 2766 short rt_metric; /* +1 for binary compatibility! */
2757 /* char * */ u32 rt_dev; /* forcing the device at add */ 2767 /* char * */ u32 rt_dev; /* forcing the device at add */
2758 u32 rt_mtu; /* per route MTU/Window */ 2768 u32 rt_mtu; /* per route MTU/Window */
2759 u32 rt_window; /* Window clamping */ 2769 u32 rt_window; /* Window clamping */
2760 unsigned short rt_irtt; /* Initial RTT */ 2770 unsigned short rt_irtt; /* Initial RTT */
2761}; 2771};
2762 2772
@@ -2786,29 +2796,29 @@ static int routing_ioctl(struct net *net, struct socket *sock,
2786 2796
2787 if (sock && sock->sk && sock->sk->sk_family == AF_INET6) { /* ipv6 */ 2797 if (sock && sock->sk && sock->sk->sk_family == AF_INET6) { /* ipv6 */
2788 struct in6_rtmsg32 __user *ur6 = argp; 2798 struct in6_rtmsg32 __user *ur6 = argp;
2789 ret = copy_from_user (&r6.rtmsg_dst, &(ur6->rtmsg_dst), 2799 ret = copy_from_user(&r6.rtmsg_dst, &(ur6->rtmsg_dst),
2790 3 * sizeof(struct in6_addr)); 2800 3 * sizeof(struct in6_addr));
2791 ret |= __get_user (r6.rtmsg_type, &(ur6->rtmsg_type)); 2801 ret |= __get_user(r6.rtmsg_type, &(ur6->rtmsg_type));
2792 ret |= __get_user (r6.rtmsg_dst_len, &(ur6->rtmsg_dst_len)); 2802 ret |= __get_user(r6.rtmsg_dst_len, &(ur6->rtmsg_dst_len));
2793 ret |= __get_user (r6.rtmsg_src_len, &(ur6->rtmsg_src_len)); 2803 ret |= __get_user(r6.rtmsg_src_len, &(ur6->rtmsg_src_len));
2794 ret |= __get_user (r6.rtmsg_metric, &(ur6->rtmsg_metric)); 2804 ret |= __get_user(r6.rtmsg_metric, &(ur6->rtmsg_metric));
2795 ret |= __get_user (r6.rtmsg_info, &(ur6->rtmsg_info)); 2805 ret |= __get_user(r6.rtmsg_info, &(ur6->rtmsg_info));
2796 ret |= __get_user (r6.rtmsg_flags, &(ur6->rtmsg_flags)); 2806 ret |= __get_user(r6.rtmsg_flags, &(ur6->rtmsg_flags));
2797 ret |= __get_user (r6.rtmsg_ifindex, &(ur6->rtmsg_ifindex)); 2807 ret |= __get_user(r6.rtmsg_ifindex, &(ur6->rtmsg_ifindex));
2798 2808
2799 r = (void *) &r6; 2809 r = (void *) &r6;
2800 } else { /* ipv4 */ 2810 } else { /* ipv4 */
2801 struct rtentry32 __user *ur4 = argp; 2811 struct rtentry32 __user *ur4 = argp;
2802 ret = copy_from_user (&r4.rt_dst, &(ur4->rt_dst), 2812 ret = copy_from_user(&r4.rt_dst, &(ur4->rt_dst),
2803 3 * sizeof(struct sockaddr)); 2813 3 * sizeof(struct sockaddr));
2804 ret |= __get_user (r4.rt_flags, &(ur4->rt_flags)); 2814 ret |= __get_user(r4.rt_flags, &(ur4->rt_flags));
2805 ret |= __get_user (r4.rt_metric, &(ur4->rt_metric)); 2815 ret |= __get_user(r4.rt_metric, &(ur4->rt_metric));
2806 ret |= __get_user (r4.rt_mtu, &(ur4->rt_mtu)); 2816 ret |= __get_user(r4.rt_mtu, &(ur4->rt_mtu));
2807 ret |= __get_user (r4.rt_window, &(ur4->rt_window)); 2817 ret |= __get_user(r4.rt_window, &(ur4->rt_window));
2808 ret |= __get_user (r4.rt_irtt, &(ur4->rt_irtt)); 2818 ret |= __get_user(r4.rt_irtt, &(ur4->rt_irtt));
2809 ret |= __get_user (rtdev, &(ur4->rt_dev)); 2819 ret |= __get_user(rtdev, &(ur4->rt_dev));
2810 if (rtdev) { 2820 if (rtdev) {
2811 ret |= copy_from_user (devname, compat_ptr(rtdev), 15); 2821 ret |= copy_from_user(devname, compat_ptr(rtdev), 15);
2812 r4.rt_dev = devname; devname[15] = 0; 2822 r4.rt_dev = devname; devname[15] = 0;
2813 } else 2823 } else
2814 r4.rt_dev = NULL; 2824 r4.rt_dev = NULL;
@@ -2821,9 +2831,9 @@ static int routing_ioctl(struct net *net, struct socket *sock,
2821 goto out; 2831 goto out;
2822 } 2832 }
2823 2833
2824 set_fs (KERNEL_DS); 2834 set_fs(KERNEL_DS);
2825 ret = sock_do_ioctl(net, sock, cmd, (unsigned long) r); 2835 ret = sock_do_ioctl(net, sock, cmd, (unsigned long) r);
2826 set_fs (old_fs); 2836 set_fs(old_fs);
2827 2837
2828out: 2838out:
2829 return ret; 2839 return ret;
@@ -2986,11 +2996,13 @@ int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
2986{ 2996{
2987 return sock->ops->bind(sock, addr, addrlen); 2997 return sock->ops->bind(sock, addr, addrlen);
2988} 2998}
2999EXPORT_SYMBOL(kernel_bind);
2989 3000
2990int kernel_listen(struct socket *sock, int backlog) 3001int kernel_listen(struct socket *sock, int backlog)
2991{ 3002{
2992 return sock->ops->listen(sock, backlog); 3003 return sock->ops->listen(sock, backlog);
2993} 3004}
3005EXPORT_SYMBOL(kernel_listen);
2994 3006
2995int kernel_accept(struct socket *sock, struct socket **newsock, int flags) 3007int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
2996{ 3008{
@@ -3015,24 +3027,28 @@ int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
3015done: 3027done:
3016 return err; 3028 return err;
3017} 3029}
3030EXPORT_SYMBOL(kernel_accept);
3018 3031
3019int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen, 3032int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
3020 int flags) 3033 int flags)
3021{ 3034{
3022 return sock->ops->connect(sock, addr, addrlen, flags); 3035 return sock->ops->connect(sock, addr, addrlen, flags);
3023} 3036}
3037EXPORT_SYMBOL(kernel_connect);
3024 3038
3025int kernel_getsockname(struct socket *sock, struct sockaddr *addr, 3039int kernel_getsockname(struct socket *sock, struct sockaddr *addr,
3026 int *addrlen) 3040 int *addrlen)
3027{ 3041{
3028 return sock->ops->getname(sock, addr, addrlen, 0); 3042 return sock->ops->getname(sock, addr, addrlen, 0);
3029} 3043}
3044EXPORT_SYMBOL(kernel_getsockname);
3030 3045
3031int kernel_getpeername(struct socket *sock, struct sockaddr *addr, 3046int kernel_getpeername(struct socket *sock, struct sockaddr *addr,
3032 int *addrlen) 3047 int *addrlen)
3033{ 3048{
3034 return sock->ops->getname(sock, addr, addrlen, 1); 3049 return sock->ops->getname(sock, addr, addrlen, 1);
3035} 3050}
3051EXPORT_SYMBOL(kernel_getpeername);
3036 3052
3037int kernel_getsockopt(struct socket *sock, int level, int optname, 3053int kernel_getsockopt(struct socket *sock, int level, int optname,
3038 char *optval, int *optlen) 3054 char *optval, int *optlen)
@@ -3049,6 +3065,7 @@ int kernel_getsockopt(struct socket *sock, int level, int optname,
3049 set_fs(oldfs); 3065 set_fs(oldfs);
3050 return err; 3066 return err;
3051} 3067}
3068EXPORT_SYMBOL(kernel_getsockopt);
3052 3069
3053int kernel_setsockopt(struct socket *sock, int level, int optname, 3070int kernel_setsockopt(struct socket *sock, int level, int optname,
3054 char *optval, unsigned int optlen) 3071 char *optval, unsigned int optlen)
@@ -3065,15 +3082,19 @@ int kernel_setsockopt(struct socket *sock, int level, int optname,
3065 set_fs(oldfs); 3082 set_fs(oldfs);
3066 return err; 3083 return err;
3067} 3084}
3085EXPORT_SYMBOL(kernel_setsockopt);
3068 3086
3069int kernel_sendpage(struct socket *sock, struct page *page, int offset, 3087int kernel_sendpage(struct socket *sock, struct page *page, int offset,
3070 size_t size, int flags) 3088 size_t size, int flags)
3071{ 3089{
3090 sock_update_classid(sock->sk);
3091
3072 if (sock->ops->sendpage) 3092 if (sock->ops->sendpage)
3073 return sock->ops->sendpage(sock, page, offset, size, flags); 3093 return sock->ops->sendpage(sock, page, offset, size, flags);
3074 3094
3075 return sock_no_sendpage(sock, page, offset, size, flags); 3095 return sock_no_sendpage(sock, page, offset, size, flags);
3076} 3096}
3097EXPORT_SYMBOL(kernel_sendpage);
3077 3098
3078int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg) 3099int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg)
3079{ 3100{
@@ -3086,33 +3107,10 @@ int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg)
3086 3107
3087 return err; 3108 return err;
3088} 3109}
3110EXPORT_SYMBOL(kernel_sock_ioctl);
3089 3111
3090int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how) 3112int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how)
3091{ 3113{
3092 return sock->ops->shutdown(sock, how); 3114 return sock->ops->shutdown(sock, how);
3093} 3115}
3094
3095EXPORT_SYMBOL(sock_create);
3096EXPORT_SYMBOL(sock_create_kern);
3097EXPORT_SYMBOL(sock_create_lite);
3098EXPORT_SYMBOL(sock_map_fd);
3099EXPORT_SYMBOL(sock_recvmsg);
3100EXPORT_SYMBOL(sock_register);
3101EXPORT_SYMBOL(sock_release);
3102EXPORT_SYMBOL(sock_sendmsg);
3103EXPORT_SYMBOL(sock_unregister);
3104EXPORT_SYMBOL(sock_wake_async);
3105EXPORT_SYMBOL(sockfd_lookup);
3106EXPORT_SYMBOL(kernel_sendmsg);
3107EXPORT_SYMBOL(kernel_recvmsg);
3108EXPORT_SYMBOL(kernel_bind);
3109EXPORT_SYMBOL(kernel_listen);
3110EXPORT_SYMBOL(kernel_accept);
3111EXPORT_SYMBOL(kernel_connect);
3112EXPORT_SYMBOL(kernel_getsockname);
3113EXPORT_SYMBOL(kernel_getpeername);
3114EXPORT_SYMBOL(kernel_getsockopt);
3115EXPORT_SYMBOL(kernel_setsockopt);
3116EXPORT_SYMBOL(kernel_sendpage);
3117EXPORT_SYMBOL(kernel_sock_ioctl);
3118EXPORT_SYMBOL(kernel_sock_shutdown); 3116EXPORT_SYMBOL(kernel_sock_shutdown);
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index c2173ebdb33c..58de76c8540c 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -34,6 +34,7 @@
34#include <linux/sunrpc/cache.h> 34#include <linux/sunrpc/cache.h>
35#include <linux/sunrpc/stats.h> 35#include <linux/sunrpc/stats.h>
36#include <linux/sunrpc/rpc_pipe_fs.h> 36#include <linux/sunrpc/rpc_pipe_fs.h>
37#include <linux/smp_lock.h>
37 38
38#define RPCDBG_FACILITY RPCDBG_CACHE 39#define RPCDBG_FACILITY RPCDBG_CACHE
39 40
@@ -1545,12 +1546,18 @@ static unsigned int cache_poll_pipefs(struct file *filp, poll_table *wait)
1545 return cache_poll(filp, wait, cd); 1546 return cache_poll(filp, wait, cd);
1546} 1547}
1547 1548
1548static int cache_ioctl_pipefs(struct inode *inode, struct file *filp, 1549static long cache_ioctl_pipefs(struct file *filp,
1549 unsigned int cmd, unsigned long arg) 1550 unsigned int cmd, unsigned long arg)
1550{ 1551{
1552 struct inode *inode = filp->f_dentry->d_inode;
1551 struct cache_detail *cd = RPC_I(inode)->private; 1553 struct cache_detail *cd = RPC_I(inode)->private;
1554 long ret;
1552 1555
1553 return cache_ioctl(inode, filp, cmd, arg, cd); 1556 lock_kernel();
1557 ret = cache_ioctl(inode, filp, cmd, arg, cd);
1558 unlock_kernel();
1559
1560 return ret;
1554} 1561}
1555 1562
1556static int cache_open_pipefs(struct inode *inode, struct file *filp) 1563static int cache_open_pipefs(struct inode *inode, struct file *filp)
@@ -1573,7 +1580,7 @@ const struct file_operations cache_file_operations_pipefs = {
1573 .read = cache_read_pipefs, 1580 .read = cache_read_pipefs,
1574 .write = cache_write_pipefs, 1581 .write = cache_write_pipefs,
1575 .poll = cache_poll_pipefs, 1582 .poll = cache_poll_pipefs,
1576 .ioctl = cache_ioctl_pipefs, /* for FIONREAD */ 1583 .unlocked_ioctl = cache_ioctl_pipefs, /* for FIONREAD */
1577 .open = cache_open_pipefs, 1584 .open = cache_open_pipefs,
1578 .release = cache_release_pipefs, 1585 .release = cache_release_pipefs,
1579}; 1586};
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 20e30c6f8355..95ccbcf45d3e 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -27,6 +27,7 @@
27#include <linux/workqueue.h> 27#include <linux/workqueue.h>
28#include <linux/sunrpc/rpc_pipe_fs.h> 28#include <linux/sunrpc/rpc_pipe_fs.h>
29#include <linux/sunrpc/cache.h> 29#include <linux/sunrpc/cache.h>
30#include <linux/smp_lock.h>
30 31
31static struct vfsmount *rpc_mount __read_mostly; 32static struct vfsmount *rpc_mount __read_mostly;
32static int rpc_mount_count; 33static int rpc_mount_count;
@@ -309,8 +310,7 @@ rpc_pipe_poll(struct file *filp, struct poll_table_struct *wait)
309} 310}
310 311
311static int 312static int
312rpc_pipe_ioctl(struct inode *ino, struct file *filp, 313rpc_pipe_ioctl_unlocked(struct file *filp, unsigned int cmd, unsigned long arg)
313 unsigned int cmd, unsigned long arg)
314{ 314{
315 struct rpc_inode *rpci = RPC_I(filp->f_path.dentry->d_inode); 315 struct rpc_inode *rpci = RPC_I(filp->f_path.dentry->d_inode);
316 int len; 316 int len;
@@ -331,13 +331,25 @@ rpc_pipe_ioctl(struct inode *ino, struct file *filp,
331 } 331 }
332} 332}
333 333
334static long
335rpc_pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
336{
337 long ret;
338
339 lock_kernel();
340 ret = rpc_pipe_ioctl_unlocked(filp, cmd, arg);
341 unlock_kernel();
342
343 return ret;
344}
345
334static const struct file_operations rpc_pipe_fops = { 346static const struct file_operations rpc_pipe_fops = {
335 .owner = THIS_MODULE, 347 .owner = THIS_MODULE,
336 .llseek = no_llseek, 348 .llseek = no_llseek,
337 .read = rpc_pipe_read, 349 .read = rpc_pipe_read,
338 .write = rpc_pipe_write, 350 .write = rpc_pipe_write,
339 .poll = rpc_pipe_poll, 351 .poll = rpc_pipe_poll,
340 .ioctl = rpc_pipe_ioctl, 352 .unlocked_ioctl = rpc_pipe_ioctl,
341 .open = rpc_pipe_open, 353 .open = rpc_pipe_open,
342 .release = rpc_pipe_release, 354 .release = rpc_pipe_release,
343}; 355};
diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index 121105355f60..dac219a56ae1 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c
@@ -783,7 +783,7 @@ static int rpcb_dec_getport(struct rpc_rqst *req, __be32 *p,
783 port = ntohl(*p); 783 port = ntohl(*p);
784 dprintk("RPC: %5u PMAP_%s result: %lu\n", task->tk_pid, 784 dprintk("RPC: %5u PMAP_%s result: %lu\n", task->tk_pid,
785 task->tk_msg.rpc_proc->p_name, port); 785 task->tk_msg.rpc_proc->p_name, port);
786 if (unlikely(port > USHORT_MAX)) 786 if (unlikely(port > USHRT_MAX))
787 return -EIO; 787 return -EIO;
788 788
789 rpcb->r_port = port; 789 rpcb->r_port = port;
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 3fc325399ee4..dcd0132396ba 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -166,7 +166,6 @@ EXPORT_SYMBOL_GPL(xprt_unregister_transport);
166int xprt_load_transport(const char *transport_name) 166int xprt_load_transport(const char *transport_name)
167{ 167{
168 struct xprt_class *t; 168 struct xprt_class *t;
169 char module_name[sizeof t->name + 5];
170 int result; 169 int result;
171 170
172 result = 0; 171 result = 0;
@@ -178,9 +177,7 @@ int xprt_load_transport(const char *transport_name)
178 } 177 }
179 } 178 }
180 spin_unlock(&xprt_list_lock); 179 spin_unlock(&xprt_list_lock);
181 strcpy(module_name, "xprt"); 180 result = request_module("xprt%s", transport_name);
182 strncat(module_name, transport_name, sizeof t->name);
183 result = request_module(module_name);
184out: 181out:
185 return result; 182 return result;
186} 183}
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index b7cd8cccbe72..2a9675136c68 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -2293,6 +2293,7 @@ static struct rpc_xprt *xs_setup_udp(struct xprt_create *args)
2293 struct sockaddr *addr = args->dstaddr; 2293 struct sockaddr *addr = args->dstaddr;
2294 struct rpc_xprt *xprt; 2294 struct rpc_xprt *xprt;
2295 struct sock_xprt *transport; 2295 struct sock_xprt *transport;
2296 struct rpc_xprt *ret;
2296 2297
2297 xprt = xs_setup_xprt(args, xprt_udp_slot_table_entries); 2298 xprt = xs_setup_xprt(args, xprt_udp_slot_table_entries);
2298 if (IS_ERR(xprt)) 2299 if (IS_ERR(xprt))
@@ -2330,8 +2331,8 @@ static struct rpc_xprt *xs_setup_udp(struct xprt_create *args)
2330 xs_format_peer_addresses(xprt, "udp", RPCBIND_NETID_UDP6); 2331 xs_format_peer_addresses(xprt, "udp", RPCBIND_NETID_UDP6);
2331 break; 2332 break;
2332 default: 2333 default:
2333 kfree(xprt); 2334 ret = ERR_PTR(-EAFNOSUPPORT);
2334 return ERR_PTR(-EAFNOSUPPORT); 2335 goto out_err;
2335 } 2336 }
2336 2337
2337 if (xprt_bound(xprt)) 2338 if (xprt_bound(xprt))
@@ -2346,10 +2347,11 @@ static struct rpc_xprt *xs_setup_udp(struct xprt_create *args)
2346 2347
2347 if (try_module_get(THIS_MODULE)) 2348 if (try_module_get(THIS_MODULE))
2348 return xprt; 2349 return xprt;
2349 2350 ret = ERR_PTR(-EINVAL);
2351out_err:
2350 kfree(xprt->slot); 2352 kfree(xprt->slot);
2351 kfree(xprt); 2353 kfree(xprt);
2352 return ERR_PTR(-EINVAL); 2354 return ret;
2353} 2355}
2354 2356
2355static const struct rpc_timeout xs_tcp_default_timeout = { 2357static const struct rpc_timeout xs_tcp_default_timeout = {
@@ -2368,6 +2370,7 @@ static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args)
2368 struct sockaddr *addr = args->dstaddr; 2370 struct sockaddr *addr = args->dstaddr;
2369 struct rpc_xprt *xprt; 2371 struct rpc_xprt *xprt;
2370 struct sock_xprt *transport; 2372 struct sock_xprt *transport;
2373 struct rpc_xprt *ret;
2371 2374
2372 xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries); 2375 xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries);
2373 if (IS_ERR(xprt)) 2376 if (IS_ERR(xprt))
@@ -2403,8 +2406,8 @@ static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args)
2403 xs_format_peer_addresses(xprt, "tcp", RPCBIND_NETID_TCP6); 2406 xs_format_peer_addresses(xprt, "tcp", RPCBIND_NETID_TCP6);
2404 break; 2407 break;
2405 default: 2408 default:
2406 kfree(xprt); 2409 ret = ERR_PTR(-EAFNOSUPPORT);
2407 return ERR_PTR(-EAFNOSUPPORT); 2410 goto out_err;
2408 } 2411 }
2409 2412
2410 if (xprt_bound(xprt)) 2413 if (xprt_bound(xprt))
@@ -2420,10 +2423,11 @@ static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args)
2420 2423
2421 if (try_module_get(THIS_MODULE)) 2424 if (try_module_get(THIS_MODULE))
2422 return xprt; 2425 return xprt;
2423 2426 ret = ERR_PTR(-EINVAL);
2427out_err:
2424 kfree(xprt->slot); 2428 kfree(xprt->slot);
2425 kfree(xprt); 2429 kfree(xprt);
2426 return ERR_PTR(-EINVAL); 2430 return ret;
2427} 2431}
2428 2432
2429/** 2433/**
@@ -2437,6 +2441,7 @@ static struct rpc_xprt *xs_setup_bc_tcp(struct xprt_create *args)
2437 struct rpc_xprt *xprt; 2441 struct rpc_xprt *xprt;
2438 struct sock_xprt *transport; 2442 struct sock_xprt *transport;
2439 struct svc_sock *bc_sock; 2443 struct svc_sock *bc_sock;
2444 struct rpc_xprt *ret;
2440 2445
2441 xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries); 2446 xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries);
2442 if (IS_ERR(xprt)) 2447 if (IS_ERR(xprt))
@@ -2476,8 +2481,8 @@ static struct rpc_xprt *xs_setup_bc_tcp(struct xprt_create *args)
2476 RPCBIND_NETID_TCP6); 2481 RPCBIND_NETID_TCP6);
2477 break; 2482 break;
2478 default: 2483 default:
2479 kfree(xprt); 2484 ret = ERR_PTR(-EAFNOSUPPORT);
2480 return ERR_PTR(-EAFNOSUPPORT); 2485 goto out_err;
2481 } 2486 }
2482 2487
2483 if (xprt_bound(xprt)) 2488 if (xprt_bound(xprt))
@@ -2499,9 +2504,11 @@ static struct rpc_xprt *xs_setup_bc_tcp(struct xprt_create *args)
2499 2504
2500 if (try_module_get(THIS_MODULE)) 2505 if (try_module_get(THIS_MODULE))
2501 return xprt; 2506 return xprt;
2507 ret = ERR_PTR(-EINVAL);
2508out_err:
2502 kfree(xprt->slot); 2509 kfree(xprt->slot);
2503 kfree(xprt); 2510 kfree(xprt);
2504 return ERR_PTR(-EINVAL); 2511 return ret;
2505} 2512}
2506 2513
2507static struct xprt_class xs_udp_transport = { 2514static struct xprt_class xs_udp_transport = {
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index fef2cc5e9d2b..4414a18c63b4 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -282,7 +282,7 @@ static inline struct sock *unix_find_socket_byname(struct net *net,
282 return s; 282 return s;
283} 283}
284 284
285static struct sock *unix_find_socket_byinode(struct net *net, struct inode *i) 285static struct sock *unix_find_socket_byinode(struct inode *i)
286{ 286{
287 struct sock *s; 287 struct sock *s;
288 struct hlist_node *node; 288 struct hlist_node *node;
@@ -292,9 +292,6 @@ static struct sock *unix_find_socket_byinode(struct net *net, struct inode *i)
292 &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) { 292 &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
293 struct dentry *dentry = unix_sk(s)->dentry; 293 struct dentry *dentry = unix_sk(s)->dentry;
294 294
295 if (!net_eq(sock_net(s), net))
296 continue;
297
298 if (dentry && dentry->d_inode == i) { 295 if (dentry && dentry->d_inode == i) {
299 sock_hold(s); 296 sock_hold(s);
300 goto found; 297 goto found;
@@ -450,11 +447,31 @@ static int unix_release_sock(struct sock *sk, int embrion)
450 return 0; 447 return 0;
451} 448}
452 449
450static void init_peercred(struct sock *sk)
451{
452 put_pid(sk->sk_peer_pid);
453 if (sk->sk_peer_cred)
454 put_cred(sk->sk_peer_cred);
455 sk->sk_peer_pid = get_pid(task_tgid(current));
456 sk->sk_peer_cred = get_current_cred();
457}
458
459static void copy_peercred(struct sock *sk, struct sock *peersk)
460{
461 put_pid(sk->sk_peer_pid);
462 if (sk->sk_peer_cred)
463 put_cred(sk->sk_peer_cred);
464 sk->sk_peer_pid = get_pid(peersk->sk_peer_pid);
465 sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
466}
467
453static int unix_listen(struct socket *sock, int backlog) 468static int unix_listen(struct socket *sock, int backlog)
454{ 469{
455 int err; 470 int err;
456 struct sock *sk = sock->sk; 471 struct sock *sk = sock->sk;
457 struct unix_sock *u = unix_sk(sk); 472 struct unix_sock *u = unix_sk(sk);
473 struct pid *old_pid = NULL;
474 const struct cred *old_cred = NULL;
458 475
459 err = -EOPNOTSUPP; 476 err = -EOPNOTSUPP;
460 if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET) 477 if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
@@ -470,12 +487,14 @@ static int unix_listen(struct socket *sock, int backlog)
470 sk->sk_max_ack_backlog = backlog; 487 sk->sk_max_ack_backlog = backlog;
471 sk->sk_state = TCP_LISTEN; 488 sk->sk_state = TCP_LISTEN;
472 /* set credentials so connect can copy them */ 489 /* set credentials so connect can copy them */
473 sk->sk_peercred.pid = task_tgid_vnr(current); 490 init_peercred(sk);
474 current_euid_egid(&sk->sk_peercred.uid, &sk->sk_peercred.gid);
475 err = 0; 491 err = 0;
476 492
477out_unlock: 493out_unlock:
478 unix_state_unlock(sk); 494 unix_state_unlock(sk);
495 put_pid(old_pid);
496 if (old_cred)
497 put_cred(old_cred);
479out: 498out:
480 return err; 499 return err;
481} 500}
@@ -736,7 +755,7 @@ static struct sock *unix_find_other(struct net *net,
736 err = -ECONNREFUSED; 755 err = -ECONNREFUSED;
737 if (!S_ISSOCK(inode->i_mode)) 756 if (!S_ISSOCK(inode->i_mode))
738 goto put_fail; 757 goto put_fail;
739 u = unix_find_socket_byinode(net, inode); 758 u = unix_find_socket_byinode(inode);
740 if (!u) 759 if (!u)
741 goto put_fail; 760 goto put_fail;
742 761
@@ -1140,8 +1159,7 @@ restart:
1140 unix_peer(newsk) = sk; 1159 unix_peer(newsk) = sk;
1141 newsk->sk_state = TCP_ESTABLISHED; 1160 newsk->sk_state = TCP_ESTABLISHED;
1142 newsk->sk_type = sk->sk_type; 1161 newsk->sk_type = sk->sk_type;
1143 newsk->sk_peercred.pid = task_tgid_vnr(current); 1162 init_peercred(newsk);
1144 current_euid_egid(&newsk->sk_peercred.uid, &newsk->sk_peercred.gid);
1145 newu = unix_sk(newsk); 1163 newu = unix_sk(newsk);
1146 newsk->sk_wq = &newu->peer_wq; 1164 newsk->sk_wq = &newu->peer_wq;
1147 otheru = unix_sk(other); 1165 otheru = unix_sk(other);
@@ -1157,7 +1175,7 @@ restart:
1157 } 1175 }
1158 1176
1159 /* Set credentials */ 1177 /* Set credentials */
1160 sk->sk_peercred = other->sk_peercred; 1178 copy_peercred(sk, other);
1161 1179
1162 sock->state = SS_CONNECTED; 1180 sock->state = SS_CONNECTED;
1163 sk->sk_state = TCP_ESTABLISHED; 1181 sk->sk_state = TCP_ESTABLISHED;
@@ -1199,10 +1217,8 @@ static int unix_socketpair(struct socket *socka, struct socket *sockb)
1199 sock_hold(skb); 1217 sock_hold(skb);
1200 unix_peer(ska) = skb; 1218 unix_peer(ska) = skb;
1201 unix_peer(skb) = ska; 1219 unix_peer(skb) = ska;
1202 ska->sk_peercred.pid = skb->sk_peercred.pid = task_tgid_vnr(current); 1220 init_peercred(ska);
1203 current_euid_egid(&skb->sk_peercred.uid, &skb->sk_peercred.gid); 1221 init_peercred(skb);
1204 ska->sk_peercred.uid = skb->sk_peercred.uid;
1205 ska->sk_peercred.gid = skb->sk_peercred.gid;
1206 1222
1207 if (ska->sk_type != SOCK_DGRAM) { 1223 if (ska->sk_type != SOCK_DGRAM) {
1208 ska->sk_state = TCP_ESTABLISHED; 1224 ska->sk_state = TCP_ESTABLISHED;
@@ -1297,18 +1313,20 @@ static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1297 int i; 1313 int i;
1298 1314
1299 scm->fp = UNIXCB(skb).fp; 1315 scm->fp = UNIXCB(skb).fp;
1300 skb->destructor = sock_wfree;
1301 UNIXCB(skb).fp = NULL; 1316 UNIXCB(skb).fp = NULL;
1302 1317
1303 for (i = scm->fp->count-1; i >= 0; i--) 1318 for (i = scm->fp->count-1; i >= 0; i--)
1304 unix_notinflight(scm->fp->fp[i]); 1319 unix_notinflight(scm->fp->fp[i]);
1305} 1320}
1306 1321
1307static void unix_destruct_fds(struct sk_buff *skb) 1322static void unix_destruct_scm(struct sk_buff *skb)
1308{ 1323{
1309 struct scm_cookie scm; 1324 struct scm_cookie scm;
1310 memset(&scm, 0, sizeof(scm)); 1325 memset(&scm, 0, sizeof(scm));
1311 unix_detach_fds(&scm, skb); 1326 scm.pid = UNIXCB(skb).pid;
1327 scm.cred = UNIXCB(skb).cred;
1328 if (UNIXCB(skb).fp)
1329 unix_detach_fds(&scm, skb);
1312 1330
1313 /* Alas, it calls VFS */ 1331 /* Alas, it calls VFS */
1314 /* So fscking what? fput() had been SMP-safe since the last Summer */ 1332 /* So fscking what? fput() had been SMP-safe since the last Summer */
@@ -1331,10 +1349,22 @@ static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1331 1349
1332 for (i = scm->fp->count-1; i >= 0; i--) 1350 for (i = scm->fp->count-1; i >= 0; i--)
1333 unix_inflight(scm->fp->fp[i]); 1351 unix_inflight(scm->fp->fp[i]);
1334 skb->destructor = unix_destruct_fds;
1335 return 0; 1352 return 0;
1336} 1353}
1337 1354
1355static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
1356{
1357 int err = 0;
1358 UNIXCB(skb).pid = get_pid(scm->pid);
1359 UNIXCB(skb).cred = get_cred(scm->cred);
1360 UNIXCB(skb).fp = NULL;
1361 if (scm->fp && send_fds)
1362 err = unix_attach_fds(scm, skb);
1363
1364 skb->destructor = unix_destruct_scm;
1365 return err;
1366}
1367
1338/* 1368/*
1339 * Send AF_UNIX data. 1369 * Send AF_UNIX data.
1340 */ 1370 */
@@ -1391,12 +1421,9 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
1391 if (skb == NULL) 1421 if (skb == NULL)
1392 goto out; 1422 goto out;
1393 1423
1394 memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred)); 1424 err = unix_scm_to_skb(siocb->scm, skb, true);
1395 if (siocb->scm->fp) { 1425 if (err)
1396 err = unix_attach_fds(siocb->scm, skb); 1426 goto out_free;
1397 if (err)
1398 goto out_free;
1399 }
1400 unix_get_secdata(siocb->scm, skb); 1427 unix_get_secdata(siocb->scm, skb);
1401 1428
1402 skb_reset_transport_header(skb); 1429 skb_reset_transport_header(skb);
@@ -1566,16 +1593,14 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
1566 */ 1593 */
1567 size = min_t(int, size, skb_tailroom(skb)); 1594 size = min_t(int, size, skb_tailroom(skb));
1568 1595
1569 memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred)); 1596
1570 /* Only send the fds in the first buffer */ 1597 /* Only send the fds in the first buffer */
1571 if (siocb->scm->fp && !fds_sent) { 1598 err = unix_scm_to_skb(siocb->scm, skb, !fds_sent);
1572 err = unix_attach_fds(siocb->scm, skb); 1599 if (err) {
1573 if (err) { 1600 kfree_skb(skb);
1574 kfree_skb(skb); 1601 goto out_err;
1575 goto out_err;
1576 }
1577 fds_sent = true;
1578 } 1602 }
1603 fds_sent = true;
1579 1604
1580 err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size); 1605 err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size);
1581 if (err) { 1606 if (err) {
@@ -1692,7 +1717,7 @@ static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock,
1692 siocb->scm = &tmp_scm; 1717 siocb->scm = &tmp_scm;
1693 memset(&tmp_scm, 0, sizeof(tmp_scm)); 1718 memset(&tmp_scm, 0, sizeof(tmp_scm));
1694 } 1719 }
1695 siocb->scm->creds = *UNIXCREDS(skb); 1720 scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).cred);
1696 unix_set_secdata(siocb->scm, skb); 1721 unix_set_secdata(siocb->scm, skb);
1697 1722
1698 if (!(flags & MSG_PEEK)) { 1723 if (!(flags & MSG_PEEK)) {
@@ -1841,14 +1866,14 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
1841 1866
1842 if (check_creds) { 1867 if (check_creds) {
1843 /* Never glue messages from different writers */ 1868 /* Never glue messages from different writers */
1844 if (memcmp(UNIXCREDS(skb), &siocb->scm->creds, 1869 if ((UNIXCB(skb).pid != siocb->scm->pid) ||
1845 sizeof(siocb->scm->creds)) != 0) { 1870 (UNIXCB(skb).cred != siocb->scm->cred)) {
1846 skb_queue_head(&sk->sk_receive_queue, skb); 1871 skb_queue_head(&sk->sk_receive_queue, skb);
1847 break; 1872 break;
1848 } 1873 }
1849 } else { 1874 } else {
1850 /* Copy credentials */ 1875 /* Copy credentials */
1851 siocb->scm->creds = *UNIXCREDS(skb); 1876 scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).cred);
1852 check_creds = 1; 1877 check_creds = 1;
1853 } 1878 }
1854 1879
@@ -1881,7 +1906,7 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
1881 break; 1906 break;
1882 } 1907 }
1883 1908
1884 kfree_skb(skb); 1909 consume_skb(skb);
1885 1910
1886 if (siocb->scm->fp) 1911 if (siocb->scm->fp)
1887 break; 1912 break;
diff --git a/net/wanrouter/wanmain.c b/net/wanrouter/wanmain.c
index 258daa80ad92..2bf23406637a 100644
--- a/net/wanrouter/wanmain.c
+++ b/net/wanrouter/wanmain.c
@@ -48,7 +48,7 @@
48#include <linux/kernel.h> 48#include <linux/kernel.h>
49#include <linux/module.h> /* support for loadable modules */ 49#include <linux/module.h> /* support for loadable modules */
50#include <linux/slab.h> /* kmalloc(), kfree() */ 50#include <linux/slab.h> /* kmalloc(), kfree() */
51#include <linux/smp_lock.h> 51#include <linux/mutex.h>
52#include <linux/mm.h> 52#include <linux/mm.h>
53#include <linux/string.h> /* inline mem*, str* functions */ 53#include <linux/string.h> /* inline mem*, str* functions */
54 54
@@ -71,6 +71,7 @@
71 * WAN device IOCTL handlers 71 * WAN device IOCTL handlers
72 */ 72 */
73 73
74static DEFINE_MUTEX(wanrouter_mutex);
74static int wanrouter_device_setup(struct wan_device *wandev, 75static int wanrouter_device_setup(struct wan_device *wandev,
75 wandev_conf_t __user *u_conf); 76 wandev_conf_t __user *u_conf);
76static int wanrouter_device_stat(struct wan_device *wandev, 77static int wanrouter_device_stat(struct wan_device *wandev,
@@ -376,7 +377,7 @@ long wanrouter_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
376 if (wandev->magic != ROUTER_MAGIC) 377 if (wandev->magic != ROUTER_MAGIC)
377 return -EINVAL; 378 return -EINVAL;
378 379
379 lock_kernel(); 380 mutex_lock(&wanrouter_mutex);
380 switch (cmd) { 381 switch (cmd) {
381 case ROUTER_SETUP: 382 case ROUTER_SETUP:
382 err = wanrouter_device_setup(wandev, data); 383 err = wanrouter_device_setup(wandev, data);
@@ -408,7 +409,7 @@ long wanrouter_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
408 err = wandev->ioctl(wandev, cmd, arg); 409 err = wandev->ioctl(wandev, cmd, arg);
409 else err = -EINVAL; 410 else err = -EINVAL;
410 } 411 }
411 unlock_kernel(); 412 mutex_unlock(&wanrouter_mutex);
412 return err; 413 return err;
413} 414}
414 415
diff --git a/net/wanrouter/wanproc.c b/net/wanrouter/wanproc.c
index c44d96b3a437..11f25c7a7a05 100644
--- a/net/wanrouter/wanproc.c
+++ b/net/wanrouter/wanproc.c
@@ -27,7 +27,7 @@
27#include <linux/module.h> 27#include <linux/module.h>
28#include <linux/wanrouter.h> /* WAN router API definitions */ 28#include <linux/wanrouter.h> /* WAN router API definitions */
29#include <linux/seq_file.h> 29#include <linux/seq_file.h>
30#include <linux/smp_lock.h> 30#include <linux/mutex.h>
31 31
32#include <net/net_namespace.h> 32#include <net/net_namespace.h>
33#include <asm/io.h> 33#include <asm/io.h>
@@ -66,6 +66,7 @@
66 * /proc/net/router 66 * /proc/net/router
67 */ 67 */
68 68
69static DEFINE_MUTEX(config_mutex);
69static struct proc_dir_entry *proc_router; 70static struct proc_dir_entry *proc_router;
70 71
71/* Strings */ 72/* Strings */
@@ -85,7 +86,7 @@ static void *r_start(struct seq_file *m, loff_t *pos)
85 struct wan_device *wandev; 86 struct wan_device *wandev;
86 loff_t l = *pos; 87 loff_t l = *pos;
87 88
88 lock_kernel(); 89 mutex_lock(&config_mutex);
89 if (!l--) 90 if (!l--)
90 return SEQ_START_TOKEN; 91 return SEQ_START_TOKEN;
91 for (wandev = wanrouter_router_devlist; l-- && wandev; 92 for (wandev = wanrouter_router_devlist; l-- && wandev;
@@ -104,7 +105,7 @@ static void *r_next(struct seq_file *m, void *v, loff_t *pos)
104static void r_stop(struct seq_file *m, void *v) 105static void r_stop(struct seq_file *m, void *v)
105 __releases(kernel_lock) 106 __releases(kernel_lock)
106{ 107{
107 unlock_kernel(); 108 mutex_unlock(&config_mutex);
108} 109}
109 110
110static int config_show(struct seq_file *m, void *v) 111static int config_show(struct seq_file *m, void *v)
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index cea595e2ed4d..fbfac588297c 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -1138,7 +1138,7 @@ static int nl80211_valid_4addr(struct cfg80211_registered_device *rdev,
1138 enum nl80211_iftype iftype) 1138 enum nl80211_iftype iftype)
1139{ 1139{
1140 if (!use_4addr) { 1140 if (!use_4addr) {
1141 if (netdev && netdev->br_port) 1141 if (netdev && (netdev->priv_flags & IFF_BRIDGE_PORT))
1142 return -EBUSY; 1142 return -EBUSY;
1143 return 0; 1143 return 0;
1144 } 1144 }
diff --git a/net/wireless/util.c b/net/wireless/util.c
index 3416373a9c0c..0c8a1e8b7690 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -770,8 +770,8 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev,
770 return -EOPNOTSUPP; 770 return -EOPNOTSUPP;
771 771
772 /* if it's part of a bridge, reject changing type to station/ibss */ 772 /* if it's part of a bridge, reject changing type to station/ibss */
773 if (dev->br_port && (ntype == NL80211_IFTYPE_ADHOC || 773 if ((dev->priv_flags & IFF_BRIDGE_PORT) &&
774 ntype == NL80211_IFTYPE_STATION)) 774 (ntype == NL80211_IFTYPE_ADHOC || ntype == NL80211_IFTYPE_STATION))
775 return -EBUSY; 775 return -EBUSY;
776 776
777 if (ntype != otype) { 777 if (ntype != otype) {
diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index 6a329158bdfa..a3cca0a94346 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c
@@ -95,13 +95,13 @@ resume:
95 goto error_nolock; 95 goto error_nolock;
96 } 96 }
97 97
98 dst = dst_pop(dst); 98 dst = skb_dst_pop(skb);
99 if (!dst) { 99 if (!dst) {
100 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTERROR); 100 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTERROR);
101 err = -EHOSTUNREACH; 101 err = -EHOSTUNREACH;
102 goto error_nolock; 102 goto error_nolock;
103 } 103 }
104 skb_dst_set(skb, dst); 104 skb_dst_set_noref(skb, dst);
105 x = dst->xfrm; 105 x = dst->xfrm;
106 } while (x && !(x->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL)); 106 } while (x && !(x->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL));
107 107
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index d965a2bad8d3..2b3ed7ad4933 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1594,8 +1594,8 @@ xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols,
1594 1594
1595 /* Try to instantiate a bundle */ 1595 /* Try to instantiate a bundle */
1596 err = xfrm_tmpl_resolve(pols, num_pols, fl, xfrm, family); 1596 err = xfrm_tmpl_resolve(pols, num_pols, fl, xfrm, family);
1597 if (err < 0) { 1597 if (err <= 0) {
1598 if (err != -EAGAIN) 1598 if (err != 0 && err != -EAGAIN)
1599 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR); 1599 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR);
1600 return ERR_PTR(err); 1600 return ERR_PTR(err);
1601 } 1601 }
@@ -1678,6 +1678,13 @@ xfrm_bundle_lookup(struct net *net, struct flowi *fl, u16 family, u8 dir,
1678 goto make_dummy_bundle; 1678 goto make_dummy_bundle;
1679 dst_hold(&xdst->u.dst); 1679 dst_hold(&xdst->u.dst);
1680 return oldflo; 1680 return oldflo;
1681 } else if (new_xdst == NULL) {
1682 num_xfrms = 0;
1683 if (oldflo == NULL)
1684 goto make_dummy_bundle;
1685 xdst->num_xfrms = 0;
1686 dst_hold(&xdst->u.dst);
1687 return oldflo;
1681 } 1688 }
1682 1689
1683 /* Kill the previous bundle */ 1690 /* Kill the previous bundle */
@@ -1760,6 +1767,10 @@ restart:
1760 xfrm_pols_put(pols, num_pols); 1767 xfrm_pols_put(pols, num_pols);
1761 err = PTR_ERR(xdst); 1768 err = PTR_ERR(xdst);
1762 goto dropdst; 1769 goto dropdst;
1770 } else if (xdst == NULL) {
1771 num_xfrms = 0;
1772 drop_pols = num_pols;
1773 goto no_transform;
1763 } 1774 }
1764 1775
1765 spin_lock_bh(&xfrm_policy_sk_bundle_lock); 1776 spin_lock_bh(&xfrm_policy_sk_bundle_lock);
@@ -2153,6 +2164,7 @@ int __xfrm_route_forward(struct sk_buff *skb, unsigned short family)
2153 return 0; 2164 return 0;
2154 } 2165 }
2155 2166
2167 skb_dst_force(skb);
2156 dst = skb_dst(skb); 2168 dst = skb_dst(skb);
2157 2169
2158 res = xfrm_lookup(net, &dst, &fl, NULL, 0) == 0; 2170 res = xfrm_lookup(net, &dst, &fl, NULL, 0) == 0;
@@ -2299,7 +2311,8 @@ int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *first,
2299 return 0; 2311 return 0;
2300 if (xdst->xfrm_genid != dst->xfrm->genid) 2312 if (xdst->xfrm_genid != dst->xfrm->genid)
2301 return 0; 2313 return 0;
2302 if (xdst->policy_genid != atomic_read(&xdst->pols[0]->genid)) 2314 if (xdst->num_pols > 0 &&
2315 xdst->policy_genid != atomic_read(&xdst->pols[0]->genid))
2303 return 0; 2316 return 0;
2304 2317
2305 if (strict && fl && 2318 if (strict && fl &&
@@ -2479,7 +2492,8 @@ static int __net_init xfrm_statistics_init(struct net *net)
2479 int rv; 2492 int rv;
2480 2493
2481 if (snmp_mib_init((void __percpu **)net->mib.xfrm_statistics, 2494 if (snmp_mib_init((void __percpu **)net->mib.xfrm_statistics,
2482 sizeof(struct linux_xfrm_mib)) < 0) 2495 sizeof(struct linux_xfrm_mib),
2496 __alignof__(struct linux_xfrm_mib)) < 0)
2483 return -ENOMEM; 2497 return -ENOMEM;
2484 rv = xfrm_proc_init(net); 2498 rv = xfrm_proc_init(net);
2485 if (rv < 0) 2499 if (rv < 0)