aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/8021q/vlan.c13
-rw-r--r--net/8021q/vlan.h22
-rw-r--r--net/8021q/vlan_core.c4
-rw-r--r--net/8021q/vlan_dev.c197
-rw-r--r--net/8021q/vlan_netlink.c20
-rw-r--r--net/8021q/vlanproc.c5
-rw-r--r--net/9p/protocol.c33
-rw-r--r--net/Kconfig5
-rw-r--r--net/atm/br2684.c2
-rw-r--r--net/atm/clip.c3
-rw-r--r--net/atm/lec.c3
-rw-r--r--net/bluetooth/Makefile2
-rw-r--r--net/bridge/br.c4
-rw-r--r--net/bridge/br_fdb.c15
-rw-r--r--net/bridge/br_forward.c4
-rw-r--r--net/bridge/br_if.c7
-rw-r--r--net/bridge/br_input.c10
-rw-r--r--net/bridge/br_multicast.c78
-rw-r--r--net/bridge/br_netfilter.c22
-rw-r--r--net/bridge/br_netlink.c10
-rw-r--r--net/bridge/br_notify.c6
-rw-r--r--net/bridge/br_private.h21
-rw-r--r--net/bridge/br_stp_bpdu.c8
-rw-r--r--net/bridge/netfilter/ebtable_broute.c3
-rw-r--r--net/bridge/netfilter/ebtables.c11
-rw-r--r--net/caif/Makefile8
-rw-r--r--net/can/Makefile6
-rw-r--r--net/ceph/Makefile2
-rw-r--r--net/core/datagram.c2
-rw-r--r--net/core/dev.c205
-rw-r--r--net/core/ethtool.c4
-rw-r--r--net/core/filter.c444
-rw-r--r--net/core/net-sysfs.c429
-rw-r--r--net/core/net-sysfs.h4
-rw-r--r--net/core/netpoll.c3
-rw-r--r--net/core/pktgen.c39
-rw-r--r--net/core/request_sock.c1
-rw-r--r--net/core/rtnetlink.c166
-rw-r--r--net/core/scm.c10
-rw-r--r--net/core/skbuff.c34
-rw-r--r--net/core/timestamping.c4
-rw-r--r--net/dccp/Makefile4
-rw-r--r--net/dccp/ackvec.c616
-rw-r--r--net/dccp/ackvec.h151
-rw-r--r--net/dccp/ccids/ccid2.c143
-rw-r--r--net/dccp/ccids/ccid2.h2
-rw-r--r--net/dccp/dccp.h24
-rw-r--r--net/dccp/input.c33
-rw-r--r--net/dccp/ipv4.c13
-rw-r--r--net/dccp/options.c100
-rw-r--r--net/dccp/output.c22
-rw-r--r--net/dccp/proto.c71
-rw-r--r--net/dccp/qpolicy.c137
-rw-r--r--net/decnet/af_decnet.c2
-rw-r--r--net/decnet/dn_dev.c100
-rw-r--r--net/decnet/dn_fib.c6
-rw-r--r--net/decnet/dn_neigh.c2
-rw-r--r--net/decnet/dn_route.c94
-rw-r--r--net/decnet/dn_rules.c2
-rw-r--r--net/dns_resolver/Makefile2
-rw-r--r--net/econet/Makefile2
-rw-r--r--net/ieee802154/af_ieee802154.c6
-rw-r--r--net/ipv4/af_inet.c18
-rw-r--r--net/ipv4/arp.c31
-rw-r--r--net/ipv4/devinet.c91
-rw-r--r--net/ipv4/fib_frontend.c28
-rw-r--r--net/ipv4/fib_semantics.c8
-rw-r--r--net/ipv4/icmp.c32
-rw-r--r--net/ipv4/igmp.c282
-rw-r--r--net/ipv4/inet_connection_sock.c15
-rw-r--r--net/ipv4/inetpeer.c167
-rw-r--r--net/ipv4/ip_fragment.c2
-rw-r--r--net/ipv4/ip_gre.c48
-rw-r--r--net/ipv4/ip_output.c25
-rw-r--r--net/ipv4/ipconfig.c32
-rw-r--r--net/ipv4/ipip.c21
-rw-r--r--net/ipv4/ipmr.c20
-rw-r--r--net/ipv4/netfilter.c8
-rw-r--r--net/ipv4/netfilter/Makefile6
-rw-r--r--net/ipv4/raw.c7
-rw-r--r--net/ipv4/route.c102
-rw-r--r--net/ipv4/syncookies.c15
-rw-r--r--net/ipv4/tcp.c16
-rw-r--r--net/ipv4/tcp_ipv4.c72
-rw-r--r--net/ipv4/tcp_minisocks.c63
-rw-r--r--net/ipv4/tcp_output.c12
-rw-r--r--net/ipv4/tcp_probe.c4
-rw-r--r--net/ipv4/udp.c16
-rw-r--r--net/ipv4/xfrm4_policy.c47
-rw-r--r--net/ipv6/addrconf.c114
-rw-r--r--net/ipv6/inet6_connection_sock.c54
-rw-r--r--net/ipv6/ip6_tunnel.c2
-rw-r--r--net/ipv6/ip6mr.c4
-rw-r--r--net/ipv6/mcast.c77
-rw-r--r--net/ipv6/ndisc.c24
-rw-r--r--net/ipv6/netfilter.c6
-rw-r--r--net/ipv6/netfilter/Makefile4
-rw-r--r--net/ipv6/reassembly.c36
-rw-r--r--net/ipv6/route.c44
-rw-r--r--net/ipv6/sit.c14
-rw-r--r--net/ipv6/tcp_ipv6.c149
-rw-r--r--net/ipv6/udp.c4
-rw-r--r--net/irda/ircomm/Makefile4
-rw-r--r--net/irda/irlan/Makefile2
-rw-r--r--net/irda/irnet/Makefile2
-rw-r--r--net/l2tp/l2tp_ip.c12
-rw-r--r--net/lapb/Makefile2
-rw-r--r--net/llc/af_llc.c6
-rw-r--r--net/mac80211/aes_ccm.c3
-rw-r--r--net/mac80211/aes_cmac.c3
-rw-r--r--net/mac80211/cfg.c26
-rw-r--r--net/mac80211/debugfs.c60
-rw-r--r--net/mac80211/debugfs.h2
-rw-r--r--net/mac80211/debugfs_key.c19
-rw-r--r--net/mac80211/debugfs_sta.c26
-rw-r--r--net/mac80211/driver-ops.h37
-rw-r--r--net/mac80211/driver-trace.h71
-rw-r--r--net/mac80211/ibss.c2
-rw-r--r--net/mac80211/ieee80211_i.h10
-rw-r--r--net/mac80211/key.c9
-rw-r--r--net/mac80211/mlme.c143
-rw-r--r--net/mac80211/rate.c18
-rw-r--r--net/mac80211/rc80211_minstrel_ht.c19
-rw-r--r--net/mac80211/rx.c2
-rw-r--r--net/mac80211/sta_info.c17
-rw-r--r--net/mac80211/sta_info.h3
-rw-r--r--net/mac80211/status.c26
-rw-r--r--net/mac80211/tx.c16
-rw-r--r--net/mac80211/util.c40
-rw-r--r--net/mac80211/wme.c11
-rw-r--r--net/netfilter/core.c6
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c6
-rw-r--r--net/netfilter/ipvs/ip_vs_xmit.c42
-rw-r--r--net/netfilter/xt_TEE.c12
-rw-r--r--net/packet/af_packet.c145
-rw-r--r--net/phonet/Makefile4
-rw-r--r--net/rds/Makefile8
-rw-r--r--net/rfkill/core.c14
-rw-r--r--net/rxrpc/Makefile4
-rw-r--r--net/rxrpc/ar-peer.c10
-rw-r--r--net/sched/sch_generic.c12
-rw-r--r--net/sched/sch_teql.c3
-rw-r--r--net/sctp/socket.c2
-rw-r--r--net/socket.c11
-rw-r--r--net/sunrpc/auth_gss/Makefile4
-rw-r--r--net/tipc/addr.c2
-rw-r--r--net/tipc/bcast.c8
-rw-r--r--net/tipc/bearer.c12
-rw-r--r--net/tipc/bearer.h71
-rw-r--r--net/tipc/cluster.c7
-rw-r--r--net/tipc/config.c16
-rw-r--r--net/tipc/config.h1
-rw-r--r--net/tipc/core.c41
-rw-r--r--net/tipc/core.h14
-rw-r--r--net/tipc/discover.c2
-rw-r--r--net/tipc/discover.h2
-rw-r--r--net/tipc/eth_media.c6
-rw-r--r--net/tipc/link.c14
-rw-r--r--net/tipc/link.h1
-rw-r--r--net/tipc/msg.c2
-rw-r--r--net/tipc/msg.h168
-rw-r--r--net/tipc/name_distr.c2
-rw-r--r--net/tipc/name_table.c5
-rw-r--r--net/tipc/net.c5
-rw-r--r--net/tipc/node.c7
-rw-r--r--net/tipc/node_subscr.c2
-rw-r--r--net/tipc/port.c115
-rw-r--r--net/tipc/port.h130
-rw-r--r--net/tipc/socket.c7
-rw-r--r--net/tipc/subscr.c8
-rw-r--r--net/tipc/user_reg.c50
-rw-r--r--net/tipc/user_reg.h3
-rw-r--r--net/tipc/zone.c3
-rw-r--r--net/unix/af_unix.c34
-rw-r--r--net/wanrouter/Makefile2
-rw-r--r--net/wireless/core.c8
-rw-r--r--net/wireless/lib80211.c8
-rw-r--r--net/wireless/lib80211_crypt_tkip.c16
-rw-r--r--net/wireless/mlme.c12
-rw-r--r--net/wireless/nl80211.c111
-rw-r--r--net/wireless/nl80211.h4
-rw-r--r--net/wireless/reg.c259
-rw-r--r--net/wireless/util.c11
-rw-r--r--net/wireless/wext-core.c10
-rw-r--r--net/x25/af_x25.c95
-rw-r--r--net/x25/x25_link.c8
186 files changed, 4486 insertions, 2791 deletions
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 52077ca2207..6e64f7c6a2e 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -272,13 +272,11 @@ static int register_vlan_device(struct net_device *real_dev, u16 vlan_id)
272 snprintf(name, IFNAMSIZ, "vlan%.4i", vlan_id); 272 snprintf(name, IFNAMSIZ, "vlan%.4i", vlan_id);
273 } 273 }
274 274
275 new_dev = alloc_netdev_mq(sizeof(struct vlan_dev_info), name, 275 new_dev = alloc_netdev(sizeof(struct vlan_dev_info), name, vlan_setup);
276 vlan_setup, real_dev->num_tx_queues);
277 276
278 if (new_dev == NULL) 277 if (new_dev == NULL)
279 return -ENOBUFS; 278 return -ENOBUFS;
280 279
281 netif_copy_real_num_queues(new_dev, real_dev);
282 dev_net_set(new_dev, net); 280 dev_net_set(new_dev, net);
283 /* need 4 bytes for extra VLAN header info, 281 /* need 4 bytes for extra VLAN header info,
284 * hope the underlying device can handle it. 282 * hope the underlying device can handle it.
@@ -334,12 +332,15 @@ static void vlan_transfer_features(struct net_device *dev,
334 vlandev->features &= ~dev->vlan_features; 332 vlandev->features &= ~dev->vlan_features;
335 vlandev->features |= dev->features & dev->vlan_features; 333 vlandev->features |= dev->features & dev->vlan_features;
336 vlandev->gso_max_size = dev->gso_max_size; 334 vlandev->gso_max_size = dev->gso_max_size;
335
336 if (dev->features & NETIF_F_HW_VLAN_TX)
337 vlandev->hard_header_len = dev->hard_header_len;
338 else
339 vlandev->hard_header_len = dev->hard_header_len + VLAN_HLEN;
340
337#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE) 341#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE)
338 vlandev->fcoe_ddp_xid = dev->fcoe_ddp_xid; 342 vlandev->fcoe_ddp_xid = dev->fcoe_ddp_xid;
339#endif 343#endif
340 vlandev->real_num_tx_queues = dev->real_num_tx_queues;
341 BUG_ON(vlandev->real_num_tx_queues > vlandev->num_tx_queues);
342
343 if (old_features != vlandev->features) 344 if (old_features != vlandev->features)
344 netdev_features_change(vlandev); 345 netdev_features_change(vlandev);
345} 346}
diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h
index db01b3181fd..5687c9b95f3 100644
--- a/net/8021q/vlan.h
+++ b/net/8021q/vlan.h
@@ -19,19 +19,25 @@ struct vlan_priority_tci_mapping {
19 19
20 20
21/** 21/**
22 * struct vlan_rx_stats - VLAN percpu rx stats 22 * struct vlan_pcpu_stats - VLAN percpu rx/tx stats
23 * @rx_packets: number of received packets 23 * @rx_packets: number of received packets
24 * @rx_bytes: number of received bytes 24 * @rx_bytes: number of received bytes
25 * @rx_multicast: number of received multicast packets 25 * @rx_multicast: number of received multicast packets
26 * @tx_packets: number of transmitted packets
27 * @tx_bytes: number of transmitted bytes
26 * @syncp: synchronization point for 64bit counters 28 * @syncp: synchronization point for 64bit counters
27 * @rx_errors: number of errors 29 * @rx_errors: number of rx errors
30 * @tx_dropped: number of tx drops
28 */ 31 */
29struct vlan_rx_stats { 32struct vlan_pcpu_stats {
30 u64 rx_packets; 33 u64 rx_packets;
31 u64 rx_bytes; 34 u64 rx_bytes;
32 u64 rx_multicast; 35 u64 rx_multicast;
36 u64 tx_packets;
37 u64 tx_bytes;
33 struct u64_stats_sync syncp; 38 struct u64_stats_sync syncp;
34 unsigned long rx_errors; 39 u32 rx_errors;
40 u32 tx_dropped;
35}; 41};
36 42
37/** 43/**
@@ -45,9 +51,7 @@ struct vlan_rx_stats {
45 * @real_dev: underlying netdevice 51 * @real_dev: underlying netdevice
46 * @real_dev_addr: address of underlying netdevice 52 * @real_dev_addr: address of underlying netdevice
47 * @dent: proc dir entry 53 * @dent: proc dir entry
48 * @cnt_inc_headroom_on_tx: statistic - number of skb expansions on TX 54 * @vlan_pcpu_stats: ptr to percpu rx stats
49 * @cnt_encap_on_xmit: statistic - number of skb encapsulations on TX
50 * @vlan_rx_stats: ptr to percpu rx stats
51 */ 55 */
52struct vlan_dev_info { 56struct vlan_dev_info {
53 unsigned int nr_ingress_mappings; 57 unsigned int nr_ingress_mappings;
@@ -62,9 +66,7 @@ struct vlan_dev_info {
62 unsigned char real_dev_addr[ETH_ALEN]; 66 unsigned char real_dev_addr[ETH_ALEN];
63 67
64 struct proc_dir_entry *dent; 68 struct proc_dir_entry *dent;
65 unsigned long cnt_inc_headroom_on_tx; 69 struct vlan_pcpu_stats __percpu *vlan_pcpu_stats;
66 unsigned long cnt_encap_on_xmit;
67 struct vlan_rx_stats __percpu *vlan_rx_stats;
68}; 70};
69 71
70static inline struct vlan_dev_info *vlan_dev_info(const struct net_device *dev) 72static inline struct vlan_dev_info *vlan_dev_info(const struct net_device *dev)
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index 69b2f79800a..ce8e3ab3e7a 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -9,7 +9,7 @@ bool vlan_hwaccel_do_receive(struct sk_buff **skbp)
9 struct sk_buff *skb = *skbp; 9 struct sk_buff *skb = *skbp;
10 u16 vlan_id = skb->vlan_tci & VLAN_VID_MASK; 10 u16 vlan_id = skb->vlan_tci & VLAN_VID_MASK;
11 struct net_device *vlan_dev; 11 struct net_device *vlan_dev;
12 struct vlan_rx_stats *rx_stats; 12 struct vlan_pcpu_stats *rx_stats;
13 13
14 vlan_dev = vlan_find_dev(skb->dev, vlan_id); 14 vlan_dev = vlan_find_dev(skb->dev, vlan_id);
15 if (!vlan_dev) { 15 if (!vlan_dev) {
@@ -26,7 +26,7 @@ bool vlan_hwaccel_do_receive(struct sk_buff **skbp)
26 skb->priority = vlan_get_ingress_priority(vlan_dev, skb->vlan_tci); 26 skb->priority = vlan_get_ingress_priority(vlan_dev, skb->vlan_tci);
27 skb->vlan_tci = 0; 27 skb->vlan_tci = 0;
28 28
29 rx_stats = this_cpu_ptr(vlan_dev_info(vlan_dev)->vlan_rx_stats); 29 rx_stats = this_cpu_ptr(vlan_dev_info(vlan_dev)->vlan_pcpu_stats);
30 30
31 u64_stats_update_begin(&rx_stats->syncp); 31 u64_stats_update_begin(&rx_stats->syncp);
32 rx_stats->rx_packets++; 32 rx_stats->rx_packets++;
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 14e3d1fa07a..be737539f34 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -141,7 +141,7 @@ int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev,
141 struct packet_type *ptype, struct net_device *orig_dev) 141 struct packet_type *ptype, struct net_device *orig_dev)
142{ 142{
143 struct vlan_hdr *vhdr; 143 struct vlan_hdr *vhdr;
144 struct vlan_rx_stats *rx_stats; 144 struct vlan_pcpu_stats *rx_stats;
145 struct net_device *vlan_dev; 145 struct net_device *vlan_dev;
146 u16 vlan_id; 146 u16 vlan_id;
147 u16 vlan_tci; 147 u16 vlan_tci;
@@ -177,7 +177,7 @@ int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev,
177 } else { 177 } else {
178 skb->dev = vlan_dev; 178 skb->dev = vlan_dev;
179 179
180 rx_stats = this_cpu_ptr(vlan_dev_info(skb->dev)->vlan_rx_stats); 180 rx_stats = this_cpu_ptr(vlan_dev_info(skb->dev)->vlan_pcpu_stats);
181 181
182 u64_stats_update_begin(&rx_stats->syncp); 182 u64_stats_update_begin(&rx_stats->syncp);
183 rx_stats->rx_packets++; 183 rx_stats->rx_packets++;
@@ -274,9 +274,6 @@ static int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev,
274 u16 vlan_tci = 0; 274 u16 vlan_tci = 0;
275 int rc; 275 int rc;
276 276
277 if (WARN_ON(skb_headroom(skb) < dev->hard_header_len))
278 return -ENOSPC;
279
280 if (!(vlan_dev_info(dev)->flags & VLAN_FLAG_REORDER_HDR)) { 277 if (!(vlan_dev_info(dev)->flags & VLAN_FLAG_REORDER_HDR)) {
281 vhdr = (struct vlan_hdr *) skb_push(skb, VLAN_HLEN); 278 vhdr = (struct vlan_hdr *) skb_push(skb, VLAN_HLEN);
282 279
@@ -313,8 +310,6 @@ static int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev,
313static netdev_tx_t vlan_dev_hard_start_xmit(struct sk_buff *skb, 310static netdev_tx_t vlan_dev_hard_start_xmit(struct sk_buff *skb,
314 struct net_device *dev) 311 struct net_device *dev)
315{ 312{
316 int i = skb_get_queue_mapping(skb);
317 struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
318 struct vlan_ethhdr *veth = (struct vlan_ethhdr *)(skb->data); 313 struct vlan_ethhdr *veth = (struct vlan_ethhdr *)(skb->data);
319 unsigned int len; 314 unsigned int len;
320 int ret; 315 int ret;
@@ -326,71 +321,31 @@ static netdev_tx_t vlan_dev_hard_start_xmit(struct sk_buff *skb,
326 */ 321 */
327 if (veth->h_vlan_proto != htons(ETH_P_8021Q) || 322 if (veth->h_vlan_proto != htons(ETH_P_8021Q) ||
328 vlan_dev_info(dev)->flags & VLAN_FLAG_REORDER_HDR) { 323 vlan_dev_info(dev)->flags & VLAN_FLAG_REORDER_HDR) {
329 unsigned int orig_headroom = skb_headroom(skb);
330 u16 vlan_tci; 324 u16 vlan_tci;
331
332 vlan_dev_info(dev)->cnt_encap_on_xmit++;
333
334 vlan_tci = vlan_dev_info(dev)->vlan_id; 325 vlan_tci = vlan_dev_info(dev)->vlan_id;
335 vlan_tci |= vlan_dev_get_egress_qos_mask(dev, skb); 326 vlan_tci |= vlan_dev_get_egress_qos_mask(dev, skb);
336 skb = __vlan_put_tag(skb, vlan_tci); 327 skb = __vlan_hwaccel_put_tag(skb, vlan_tci);
337 if (!skb) {
338 txq->tx_dropped++;
339 return NETDEV_TX_OK;
340 }
341
342 if (orig_headroom < VLAN_HLEN)
343 vlan_dev_info(dev)->cnt_inc_headroom_on_tx++;
344 } 328 }
345 329
346
347 skb_set_dev(skb, vlan_dev_info(dev)->real_dev); 330 skb_set_dev(skb, vlan_dev_info(dev)->real_dev);
348 len = skb->len; 331 len = skb->len;
349 ret = dev_queue_xmit(skb); 332 ret = dev_queue_xmit(skb);
350 333
351 if (likely(ret == NET_XMIT_SUCCESS || ret == NET_XMIT_CN)) { 334 if (likely(ret == NET_XMIT_SUCCESS || ret == NET_XMIT_CN)) {
352 txq->tx_packets++; 335 struct vlan_pcpu_stats *stats;
353 txq->tx_bytes += len;
354 } else
355 txq->tx_dropped++;
356 336
357 return ret; 337 stats = this_cpu_ptr(vlan_dev_info(dev)->vlan_pcpu_stats);
358} 338 u64_stats_update_begin(&stats->syncp);
359 339 stats->tx_packets++;
360static netdev_tx_t vlan_dev_hwaccel_hard_start_xmit(struct sk_buff *skb, 340 stats->tx_bytes += len;
361 struct net_device *dev) 341 u64_stats_update_begin(&stats->syncp);
362{ 342 } else {
363 int i = skb_get_queue_mapping(skb); 343 this_cpu_inc(vlan_dev_info(dev)->vlan_pcpu_stats->tx_dropped);
364 struct netdev_queue *txq = netdev_get_tx_queue(dev, i); 344 }
365 u16 vlan_tci;
366 unsigned int len;
367 int ret;
368
369 vlan_tci = vlan_dev_info(dev)->vlan_id;
370 vlan_tci |= vlan_dev_get_egress_qos_mask(dev, skb);
371 skb = __vlan_hwaccel_put_tag(skb, vlan_tci);
372
373 skb->dev = vlan_dev_info(dev)->real_dev;
374 len = skb->len;
375 ret = dev_queue_xmit(skb);
376
377 if (likely(ret == NET_XMIT_SUCCESS || ret == NET_XMIT_CN)) {
378 txq->tx_packets++;
379 txq->tx_bytes += len;
380 } else
381 txq->tx_dropped++;
382 345
383 return ret; 346 return ret;
384} 347}
385 348
386static u16 vlan_dev_select_queue(struct net_device *dev, struct sk_buff *skb)
387{
388 struct net_device *rdev = vlan_dev_info(dev)->real_dev;
389 const struct net_device_ops *ops = rdev->netdev_ops;
390
391 return ops->ndo_select_queue(rdev, skb);
392}
393
394static int vlan_dev_change_mtu(struct net_device *dev, int new_mtu) 349static int vlan_dev_change_mtu(struct net_device *dev, int new_mtu)
395{ 350{
396 /* TODO: gotta make sure the underlying layer can handle it, 351 /* TODO: gotta make sure the underlying layer can handle it,
@@ -719,8 +674,7 @@ static const struct header_ops vlan_header_ops = {
719 .parse = eth_header_parse, 674 .parse = eth_header_parse,
720}; 675};
721 676
722static const struct net_device_ops vlan_netdev_ops, vlan_netdev_accel_ops, 677static const struct net_device_ops vlan_netdev_ops;
723 vlan_netdev_ops_sq, vlan_netdev_accel_ops_sq;
724 678
725static int vlan_dev_init(struct net_device *dev) 679static int vlan_dev_init(struct net_device *dev)
726{ 680{
@@ -738,6 +692,7 @@ static int vlan_dev_init(struct net_device *dev)
738 (1<<__LINK_STATE_PRESENT); 692 (1<<__LINK_STATE_PRESENT);
739 693
740 dev->features |= real_dev->features & real_dev->vlan_features; 694 dev->features |= real_dev->features & real_dev->vlan_features;
695 dev->features |= NETIF_F_LLTX;
741 dev->gso_max_size = real_dev->gso_max_size; 696 dev->gso_max_size = real_dev->gso_max_size;
742 697
743 /* ipv6 shared card related stuff */ 698 /* ipv6 shared card related stuff */
@@ -755,26 +710,20 @@ static int vlan_dev_init(struct net_device *dev)
755 if (real_dev->features & NETIF_F_HW_VLAN_TX) { 710 if (real_dev->features & NETIF_F_HW_VLAN_TX) {
756 dev->header_ops = real_dev->header_ops; 711 dev->header_ops = real_dev->header_ops;
757 dev->hard_header_len = real_dev->hard_header_len; 712 dev->hard_header_len = real_dev->hard_header_len;
758 if (real_dev->netdev_ops->ndo_select_queue)
759 dev->netdev_ops = &vlan_netdev_accel_ops_sq;
760 else
761 dev->netdev_ops = &vlan_netdev_accel_ops;
762 } else { 713 } else {
763 dev->header_ops = &vlan_header_ops; 714 dev->header_ops = &vlan_header_ops;
764 dev->hard_header_len = real_dev->hard_header_len + VLAN_HLEN; 715 dev->hard_header_len = real_dev->hard_header_len + VLAN_HLEN;
765 if (real_dev->netdev_ops->ndo_select_queue)
766 dev->netdev_ops = &vlan_netdev_ops_sq;
767 else
768 dev->netdev_ops = &vlan_netdev_ops;
769 } 716 }
770 717
718 dev->netdev_ops = &vlan_netdev_ops;
719
771 if (is_vlan_dev(real_dev)) 720 if (is_vlan_dev(real_dev))
772 subclass = 1; 721 subclass = 1;
773 722
774 vlan_dev_set_lockdep_class(dev, subclass); 723 vlan_dev_set_lockdep_class(dev, subclass);
775 724
776 vlan_dev_info(dev)->vlan_rx_stats = alloc_percpu(struct vlan_rx_stats); 725 vlan_dev_info(dev)->vlan_pcpu_stats = alloc_percpu(struct vlan_pcpu_stats);
777 if (!vlan_dev_info(dev)->vlan_rx_stats) 726 if (!vlan_dev_info(dev)->vlan_pcpu_stats)
778 return -ENOMEM; 727 return -ENOMEM;
779 728
780 return 0; 729 return 0;
@@ -786,8 +735,8 @@ static void vlan_dev_uninit(struct net_device *dev)
786 struct vlan_dev_info *vlan = vlan_dev_info(dev); 735 struct vlan_dev_info *vlan = vlan_dev_info(dev);
787 int i; 736 int i;
788 737
789 free_percpu(vlan->vlan_rx_stats); 738 free_percpu(vlan->vlan_pcpu_stats);
790 vlan->vlan_rx_stats = NULL; 739 vlan->vlan_pcpu_stats = NULL;
791 for (i = 0; i < ARRAY_SIZE(vlan->egress_priority_map); i++) { 740 for (i = 0; i < ARRAY_SIZE(vlan->egress_priority_map); i++) {
792 while ((pm = vlan->egress_priority_map[i]) != NULL) { 741 while ((pm = vlan->egress_priority_map[i]) != NULL) {
793 vlan->egress_priority_map[i] = pm->next; 742 vlan->egress_priority_map[i] = pm->next;
@@ -825,33 +774,37 @@ static u32 vlan_ethtool_get_flags(struct net_device *dev)
825 774
826static struct rtnl_link_stats64 *vlan_dev_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) 775static struct rtnl_link_stats64 *vlan_dev_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats)
827{ 776{
828 dev_txq_stats_fold(dev, stats);
829 777
830 if (vlan_dev_info(dev)->vlan_rx_stats) { 778 if (vlan_dev_info(dev)->vlan_pcpu_stats) {
831 struct vlan_rx_stats *p, accum = {0}; 779 struct vlan_pcpu_stats *p;
780 u32 rx_errors = 0, tx_dropped = 0;
832 int i; 781 int i;
833 782
834 for_each_possible_cpu(i) { 783 for_each_possible_cpu(i) {
835 u64 rxpackets, rxbytes, rxmulticast; 784 u64 rxpackets, rxbytes, rxmulticast, txpackets, txbytes;
836 unsigned int start; 785 unsigned int start;
837 786
838 p = per_cpu_ptr(vlan_dev_info(dev)->vlan_rx_stats, i); 787 p = per_cpu_ptr(vlan_dev_info(dev)->vlan_pcpu_stats, i);
839 do { 788 do {
840 start = u64_stats_fetch_begin_bh(&p->syncp); 789 start = u64_stats_fetch_begin_bh(&p->syncp);
841 rxpackets = p->rx_packets; 790 rxpackets = p->rx_packets;
842 rxbytes = p->rx_bytes; 791 rxbytes = p->rx_bytes;
843 rxmulticast = p->rx_multicast; 792 rxmulticast = p->rx_multicast;
793 txpackets = p->tx_packets;
794 txbytes = p->tx_bytes;
844 } while (u64_stats_fetch_retry_bh(&p->syncp, start)); 795 } while (u64_stats_fetch_retry_bh(&p->syncp, start));
845 accum.rx_packets += rxpackets; 796
846 accum.rx_bytes += rxbytes; 797 stats->rx_packets += rxpackets;
847 accum.rx_multicast += rxmulticast; 798 stats->rx_bytes += rxbytes;
848 /* rx_errors is ulong, not protected by syncp */ 799 stats->multicast += rxmulticast;
849 accum.rx_errors += p->rx_errors; 800 stats->tx_packets += txpackets;
801 stats->tx_bytes += txbytes;
802 /* rx_errors & tx_dropped are u32 */
803 rx_errors += p->rx_errors;
804 tx_dropped += p->tx_dropped;
850 } 805 }
851 stats->rx_packets = accum.rx_packets; 806 stats->rx_errors = rx_errors;
852 stats->rx_bytes = accum.rx_bytes; 807 stats->tx_dropped = tx_dropped;
853 stats->rx_errors = accum.rx_errors;
854 stats->multicast = accum.rx_multicast;
855 } 808 }
856 return stats; 809 return stats;
857} 810}
@@ -908,80 +861,6 @@ static const struct net_device_ops vlan_netdev_ops = {
908#endif 861#endif
909}; 862};
910 863
911static const struct net_device_ops vlan_netdev_accel_ops = {
912 .ndo_change_mtu = vlan_dev_change_mtu,
913 .ndo_init = vlan_dev_init,
914 .ndo_uninit = vlan_dev_uninit,
915 .ndo_open = vlan_dev_open,
916 .ndo_stop = vlan_dev_stop,
917 .ndo_start_xmit = vlan_dev_hwaccel_hard_start_xmit,
918 .ndo_validate_addr = eth_validate_addr,
919 .ndo_set_mac_address = vlan_dev_set_mac_address,
920 .ndo_set_rx_mode = vlan_dev_set_rx_mode,
921 .ndo_set_multicast_list = vlan_dev_set_rx_mode,
922 .ndo_change_rx_flags = vlan_dev_change_rx_flags,
923 .ndo_do_ioctl = vlan_dev_ioctl,
924 .ndo_neigh_setup = vlan_dev_neigh_setup,
925 .ndo_get_stats64 = vlan_dev_get_stats64,
926#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE)
927 .ndo_fcoe_ddp_setup = vlan_dev_fcoe_ddp_setup,
928 .ndo_fcoe_ddp_done = vlan_dev_fcoe_ddp_done,
929 .ndo_fcoe_enable = vlan_dev_fcoe_enable,
930 .ndo_fcoe_disable = vlan_dev_fcoe_disable,
931 .ndo_fcoe_get_wwn = vlan_dev_fcoe_get_wwn,
932#endif
933};
934
935static const struct net_device_ops vlan_netdev_ops_sq = {
936 .ndo_select_queue = vlan_dev_select_queue,
937 .ndo_change_mtu = vlan_dev_change_mtu,
938 .ndo_init = vlan_dev_init,
939 .ndo_uninit = vlan_dev_uninit,
940 .ndo_open = vlan_dev_open,
941 .ndo_stop = vlan_dev_stop,
942 .ndo_start_xmit = vlan_dev_hard_start_xmit,
943 .ndo_validate_addr = eth_validate_addr,
944 .ndo_set_mac_address = vlan_dev_set_mac_address,
945 .ndo_set_rx_mode = vlan_dev_set_rx_mode,
946 .ndo_set_multicast_list = vlan_dev_set_rx_mode,
947 .ndo_change_rx_flags = vlan_dev_change_rx_flags,
948 .ndo_do_ioctl = vlan_dev_ioctl,
949 .ndo_neigh_setup = vlan_dev_neigh_setup,
950 .ndo_get_stats64 = vlan_dev_get_stats64,
951#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE)
952 .ndo_fcoe_ddp_setup = vlan_dev_fcoe_ddp_setup,
953 .ndo_fcoe_ddp_done = vlan_dev_fcoe_ddp_done,
954 .ndo_fcoe_enable = vlan_dev_fcoe_enable,
955 .ndo_fcoe_disable = vlan_dev_fcoe_disable,
956 .ndo_fcoe_get_wwn = vlan_dev_fcoe_get_wwn,
957#endif
958};
959
960static const struct net_device_ops vlan_netdev_accel_ops_sq = {
961 .ndo_select_queue = vlan_dev_select_queue,
962 .ndo_change_mtu = vlan_dev_change_mtu,
963 .ndo_init = vlan_dev_init,
964 .ndo_uninit = vlan_dev_uninit,
965 .ndo_open = vlan_dev_open,
966 .ndo_stop = vlan_dev_stop,
967 .ndo_start_xmit = vlan_dev_hwaccel_hard_start_xmit,
968 .ndo_validate_addr = eth_validate_addr,
969 .ndo_set_mac_address = vlan_dev_set_mac_address,
970 .ndo_set_rx_mode = vlan_dev_set_rx_mode,
971 .ndo_set_multicast_list = vlan_dev_set_rx_mode,
972 .ndo_change_rx_flags = vlan_dev_change_rx_flags,
973 .ndo_do_ioctl = vlan_dev_ioctl,
974 .ndo_neigh_setup = vlan_dev_neigh_setup,
975 .ndo_get_stats64 = vlan_dev_get_stats64,
976#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE)
977 .ndo_fcoe_ddp_setup = vlan_dev_fcoe_ddp_setup,
978 .ndo_fcoe_ddp_done = vlan_dev_fcoe_ddp_done,
979 .ndo_fcoe_enable = vlan_dev_fcoe_enable,
980 .ndo_fcoe_disable = vlan_dev_fcoe_disable,
981 .ndo_fcoe_get_wwn = vlan_dev_fcoe_get_wwn,
982#endif
983};
984
985void vlan_setup(struct net_device *dev) 864void vlan_setup(struct net_device *dev)
986{ 865{
987 ether_setup(dev); 866 ether_setup(dev);
diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c
index ddc105734af..be9a5c19a77 100644
--- a/net/8021q/vlan_netlink.c
+++ b/net/8021q/vlan_netlink.c
@@ -101,25 +101,6 @@ static int vlan_changelink(struct net_device *dev,
101 return 0; 101 return 0;
102} 102}
103 103
104static int vlan_get_tx_queues(struct net *net,
105 struct nlattr *tb[],
106 unsigned int *num_tx_queues,
107 unsigned int *real_num_tx_queues)
108{
109 struct net_device *real_dev;
110
111 if (!tb[IFLA_LINK])
112 return -EINVAL;
113
114 real_dev = __dev_get_by_index(net, nla_get_u32(tb[IFLA_LINK]));
115 if (!real_dev)
116 return -ENODEV;
117
118 *num_tx_queues = real_dev->num_tx_queues;
119 *real_num_tx_queues = real_dev->real_num_tx_queues;
120 return 0;
121}
122
123static int vlan_newlink(struct net *src_net, struct net_device *dev, 104static int vlan_newlink(struct net *src_net, struct net_device *dev,
124 struct nlattr *tb[], struct nlattr *data[]) 105 struct nlattr *tb[], struct nlattr *data[])
125{ 106{
@@ -237,7 +218,6 @@ struct rtnl_link_ops vlan_link_ops __read_mostly = {
237 .maxtype = IFLA_VLAN_MAX, 218 .maxtype = IFLA_VLAN_MAX,
238 .policy = vlan_policy, 219 .policy = vlan_policy,
239 .priv_size = sizeof(struct vlan_dev_info), 220 .priv_size = sizeof(struct vlan_dev_info),
240 .get_tx_queues = vlan_get_tx_queues,
241 .setup = vlan_setup, 221 .setup = vlan_setup,
242 .validate = vlan_validate, 222 .validate = vlan_validate,
243 .newlink = vlan_newlink, 223 .newlink = vlan_newlink,
diff --git a/net/8021q/vlanproc.c b/net/8021q/vlanproc.c
index 80e280f5668..d1314cf18ad 100644
--- a/net/8021q/vlanproc.c
+++ b/net/8021q/vlanproc.c
@@ -280,7 +280,6 @@ static int vlandev_seq_show(struct seq_file *seq, void *offset)
280 const struct vlan_dev_info *dev_info = vlan_dev_info(vlandev); 280 const struct vlan_dev_info *dev_info = vlan_dev_info(vlandev);
281 struct rtnl_link_stats64 temp; 281 struct rtnl_link_stats64 temp;
282 const struct rtnl_link_stats64 *stats; 282 const struct rtnl_link_stats64 *stats;
283 static const char fmt[] = "%30s %12lu\n";
284 static const char fmt64[] = "%30s %12llu\n"; 283 static const char fmt64[] = "%30s %12llu\n";
285 int i; 284 int i;
286 285
@@ -299,10 +298,6 @@ static int vlandev_seq_show(struct seq_file *seq, void *offset)
299 seq_puts(seq, "\n"); 298 seq_puts(seq, "\n");
300 seq_printf(seq, fmt64, "total frames transmitted", stats->tx_packets); 299 seq_printf(seq, fmt64, "total frames transmitted", stats->tx_packets);
301 seq_printf(seq, fmt64, "total bytes transmitted", stats->tx_bytes); 300 seq_printf(seq, fmt64, "total bytes transmitted", stats->tx_bytes);
302 seq_printf(seq, fmt, "total headroom inc",
303 dev_info->cnt_inc_headroom_on_tx);
304 seq_printf(seq, fmt, "total encap on xmit",
305 dev_info->cnt_encap_on_xmit);
306 seq_printf(seq, "Device: %s", dev_info->real_dev->name); 301 seq_printf(seq, "Device: %s", dev_info->real_dev->name);
307 /* now show all PRIORITY mappings relating to this VLAN */ 302 /* now show all PRIORITY mappings relating to this VLAN */
308 seq_printf(seq, "\nINGRESS priority mappings: " 303 seq_printf(seq, "\nINGRESS priority mappings: "
diff --git a/net/9p/protocol.c b/net/9p/protocol.c
index 45c15f49140..798beac7f10 100644
--- a/net/9p/protocol.c
+++ b/net/9p/protocol.c
@@ -27,31 +27,16 @@
27 27
28#include <linux/module.h> 28#include <linux/module.h>
29#include <linux/errno.h> 29#include <linux/errno.h>
30#include <linux/kernel.h>
30#include <linux/uaccess.h> 31#include <linux/uaccess.h>
31#include <linux/slab.h> 32#include <linux/slab.h>
32#include <linux/sched.h> 33#include <linux/sched.h>
34#include <linux/stddef.h>
33#include <linux/types.h> 35#include <linux/types.h>
34#include <net/9p/9p.h> 36#include <net/9p/9p.h>
35#include <net/9p/client.h> 37#include <net/9p/client.h>
36#include "protocol.h" 38#include "protocol.h"
37 39
38#ifndef MIN
39#define MIN(a, b) (((a) < (b)) ? (a) : (b))
40#endif
41
42#ifndef MAX
43#define MAX(a, b) (((a) > (b)) ? (a) : (b))
44#endif
45
46#ifndef offset_of
47#define offset_of(type, memb) \
48 ((unsigned long)(&((type *)0)->memb))
49#endif
50#ifndef container_of
51#define container_of(obj, type, memb) \
52 ((type *)(((char *)obj) - offset_of(type, memb)))
53#endif
54
55static int 40static int
56p9pdu_writef(struct p9_fcall *pdu, int proto_version, const char *fmt, ...); 41p9pdu_writef(struct p9_fcall *pdu, int proto_version, const char *fmt, ...);
57 42
@@ -104,7 +89,7 @@ EXPORT_SYMBOL(p9stat_free);
104 89
105static size_t pdu_read(struct p9_fcall *pdu, void *data, size_t size) 90static size_t pdu_read(struct p9_fcall *pdu, void *data, size_t size)
106{ 91{
107 size_t len = MIN(pdu->size - pdu->offset, size); 92 size_t len = min(pdu->size - pdu->offset, size);
108 memcpy(data, &pdu->sdata[pdu->offset], len); 93 memcpy(data, &pdu->sdata[pdu->offset], len);
109 pdu->offset += len; 94 pdu->offset += len;
110 return size - len; 95 return size - len;
@@ -112,7 +97,7 @@ static size_t pdu_read(struct p9_fcall *pdu, void *data, size_t size)
112 97
113static size_t pdu_write(struct p9_fcall *pdu, const void *data, size_t size) 98static size_t pdu_write(struct p9_fcall *pdu, const void *data, size_t size)
114{ 99{
115 size_t len = MIN(pdu->capacity - pdu->size, size); 100 size_t len = min(pdu->capacity - pdu->size, size);
116 memcpy(&pdu->sdata[pdu->size], data, len); 101 memcpy(&pdu->sdata[pdu->size], data, len);
117 pdu->size += len; 102 pdu->size += len;
118 return size - len; 103 return size - len;
@@ -121,7 +106,7 @@ static size_t pdu_write(struct p9_fcall *pdu, const void *data, size_t size)
121static size_t 106static size_t
122pdu_write_u(struct p9_fcall *pdu, const char __user *udata, size_t size) 107pdu_write_u(struct p9_fcall *pdu, const char __user *udata, size_t size)
123{ 108{
124 size_t len = MIN(pdu->capacity - pdu->size, size); 109 size_t len = min(pdu->capacity - pdu->size, size);
125 if (copy_from_user(&pdu->sdata[pdu->size], udata, len)) 110 if (copy_from_user(&pdu->sdata[pdu->size], udata, len))
126 len = 0; 111 len = 0;
127 112
@@ -201,7 +186,7 @@ p9pdu_vreadf(struct p9_fcall *pdu, int proto_version, const char *fmt,
201 if (errcode) 186 if (errcode)
202 break; 187 break;
203 188
204 size = MAX(len, 0); 189 size = max_t(int16_t, len, 0);
205 190
206 *sptr = kmalloc(size + 1, GFP_KERNEL); 191 *sptr = kmalloc(size + 1, GFP_KERNEL);
207 if (*sptr == NULL) { 192 if (*sptr == NULL) {
@@ -256,8 +241,8 @@ p9pdu_vreadf(struct p9_fcall *pdu, int proto_version, const char *fmt,
256 p9pdu_readf(pdu, proto_version, "d", count); 241 p9pdu_readf(pdu, proto_version, "d", count);
257 if (!errcode) { 242 if (!errcode) {
258 *count = 243 *count =
259 MIN(*count, 244 min_t(int32_t, *count,
260 pdu->size - pdu->offset); 245 pdu->size - pdu->offset);
261 *data = &pdu->sdata[pdu->offset]; 246 *data = &pdu->sdata[pdu->offset];
262 } 247 }
263 } 248 }
@@ -421,7 +406,7 @@ p9pdu_vwritef(struct p9_fcall *pdu, int proto_version, const char *fmt,
421 const char *sptr = va_arg(ap, const char *); 406 const char *sptr = va_arg(ap, const char *);
422 int16_t len = 0; 407 int16_t len = 0;
423 if (sptr) 408 if (sptr)
424 len = MIN(strlen(sptr), USHRT_MAX); 409 len = min_t(int16_t, strlen(sptr), USHRT_MAX);
425 410
426 errcode = p9pdu_writef(pdu, proto_version, 411 errcode = p9pdu_writef(pdu, proto_version,
427 "w", len); 412 "w", len);
diff --git a/net/Kconfig b/net/Kconfig
index 55fd82e9ffd..126c2af0fc1 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -220,6 +220,11 @@ config RPS
220 depends on SMP && SYSFS && USE_GENERIC_SMP_HELPERS 220 depends on SMP && SYSFS && USE_GENERIC_SMP_HELPERS
221 default y 221 default y
222 222
223config XPS
224 boolean
225 depends on SMP && SYSFS && USE_GENERIC_SMP_HELPERS
226 default y
227
223menu "Network testing" 228menu "Network testing"
224 229
225config NET_PKTGEN 230config NET_PKTGEN
diff --git a/net/atm/br2684.c b/net/atm/br2684.c
index ad2b232a205..fce2eae8d47 100644
--- a/net/atm/br2684.c
+++ b/net/atm/br2684.c
@@ -97,7 +97,7 @@ static LIST_HEAD(br2684_devs);
97 97
98static inline struct br2684_dev *BRPRIV(const struct net_device *net_dev) 98static inline struct br2684_dev *BRPRIV(const struct net_device *net_dev)
99{ 99{
100 return (struct br2684_dev *)netdev_priv(net_dev); 100 return netdev_priv(net_dev);
101} 101}
102 102
103static inline struct net_device *list_entry_brdev(const struct list_head *le) 103static inline struct net_device *list_entry_brdev(const struct list_head *le)
diff --git a/net/atm/clip.c b/net/atm/clip.c
index ff956d1115b..d257da50fcf 100644
--- a/net/atm/clip.c
+++ b/net/atm/clip.c
@@ -502,7 +502,8 @@ static int clip_setentry(struct atm_vcc *vcc, __be32 ip)
502 struct atmarp_entry *entry; 502 struct atmarp_entry *entry;
503 int error; 503 int error;
504 struct clip_vcc *clip_vcc; 504 struct clip_vcc *clip_vcc;
505 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = ip, .tos = 1}} }; 505 struct flowi fl = { .fl4_dst = ip,
506 .fl4_tos = 1 };
506 struct rtable *rt; 507 struct rtable *rt;
507 508
508 if (vcc->push != clip_push) { 509 if (vcc->push != clip_push) {
diff --git a/net/atm/lec.c b/net/atm/lec.c
index 181d70c73d7..179e04bc99d 100644
--- a/net/atm/lec.c
+++ b/net/atm/lec.c
@@ -816,8 +816,7 @@ static int lec_mcast_attach(struct atm_vcc *vcc, int arg)
816 if (arg < 0 || arg >= MAX_LEC_ITF || !dev_lec[arg]) 816 if (arg < 0 || arg >= MAX_LEC_ITF || !dev_lec[arg])
817 return -EINVAL; 817 return -EINVAL;
818 vcc->proto_data = dev_lec[arg]; 818 vcc->proto_data = dev_lec[arg];
819 return lec_mcast_make((struct lec_priv *)netdev_priv(dev_lec[arg]), 819 return lec_mcast_make(netdev_priv(dev_lec[arg]), vcc);
820 vcc);
821} 820}
822 821
823/* Initialize device. */ 822/* Initialize device. */
diff --git a/net/bluetooth/Makefile b/net/bluetooth/Makefile
index d1e433f7d67..7ca1f46a471 100644
--- a/net/bluetooth/Makefile
+++ b/net/bluetooth/Makefile
@@ -10,4 +10,4 @@ obj-$(CONFIG_BT_BNEP) += bnep/
10obj-$(CONFIG_BT_CMTP) += cmtp/ 10obj-$(CONFIG_BT_CMTP) += cmtp/
11obj-$(CONFIG_BT_HIDP) += hidp/ 11obj-$(CONFIG_BT_HIDP) += hidp/
12 12
13bluetooth-objs := af_bluetooth.o hci_core.o hci_conn.o hci_event.o hci_sock.o hci_sysfs.o lib.o 13bluetooth-y := af_bluetooth.o hci_core.o hci_conn.o hci_event.o hci_sock.o hci_sysfs.o lib.o
diff --git a/net/bridge/br.c b/net/bridge/br.c
index c8436fa3134..84bbb82599b 100644
--- a/net/bridge/br.c
+++ b/net/bridge/br.c
@@ -22,8 +22,6 @@
22 22
23#include "br_private.h" 23#include "br_private.h"
24 24
25int (*br_should_route_hook)(struct sk_buff *skb);
26
27static const struct stp_proto br_stp_proto = { 25static const struct stp_proto br_stp_proto = {
28 .rcv = br_stp_rcv, 26 .rcv = br_stp_rcv,
29}; 27};
@@ -102,8 +100,6 @@ static void __exit br_deinit(void)
102 br_fdb_fini(); 100 br_fdb_fini();
103} 101}
104 102
105EXPORT_SYMBOL(br_should_route_hook);
106
107module_init(br_init) 103module_init(br_init)
108module_exit(br_deinit) 104module_exit(br_deinit)
109MODULE_LICENSE("GPL"); 105MODULE_LICENSE("GPL");
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index 90512ccfd3e..2872393b293 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -238,15 +238,18 @@ struct net_bridge_fdb_entry *__br_fdb_get(struct net_bridge *br,
238int br_fdb_test_addr(struct net_device *dev, unsigned char *addr) 238int br_fdb_test_addr(struct net_device *dev, unsigned char *addr)
239{ 239{
240 struct net_bridge_fdb_entry *fdb; 240 struct net_bridge_fdb_entry *fdb;
241 struct net_bridge_port *port;
241 int ret; 242 int ret;
242 243
243 if (!br_port_exists(dev))
244 return 0;
245
246 rcu_read_lock(); 244 rcu_read_lock();
247 fdb = __br_fdb_get(br_port_get_rcu(dev)->br, addr); 245 port = br_port_get_rcu(dev);
248 ret = fdb && fdb->dst->dev != dev && 246 if (!port)
249 fdb->dst->state == BR_STATE_FORWARDING; 247 ret = 0;
248 else {
249 fdb = __br_fdb_get(port->br, addr);
250 ret = fdb && fdb->dst->dev != dev &&
251 fdb->dst->state == BR_STATE_FORWARDING;
252 }
250 rcu_read_unlock(); 253 rcu_read_unlock();
251 254
252 return ret; 255 return ret;
diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index cbfe87f0f34..2bd11ec6d16 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -223,7 +223,7 @@ static void br_multicast_flood(struct net_bridge_mdb_entry *mdst,
223 struct net_bridge_port_group *p; 223 struct net_bridge_port_group *p;
224 struct hlist_node *rp; 224 struct hlist_node *rp;
225 225
226 rp = rcu_dereference(br->router_list.first); 226 rp = rcu_dereference(hlist_first_rcu(&br->router_list));
227 p = mdst ? rcu_dereference(mdst->ports) : NULL; 227 p = mdst ? rcu_dereference(mdst->ports) : NULL;
228 while (p || rp) { 228 while (p || rp) {
229 struct net_bridge_port *port, *lport, *rport; 229 struct net_bridge_port *port, *lport, *rport;
@@ -242,7 +242,7 @@ static void br_multicast_flood(struct net_bridge_mdb_entry *mdst,
242 if ((unsigned long)lport >= (unsigned long)port) 242 if ((unsigned long)lport >= (unsigned long)port)
243 p = rcu_dereference(p->next); 243 p = rcu_dereference(p->next);
244 if ((unsigned long)rport >= (unsigned long)port) 244 if ((unsigned long)rport >= (unsigned long)port)
245 rp = rcu_dereference(rp->next); 245 rp = rcu_dereference(hlist_next_rcu(rp));
246 } 246 }
247 247
248 if (!prev) 248 if (!prev)
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index 89ad25a7620..d9d1e2bac1d 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -475,11 +475,8 @@ int br_del_if(struct net_bridge *br, struct net_device *dev)
475{ 475{
476 struct net_bridge_port *p; 476 struct net_bridge_port *p;
477 477
478 if (!br_port_exists(dev)) 478 p = br_port_get_rtnl(dev);
479 return -EINVAL; 479 if (!p || p->br != br)
480
481 p = br_port_get(dev);
482 if (p->br != br)
483 return -EINVAL; 480 return -EINVAL;
484 481
485 del_nbp(p); 482 del_nbp(p);
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index 25207a1f182..6f6d8e1b776 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -21,6 +21,10 @@
21/* Bridge group multicast address 802.1d (pg 51). */ 21/* Bridge group multicast address 802.1d (pg 51). */
22const u8 br_group_address[ETH_ALEN] = { 0x01, 0x80, 0xc2, 0x00, 0x00, 0x00 }; 22const u8 br_group_address[ETH_ALEN] = { 0x01, 0x80, 0xc2, 0x00, 0x00, 0x00 };
23 23
24/* Hook for brouter */
25br_should_route_hook_t __rcu *br_should_route_hook __read_mostly;
26EXPORT_SYMBOL(br_should_route_hook);
27
24static int br_pass_frame_up(struct sk_buff *skb) 28static int br_pass_frame_up(struct sk_buff *skb)
25{ 29{
26 struct net_device *indev, *brdev = BR_INPUT_SKB_CB(skb)->brdev; 30 struct net_device *indev, *brdev = BR_INPUT_SKB_CB(skb)->brdev;
@@ -139,7 +143,7 @@ struct sk_buff *br_handle_frame(struct sk_buff *skb)
139{ 143{
140 struct net_bridge_port *p; 144 struct net_bridge_port *p;
141 const unsigned char *dest = eth_hdr(skb)->h_dest; 145 const unsigned char *dest = eth_hdr(skb)->h_dest;
142 int (*rhook)(struct sk_buff *skb); 146 br_should_route_hook_t *rhook;
143 147
144 if (unlikely(skb->pkt_type == PACKET_LOOPBACK)) 148 if (unlikely(skb->pkt_type == PACKET_LOOPBACK))
145 return skb; 149 return skb;
@@ -173,8 +177,8 @@ forward:
173 switch (p->state) { 177 switch (p->state) {
174 case BR_STATE_FORWARDING: 178 case BR_STATE_FORWARDING:
175 rhook = rcu_dereference(br_should_route_hook); 179 rhook = rcu_dereference(br_should_route_hook);
176 if (rhook != NULL) { 180 if (rhook) {
177 if (rhook(skb)) 181 if ((*rhook)(skb))
178 return skb; 182 return skb;
179 dest = eth_hdr(skb)->h_dest; 183 dest = eth_hdr(skb)->h_dest;
180 } 184 }
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index eb5b256ffc8..326e599f83f 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -33,6 +33,9 @@
33 33
34#include "br_private.h" 34#include "br_private.h"
35 35
36#define mlock_dereference(X, br) \
37 rcu_dereference_protected(X, lockdep_is_held(&br->multicast_lock))
38
36#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) 39#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
37static inline int ipv6_is_local_multicast(const struct in6_addr *addr) 40static inline int ipv6_is_local_multicast(const struct in6_addr *addr)
38{ 41{
@@ -135,7 +138,7 @@ static struct net_bridge_mdb_entry *br_mdb_ip6_get(
135struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge *br, 138struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge *br,
136 struct sk_buff *skb) 139 struct sk_buff *skb)
137{ 140{
138 struct net_bridge_mdb_htable *mdb = br->mdb; 141 struct net_bridge_mdb_htable *mdb = rcu_dereference(br->mdb);
139 struct br_ip ip; 142 struct br_ip ip;
140 143
141 if (br->multicast_disabled) 144 if (br->multicast_disabled)
@@ -235,7 +238,8 @@ static void br_multicast_group_expired(unsigned long data)
235 if (mp->ports) 238 if (mp->ports)
236 goto out; 239 goto out;
237 240
238 mdb = br->mdb; 241 mdb = mlock_dereference(br->mdb, br);
242
239 hlist_del_rcu(&mp->hlist[mdb->ver]); 243 hlist_del_rcu(&mp->hlist[mdb->ver]);
240 mdb->size--; 244 mdb->size--;
241 245
@@ -249,16 +253,20 @@ out:
249static void br_multicast_del_pg(struct net_bridge *br, 253static void br_multicast_del_pg(struct net_bridge *br,
250 struct net_bridge_port_group *pg) 254 struct net_bridge_port_group *pg)
251{ 255{
252 struct net_bridge_mdb_htable *mdb = br->mdb; 256 struct net_bridge_mdb_htable *mdb;
253 struct net_bridge_mdb_entry *mp; 257 struct net_bridge_mdb_entry *mp;
254 struct net_bridge_port_group *p; 258 struct net_bridge_port_group *p;
255 struct net_bridge_port_group **pp; 259 struct net_bridge_port_group __rcu **pp;
260
261 mdb = mlock_dereference(br->mdb, br);
256 262
257 mp = br_mdb_ip_get(mdb, &pg->addr); 263 mp = br_mdb_ip_get(mdb, &pg->addr);
258 if (WARN_ON(!mp)) 264 if (WARN_ON(!mp))
259 return; 265 return;
260 266
261 for (pp = &mp->ports; (p = *pp); pp = &p->next) { 267 for (pp = &mp->ports;
268 (p = mlock_dereference(*pp, br)) != NULL;
269 pp = &p->next) {
262 if (p != pg) 270 if (p != pg)
263 continue; 271 continue;
264 272
@@ -294,10 +302,10 @@ out:
294 spin_unlock(&br->multicast_lock); 302 spin_unlock(&br->multicast_lock);
295} 303}
296 304
297static int br_mdb_rehash(struct net_bridge_mdb_htable **mdbp, int max, 305static int br_mdb_rehash(struct net_bridge_mdb_htable __rcu **mdbp, int max,
298 int elasticity) 306 int elasticity)
299{ 307{
300 struct net_bridge_mdb_htable *old = *mdbp; 308 struct net_bridge_mdb_htable *old = rcu_dereference_protected(*mdbp, 1);
301 struct net_bridge_mdb_htable *mdb; 309 struct net_bridge_mdb_htable *mdb;
302 int err; 310 int err;
303 311
@@ -569,7 +577,7 @@ static struct net_bridge_mdb_entry *br_multicast_get_group(
569 struct net_bridge *br, struct net_bridge_port *port, 577 struct net_bridge *br, struct net_bridge_port *port,
570 struct br_ip *group, int hash) 578 struct br_ip *group, int hash)
571{ 579{
572 struct net_bridge_mdb_htable *mdb = br->mdb; 580 struct net_bridge_mdb_htable *mdb;
573 struct net_bridge_mdb_entry *mp; 581 struct net_bridge_mdb_entry *mp;
574 struct hlist_node *p; 582 struct hlist_node *p;
575 unsigned count = 0; 583 unsigned count = 0;
@@ -577,6 +585,7 @@ static struct net_bridge_mdb_entry *br_multicast_get_group(
577 int elasticity; 585 int elasticity;
578 int err; 586 int err;
579 587
588 mdb = rcu_dereference_protected(br->mdb, 1);
580 hlist_for_each_entry(mp, p, &mdb->mhash[hash], hlist[mdb->ver]) { 589 hlist_for_each_entry(mp, p, &mdb->mhash[hash], hlist[mdb->ver]) {
581 count++; 590 count++;
582 if (unlikely(br_ip_equal(group, &mp->addr))) 591 if (unlikely(br_ip_equal(group, &mp->addr)))
@@ -642,10 +651,11 @@ static struct net_bridge_mdb_entry *br_multicast_new_group(
642 struct net_bridge *br, struct net_bridge_port *port, 651 struct net_bridge *br, struct net_bridge_port *port,
643 struct br_ip *group) 652 struct br_ip *group)
644{ 653{
645 struct net_bridge_mdb_htable *mdb = br->mdb; 654 struct net_bridge_mdb_htable *mdb;
646 struct net_bridge_mdb_entry *mp; 655 struct net_bridge_mdb_entry *mp;
647 int hash; 656 int hash;
648 657
658 mdb = rcu_dereference_protected(br->mdb, 1);
649 if (!mdb) { 659 if (!mdb) {
650 if (br_mdb_rehash(&br->mdb, BR_HASH_SIZE, 0)) 660 if (br_mdb_rehash(&br->mdb, BR_HASH_SIZE, 0))
651 return NULL; 661 return NULL;
@@ -660,7 +670,7 @@ static struct net_bridge_mdb_entry *br_multicast_new_group(
660 670
661 case -EAGAIN: 671 case -EAGAIN:
662rehash: 672rehash:
663 mdb = br->mdb; 673 mdb = rcu_dereference_protected(br->mdb, 1);
664 hash = br_ip_hash(mdb, group); 674 hash = br_ip_hash(mdb, group);
665 break; 675 break;
666 676
@@ -692,7 +702,7 @@ static int br_multicast_add_group(struct net_bridge *br,
692{ 702{
693 struct net_bridge_mdb_entry *mp; 703 struct net_bridge_mdb_entry *mp;
694 struct net_bridge_port_group *p; 704 struct net_bridge_port_group *p;
695 struct net_bridge_port_group **pp; 705 struct net_bridge_port_group __rcu **pp;
696 unsigned long now = jiffies; 706 unsigned long now = jiffies;
697 int err; 707 int err;
698 708
@@ -712,7 +722,9 @@ static int br_multicast_add_group(struct net_bridge *br,
712 goto out; 722 goto out;
713 } 723 }
714 724
715 for (pp = &mp->ports; (p = *pp); pp = &p->next) { 725 for (pp = &mp->ports;
726 (p = mlock_dereference(*pp, br)) != NULL;
727 pp = &p->next) {
716 if (p->port == port) 728 if (p->port == port)
717 goto found; 729 goto found;
718 if ((unsigned long)p->port < (unsigned long)port) 730 if ((unsigned long)p->port < (unsigned long)port)
@@ -1106,7 +1118,7 @@ static int br_ip4_multicast_query(struct net_bridge *br,
1106 struct net_bridge_mdb_entry *mp; 1118 struct net_bridge_mdb_entry *mp;
1107 struct igmpv3_query *ih3; 1119 struct igmpv3_query *ih3;
1108 struct net_bridge_port_group *p; 1120 struct net_bridge_port_group *p;
1109 struct net_bridge_port_group **pp; 1121 struct net_bridge_port_group __rcu **pp;
1110 unsigned long max_delay; 1122 unsigned long max_delay;
1111 unsigned long now = jiffies; 1123 unsigned long now = jiffies;
1112 __be32 group; 1124 __be32 group;
@@ -1145,7 +1157,7 @@ static int br_ip4_multicast_query(struct net_bridge *br,
1145 if (!group) 1157 if (!group)
1146 goto out; 1158 goto out;
1147 1159
1148 mp = br_mdb_ip4_get(br->mdb, group); 1160 mp = br_mdb_ip4_get(mlock_dereference(br->mdb, br), group);
1149 if (!mp) 1161 if (!mp)
1150 goto out; 1162 goto out;
1151 1163
@@ -1157,7 +1169,9 @@ static int br_ip4_multicast_query(struct net_bridge *br,
1157 try_to_del_timer_sync(&mp->timer) >= 0)) 1169 try_to_del_timer_sync(&mp->timer) >= 0))
1158 mod_timer(&mp->timer, now + max_delay); 1170 mod_timer(&mp->timer, now + max_delay);
1159 1171
1160 for (pp = &mp->ports; (p = *pp); pp = &p->next) { 1172 for (pp = &mp->ports;
1173 (p = mlock_dereference(*pp, br)) != NULL;
1174 pp = &p->next) {
1161 if (timer_pending(&p->timer) ? 1175 if (timer_pending(&p->timer) ?
1162 time_after(p->timer.expires, now + max_delay) : 1176 time_after(p->timer.expires, now + max_delay) :
1163 try_to_del_timer_sync(&p->timer) >= 0) 1177 try_to_del_timer_sync(&p->timer) >= 0)
@@ -1178,7 +1192,8 @@ static int br_ip6_multicast_query(struct net_bridge *br,
1178 struct mld_msg *mld = (struct mld_msg *) icmp6_hdr(skb); 1192 struct mld_msg *mld = (struct mld_msg *) icmp6_hdr(skb);
1179 struct net_bridge_mdb_entry *mp; 1193 struct net_bridge_mdb_entry *mp;
1180 struct mld2_query *mld2q; 1194 struct mld2_query *mld2q;
1181 struct net_bridge_port_group *p, **pp; 1195 struct net_bridge_port_group *p;
1196 struct net_bridge_port_group __rcu **pp;
1182 unsigned long max_delay; 1197 unsigned long max_delay;
1183 unsigned long now = jiffies; 1198 unsigned long now = jiffies;
1184 struct in6_addr *group = NULL; 1199 struct in6_addr *group = NULL;
@@ -1214,7 +1229,7 @@ static int br_ip6_multicast_query(struct net_bridge *br,
1214 if (!group) 1229 if (!group)
1215 goto out; 1230 goto out;
1216 1231
1217 mp = br_mdb_ip6_get(br->mdb, group); 1232 mp = br_mdb_ip6_get(mlock_dereference(br->mdb, br), group);
1218 if (!mp) 1233 if (!mp)
1219 goto out; 1234 goto out;
1220 1235
@@ -1225,7 +1240,9 @@ static int br_ip6_multicast_query(struct net_bridge *br,
1225 try_to_del_timer_sync(&mp->timer) >= 0)) 1240 try_to_del_timer_sync(&mp->timer) >= 0))
1226 mod_timer(&mp->timer, now + max_delay); 1241 mod_timer(&mp->timer, now + max_delay);
1227 1242
1228 for (pp = &mp->ports; (p = *pp); pp = &p->next) { 1243 for (pp = &mp->ports;
1244 (p = mlock_dereference(*pp, br)) != NULL;
1245 pp = &p->next) {
1229 if (timer_pending(&p->timer) ? 1246 if (timer_pending(&p->timer) ?
1230 time_after(p->timer.expires, now + max_delay) : 1247 time_after(p->timer.expires, now + max_delay) :
1231 try_to_del_timer_sync(&p->timer) >= 0) 1248 try_to_del_timer_sync(&p->timer) >= 0)
@@ -1254,7 +1271,7 @@ static void br_multicast_leave_group(struct net_bridge *br,
1254 timer_pending(&br->multicast_querier_timer)) 1271 timer_pending(&br->multicast_querier_timer))
1255 goto out; 1272 goto out;
1256 1273
1257 mdb = br->mdb; 1274 mdb = mlock_dereference(br->mdb, br);
1258 mp = br_mdb_ip_get(mdb, group); 1275 mp = br_mdb_ip_get(mdb, group);
1259 if (!mp) 1276 if (!mp)
1260 goto out; 1277 goto out;
@@ -1277,7 +1294,9 @@ static void br_multicast_leave_group(struct net_bridge *br,
1277 goto out; 1294 goto out;
1278 } 1295 }
1279 1296
1280 for (p = mp->ports; p; p = p->next) { 1297 for (p = mlock_dereference(mp->ports, br);
1298 p != NULL;
1299 p = mlock_dereference(p->next, br)) {
1281 if (p->port != port) 1300 if (p->port != port)
1282 continue; 1301 continue;
1283 1302
@@ -1625,7 +1644,7 @@ void br_multicast_stop(struct net_bridge *br)
1625 del_timer_sync(&br->multicast_query_timer); 1644 del_timer_sync(&br->multicast_query_timer);
1626 1645
1627 spin_lock_bh(&br->multicast_lock); 1646 spin_lock_bh(&br->multicast_lock);
1628 mdb = br->mdb; 1647 mdb = mlock_dereference(br->mdb, br);
1629 if (!mdb) 1648 if (!mdb)
1630 goto out; 1649 goto out;
1631 1650
@@ -1729,6 +1748,7 @@ int br_multicast_toggle(struct net_bridge *br, unsigned long val)
1729{ 1748{
1730 struct net_bridge_port *port; 1749 struct net_bridge_port *port;
1731 int err = 0; 1750 int err = 0;
1751 struct net_bridge_mdb_htable *mdb;
1732 1752
1733 spin_lock(&br->multicast_lock); 1753 spin_lock(&br->multicast_lock);
1734 if (br->multicast_disabled == !val) 1754 if (br->multicast_disabled == !val)
@@ -1741,15 +1761,16 @@ int br_multicast_toggle(struct net_bridge *br, unsigned long val)
1741 if (!netif_running(br->dev)) 1761 if (!netif_running(br->dev))
1742 goto unlock; 1762 goto unlock;
1743 1763
1744 if (br->mdb) { 1764 mdb = mlock_dereference(br->mdb, br);
1745 if (br->mdb->old) { 1765 if (mdb) {
1766 if (mdb->old) {
1746 err = -EEXIST; 1767 err = -EEXIST;
1747rollback: 1768rollback:
1748 br->multicast_disabled = !!val; 1769 br->multicast_disabled = !!val;
1749 goto unlock; 1770 goto unlock;
1750 } 1771 }
1751 1772
1752 err = br_mdb_rehash(&br->mdb, br->mdb->max, 1773 err = br_mdb_rehash(&br->mdb, mdb->max,
1753 br->hash_elasticity); 1774 br->hash_elasticity);
1754 if (err) 1775 if (err)
1755 goto rollback; 1776 goto rollback;
@@ -1774,6 +1795,7 @@ int br_multicast_set_hash_max(struct net_bridge *br, unsigned long val)
1774{ 1795{
1775 int err = -ENOENT; 1796 int err = -ENOENT;
1776 u32 old; 1797 u32 old;
1798 struct net_bridge_mdb_htable *mdb;
1777 1799
1778 spin_lock(&br->multicast_lock); 1800 spin_lock(&br->multicast_lock);
1779 if (!netif_running(br->dev)) 1801 if (!netif_running(br->dev))
@@ -1782,7 +1804,9 @@ int br_multicast_set_hash_max(struct net_bridge *br, unsigned long val)
1782 err = -EINVAL; 1804 err = -EINVAL;
1783 if (!is_power_of_2(val)) 1805 if (!is_power_of_2(val))
1784 goto unlock; 1806 goto unlock;
1785 if (br->mdb && val < br->mdb->size) 1807
1808 mdb = mlock_dereference(br->mdb, br);
1809 if (mdb && val < mdb->size)
1786 goto unlock; 1810 goto unlock;
1787 1811
1788 err = 0; 1812 err = 0;
@@ -1790,8 +1814,8 @@ int br_multicast_set_hash_max(struct net_bridge *br, unsigned long val)
1790 old = br->hash_max; 1814 old = br->hash_max;
1791 br->hash_max = val; 1815 br->hash_max = val;
1792 1816
1793 if (br->mdb) { 1817 if (mdb) {
1794 if (br->mdb->old) { 1818 if (mdb->old) {
1795 err = -EEXIST; 1819 err = -EEXIST;
1796rollback: 1820rollback:
1797 br->hash_max = old; 1821 br->hash_max = old;
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index 865fd7634b6..6e139209391 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -131,17 +131,18 @@ void br_netfilter_rtable_init(struct net_bridge *br)
131 131
132static inline struct rtable *bridge_parent_rtable(const struct net_device *dev) 132static inline struct rtable *bridge_parent_rtable(const struct net_device *dev)
133{ 133{
134 if (!br_port_exists(dev)) 134 struct net_bridge_port *port;
135 return NULL; 135
136 return &br_port_get_rcu(dev)->br->fake_rtable; 136 port = br_port_get_rcu(dev);
137 return port ? &port->br->fake_rtable : NULL;
137} 138}
138 139
139static inline struct net_device *bridge_parent(const struct net_device *dev) 140static inline struct net_device *bridge_parent(const struct net_device *dev)
140{ 141{
141 if (!br_port_exists(dev)) 142 struct net_bridge_port *port;
142 return NULL;
143 143
144 return br_port_get_rcu(dev)->br->dev; 144 port = br_port_get_rcu(dev);
145 return port ? port->br->dev : NULL;
145} 146}
146 147
147static inline struct nf_bridge_info *nf_bridge_alloc(struct sk_buff *skb) 148static inline struct nf_bridge_info *nf_bridge_alloc(struct sk_buff *skb)
@@ -412,13 +413,8 @@ static int br_nf_pre_routing_finish(struct sk_buff *skb)
412 if (dnat_took_place(skb)) { 413 if (dnat_took_place(skb)) {
413 if ((err = ip_route_input(skb, iph->daddr, iph->saddr, iph->tos, dev))) { 414 if ((err = ip_route_input(skb, iph->daddr, iph->saddr, iph->tos, dev))) {
414 struct flowi fl = { 415 struct flowi fl = {
415 .nl_u = { 416 .fl4_dst = iph->daddr,
416 .ip4_u = { 417 .fl4_tos = RT_TOS(iph->tos),
417 .daddr = iph->daddr,
418 .saddr = 0,
419 .tos = RT_TOS(iph->tos) },
420 },
421 .proto = 0,
422 }; 418 };
423 struct in_device *in_dev = __in_dev_get_rcu(dev); 419 struct in_device *in_dev = __in_dev_get_rcu(dev);
424 420
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 4a6a378c84e..f8bf4c7f842 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -119,11 +119,13 @@ static int br_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
119 119
120 idx = 0; 120 idx = 0;
121 for_each_netdev(net, dev) { 121 for_each_netdev(net, dev) {
122 struct net_bridge_port *port = br_port_get_rtnl(dev);
123
122 /* not a bridge port */ 124 /* not a bridge port */
123 if (!br_port_exists(dev) || idx < cb->args[0]) 125 if (!port || idx < cb->args[0])
124 goto skip; 126 goto skip;
125 127
126 if (br_fill_ifinfo(skb, br_port_get(dev), 128 if (br_fill_ifinfo(skb, port,
127 NETLINK_CB(cb->skb).pid, 129 NETLINK_CB(cb->skb).pid,
128 cb->nlh->nlmsg_seq, RTM_NEWLINK, 130 cb->nlh->nlmsg_seq, RTM_NEWLINK,
129 NLM_F_MULTI) < 0) 131 NLM_F_MULTI) < 0)
@@ -169,9 +171,9 @@ static int br_rtm_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
169 if (!dev) 171 if (!dev)
170 return -ENODEV; 172 return -ENODEV;
171 173
172 if (!br_port_exists(dev)) 174 p = br_port_get_rtnl(dev);
175 if (!p)
173 return -EINVAL; 176 return -EINVAL;
174 p = br_port_get(dev);
175 177
176 /* if kernel STP is running, don't allow changes */ 178 /* if kernel STP is running, don't allow changes */
177 if (p->br->stp_enabled == BR_KERNEL_STP) 179 if (p->br->stp_enabled == BR_KERNEL_STP)
diff --git a/net/bridge/br_notify.c b/net/bridge/br_notify.c
index 404d4e14c6a..7d337c9b608 100644
--- a/net/bridge/br_notify.c
+++ b/net/bridge/br_notify.c
@@ -32,15 +32,15 @@ struct notifier_block br_device_notifier = {
32static int br_device_event(struct notifier_block *unused, unsigned long event, void *ptr) 32static int br_device_event(struct notifier_block *unused, unsigned long event, void *ptr)
33{ 33{
34 struct net_device *dev = ptr; 34 struct net_device *dev = ptr;
35 struct net_bridge_port *p = br_port_get(dev); 35 struct net_bridge_port *p;
36 struct net_bridge *br; 36 struct net_bridge *br;
37 int err; 37 int err;
38 38
39 /* not a port of a bridge */ 39 /* not a port of a bridge */
40 if (!br_port_exists(dev)) 40 p = br_port_get_rtnl(dev);
41 if (!p)
41 return NOTIFY_DONE; 42 return NOTIFY_DONE;
42 43
43 p = br_port_get(dev);
44 br = p->br; 44 br = p->br;
45 45
46 switch (event) { 46 switch (event) {
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 75c90edaf7d..84aac7734bf 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -72,7 +72,7 @@ struct net_bridge_fdb_entry
72 72
73struct net_bridge_port_group { 73struct net_bridge_port_group {
74 struct net_bridge_port *port; 74 struct net_bridge_port *port;
75 struct net_bridge_port_group *next; 75 struct net_bridge_port_group __rcu *next;
76 struct hlist_node mglist; 76 struct hlist_node mglist;
77 struct rcu_head rcu; 77 struct rcu_head rcu;
78 struct timer_list timer; 78 struct timer_list timer;
@@ -86,7 +86,7 @@ struct net_bridge_mdb_entry
86 struct hlist_node hlist[2]; 86 struct hlist_node hlist[2];
87 struct hlist_node mglist; 87 struct hlist_node mglist;
88 struct net_bridge *br; 88 struct net_bridge *br;
89 struct net_bridge_port_group *ports; 89 struct net_bridge_port_group __rcu *ports;
90 struct rcu_head rcu; 90 struct rcu_head rcu;
91 struct timer_list timer; 91 struct timer_list timer;
92 struct timer_list query_timer; 92 struct timer_list query_timer;
@@ -151,11 +151,20 @@ struct net_bridge_port
151#endif 151#endif
152}; 152};
153 153
154#define br_port_get_rcu(dev) \
155 ((struct net_bridge_port *) rcu_dereference(dev->rx_handler_data))
156#define br_port_get(dev) ((struct net_bridge_port *) dev->rx_handler_data)
157#define br_port_exists(dev) (dev->priv_flags & IFF_BRIDGE_PORT) 154#define br_port_exists(dev) (dev->priv_flags & IFF_BRIDGE_PORT)
158 155
156static inline struct net_bridge_port *br_port_get_rcu(const struct net_device *dev)
157{
158 struct net_bridge_port *port = rcu_dereference(dev->rx_handler_data);
159 return br_port_exists(dev) ? port : NULL;
160}
161
162static inline struct net_bridge_port *br_port_get_rtnl(struct net_device *dev)
163{
164 return br_port_exists(dev) ?
165 rtnl_dereference(dev->rx_handler_data) : NULL;
166}
167
159struct br_cpu_netstats { 168struct br_cpu_netstats {
160 u64 rx_packets; 169 u64 rx_packets;
161 u64 rx_bytes; 170 u64 rx_bytes;
@@ -227,7 +236,7 @@ struct net_bridge
227 unsigned long multicast_startup_query_interval; 236 unsigned long multicast_startup_query_interval;
228 237
229 spinlock_t multicast_lock; 238 spinlock_t multicast_lock;
230 struct net_bridge_mdb_htable *mdb; 239 struct net_bridge_mdb_htable __rcu *mdb;
231 struct hlist_head router_list; 240 struct hlist_head router_list;
232 struct hlist_head mglist; 241 struct hlist_head mglist;
233 242
diff --git a/net/bridge/br_stp_bpdu.c b/net/bridge/br_stp_bpdu.c
index 35cf27087b5..3d9a55d3822 100644
--- a/net/bridge/br_stp_bpdu.c
+++ b/net/bridge/br_stp_bpdu.c
@@ -141,10 +141,6 @@ void br_stp_rcv(const struct stp_proto *proto, struct sk_buff *skb,
141 struct net_bridge *br; 141 struct net_bridge *br;
142 const unsigned char *buf; 142 const unsigned char *buf;
143 143
144 if (!br_port_exists(dev))
145 goto err;
146 p = br_port_get_rcu(dev);
147
148 if (!pskb_may_pull(skb, 4)) 144 if (!pskb_may_pull(skb, 4))
149 goto err; 145 goto err;
150 146
@@ -153,6 +149,10 @@ void br_stp_rcv(const struct stp_proto *proto, struct sk_buff *skb,
153 if (buf[0] != 0 || buf[1] != 0 || buf[2] != 0) 149 if (buf[0] != 0 || buf[1] != 0 || buf[2] != 0)
154 goto err; 150 goto err;
155 151
152 p = br_port_get_rcu(dev);
153 if (!p)
154 goto err;
155
156 br = p->br; 156 br = p->br;
157 spin_lock(&br->lock); 157 spin_lock(&br->lock);
158 158
diff --git a/net/bridge/netfilter/ebtable_broute.c b/net/bridge/netfilter/ebtable_broute.c
index ae3f106c390..1bcaf36ad61 100644
--- a/net/bridge/netfilter/ebtable_broute.c
+++ b/net/bridge/netfilter/ebtable_broute.c
@@ -87,7 +87,8 @@ static int __init ebtable_broute_init(void)
87 if (ret < 0) 87 if (ret < 0)
88 return ret; 88 return ret;
89 /* see br_input.c */ 89 /* see br_input.c */
90 rcu_assign_pointer(br_should_route_hook, ebt_broute); 90 rcu_assign_pointer(br_should_route_hook,
91 (br_should_route_hook_t *)ebt_broute);
91 return 0; 92 return 0;
92} 93}
93 94
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index a1dcf83f0d5..cbc9f395ab1 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -128,6 +128,7 @@ ebt_basic_match(const struct ebt_entry *e, const struct sk_buff *skb,
128 const struct net_device *in, const struct net_device *out) 128 const struct net_device *in, const struct net_device *out)
129{ 129{
130 const struct ethhdr *h = eth_hdr(skb); 130 const struct ethhdr *h = eth_hdr(skb);
131 const struct net_bridge_port *p;
131 __be16 ethproto; 132 __be16 ethproto;
132 int verdict, i; 133 int verdict, i;
133 134
@@ -148,13 +149,11 @@ ebt_basic_match(const struct ebt_entry *e, const struct sk_buff *skb,
148 if (FWINV2(ebt_dev_check(e->out, out), EBT_IOUT)) 149 if (FWINV2(ebt_dev_check(e->out, out), EBT_IOUT))
149 return 1; 150 return 1;
150 /* rcu_read_lock()ed by nf_hook_slow */ 151 /* rcu_read_lock()ed by nf_hook_slow */
151 if (in && br_port_exists(in) && 152 if (in && (p = br_port_get_rcu(in)) != NULL &&
152 FWINV2(ebt_dev_check(e->logical_in, br_port_get_rcu(in)->br->dev), 153 FWINV2(ebt_dev_check(e->logical_in, p->br->dev), EBT_ILOGICALIN))
153 EBT_ILOGICALIN))
154 return 1; 154 return 1;
155 if (out && br_port_exists(out) && 155 if (out && (p = br_port_get_rcu(out)) != NULL &&
156 FWINV2(ebt_dev_check(e->logical_out, br_port_get_rcu(out)->br->dev), 156 FWINV2(ebt_dev_check(e->logical_out, p->br->dev), EBT_ILOGICALOUT))
157 EBT_ILOGICALOUT))
158 return 1; 157 return 1;
159 158
160 if (e->bitmask & EBT_SOURCEMAC) { 159 if (e->bitmask & EBT_SOURCEMAC) {
diff --git a/net/caif/Makefile b/net/caif/Makefile
index f87481fb0e6..9d38e406e4a 100644
--- a/net/caif/Makefile
+++ b/net/caif/Makefile
@@ -1,8 +1,6 @@
1ifeq ($(CONFIG_CAIF_DEBUG),y) 1ccflags-$(CONFIG_CAIF_DEBUG) := -DDEBUG
2EXTRA_CFLAGS += -DDEBUG
3endif
4 2
5caif-objs := caif_dev.o \ 3caif-y := caif_dev.o \
6 cfcnfg.o cfmuxl.o cfctrl.o \ 4 cfcnfg.o cfmuxl.o cfctrl.o \
7 cffrml.o cfveil.o cfdbgl.o\ 5 cffrml.o cfveil.o cfdbgl.o\
8 cfserl.o cfdgml.o \ 6 cfserl.o cfdgml.o \
@@ -13,4 +11,4 @@ obj-$(CONFIG_CAIF) += caif.o
13obj-$(CONFIG_CAIF_NETDEV) += chnl_net.o 11obj-$(CONFIG_CAIF_NETDEV) += chnl_net.o
14obj-$(CONFIG_CAIF) += caif_socket.o 12obj-$(CONFIG_CAIF) += caif_socket.o
15 13
16export-objs := caif.o 14export-y := caif.o
diff --git a/net/can/Makefile b/net/can/Makefile
index 9cd3c4b3abd..2d3894b3274 100644
--- a/net/can/Makefile
+++ b/net/can/Makefile
@@ -3,10 +3,10 @@
3# 3#
4 4
5obj-$(CONFIG_CAN) += can.o 5obj-$(CONFIG_CAN) += can.o
6can-objs := af_can.o proc.o 6can-y := af_can.o proc.o
7 7
8obj-$(CONFIG_CAN_RAW) += can-raw.o 8obj-$(CONFIG_CAN_RAW) += can-raw.o
9can-raw-objs := raw.o 9can-raw-y := raw.o
10 10
11obj-$(CONFIG_CAN_BCM) += can-bcm.o 11obj-$(CONFIG_CAN_BCM) += can-bcm.o
12can-bcm-objs := bcm.o 12can-bcm-y := bcm.o
diff --git a/net/ceph/Makefile b/net/ceph/Makefile
index 5f19415ec9c..e87ef435e11 100644
--- a/net/ceph/Makefile
+++ b/net/ceph/Makefile
@@ -3,7 +3,7 @@
3# 3#
4obj-$(CONFIG_CEPH_LIB) += libceph.o 4obj-$(CONFIG_CEPH_LIB) += libceph.o
5 5
6libceph-objs := ceph_common.o messenger.o msgpool.o buffer.o pagelist.o \ 6libceph-y := ceph_common.o messenger.o msgpool.o buffer.o pagelist.o \
7 mon_client.o \ 7 mon_client.o \
8 osd_client.o osdmap.o crush/crush.o crush/mapper.o crush/hash.o \ 8 osd_client.o osdmap.o crush/crush.o crush/mapper.o crush/hash.o \
9 debugfs.o \ 9 debugfs.o \
diff --git a/net/core/datagram.c b/net/core/datagram.c
index cd1e039c875..18ac112ea7a 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -177,7 +177,7 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned flags,
177 * interrupt level will suddenly eat the receive_queue. 177 * interrupt level will suddenly eat the receive_queue.
178 * 178 *
179 * Look at current nfs client by the way... 179 * Look at current nfs client by the way...
180 * However, this function was corrent in any case. 8) 180 * However, this function was correct in any case. 8)
181 */ 181 */
182 unsigned long cpu_flags; 182 unsigned long cpu_flags;
183 183
diff --git a/net/core/dev.c b/net/core/dev.c
index 0dd54a69dac..d28b3a023bb 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -743,34 +743,31 @@ struct net_device *dev_get_by_index(struct net *net, int ifindex)
743EXPORT_SYMBOL(dev_get_by_index); 743EXPORT_SYMBOL(dev_get_by_index);
744 744
745/** 745/**
746 * dev_getbyhwaddr - find a device by its hardware address 746 * dev_getbyhwaddr_rcu - find a device by its hardware address
747 * @net: the applicable net namespace 747 * @net: the applicable net namespace
748 * @type: media type of device 748 * @type: media type of device
749 * @ha: hardware address 749 * @ha: hardware address
750 * 750 *
751 * Search for an interface by MAC address. Returns NULL if the device 751 * Search for an interface by MAC address. Returns NULL if the device
752 * is not found or a pointer to the device. The caller must hold the 752 * is not found or a pointer to the device. The caller must hold RCU
753 * rtnl semaphore. The returned device has not had its ref count increased 753 * The returned device has not had its ref count increased
754 * and the caller must therefore be careful about locking 754 * and the caller must therefore be careful about locking
755 * 755 *
756 * BUGS:
757 * If the API was consistent this would be __dev_get_by_hwaddr
758 */ 756 */
759 757
760struct net_device *dev_getbyhwaddr(struct net *net, unsigned short type, char *ha) 758struct net_device *dev_getbyhwaddr_rcu(struct net *net, unsigned short type,
759 const char *ha)
761{ 760{
762 struct net_device *dev; 761 struct net_device *dev;
763 762
764 ASSERT_RTNL(); 763 for_each_netdev_rcu(net, dev)
765
766 for_each_netdev(net, dev)
767 if (dev->type == type && 764 if (dev->type == type &&
768 !memcmp(dev->dev_addr, ha, dev->addr_len)) 765 !memcmp(dev->dev_addr, ha, dev->addr_len))
769 return dev; 766 return dev;
770 767
771 return NULL; 768 return NULL;
772} 769}
773EXPORT_SYMBOL(dev_getbyhwaddr); 770EXPORT_SYMBOL(dev_getbyhwaddr_rcu);
774 771
775struct net_device *__dev_getfirstbyhwtype(struct net *net, unsigned short type) 772struct net_device *__dev_getfirstbyhwtype(struct net *net, unsigned short type)
776{ 773{
@@ -1557,12 +1554,19 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
1557 */ 1554 */
1558int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq) 1555int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq)
1559{ 1556{
1557 int rc;
1558
1560 if (txq < 1 || txq > dev->num_tx_queues) 1559 if (txq < 1 || txq > dev->num_tx_queues)
1561 return -EINVAL; 1560 return -EINVAL;
1562 1561
1563 if (dev->reg_state == NETREG_REGISTERED) { 1562 if (dev->reg_state == NETREG_REGISTERED) {
1564 ASSERT_RTNL(); 1563 ASSERT_RTNL();
1565 1564
1565 rc = netdev_queue_update_kobjects(dev, dev->real_num_tx_queues,
1566 txq);
1567 if (rc)
1568 return rc;
1569
1566 if (txq < dev->real_num_tx_queues) 1570 if (txq < dev->real_num_tx_queues)
1567 qdisc_reset_all_tx_gt(dev, txq); 1571 qdisc_reset_all_tx_gt(dev, txq);
1568 } 1572 }
@@ -1794,16 +1798,18 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features)
1794 struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT); 1798 struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
1795 struct packet_type *ptype; 1799 struct packet_type *ptype;
1796 __be16 type = skb->protocol; 1800 __be16 type = skb->protocol;
1801 int vlan_depth = ETH_HLEN;
1797 int err; 1802 int err;
1798 1803
1799 if (type == htons(ETH_P_8021Q)) { 1804 while (type == htons(ETH_P_8021Q)) {
1800 struct vlan_ethhdr *veh; 1805 struct vlan_hdr *vh;
1801 1806
1802 if (unlikely(!pskb_may_pull(skb, VLAN_ETH_HLEN))) 1807 if (unlikely(!pskb_may_pull(skb, vlan_depth + VLAN_HLEN)))
1803 return ERR_PTR(-EINVAL); 1808 return ERR_PTR(-EINVAL);
1804 1809
1805 veh = (struct vlan_ethhdr *)skb->data; 1810 vh = (struct vlan_hdr *)(skb->data + vlan_depth);
1806 type = veh->h_vlan_encapsulated_proto; 1811 type = vh->h_vlan_encapsulated_proto;
1812 vlan_depth += VLAN_HLEN;
1807 } 1813 }
1808 1814
1809 skb_reset_mac_header(skb); 1815 skb_reset_mac_header(skb);
@@ -1817,8 +1823,7 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features)
1817 if (dev && dev->ethtool_ops && dev->ethtool_ops->get_drvinfo) 1823 if (dev && dev->ethtool_ops && dev->ethtool_ops->get_drvinfo)
1818 dev->ethtool_ops->get_drvinfo(dev, &info); 1824 dev->ethtool_ops->get_drvinfo(dev, &info);
1819 1825
1820 WARN(1, "%s: caps=(0x%lx, 0x%lx) len=%d data_len=%d " 1826 WARN(1, "%s: caps=(0x%lx, 0x%lx) len=%d data_len=%d ip_summed=%d\n",
1821 "ip_summed=%d",
1822 info.driver, dev ? dev->features : 0L, 1827 info.driver, dev ? dev->features : 0L,
1823 skb->sk ? skb->sk->sk_route_caps : 0L, 1828 skb->sk ? skb->sk->sk_route_caps : 0L,
1824 skb->len, skb->data_len, skb->ip_summed); 1829 skb->len, skb->data_len, skb->ip_summed);
@@ -1967,6 +1972,23 @@ static inline void skb_orphan_try(struct sk_buff *skb)
1967 } 1972 }
1968} 1973}
1969 1974
1975int netif_get_vlan_features(struct sk_buff *skb, struct net_device *dev)
1976{
1977 __be16 protocol = skb->protocol;
1978
1979 if (protocol == htons(ETH_P_8021Q)) {
1980 struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
1981 protocol = veh->h_vlan_encapsulated_proto;
1982 } else if (!skb->vlan_tci)
1983 return dev->features;
1984
1985 if (protocol != htons(ETH_P_8021Q))
1986 return dev->features & dev->vlan_features;
1987 else
1988 return 0;
1989}
1990EXPORT_SYMBOL(netif_get_vlan_features);
1991
1970/* 1992/*
1971 * Returns true if either: 1993 * Returns true if either:
1972 * 1. skb has frag_list and the device doesn't support FRAGLIST, or 1994 * 1. skb has frag_list and the device doesn't support FRAGLIST, or
@@ -1977,15 +1999,20 @@ static inline void skb_orphan_try(struct sk_buff *skb)
1977static inline int skb_needs_linearize(struct sk_buff *skb, 1999static inline int skb_needs_linearize(struct sk_buff *skb,
1978 struct net_device *dev) 2000 struct net_device *dev)
1979{ 2001{
1980 int features = dev->features; 2002 if (skb_is_nonlinear(skb)) {
2003 int features = dev->features;
1981 2004
1982 if (skb->protocol == htons(ETH_P_8021Q) || vlan_tx_tag_present(skb)) 2005 if (vlan_tx_tag_present(skb))
1983 features &= dev->vlan_features; 2006 features &= dev->vlan_features;
2007
2008 return (skb_has_frag_list(skb) &&
2009 !(features & NETIF_F_FRAGLIST)) ||
2010 (skb_shinfo(skb)->nr_frags &&
2011 (!(features & NETIF_F_SG) ||
2012 illegal_highdma(dev, skb)));
2013 }
1984 2014
1985 return skb_is_nonlinear(skb) && 2015 return 0;
1986 ((skb_has_frag_list(skb) && !(features & NETIF_F_FRAGLIST)) ||
1987 (skb_shinfo(skb)->nr_frags && (!(features & NETIF_F_SG) ||
1988 illegal_highdma(dev, skb))));
1989} 2016}
1990 2017
1991int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, 2018int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
@@ -1995,9 +2022,6 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
1995 int rc = NETDEV_TX_OK; 2022 int rc = NETDEV_TX_OK;
1996 2023
1997 if (likely(!skb->next)) { 2024 if (likely(!skb->next)) {
1998 if (!list_empty(&ptype_all))
1999 dev_queue_xmit_nit(skb, dev);
2000
2001 /* 2025 /*
2002 * If device doesnt need skb->dst, release it right now while 2026 * If device doesnt need skb->dst, release it right now while
2003 * its hot in this cpu cache 2027 * its hot in this cpu cache
@@ -2005,6 +2029,9 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
2005 if (dev->priv_flags & IFF_XMIT_DST_RELEASE) 2029 if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
2006 skb_dst_drop(skb); 2030 skb_dst_drop(skb);
2007 2031
2032 if (!list_empty(&ptype_all))
2033 dev_queue_xmit_nit(skb, dev);
2034
2008 skb_orphan_try(skb); 2035 skb_orphan_try(skb);
2009 2036
2010 if (vlan_tx_tag_present(skb) && 2037 if (vlan_tx_tag_present(skb) &&
@@ -2119,26 +2146,70 @@ static inline u16 dev_cap_txqueue(struct net_device *dev, u16 queue_index)
2119 return queue_index; 2146 return queue_index;
2120} 2147}
2121 2148
2149static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
2150{
2151#ifdef CONFIG_XPS
2152 struct xps_dev_maps *dev_maps;
2153 struct xps_map *map;
2154 int queue_index = -1;
2155
2156 rcu_read_lock();
2157 dev_maps = rcu_dereference(dev->xps_maps);
2158 if (dev_maps) {
2159 map = rcu_dereference(
2160 dev_maps->cpu_map[raw_smp_processor_id()]);
2161 if (map) {
2162 if (map->len == 1)
2163 queue_index = map->queues[0];
2164 else {
2165 u32 hash;
2166 if (skb->sk && skb->sk->sk_hash)
2167 hash = skb->sk->sk_hash;
2168 else
2169 hash = (__force u16) skb->protocol ^
2170 skb->rxhash;
2171 hash = jhash_1word(hash, hashrnd);
2172 queue_index = map->queues[
2173 ((u64)hash * map->len) >> 32];
2174 }
2175 if (unlikely(queue_index >= dev->real_num_tx_queues))
2176 queue_index = -1;
2177 }
2178 }
2179 rcu_read_unlock();
2180
2181 return queue_index;
2182#else
2183 return -1;
2184#endif
2185}
2186
2122static struct netdev_queue *dev_pick_tx(struct net_device *dev, 2187static struct netdev_queue *dev_pick_tx(struct net_device *dev,
2123 struct sk_buff *skb) 2188 struct sk_buff *skb)
2124{ 2189{
2125 int queue_index; 2190 int queue_index;
2126 const struct net_device_ops *ops = dev->netdev_ops; 2191 const struct net_device_ops *ops = dev->netdev_ops;
2127 2192
2128 if (ops->ndo_select_queue) { 2193 if (dev->real_num_tx_queues == 1)
2194 queue_index = 0;
2195 else if (ops->ndo_select_queue) {
2129 queue_index = ops->ndo_select_queue(dev, skb); 2196 queue_index = ops->ndo_select_queue(dev, skb);
2130 queue_index = dev_cap_txqueue(dev, queue_index); 2197 queue_index = dev_cap_txqueue(dev, queue_index);
2131 } else { 2198 } else {
2132 struct sock *sk = skb->sk; 2199 struct sock *sk = skb->sk;
2133 queue_index = sk_tx_queue_get(sk); 2200 queue_index = sk_tx_queue_get(sk);
2134 if (queue_index < 0 || queue_index >= dev->real_num_tx_queues) {
2135 2201
2136 queue_index = 0; 2202 if (queue_index < 0 || skb->ooo_okay ||
2137 if (dev->real_num_tx_queues > 1) 2203 queue_index >= dev->real_num_tx_queues) {
2204 int old_index = queue_index;
2205
2206 queue_index = get_xps_queue(dev, skb);
2207 if (queue_index < 0)
2138 queue_index = skb_tx_hash(dev, skb); 2208 queue_index = skb_tx_hash(dev, skb);
2139 2209
2140 if (sk) { 2210 if (queue_index != old_index && sk) {
2141 struct dst_entry *dst = rcu_dereference_check(sk->sk_dst_cache, 1); 2211 struct dst_entry *dst =
2212 rcu_dereference_check(sk->sk_dst_cache, 1);
2142 2213
2143 if (dst && skb_dst(skb) == dst) 2214 if (dst && skb_dst(skb) == dst)
2144 sk_tx_queue_set(sk, queue_index); 2215 sk_tx_queue_set(sk, queue_index);
@@ -4967,10 +5038,13 @@ unsigned long netdev_fix_features(unsigned long features, const char *name)
4967 } 5038 }
4968 5039
4969 if (features & NETIF_F_UFO) { 5040 if (features & NETIF_F_UFO) {
4970 if (!(features & NETIF_F_GEN_CSUM)) { 5041 /* maybe split UFO into V4 and V6? */
5042 if (!((features & NETIF_F_GEN_CSUM) ||
5043 (features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))
5044 == (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
4971 if (name) 5045 if (name)
4972 printk(KERN_ERR "%s: Dropping NETIF_F_UFO " 5046 printk(KERN_ERR "%s: Dropping NETIF_F_UFO "
4973 "since no NETIF_F_HW_CSUM feature.\n", 5047 "since no checksum offload features.\n",
4974 name); 5048 name);
4975 features &= ~NETIF_F_UFO; 5049 features &= ~NETIF_F_UFO;
4976 } 5050 }
@@ -5014,9 +5088,9 @@ void netif_stacked_transfer_operstate(const struct net_device *rootdev,
5014} 5088}
5015EXPORT_SYMBOL(netif_stacked_transfer_operstate); 5089EXPORT_SYMBOL(netif_stacked_transfer_operstate);
5016 5090
5091#ifdef CONFIG_RPS
5017static int netif_alloc_rx_queues(struct net_device *dev) 5092static int netif_alloc_rx_queues(struct net_device *dev)
5018{ 5093{
5019#ifdef CONFIG_RPS
5020 unsigned int i, count = dev->num_rx_queues; 5094 unsigned int i, count = dev->num_rx_queues;
5021 struct netdev_rx_queue *rx; 5095 struct netdev_rx_queue *rx;
5022 5096
@@ -5029,15 +5103,22 @@ static int netif_alloc_rx_queues(struct net_device *dev)
5029 } 5103 }
5030 dev->_rx = rx; 5104 dev->_rx = rx;
5031 5105
5032 /*
5033 * Set a pointer to first element in the array which holds the
5034 * reference count.
5035 */
5036 for (i = 0; i < count; i++) 5106 for (i = 0; i < count; i++)
5037 rx[i].first = rx; 5107 rx[i].dev = dev;
5038#endif
5039 return 0; 5108 return 0;
5040} 5109}
5110#endif
5111
5112static void netdev_init_one_queue(struct net_device *dev,
5113 struct netdev_queue *queue, void *_unused)
5114{
5115 /* Initialize queue lock */
5116 spin_lock_init(&queue->_xmit_lock);
5117 netdev_set_xmit_lockdep_class(&queue->_xmit_lock, dev->type);
5118 queue->xmit_lock_owner = -1;
5119 netdev_queue_numa_node_write(queue, -1);
5120 queue->dev = dev;
5121}
5041 5122
5042static int netif_alloc_netdev_queues(struct net_device *dev) 5123static int netif_alloc_netdev_queues(struct net_device *dev)
5043{ 5124{
@@ -5053,25 +5134,11 @@ static int netif_alloc_netdev_queues(struct net_device *dev)
5053 return -ENOMEM; 5134 return -ENOMEM;
5054 } 5135 }
5055 dev->_tx = tx; 5136 dev->_tx = tx;
5056 return 0;
5057}
5058 5137
5059static void netdev_init_one_queue(struct net_device *dev,
5060 struct netdev_queue *queue,
5061 void *_unused)
5062{
5063 queue->dev = dev;
5064
5065 /* Initialize queue lock */
5066 spin_lock_init(&queue->_xmit_lock);
5067 netdev_set_xmit_lockdep_class(&queue->_xmit_lock, dev->type);
5068 queue->xmit_lock_owner = -1;
5069}
5070
5071static void netdev_init_queues(struct net_device *dev)
5072{
5073 netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL); 5138 netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL);
5074 spin_lock_init(&dev->tx_global_lock); 5139 spin_lock_init(&dev->tx_global_lock);
5140
5141 return 0;
5075} 5142}
5076 5143
5077/** 5144/**
@@ -5110,16 +5177,6 @@ int register_netdevice(struct net_device *dev)
5110 5177
5111 dev->iflink = -1; 5178 dev->iflink = -1;
5112 5179
5113 ret = netif_alloc_rx_queues(dev);
5114 if (ret)
5115 goto out;
5116
5117 ret = netif_alloc_netdev_queues(dev);
5118 if (ret)
5119 goto out;
5120
5121 netdev_init_queues(dev);
5122
5123 /* Init, if this function is available */ 5180 /* Init, if this function is available */
5124 if (dev->netdev_ops->ndo_init) { 5181 if (dev->netdev_ops->ndo_init) {
5125 ret = dev->netdev_ops->ndo_init(dev); 5182 ret = dev->netdev_ops->ndo_init(dev);
@@ -5577,10 +5634,14 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
5577 5634
5578 dev->num_tx_queues = queue_count; 5635 dev->num_tx_queues = queue_count;
5579 dev->real_num_tx_queues = queue_count; 5636 dev->real_num_tx_queues = queue_count;
5637 if (netif_alloc_netdev_queues(dev))
5638 goto free_pcpu;
5580 5639
5581#ifdef CONFIG_RPS 5640#ifdef CONFIG_RPS
5582 dev->num_rx_queues = queue_count; 5641 dev->num_rx_queues = queue_count;
5583 dev->real_num_rx_queues = queue_count; 5642 dev->real_num_rx_queues = queue_count;
5643 if (netif_alloc_rx_queues(dev))
5644 goto free_pcpu;
5584#endif 5645#endif
5585 5646
5586 dev->gso_max_size = GSO_MAX_SIZE; 5647 dev->gso_max_size = GSO_MAX_SIZE;
@@ -5597,6 +5658,11 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
5597 5658
5598free_pcpu: 5659free_pcpu:
5599 free_percpu(dev->pcpu_refcnt); 5660 free_percpu(dev->pcpu_refcnt);
5661 kfree(dev->_tx);
5662#ifdef CONFIG_RPS
5663 kfree(dev->_rx);
5664#endif
5665
5600free_p: 5666free_p:
5601 kfree(p); 5667 kfree(p);
5602 return NULL; 5668 return NULL;
@@ -5618,6 +5684,9 @@ void free_netdev(struct net_device *dev)
5618 release_net(dev_net(dev)); 5684 release_net(dev_net(dev));
5619 5685
5620 kfree(dev->_tx); 5686 kfree(dev->_tx);
5687#ifdef CONFIG_RPS
5688 kfree(dev->_rx);
5689#endif
5621 5690
5622 kfree(rcu_dereference_raw(dev->ingress_queue)); 5691 kfree(rcu_dereference_raw(dev->ingress_queue));
5623 5692
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 956a9f4971c..d5bc2881888 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -1171,7 +1171,9 @@ static int ethtool_set_ufo(struct net_device *dev, char __user *useraddr)
1171 return -EFAULT; 1171 return -EFAULT;
1172 if (edata.data && !(dev->features & NETIF_F_SG)) 1172 if (edata.data && !(dev->features & NETIF_F_SG))
1173 return -EINVAL; 1173 return -EINVAL;
1174 if (edata.data && !(dev->features & NETIF_F_HW_CSUM)) 1174 if (edata.data && !((dev->features & NETIF_F_GEN_CSUM) ||
1175 (dev->features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))
1176 == (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM)))
1175 return -EINVAL; 1177 return -EINVAL;
1176 return dev->ethtool_ops->set_ufo(dev, edata.data); 1178 return dev->ethtool_ops->set_ufo(dev, edata.data);
1177} 1179}
diff --git a/net/core/filter.c b/net/core/filter.c
index ae21a0d3c4a..e193e29d467 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -37,9 +37,58 @@
37#include <asm/uaccess.h> 37#include <asm/uaccess.h>
38#include <asm/unaligned.h> 38#include <asm/unaligned.h>
39#include <linux/filter.h> 39#include <linux/filter.h>
40#include <linux/reciprocal_div.h>
41
42enum {
43 BPF_S_RET_K = 1,
44 BPF_S_RET_A,
45 BPF_S_ALU_ADD_K,
46 BPF_S_ALU_ADD_X,
47 BPF_S_ALU_SUB_K,
48 BPF_S_ALU_SUB_X,
49 BPF_S_ALU_MUL_K,
50 BPF_S_ALU_MUL_X,
51 BPF_S_ALU_DIV_X,
52 BPF_S_ALU_AND_K,
53 BPF_S_ALU_AND_X,
54 BPF_S_ALU_OR_K,
55 BPF_S_ALU_OR_X,
56 BPF_S_ALU_LSH_K,
57 BPF_S_ALU_LSH_X,
58 BPF_S_ALU_RSH_K,
59 BPF_S_ALU_RSH_X,
60 BPF_S_ALU_NEG,
61 BPF_S_LD_W_ABS,
62 BPF_S_LD_H_ABS,
63 BPF_S_LD_B_ABS,
64 BPF_S_LD_W_LEN,
65 BPF_S_LD_W_IND,
66 BPF_S_LD_H_IND,
67 BPF_S_LD_B_IND,
68 BPF_S_LD_IMM,
69 BPF_S_LDX_W_LEN,
70 BPF_S_LDX_B_MSH,
71 BPF_S_LDX_IMM,
72 BPF_S_MISC_TAX,
73 BPF_S_MISC_TXA,
74 BPF_S_ALU_DIV_K,
75 BPF_S_LD_MEM,
76 BPF_S_LDX_MEM,
77 BPF_S_ST,
78 BPF_S_STX,
79 BPF_S_JMP_JA,
80 BPF_S_JMP_JEQ_K,
81 BPF_S_JMP_JEQ_X,
82 BPF_S_JMP_JGE_K,
83 BPF_S_JMP_JGE_X,
84 BPF_S_JMP_JGT_K,
85 BPF_S_JMP_JGT_X,
86 BPF_S_JMP_JSET_K,
87 BPF_S_JMP_JSET_X,
88};
40 89
41/* No hurry in this branch */ 90/* No hurry in this branch */
42static void *__load_pointer(struct sk_buff *skb, int k) 91static void *__load_pointer(const struct sk_buff *skb, int k)
43{ 92{
44 u8 *ptr = NULL; 93 u8 *ptr = NULL;
45 94
@@ -53,7 +102,7 @@ static void *__load_pointer(struct sk_buff *skb, int k)
53 return NULL; 102 return NULL;
54} 103}
55 104
56static inline void *load_pointer(struct sk_buff *skb, int k, 105static inline void *load_pointer(const struct sk_buff *skb, int k,
57 unsigned int size, void *buffer) 106 unsigned int size, void *buffer)
58{ 107{
59 if (k >= 0) 108 if (k >= 0)
@@ -89,7 +138,7 @@ int sk_filter(struct sock *sk, struct sk_buff *skb)
89 rcu_read_lock_bh(); 138 rcu_read_lock_bh();
90 filter = rcu_dereference_bh(sk->sk_filter); 139 filter = rcu_dereference_bh(sk->sk_filter);
91 if (filter) { 140 if (filter) {
92 unsigned int pkt_len = sk_run_filter(skb, filter->insns, filter->len); 141 unsigned int pkt_len = sk_run_filter(skb, filter->insns);
93 142
94 err = pkt_len ? pskb_trim(skb, pkt_len) : -EPERM; 143 err = pkt_len ? pskb_trim(skb, pkt_len) : -EPERM;
95 } 144 }
@@ -103,50 +152,52 @@ EXPORT_SYMBOL(sk_filter);
103 * sk_run_filter - run a filter on a socket 152 * sk_run_filter - run a filter on a socket
104 * @skb: buffer to run the filter on 153 * @skb: buffer to run the filter on
105 * @filter: filter to apply 154 * @filter: filter to apply
106 * @flen: length of filter
107 * 155 *
108 * Decode and apply filter instructions to the skb->data. 156 * Decode and apply filter instructions to the skb->data.
109 * Return length to keep, 0 for none. skb is the data we are 157 * Return length to keep, 0 for none. @skb is the data we are
110 * filtering, filter is the array of filter instructions, and 158 * filtering, @filter is the array of filter instructions.
111 * len is the number of filter blocks in the array. 159 * Because all jumps are guaranteed to be before last instruction,
160 * and last instruction guaranteed to be a RET, we dont need to check
161 * flen. (We used to pass to this function the length of filter)
112 */ 162 */
113unsigned int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int flen) 163unsigned int sk_run_filter(const struct sk_buff *skb,
164 const struct sock_filter *fentry)
114{ 165{
115 void *ptr; 166 void *ptr;
116 u32 A = 0; /* Accumulator */ 167 u32 A = 0; /* Accumulator */
117 u32 X = 0; /* Index Register */ 168 u32 X = 0; /* Index Register */
118 u32 mem[BPF_MEMWORDS]; /* Scratch Memory Store */ 169 u32 mem[BPF_MEMWORDS]; /* Scratch Memory Store */
119 unsigned long memvalid = 0;
120 u32 tmp; 170 u32 tmp;
121 int k; 171 int k;
122 int pc;
123 172
124 BUILD_BUG_ON(BPF_MEMWORDS > BITS_PER_LONG);
125 /* 173 /*
126 * Process array of filter instructions. 174 * Process array of filter instructions.
127 */ 175 */
128 for (pc = 0; pc < flen; pc++) { 176 for (;; fentry++) {
129 const struct sock_filter *fentry = &filter[pc]; 177#if defined(CONFIG_X86_32)
130 u32 f_k = fentry->k; 178#define K (fentry->k)
179#else
180 const u32 K = fentry->k;
181#endif
131 182
132 switch (fentry->code) { 183 switch (fentry->code) {
133 case BPF_S_ALU_ADD_X: 184 case BPF_S_ALU_ADD_X:
134 A += X; 185 A += X;
135 continue; 186 continue;
136 case BPF_S_ALU_ADD_K: 187 case BPF_S_ALU_ADD_K:
137 A += f_k; 188 A += K;
138 continue; 189 continue;
139 case BPF_S_ALU_SUB_X: 190 case BPF_S_ALU_SUB_X:
140 A -= X; 191 A -= X;
141 continue; 192 continue;
142 case BPF_S_ALU_SUB_K: 193 case BPF_S_ALU_SUB_K:
143 A -= f_k; 194 A -= K;
144 continue; 195 continue;
145 case BPF_S_ALU_MUL_X: 196 case BPF_S_ALU_MUL_X:
146 A *= X; 197 A *= X;
147 continue; 198 continue;
148 case BPF_S_ALU_MUL_K: 199 case BPF_S_ALU_MUL_K:
149 A *= f_k; 200 A *= K;
150 continue; 201 continue;
151 case BPF_S_ALU_DIV_X: 202 case BPF_S_ALU_DIV_X:
152 if (X == 0) 203 if (X == 0)
@@ -154,64 +205,64 @@ unsigned int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int
154 A /= X; 205 A /= X;
155 continue; 206 continue;
156 case BPF_S_ALU_DIV_K: 207 case BPF_S_ALU_DIV_K:
157 A /= f_k; 208 A = reciprocal_divide(A, K);
158 continue; 209 continue;
159 case BPF_S_ALU_AND_X: 210 case BPF_S_ALU_AND_X:
160 A &= X; 211 A &= X;
161 continue; 212 continue;
162 case BPF_S_ALU_AND_K: 213 case BPF_S_ALU_AND_K:
163 A &= f_k; 214 A &= K;
164 continue; 215 continue;
165 case BPF_S_ALU_OR_X: 216 case BPF_S_ALU_OR_X:
166 A |= X; 217 A |= X;
167 continue; 218 continue;
168 case BPF_S_ALU_OR_K: 219 case BPF_S_ALU_OR_K:
169 A |= f_k; 220 A |= K;
170 continue; 221 continue;
171 case BPF_S_ALU_LSH_X: 222 case BPF_S_ALU_LSH_X:
172 A <<= X; 223 A <<= X;
173 continue; 224 continue;
174 case BPF_S_ALU_LSH_K: 225 case BPF_S_ALU_LSH_K:
175 A <<= f_k; 226 A <<= K;
176 continue; 227 continue;
177 case BPF_S_ALU_RSH_X: 228 case BPF_S_ALU_RSH_X:
178 A >>= X; 229 A >>= X;
179 continue; 230 continue;
180 case BPF_S_ALU_RSH_K: 231 case BPF_S_ALU_RSH_K:
181 A >>= f_k; 232 A >>= K;
182 continue; 233 continue;
183 case BPF_S_ALU_NEG: 234 case BPF_S_ALU_NEG:
184 A = -A; 235 A = -A;
185 continue; 236 continue;
186 case BPF_S_JMP_JA: 237 case BPF_S_JMP_JA:
187 pc += f_k; 238 fentry += K;
188 continue; 239 continue;
189 case BPF_S_JMP_JGT_K: 240 case BPF_S_JMP_JGT_K:
190 pc += (A > f_k) ? fentry->jt : fentry->jf; 241 fentry += (A > K) ? fentry->jt : fentry->jf;
191 continue; 242 continue;
192 case BPF_S_JMP_JGE_K: 243 case BPF_S_JMP_JGE_K:
193 pc += (A >= f_k) ? fentry->jt : fentry->jf; 244 fentry += (A >= K) ? fentry->jt : fentry->jf;
194 continue; 245 continue;
195 case BPF_S_JMP_JEQ_K: 246 case BPF_S_JMP_JEQ_K:
196 pc += (A == f_k) ? fentry->jt : fentry->jf; 247 fentry += (A == K) ? fentry->jt : fentry->jf;
197 continue; 248 continue;
198 case BPF_S_JMP_JSET_K: 249 case BPF_S_JMP_JSET_K:
199 pc += (A & f_k) ? fentry->jt : fentry->jf; 250 fentry += (A & K) ? fentry->jt : fentry->jf;
200 continue; 251 continue;
201 case BPF_S_JMP_JGT_X: 252 case BPF_S_JMP_JGT_X:
202 pc += (A > X) ? fentry->jt : fentry->jf; 253 fentry += (A > X) ? fentry->jt : fentry->jf;
203 continue; 254 continue;
204 case BPF_S_JMP_JGE_X: 255 case BPF_S_JMP_JGE_X:
205 pc += (A >= X) ? fentry->jt : fentry->jf; 256 fentry += (A >= X) ? fentry->jt : fentry->jf;
206 continue; 257 continue;
207 case BPF_S_JMP_JEQ_X: 258 case BPF_S_JMP_JEQ_X:
208 pc += (A == X) ? fentry->jt : fentry->jf; 259 fentry += (A == X) ? fentry->jt : fentry->jf;
209 continue; 260 continue;
210 case BPF_S_JMP_JSET_X: 261 case BPF_S_JMP_JSET_X:
211 pc += (A & X) ? fentry->jt : fentry->jf; 262 fentry += (A & X) ? fentry->jt : fentry->jf;
212 continue; 263 continue;
213 case BPF_S_LD_W_ABS: 264 case BPF_S_LD_W_ABS:
214 k = f_k; 265 k = K;
215load_w: 266load_w:
216 ptr = load_pointer(skb, k, 4, &tmp); 267 ptr = load_pointer(skb, k, 4, &tmp);
217 if (ptr != NULL) { 268 if (ptr != NULL) {
@@ -220,7 +271,7 @@ load_w:
220 } 271 }
221 break; 272 break;
222 case BPF_S_LD_H_ABS: 273 case BPF_S_LD_H_ABS:
223 k = f_k; 274 k = K;
224load_h: 275load_h:
225 ptr = load_pointer(skb, k, 2, &tmp); 276 ptr = load_pointer(skb, k, 2, &tmp);
226 if (ptr != NULL) { 277 if (ptr != NULL) {
@@ -229,7 +280,7 @@ load_h:
229 } 280 }
230 break; 281 break;
231 case BPF_S_LD_B_ABS: 282 case BPF_S_LD_B_ABS:
232 k = f_k; 283 k = K;
233load_b: 284load_b:
234 ptr = load_pointer(skb, k, 1, &tmp); 285 ptr = load_pointer(skb, k, 1, &tmp);
235 if (ptr != NULL) { 286 if (ptr != NULL) {
@@ -244,34 +295,32 @@ load_b:
244 X = skb->len; 295 X = skb->len;
245 continue; 296 continue;
246 case BPF_S_LD_W_IND: 297 case BPF_S_LD_W_IND:
247 k = X + f_k; 298 k = X + K;
248 goto load_w; 299 goto load_w;
249 case BPF_S_LD_H_IND: 300 case BPF_S_LD_H_IND:
250 k = X + f_k; 301 k = X + K;
251 goto load_h; 302 goto load_h;
252 case BPF_S_LD_B_IND: 303 case BPF_S_LD_B_IND:
253 k = X + f_k; 304 k = X + K;
254 goto load_b; 305 goto load_b;
255 case BPF_S_LDX_B_MSH: 306 case BPF_S_LDX_B_MSH:
256 ptr = load_pointer(skb, f_k, 1, &tmp); 307 ptr = load_pointer(skb, K, 1, &tmp);
257 if (ptr != NULL) { 308 if (ptr != NULL) {
258 X = (*(u8 *)ptr & 0xf) << 2; 309 X = (*(u8 *)ptr & 0xf) << 2;
259 continue; 310 continue;
260 } 311 }
261 return 0; 312 return 0;
262 case BPF_S_LD_IMM: 313 case BPF_S_LD_IMM:
263 A = f_k; 314 A = K;
264 continue; 315 continue;
265 case BPF_S_LDX_IMM: 316 case BPF_S_LDX_IMM:
266 X = f_k; 317 X = K;
267 continue; 318 continue;
268 case BPF_S_LD_MEM: 319 case BPF_S_LD_MEM:
269 A = (memvalid & (1UL << f_k)) ? 320 A = mem[K];
270 mem[f_k] : 0;
271 continue; 321 continue;
272 case BPF_S_LDX_MEM: 322 case BPF_S_LDX_MEM:
273 X = (memvalid & (1UL << f_k)) ? 323 X = mem[K];
274 mem[f_k] : 0;
275 continue; 324 continue;
276 case BPF_S_MISC_TAX: 325 case BPF_S_MISC_TAX:
277 X = A; 326 X = A;
@@ -280,16 +329,14 @@ load_b:
280 A = X; 329 A = X;
281 continue; 330 continue;
282 case BPF_S_RET_K: 331 case BPF_S_RET_K:
283 return f_k; 332 return K;
284 case BPF_S_RET_A: 333 case BPF_S_RET_A:
285 return A; 334 return A;
286 case BPF_S_ST: 335 case BPF_S_ST:
287 memvalid |= 1UL << f_k; 336 mem[K] = A;
288 mem[f_k] = A;
289 continue; 337 continue;
290 case BPF_S_STX: 338 case BPF_S_STX:
291 memvalid |= 1UL << f_k; 339 mem[K] = X;
292 mem[f_k] = X;
293 continue; 340 continue;
294 default: 341 default:
295 WARN_ON(1); 342 WARN_ON(1);
@@ -323,6 +370,12 @@ load_b:
323 return 0; 370 return 0;
324 A = skb->dev->type; 371 A = skb->dev->type;
325 continue; 372 continue;
373 case SKF_AD_RXHASH:
374 A = skb->rxhash;
375 continue;
376 case SKF_AD_CPU:
377 A = raw_smp_processor_id();
378 continue;
326 case SKF_AD_NLATTR: { 379 case SKF_AD_NLATTR: {
327 struct nlattr *nla; 380 struct nlattr *nla;
328 381
@@ -367,6 +420,66 @@ load_b:
367} 420}
368EXPORT_SYMBOL(sk_run_filter); 421EXPORT_SYMBOL(sk_run_filter);
369 422
423/*
424 * Security :
425 * A BPF program is able to use 16 cells of memory to store intermediate
426 * values (check u32 mem[BPF_MEMWORDS] in sk_run_filter())
427 * As we dont want to clear mem[] array for each packet going through
428 * sk_run_filter(), we check that filter loaded by user never try to read
429 * a cell if not previously written, and we check all branches to be sure
430 * a malicious user doesnt try to abuse us.
431 */
432static int check_load_and_stores(struct sock_filter *filter, int flen)
433{
434 u16 *masks, memvalid = 0; /* one bit per cell, 16 cells */
435 int pc, ret = 0;
436
437 BUILD_BUG_ON(BPF_MEMWORDS > 16);
438 masks = kmalloc(flen * sizeof(*masks), GFP_KERNEL);
439 if (!masks)
440 return -ENOMEM;
441 memset(masks, 0xff, flen * sizeof(*masks));
442
443 for (pc = 0; pc < flen; pc++) {
444 memvalid &= masks[pc];
445
446 switch (filter[pc].code) {
447 case BPF_S_ST:
448 case BPF_S_STX:
449 memvalid |= (1 << filter[pc].k);
450 break;
451 case BPF_S_LD_MEM:
452 case BPF_S_LDX_MEM:
453 if (!(memvalid & (1 << filter[pc].k))) {
454 ret = -EINVAL;
455 goto error;
456 }
457 break;
458 case BPF_S_JMP_JA:
459 /* a jump must set masks on target */
460 masks[pc + 1 + filter[pc].k] &= memvalid;
461 memvalid = ~0;
462 break;
463 case BPF_S_JMP_JEQ_K:
464 case BPF_S_JMP_JEQ_X:
465 case BPF_S_JMP_JGE_K:
466 case BPF_S_JMP_JGE_X:
467 case BPF_S_JMP_JGT_K:
468 case BPF_S_JMP_JGT_X:
469 case BPF_S_JMP_JSET_X:
470 case BPF_S_JMP_JSET_K:
471 /* a jump must set masks on targets */
472 masks[pc + 1 + filter[pc].jt] &= memvalid;
473 masks[pc + 1 + filter[pc].jf] &= memvalid;
474 memvalid = ~0;
475 break;
476 }
477 }
478error:
479 kfree(masks);
480 return ret;
481}
482
370/** 483/**
371 * sk_chk_filter - verify socket filter code 484 * sk_chk_filter - verify socket filter code
372 * @filter: filter to verify 485 * @filter: filter to verify
@@ -383,7 +496,57 @@ EXPORT_SYMBOL(sk_run_filter);
383 */ 496 */
384int sk_chk_filter(struct sock_filter *filter, int flen) 497int sk_chk_filter(struct sock_filter *filter, int flen)
385{ 498{
386 struct sock_filter *ftest; 499 /*
500 * Valid instructions are initialized to non-0.
501 * Invalid instructions are initialized to 0.
502 */
503 static const u8 codes[] = {
504 [BPF_ALU|BPF_ADD|BPF_K] = BPF_S_ALU_ADD_K,
505 [BPF_ALU|BPF_ADD|BPF_X] = BPF_S_ALU_ADD_X,
506 [BPF_ALU|BPF_SUB|BPF_K] = BPF_S_ALU_SUB_K,
507 [BPF_ALU|BPF_SUB|BPF_X] = BPF_S_ALU_SUB_X,
508 [BPF_ALU|BPF_MUL|BPF_K] = BPF_S_ALU_MUL_K,
509 [BPF_ALU|BPF_MUL|BPF_X] = BPF_S_ALU_MUL_X,
510 [BPF_ALU|BPF_DIV|BPF_X] = BPF_S_ALU_DIV_X,
511 [BPF_ALU|BPF_AND|BPF_K] = BPF_S_ALU_AND_K,
512 [BPF_ALU|BPF_AND|BPF_X] = BPF_S_ALU_AND_X,
513 [BPF_ALU|BPF_OR|BPF_K] = BPF_S_ALU_OR_K,
514 [BPF_ALU|BPF_OR|BPF_X] = BPF_S_ALU_OR_X,
515 [BPF_ALU|BPF_LSH|BPF_K] = BPF_S_ALU_LSH_K,
516 [BPF_ALU|BPF_LSH|BPF_X] = BPF_S_ALU_LSH_X,
517 [BPF_ALU|BPF_RSH|BPF_K] = BPF_S_ALU_RSH_K,
518 [BPF_ALU|BPF_RSH|BPF_X] = BPF_S_ALU_RSH_X,
519 [BPF_ALU|BPF_NEG] = BPF_S_ALU_NEG,
520 [BPF_LD|BPF_W|BPF_ABS] = BPF_S_LD_W_ABS,
521 [BPF_LD|BPF_H|BPF_ABS] = BPF_S_LD_H_ABS,
522 [BPF_LD|BPF_B|BPF_ABS] = BPF_S_LD_B_ABS,
523 [BPF_LD|BPF_W|BPF_LEN] = BPF_S_LD_W_LEN,
524 [BPF_LD|BPF_W|BPF_IND] = BPF_S_LD_W_IND,
525 [BPF_LD|BPF_H|BPF_IND] = BPF_S_LD_H_IND,
526 [BPF_LD|BPF_B|BPF_IND] = BPF_S_LD_B_IND,
527 [BPF_LD|BPF_IMM] = BPF_S_LD_IMM,
528 [BPF_LDX|BPF_W|BPF_LEN] = BPF_S_LDX_W_LEN,
529 [BPF_LDX|BPF_B|BPF_MSH] = BPF_S_LDX_B_MSH,
530 [BPF_LDX|BPF_IMM] = BPF_S_LDX_IMM,
531 [BPF_MISC|BPF_TAX] = BPF_S_MISC_TAX,
532 [BPF_MISC|BPF_TXA] = BPF_S_MISC_TXA,
533 [BPF_RET|BPF_K] = BPF_S_RET_K,
534 [BPF_RET|BPF_A] = BPF_S_RET_A,
535 [BPF_ALU|BPF_DIV|BPF_K] = BPF_S_ALU_DIV_K,
536 [BPF_LD|BPF_MEM] = BPF_S_LD_MEM,
537 [BPF_LDX|BPF_MEM] = BPF_S_LDX_MEM,
538 [BPF_ST] = BPF_S_ST,
539 [BPF_STX] = BPF_S_STX,
540 [BPF_JMP|BPF_JA] = BPF_S_JMP_JA,
541 [BPF_JMP|BPF_JEQ|BPF_K] = BPF_S_JMP_JEQ_K,
542 [BPF_JMP|BPF_JEQ|BPF_X] = BPF_S_JMP_JEQ_X,
543 [BPF_JMP|BPF_JGE|BPF_K] = BPF_S_JMP_JGE_K,
544 [BPF_JMP|BPF_JGE|BPF_X] = BPF_S_JMP_JGE_X,
545 [BPF_JMP|BPF_JGT|BPF_K] = BPF_S_JMP_JGT_K,
546 [BPF_JMP|BPF_JGT|BPF_X] = BPF_S_JMP_JGT_X,
547 [BPF_JMP|BPF_JSET|BPF_K] = BPF_S_JMP_JSET_K,
548 [BPF_JMP|BPF_JSET|BPF_X] = BPF_S_JMP_JSET_X,
549 };
387 int pc; 550 int pc;
388 551
389 if (flen == 0 || flen > BPF_MAXINSNS) 552 if (flen == 0 || flen > BPF_MAXINSNS)
@@ -391,136 +554,31 @@ int sk_chk_filter(struct sock_filter *filter, int flen)
391 554
392 /* check the filter code now */ 555 /* check the filter code now */
393 for (pc = 0; pc < flen; pc++) { 556 for (pc = 0; pc < flen; pc++) {
394 ftest = &filter[pc]; 557 struct sock_filter *ftest = &filter[pc];
395 558 u16 code = ftest->code;
396 /* Only allow valid instructions */
397 switch (ftest->code) {
398 case BPF_ALU|BPF_ADD|BPF_K:
399 ftest->code = BPF_S_ALU_ADD_K;
400 break;
401 case BPF_ALU|BPF_ADD|BPF_X:
402 ftest->code = BPF_S_ALU_ADD_X;
403 break;
404 case BPF_ALU|BPF_SUB|BPF_K:
405 ftest->code = BPF_S_ALU_SUB_K;
406 break;
407 case BPF_ALU|BPF_SUB|BPF_X:
408 ftest->code = BPF_S_ALU_SUB_X;
409 break;
410 case BPF_ALU|BPF_MUL|BPF_K:
411 ftest->code = BPF_S_ALU_MUL_K;
412 break;
413 case BPF_ALU|BPF_MUL|BPF_X:
414 ftest->code = BPF_S_ALU_MUL_X;
415 break;
416 case BPF_ALU|BPF_DIV|BPF_X:
417 ftest->code = BPF_S_ALU_DIV_X;
418 break;
419 case BPF_ALU|BPF_AND|BPF_K:
420 ftest->code = BPF_S_ALU_AND_K;
421 break;
422 case BPF_ALU|BPF_AND|BPF_X:
423 ftest->code = BPF_S_ALU_AND_X;
424 break;
425 case BPF_ALU|BPF_OR|BPF_K:
426 ftest->code = BPF_S_ALU_OR_K;
427 break;
428 case BPF_ALU|BPF_OR|BPF_X:
429 ftest->code = BPF_S_ALU_OR_X;
430 break;
431 case BPF_ALU|BPF_LSH|BPF_K:
432 ftest->code = BPF_S_ALU_LSH_K;
433 break;
434 case BPF_ALU|BPF_LSH|BPF_X:
435 ftest->code = BPF_S_ALU_LSH_X;
436 break;
437 case BPF_ALU|BPF_RSH|BPF_K:
438 ftest->code = BPF_S_ALU_RSH_K;
439 break;
440 case BPF_ALU|BPF_RSH|BPF_X:
441 ftest->code = BPF_S_ALU_RSH_X;
442 break;
443 case BPF_ALU|BPF_NEG:
444 ftest->code = BPF_S_ALU_NEG;
445 break;
446 case BPF_LD|BPF_W|BPF_ABS:
447 ftest->code = BPF_S_LD_W_ABS;
448 break;
449 case BPF_LD|BPF_H|BPF_ABS:
450 ftest->code = BPF_S_LD_H_ABS;
451 break;
452 case BPF_LD|BPF_B|BPF_ABS:
453 ftest->code = BPF_S_LD_B_ABS;
454 break;
455 case BPF_LD|BPF_W|BPF_LEN:
456 ftest->code = BPF_S_LD_W_LEN;
457 break;
458 case BPF_LD|BPF_W|BPF_IND:
459 ftest->code = BPF_S_LD_W_IND;
460 break;
461 case BPF_LD|BPF_H|BPF_IND:
462 ftest->code = BPF_S_LD_H_IND;
463 break;
464 case BPF_LD|BPF_B|BPF_IND:
465 ftest->code = BPF_S_LD_B_IND;
466 break;
467 case BPF_LD|BPF_IMM:
468 ftest->code = BPF_S_LD_IMM;
469 break;
470 case BPF_LDX|BPF_W|BPF_LEN:
471 ftest->code = BPF_S_LDX_W_LEN;
472 break;
473 case BPF_LDX|BPF_B|BPF_MSH:
474 ftest->code = BPF_S_LDX_B_MSH;
475 break;
476 case BPF_LDX|BPF_IMM:
477 ftest->code = BPF_S_LDX_IMM;
478 break;
479 case BPF_MISC|BPF_TAX:
480 ftest->code = BPF_S_MISC_TAX;
481 break;
482 case BPF_MISC|BPF_TXA:
483 ftest->code = BPF_S_MISC_TXA;
484 break;
485 case BPF_RET|BPF_K:
486 ftest->code = BPF_S_RET_K;
487 break;
488 case BPF_RET|BPF_A:
489 ftest->code = BPF_S_RET_A;
490 break;
491 559
560 if (code >= ARRAY_SIZE(codes))
561 return -EINVAL;
562 code = codes[code];
563 if (!code)
564 return -EINVAL;
492 /* Some instructions need special checks */ 565 /* Some instructions need special checks */
493 566 switch (code) {
567 case BPF_S_ALU_DIV_K:
494 /* check for division by zero */ 568 /* check for division by zero */
495 case BPF_ALU|BPF_DIV|BPF_K:
496 if (ftest->k == 0) 569 if (ftest->k == 0)
497 return -EINVAL; 570 return -EINVAL;
498 ftest->code = BPF_S_ALU_DIV_K; 571 ftest->k = reciprocal_value(ftest->k);
499 break;
500
501 /* check for invalid memory addresses */
502 case BPF_LD|BPF_MEM:
503 if (ftest->k >= BPF_MEMWORDS)
504 return -EINVAL;
505 ftest->code = BPF_S_LD_MEM;
506 break;
507 case BPF_LDX|BPF_MEM:
508 if (ftest->k >= BPF_MEMWORDS)
509 return -EINVAL;
510 ftest->code = BPF_S_LDX_MEM;
511 break;
512 case BPF_ST:
513 if (ftest->k >= BPF_MEMWORDS)
514 return -EINVAL;
515 ftest->code = BPF_S_ST;
516 break; 572 break;
517 case BPF_STX: 573 case BPF_S_LD_MEM:
574 case BPF_S_LDX_MEM:
575 case BPF_S_ST:
576 case BPF_S_STX:
577 /* check for invalid memory addresses */
518 if (ftest->k >= BPF_MEMWORDS) 578 if (ftest->k >= BPF_MEMWORDS)
519 return -EINVAL; 579 return -EINVAL;
520 ftest->code = BPF_S_STX;
521 break; 580 break;
522 581 case BPF_S_JMP_JA:
523 case BPF_JMP|BPF_JA:
524 /* 582 /*
525 * Note, the large ftest->k might cause loops. 583 * Note, the large ftest->k might cause loops.
526 * Compare this with conditional jumps below, 584 * Compare this with conditional jumps below,
@@ -528,40 +586,7 @@ int sk_chk_filter(struct sock_filter *filter, int flen)
528 */ 586 */
529 if (ftest->k >= (unsigned)(flen-pc-1)) 587 if (ftest->k >= (unsigned)(flen-pc-1))
530 return -EINVAL; 588 return -EINVAL;
531 ftest->code = BPF_S_JMP_JA;
532 break;
533
534 case BPF_JMP|BPF_JEQ|BPF_K:
535 ftest->code = BPF_S_JMP_JEQ_K;
536 break;
537 case BPF_JMP|BPF_JEQ|BPF_X:
538 ftest->code = BPF_S_JMP_JEQ_X;
539 break;
540 case BPF_JMP|BPF_JGE|BPF_K:
541 ftest->code = BPF_S_JMP_JGE_K;
542 break;
543 case BPF_JMP|BPF_JGE|BPF_X:
544 ftest->code = BPF_S_JMP_JGE_X;
545 break;
546 case BPF_JMP|BPF_JGT|BPF_K:
547 ftest->code = BPF_S_JMP_JGT_K;
548 break;
549 case BPF_JMP|BPF_JGT|BPF_X:
550 ftest->code = BPF_S_JMP_JGT_X;
551 break;
552 case BPF_JMP|BPF_JSET|BPF_K:
553 ftest->code = BPF_S_JMP_JSET_K;
554 break; 589 break;
555 case BPF_JMP|BPF_JSET|BPF_X:
556 ftest->code = BPF_S_JMP_JSET_X;
557 break;
558
559 default:
560 return -EINVAL;
561 }
562
563 /* for conditionals both must be safe */
564 switch (ftest->code) {
565 case BPF_S_JMP_JEQ_K: 590 case BPF_S_JMP_JEQ_K:
566 case BPF_S_JMP_JEQ_X: 591 case BPF_S_JMP_JEQ_X:
567 case BPF_S_JMP_JGE_K: 592 case BPF_S_JMP_JGE_K:
@@ -570,21 +595,22 @@ int sk_chk_filter(struct sock_filter *filter, int flen)
570 case BPF_S_JMP_JGT_X: 595 case BPF_S_JMP_JGT_X:
571 case BPF_S_JMP_JSET_X: 596 case BPF_S_JMP_JSET_X:
572 case BPF_S_JMP_JSET_K: 597 case BPF_S_JMP_JSET_K:
598 /* for conditionals both must be safe */
573 if (pc + ftest->jt + 1 >= flen || 599 if (pc + ftest->jt + 1 >= flen ||
574 pc + ftest->jf + 1 >= flen) 600 pc + ftest->jf + 1 >= flen)
575 return -EINVAL; 601 return -EINVAL;
602 break;
576 } 603 }
604 ftest->code = code;
577 } 605 }
578 606
579 /* last instruction must be a RET code */ 607 /* last instruction must be a RET code */
580 switch (filter[flen - 1].code) { 608 switch (filter[flen - 1].code) {
581 case BPF_S_RET_K: 609 case BPF_S_RET_K:
582 case BPF_S_RET_A: 610 case BPF_S_RET_A:
583 return 0; 611 return check_load_and_stores(filter, flen);
584 break; 612 }
585 default: 613 return -EINVAL;
586 return -EINVAL;
587 }
588} 614}
589EXPORT_SYMBOL(sk_chk_filter); 615EXPORT_SYMBOL(sk_chk_filter);
590 616
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 7f902cad10f..85e8b5326dd 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -706,7 +706,6 @@ static struct attribute *rx_queue_default_attrs[] = {
706static void rx_queue_release(struct kobject *kobj) 706static void rx_queue_release(struct kobject *kobj)
707{ 707{
708 struct netdev_rx_queue *queue = to_rx_queue(kobj); 708 struct netdev_rx_queue *queue = to_rx_queue(kobj);
709 struct netdev_rx_queue *first = queue->first;
710 struct rps_map *map; 709 struct rps_map *map;
711 struct rps_dev_flow_table *flow_table; 710 struct rps_dev_flow_table *flow_table;
712 711
@@ -723,10 +722,8 @@ static void rx_queue_release(struct kobject *kobj)
723 call_rcu(&flow_table->rcu, rps_dev_flow_table_release); 722 call_rcu(&flow_table->rcu, rps_dev_flow_table_release);
724 } 723 }
725 724
726 if (atomic_dec_and_test(&first->count)) 725 memset(kobj, 0, sizeof(*kobj));
727 kfree(first); 726 dev_put(queue->dev);
728 else
729 memset(kobj, 0, sizeof(*kobj));
730} 727}
731 728
732static struct kobj_type rx_queue_ktype = { 729static struct kobj_type rx_queue_ktype = {
@@ -738,7 +735,6 @@ static struct kobj_type rx_queue_ktype = {
738static int rx_queue_add_kobject(struct net_device *net, int index) 735static int rx_queue_add_kobject(struct net_device *net, int index)
739{ 736{
740 struct netdev_rx_queue *queue = net->_rx + index; 737 struct netdev_rx_queue *queue = net->_rx + index;
741 struct netdev_rx_queue *first = queue->first;
742 struct kobject *kobj = &queue->kobj; 738 struct kobject *kobj = &queue->kobj;
743 int error = 0; 739 int error = 0;
744 740
@@ -751,14 +747,16 @@ static int rx_queue_add_kobject(struct net_device *net, int index)
751 } 747 }
752 748
753 kobject_uevent(kobj, KOBJ_ADD); 749 kobject_uevent(kobj, KOBJ_ADD);
754 atomic_inc(&first->count); 750 dev_hold(queue->dev);
755 751
756 return error; 752 return error;
757} 753}
754#endif /* CONFIG_RPS */
758 755
759int 756int
760net_rx_queue_update_kobjects(struct net_device *net, int old_num, int new_num) 757net_rx_queue_update_kobjects(struct net_device *net, int old_num, int new_num)
761{ 758{
759#ifdef CONFIG_RPS
762 int i; 760 int i;
763 int error = 0; 761 int error = 0;
764 762
@@ -774,23 +772,422 @@ net_rx_queue_update_kobjects(struct net_device *net, int old_num, int new_num)
774 kobject_put(&net->_rx[i].kobj); 772 kobject_put(&net->_rx[i].kobj);
775 773
776 return error; 774 return error;
775#else
776 return 0;
777#endif
778}
779
780#ifdef CONFIG_XPS
781/*
782 * netdev_queue sysfs structures and functions.
783 */
784struct netdev_queue_attribute {
785 struct attribute attr;
786 ssize_t (*show)(struct netdev_queue *queue,
787 struct netdev_queue_attribute *attr, char *buf);
788 ssize_t (*store)(struct netdev_queue *queue,
789 struct netdev_queue_attribute *attr, const char *buf, size_t len);
790};
791#define to_netdev_queue_attr(_attr) container_of(_attr, \
792 struct netdev_queue_attribute, attr)
793
794#define to_netdev_queue(obj) container_of(obj, struct netdev_queue, kobj)
795
796static ssize_t netdev_queue_attr_show(struct kobject *kobj,
797 struct attribute *attr, char *buf)
798{
799 struct netdev_queue_attribute *attribute = to_netdev_queue_attr(attr);
800 struct netdev_queue *queue = to_netdev_queue(kobj);
801
802 if (!attribute->show)
803 return -EIO;
804
805 return attribute->show(queue, attribute, buf);
777} 806}
778 807
779static int rx_queue_register_kobjects(struct net_device *net) 808static ssize_t netdev_queue_attr_store(struct kobject *kobj,
809 struct attribute *attr,
810 const char *buf, size_t count)
780{ 811{
812 struct netdev_queue_attribute *attribute = to_netdev_queue_attr(attr);
813 struct netdev_queue *queue = to_netdev_queue(kobj);
814
815 if (!attribute->store)
816 return -EIO;
817
818 return attribute->store(queue, attribute, buf, count);
819}
820
821static const struct sysfs_ops netdev_queue_sysfs_ops = {
822 .show = netdev_queue_attr_show,
823 .store = netdev_queue_attr_store,
824};
825
826static inline unsigned int get_netdev_queue_index(struct netdev_queue *queue)
827{
828 struct net_device *dev = queue->dev;
829 int i;
830
831 for (i = 0; i < dev->num_tx_queues; i++)
832 if (queue == &dev->_tx[i])
833 break;
834
835 BUG_ON(i >= dev->num_tx_queues);
836
837 return i;
838}
839
840
841static ssize_t show_xps_map(struct netdev_queue *queue,
842 struct netdev_queue_attribute *attribute, char *buf)
843{
844 struct net_device *dev = queue->dev;
845 struct xps_dev_maps *dev_maps;
846 cpumask_var_t mask;
847 unsigned long index;
848 size_t len = 0;
849 int i;
850
851 if (!zalloc_cpumask_var(&mask, GFP_KERNEL))
852 return -ENOMEM;
853
854 index = get_netdev_queue_index(queue);
855
856 rcu_read_lock();
857 dev_maps = rcu_dereference(dev->xps_maps);
858 if (dev_maps) {
859 for_each_possible_cpu(i) {
860 struct xps_map *map =
861 rcu_dereference(dev_maps->cpu_map[i]);
862 if (map) {
863 int j;
864 for (j = 0; j < map->len; j++) {
865 if (map->queues[j] == index) {
866 cpumask_set_cpu(i, mask);
867 break;
868 }
869 }
870 }
871 }
872 }
873 rcu_read_unlock();
874
875 len += cpumask_scnprintf(buf + len, PAGE_SIZE, mask);
876 if (PAGE_SIZE - len < 3) {
877 free_cpumask_var(mask);
878 return -EINVAL;
879 }
880
881 free_cpumask_var(mask);
882 len += sprintf(buf + len, "\n");
883 return len;
884}
885
886static void xps_map_release(struct rcu_head *rcu)
887{
888 struct xps_map *map = container_of(rcu, struct xps_map, rcu);
889
890 kfree(map);
891}
892
893static void xps_dev_maps_release(struct rcu_head *rcu)
894{
895 struct xps_dev_maps *dev_maps =
896 container_of(rcu, struct xps_dev_maps, rcu);
897
898 kfree(dev_maps);
899}
900
901static DEFINE_MUTEX(xps_map_mutex);
902#define xmap_dereference(P) \
903 rcu_dereference_protected((P), lockdep_is_held(&xps_map_mutex))
904
905static ssize_t store_xps_map(struct netdev_queue *queue,
906 struct netdev_queue_attribute *attribute,
907 const char *buf, size_t len)
908{
909 struct net_device *dev = queue->dev;
910 cpumask_var_t mask;
911 int err, i, cpu, pos, map_len, alloc_len, need_set;
912 unsigned long index;
913 struct xps_map *map, *new_map;
914 struct xps_dev_maps *dev_maps, *new_dev_maps;
915 int nonempty = 0;
916 int numa_node = -2;
917
918 if (!capable(CAP_NET_ADMIN))
919 return -EPERM;
920
921 if (!alloc_cpumask_var(&mask, GFP_KERNEL))
922 return -ENOMEM;
923
924 index = get_netdev_queue_index(queue);
925
926 err = bitmap_parse(buf, len, cpumask_bits(mask), nr_cpumask_bits);
927 if (err) {
928 free_cpumask_var(mask);
929 return err;
930 }
931
932 new_dev_maps = kzalloc(max_t(unsigned,
933 XPS_DEV_MAPS_SIZE, L1_CACHE_BYTES), GFP_KERNEL);
934 if (!new_dev_maps) {
935 free_cpumask_var(mask);
936 return -ENOMEM;
937 }
938
939 mutex_lock(&xps_map_mutex);
940
941 dev_maps = xmap_dereference(dev->xps_maps);
942
943 for_each_possible_cpu(cpu) {
944 map = dev_maps ?
945 xmap_dereference(dev_maps->cpu_map[cpu]) : NULL;
946 new_map = map;
947 if (map) {
948 for (pos = 0; pos < map->len; pos++)
949 if (map->queues[pos] == index)
950 break;
951 map_len = map->len;
952 alloc_len = map->alloc_len;
953 } else
954 pos = map_len = alloc_len = 0;
955
956 need_set = cpu_isset(cpu, *mask) && cpu_online(cpu);
957#ifdef CONFIG_NUMA
958 if (need_set) {
959 if (numa_node == -2)
960 numa_node = cpu_to_node(cpu);
961 else if (numa_node != cpu_to_node(cpu))
962 numa_node = -1;
963 }
964#endif
965 if (need_set && pos >= map_len) {
966 /* Need to add queue to this CPU's map */
967 if (map_len >= alloc_len) {
968 alloc_len = alloc_len ?
969 2 * alloc_len : XPS_MIN_MAP_ALLOC;
970 new_map = kzalloc_node(XPS_MAP_SIZE(alloc_len),
971 GFP_KERNEL,
972 cpu_to_node(cpu));
973 if (!new_map)
974 goto error;
975 new_map->alloc_len = alloc_len;
976 for (i = 0; i < map_len; i++)
977 new_map->queues[i] = map->queues[i];
978 new_map->len = map_len;
979 }
980 new_map->queues[new_map->len++] = index;
981 } else if (!need_set && pos < map_len) {
982 /* Need to remove queue from this CPU's map */
983 if (map_len > 1)
984 new_map->queues[pos] =
985 new_map->queues[--new_map->len];
986 else
987 new_map = NULL;
988 }
989 RCU_INIT_POINTER(new_dev_maps->cpu_map[cpu], new_map);
990 }
991
992 /* Cleanup old maps */
993 for_each_possible_cpu(cpu) {
994 map = dev_maps ?
995 xmap_dereference(dev_maps->cpu_map[cpu]) : NULL;
996 if (map && xmap_dereference(new_dev_maps->cpu_map[cpu]) != map)
997 call_rcu(&map->rcu, xps_map_release);
998 if (new_dev_maps->cpu_map[cpu])
999 nonempty = 1;
1000 }
1001
1002 if (nonempty)
1003 rcu_assign_pointer(dev->xps_maps, new_dev_maps);
1004 else {
1005 kfree(new_dev_maps);
1006 rcu_assign_pointer(dev->xps_maps, NULL);
1007 }
1008
1009 if (dev_maps)
1010 call_rcu(&dev_maps->rcu, xps_dev_maps_release);
1011
1012 netdev_queue_numa_node_write(queue, (numa_node >= 0) ? numa_node : -1);
1013
1014 mutex_unlock(&xps_map_mutex);
1015
1016 free_cpumask_var(mask);
1017 return len;
1018
1019error:
1020 mutex_unlock(&xps_map_mutex);
1021
1022 if (new_dev_maps)
1023 for_each_possible_cpu(i)
1024 kfree(rcu_dereference_protected(
1025 new_dev_maps->cpu_map[i],
1026 1));
1027 kfree(new_dev_maps);
1028 free_cpumask_var(mask);
1029 return -ENOMEM;
1030}
1031
1032static struct netdev_queue_attribute xps_cpus_attribute =
1033 __ATTR(xps_cpus, S_IRUGO | S_IWUSR, show_xps_map, store_xps_map);
1034
1035static struct attribute *netdev_queue_default_attrs[] = {
1036 &xps_cpus_attribute.attr,
1037 NULL
1038};
1039
1040static void netdev_queue_release(struct kobject *kobj)
1041{
1042 struct netdev_queue *queue = to_netdev_queue(kobj);
1043 struct net_device *dev = queue->dev;
1044 struct xps_dev_maps *dev_maps;
1045 struct xps_map *map;
1046 unsigned long index;
1047 int i, pos, nonempty = 0;
1048
1049 index = get_netdev_queue_index(queue);
1050
1051 mutex_lock(&xps_map_mutex);
1052 dev_maps = xmap_dereference(dev->xps_maps);
1053
1054 if (dev_maps) {
1055 for_each_possible_cpu(i) {
1056 map = xmap_dereference(dev_maps->cpu_map[i]);
1057 if (!map)
1058 continue;
1059
1060 for (pos = 0; pos < map->len; pos++)
1061 if (map->queues[pos] == index)
1062 break;
1063
1064 if (pos < map->len) {
1065 if (map->len > 1)
1066 map->queues[pos] =
1067 map->queues[--map->len];
1068 else {
1069 RCU_INIT_POINTER(dev_maps->cpu_map[i],
1070 NULL);
1071 call_rcu(&map->rcu, xps_map_release);
1072 map = NULL;
1073 }
1074 }
1075 if (map)
1076 nonempty = 1;
1077 }
1078
1079 if (!nonempty) {
1080 RCU_INIT_POINTER(dev->xps_maps, NULL);
1081 call_rcu(&dev_maps->rcu, xps_dev_maps_release);
1082 }
1083 }
1084
1085 mutex_unlock(&xps_map_mutex);
1086
1087 memset(kobj, 0, sizeof(*kobj));
1088 dev_put(queue->dev);
1089}
1090
1091static struct kobj_type netdev_queue_ktype = {
1092 .sysfs_ops = &netdev_queue_sysfs_ops,
1093 .release = netdev_queue_release,
1094 .default_attrs = netdev_queue_default_attrs,
1095};
1096
1097static int netdev_queue_add_kobject(struct net_device *net, int index)
1098{
1099 struct netdev_queue *queue = net->_tx + index;
1100 struct kobject *kobj = &queue->kobj;
1101 int error = 0;
1102
1103 kobj->kset = net->queues_kset;
1104 error = kobject_init_and_add(kobj, &netdev_queue_ktype, NULL,
1105 "tx-%u", index);
1106 if (error) {
1107 kobject_put(kobj);
1108 return error;
1109 }
1110
1111 kobject_uevent(kobj, KOBJ_ADD);
1112 dev_hold(queue->dev);
1113
1114 return error;
1115}
1116#endif /* CONFIG_XPS */
1117
1118int
1119netdev_queue_update_kobjects(struct net_device *net, int old_num, int new_num)
1120{
1121#ifdef CONFIG_XPS
1122 int i;
1123 int error = 0;
1124
1125 for (i = old_num; i < new_num; i++) {
1126 error = netdev_queue_add_kobject(net, i);
1127 if (error) {
1128 new_num = old_num;
1129 break;
1130 }
1131 }
1132
1133 while (--i >= new_num)
1134 kobject_put(&net->_tx[i].kobj);
1135
1136 return error;
1137#else
1138 return 0;
1139#endif
1140}
1141
1142static int register_queue_kobjects(struct net_device *net)
1143{
1144 int error = 0, txq = 0, rxq = 0, real_rx = 0, real_tx = 0;
1145
1146#if defined(CONFIG_RPS) || defined(CONFIG_XPS)
781 net->queues_kset = kset_create_and_add("queues", 1147 net->queues_kset = kset_create_and_add("queues",
782 NULL, &net->dev.kobj); 1148 NULL, &net->dev.kobj);
783 if (!net->queues_kset) 1149 if (!net->queues_kset)
784 return -ENOMEM; 1150 return -ENOMEM;
785 return net_rx_queue_update_kobjects(net, 0, net->real_num_rx_queues); 1151#endif
1152
1153#ifdef CONFIG_RPS
1154 real_rx = net->real_num_rx_queues;
1155#endif
1156 real_tx = net->real_num_tx_queues;
1157
1158 error = net_rx_queue_update_kobjects(net, 0, real_rx);
1159 if (error)
1160 goto error;
1161 rxq = real_rx;
1162
1163 error = netdev_queue_update_kobjects(net, 0, real_tx);
1164 if (error)
1165 goto error;
1166 txq = real_tx;
1167
1168 return 0;
1169
1170error:
1171 netdev_queue_update_kobjects(net, txq, 0);
1172 net_rx_queue_update_kobjects(net, rxq, 0);
1173 return error;
786} 1174}
787 1175
788static void rx_queue_remove_kobjects(struct net_device *net) 1176static void remove_queue_kobjects(struct net_device *net)
789{ 1177{
790 net_rx_queue_update_kobjects(net, net->real_num_rx_queues, 0); 1178 int real_rx = 0, real_tx = 0;
1179
1180#ifdef CONFIG_RPS
1181 real_rx = net->real_num_rx_queues;
1182#endif
1183 real_tx = net->real_num_tx_queues;
1184
1185 net_rx_queue_update_kobjects(net, real_rx, 0);
1186 netdev_queue_update_kobjects(net, real_tx, 0);
1187#if defined(CONFIG_RPS) || defined(CONFIG_XPS)
791 kset_unregister(net->queues_kset); 1188 kset_unregister(net->queues_kset);
1189#endif
792} 1190}
793#endif /* CONFIG_RPS */
794 1191
795static const void *net_current_ns(void) 1192static const void *net_current_ns(void)
796{ 1193{
@@ -889,9 +1286,7 @@ void netdev_unregister_kobject(struct net_device * net)
889 1286
890 kobject_get(&dev->kobj); 1287 kobject_get(&dev->kobj);
891 1288
892#ifdef CONFIG_RPS 1289 remove_queue_kobjects(net);
893 rx_queue_remove_kobjects(net);
894#endif
895 1290
896 device_del(dev); 1291 device_del(dev);
897} 1292}
@@ -930,13 +1325,11 @@ int netdev_register_kobject(struct net_device *net)
930 if (error) 1325 if (error)
931 return error; 1326 return error;
932 1327
933#ifdef CONFIG_RPS 1328 error = register_queue_kobjects(net);
934 error = rx_queue_register_kobjects(net);
935 if (error) { 1329 if (error) {
936 device_del(dev); 1330 device_del(dev);
937 return error; 1331 return error;
938 } 1332 }
939#endif
940 1333
941 return error; 1334 return error;
942} 1335}
diff --git a/net/core/net-sysfs.h b/net/core/net-sysfs.h
index 778e1571548..bd7751ec1c4 100644
--- a/net/core/net-sysfs.h
+++ b/net/core/net-sysfs.h
@@ -4,8 +4,8 @@
4int netdev_kobject_init(void); 4int netdev_kobject_init(void);
5int netdev_register_kobject(struct net_device *); 5int netdev_register_kobject(struct net_device *);
6void netdev_unregister_kobject(struct net_device *); 6void netdev_unregister_kobject(struct net_device *);
7#ifdef CONFIG_RPS
8int net_rx_queue_update_kobjects(struct net_device *, int old_num, int new_num); 7int net_rx_queue_update_kobjects(struct net_device *, int old_num, int new_num);
9#endif 8int netdev_queue_update_kobjects(struct net_device *net,
9 int old_num, int new_num);
10 10
11#endif 11#endif
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 4e98ffac3af..ee38acb6d46 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -76,8 +76,7 @@ static void queue_process(struct work_struct *work)
76 76
77 local_irq_save(flags); 77 local_irq_save(flags);
78 __netif_tx_lock(txq, smp_processor_id()); 78 __netif_tx_lock(txq, smp_processor_id());
79 if (netif_tx_queue_stopped(txq) || 79 if (netif_tx_queue_frozen_or_stopped(txq) ||
80 netif_tx_queue_frozen(txq) ||
81 ops->ndo_start_xmit(skb, dev) != NETDEV_TX_OK) { 80 ops->ndo_start_xmit(skb, dev) != NETDEV_TX_OK) {
82 skb_queue_head(&npinfo->txq, skb); 81 skb_queue_head(&npinfo->txq, skb);
83 __netif_tx_unlock(txq); 82 __netif_tx_unlock(txq);
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 33bc3823ac6..2953b2abc97 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -378,6 +378,7 @@ struct pktgen_dev {
378 378
379 u16 queue_map_min; 379 u16 queue_map_min;
380 u16 queue_map_max; 380 u16 queue_map_max;
381 __u32 skb_priority; /* skb priority field */
381 int node; /* Memory node */ 382 int node; /* Memory node */
382 383
383#ifdef CONFIG_XFRM 384#ifdef CONFIG_XFRM
@@ -394,6 +395,8 @@ struct pktgen_hdr {
394 __be32 tv_usec; 395 __be32 tv_usec;
395}; 396};
396 397
398static bool pktgen_exiting __read_mostly;
399
397struct pktgen_thread { 400struct pktgen_thread {
398 spinlock_t if_lock; /* for list of devices */ 401 spinlock_t if_lock; /* for list of devices */
399 struct list_head if_list; /* All device here */ 402 struct list_head if_list; /* All device here */
@@ -547,6 +550,10 @@ static int pktgen_if_show(struct seq_file *seq, void *v)
547 pkt_dev->queue_map_min, 550 pkt_dev->queue_map_min,
548 pkt_dev->queue_map_max); 551 pkt_dev->queue_map_max);
549 552
553 if (pkt_dev->skb_priority)
554 seq_printf(seq, " skb_priority: %u\n",
555 pkt_dev->skb_priority);
556
550 if (pkt_dev->flags & F_IPV6) { 557 if (pkt_dev->flags & F_IPV6) {
551 char b1[128], b2[128], b3[128]; 558 char b1[128], b2[128], b3[128];
552 fmt_ip6(b1, pkt_dev->in6_saddr.s6_addr); 559 fmt_ip6(b1, pkt_dev->in6_saddr.s6_addr);
@@ -1711,6 +1718,18 @@ static ssize_t pktgen_if_write(struct file *file,
1711 return count; 1718 return count;
1712 } 1719 }
1713 1720
1721 if (!strcmp(name, "skb_priority")) {
1722 len = num_arg(&user_buffer[i], 9, &value);
1723 if (len < 0)
1724 return len;
1725
1726 i += len;
1727 pkt_dev->skb_priority = value;
1728 sprintf(pg_result, "OK: skb_priority=%i",
1729 pkt_dev->skb_priority);
1730 return count;
1731 }
1732
1714 sprintf(pkt_dev->result, "No such parameter \"%s\"", name); 1733 sprintf(pkt_dev->result, "No such parameter \"%s\"", name);
1715 return -EINVAL; 1734 return -EINVAL;
1716} 1735}
@@ -2671,6 +2690,8 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
2671 skb->transport_header = skb->network_header + sizeof(struct iphdr); 2690 skb->transport_header = skb->network_header + sizeof(struct iphdr);
2672 skb_put(skb, sizeof(struct iphdr) + sizeof(struct udphdr)); 2691 skb_put(skb, sizeof(struct iphdr) + sizeof(struct udphdr));
2673 skb_set_queue_mapping(skb, queue_map); 2692 skb_set_queue_mapping(skb, queue_map);
2693 skb->priority = pkt_dev->skb_priority;
2694
2674 iph = ip_hdr(skb); 2695 iph = ip_hdr(skb);
2675 udph = udp_hdr(skb); 2696 udph = udp_hdr(skb);
2676 2697
@@ -3016,6 +3037,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
3016 skb->transport_header = skb->network_header + sizeof(struct ipv6hdr); 3037 skb->transport_header = skb->network_header + sizeof(struct ipv6hdr);
3017 skb_put(skb, sizeof(struct ipv6hdr) + sizeof(struct udphdr)); 3038 skb_put(skb, sizeof(struct ipv6hdr) + sizeof(struct udphdr));
3018 skb_set_queue_mapping(skb, queue_map); 3039 skb_set_queue_mapping(skb, queue_map);
3040 skb->priority = pkt_dev->skb_priority;
3019 iph = ipv6_hdr(skb); 3041 iph = ipv6_hdr(skb);
3020 udph = udp_hdr(skb); 3042 udph = udp_hdr(skb);
3021 3043
@@ -3431,11 +3453,6 @@ static void pktgen_rem_thread(struct pktgen_thread *t)
3431 3453
3432 remove_proc_entry(t->tsk->comm, pg_proc_dir); 3454 remove_proc_entry(t->tsk->comm, pg_proc_dir);
3433 3455
3434 mutex_lock(&pktgen_thread_lock);
3435
3436 list_del(&t->th_list);
3437
3438 mutex_unlock(&pktgen_thread_lock);
3439} 3456}
3440 3457
3441static void pktgen_resched(struct pktgen_dev *pkt_dev) 3458static void pktgen_resched(struct pktgen_dev *pkt_dev)
@@ -3510,7 +3527,7 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev)
3510 3527
3511 __netif_tx_lock_bh(txq); 3528 __netif_tx_lock_bh(txq);
3512 3529
3513 if (unlikely(netif_tx_queue_stopped(txq) || netif_tx_queue_frozen(txq))) { 3530 if (unlikely(netif_tx_queue_frozen_or_stopped(txq))) {
3514 ret = NETDEV_TX_BUSY; 3531 ret = NETDEV_TX_BUSY;
3515 pkt_dev->last_ok = 0; 3532 pkt_dev->last_ok = 0;
3516 goto unlock; 3533 goto unlock;
@@ -3582,6 +3599,8 @@ static int pktgen_thread_worker(void *arg)
3582 pkt_dev = next_to_run(t); 3599 pkt_dev = next_to_run(t);
3583 3600
3584 if (unlikely(!pkt_dev && t->control == 0)) { 3601 if (unlikely(!pkt_dev && t->control == 0)) {
3602 if (pktgen_exiting)
3603 break;
3585 wait_event_interruptible_timeout(t->queue, 3604 wait_event_interruptible_timeout(t->queue,
3586 t->control != 0, 3605 t->control != 0,
3587 HZ/10); 3606 HZ/10);
@@ -3634,6 +3653,13 @@ static int pktgen_thread_worker(void *arg)
3634 pr_debug("%s removing thread\n", t->tsk->comm); 3653 pr_debug("%s removing thread\n", t->tsk->comm);
3635 pktgen_rem_thread(t); 3654 pktgen_rem_thread(t);
3636 3655
3656 /* Wait for kthread_stop */
3657 while (!kthread_should_stop()) {
3658 set_current_state(TASK_INTERRUPTIBLE);
3659 schedule();
3660 }
3661 __set_current_state(TASK_RUNNING);
3662
3637 return 0; 3663 return 0;
3638} 3664}
3639 3665
@@ -3908,6 +3934,7 @@ static void __exit pg_cleanup(void)
3908 struct list_head *q, *n; 3934 struct list_head *q, *n;
3909 3935
3910 /* Stop all interfaces & threads */ 3936 /* Stop all interfaces & threads */
3937 pktgen_exiting = true;
3911 3938
3912 list_for_each_safe(q, n, &pktgen_threads) { 3939 list_for_each_safe(q, n, &pktgen_threads) {
3913 t = list_entry(q, struct pktgen_thread, th_list); 3940 t = list_entry(q, struct pktgen_thread, th_list);
diff --git a/net/core/request_sock.c b/net/core/request_sock.c
index fceeb37d716..182236b2510 100644
--- a/net/core/request_sock.c
+++ b/net/core/request_sock.c
@@ -33,6 +33,7 @@
33 * Note : Dont forget somaxconn that may limit backlog too. 33 * Note : Dont forget somaxconn that may limit backlog too.
34 */ 34 */
35int sysctl_max_syn_backlog = 256; 35int sysctl_max_syn_backlog = 256;
36EXPORT_SYMBOL(sysctl_max_syn_backlog);
36 37
37int reqsk_queue_alloc(struct request_sock_queue *queue, 38int reqsk_queue_alloc(struct request_sock_queue *queue,
38 unsigned int nr_table_entries) 39 unsigned int nr_table_entries)
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 841c287ef40..750db57f3bb 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -362,6 +362,95 @@ static size_t rtnl_link_get_size(const struct net_device *dev)
362 return size; 362 return size;
363} 363}
364 364
365static LIST_HEAD(rtnl_af_ops);
366
367static const struct rtnl_af_ops *rtnl_af_lookup(const int family)
368{
369 const struct rtnl_af_ops *ops;
370
371 list_for_each_entry(ops, &rtnl_af_ops, list) {
372 if (ops->family == family)
373 return ops;
374 }
375
376 return NULL;
377}
378
379/**
380 * __rtnl_af_register - Register rtnl_af_ops with rtnetlink.
381 * @ops: struct rtnl_af_ops * to register
382 *
383 * The caller must hold the rtnl_mutex.
384 *
385 * Returns 0 on success or a negative error code.
386 */
387int __rtnl_af_register(struct rtnl_af_ops *ops)
388{
389 list_add_tail(&ops->list, &rtnl_af_ops);
390 return 0;
391}
392EXPORT_SYMBOL_GPL(__rtnl_af_register);
393
394/**
395 * rtnl_af_register - Register rtnl_af_ops with rtnetlink.
396 * @ops: struct rtnl_af_ops * to register
397 *
398 * Returns 0 on success or a negative error code.
399 */
400int rtnl_af_register(struct rtnl_af_ops *ops)
401{
402 int err;
403
404 rtnl_lock();
405 err = __rtnl_af_register(ops);
406 rtnl_unlock();
407 return err;
408}
409EXPORT_SYMBOL_GPL(rtnl_af_register);
410
411/**
412 * __rtnl_af_unregister - Unregister rtnl_af_ops from rtnetlink.
413 * @ops: struct rtnl_af_ops * to unregister
414 *
415 * The caller must hold the rtnl_mutex.
416 */
417void __rtnl_af_unregister(struct rtnl_af_ops *ops)
418{
419 list_del(&ops->list);
420}
421EXPORT_SYMBOL_GPL(__rtnl_af_unregister);
422
423/**
424 * rtnl_af_unregister - Unregister rtnl_af_ops from rtnetlink.
425 * @ops: struct rtnl_af_ops * to unregister
426 */
427void rtnl_af_unregister(struct rtnl_af_ops *ops)
428{
429 rtnl_lock();
430 __rtnl_af_unregister(ops);
431 rtnl_unlock();
432}
433EXPORT_SYMBOL_GPL(rtnl_af_unregister);
434
435static size_t rtnl_link_get_af_size(const struct net_device *dev)
436{
437 struct rtnl_af_ops *af_ops;
438 size_t size;
439
440 /* IFLA_AF_SPEC */
441 size = nla_total_size(sizeof(struct nlattr));
442
443 list_for_each_entry(af_ops, &rtnl_af_ops, list) {
444 if (af_ops->get_link_af_size) {
445 /* AF_* + nested data */
446 size += nla_total_size(sizeof(struct nlattr)) +
447 af_ops->get_link_af_size(dev);
448 }
449 }
450
451 return size;
452}
453
365static int rtnl_link_fill(struct sk_buff *skb, const struct net_device *dev) 454static int rtnl_link_fill(struct sk_buff *skb, const struct net_device *dev)
366{ 455{
367 const struct rtnl_link_ops *ops = dev->rtnl_link_ops; 456 const struct rtnl_link_ops *ops = dev->rtnl_link_ops;
@@ -671,7 +760,8 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev)
671 + nla_total_size(4) /* IFLA_NUM_VF */ 760 + nla_total_size(4) /* IFLA_NUM_VF */
672 + rtnl_vfinfo_size(dev) /* IFLA_VFINFO_LIST */ 761 + rtnl_vfinfo_size(dev) /* IFLA_VFINFO_LIST */
673 + rtnl_port_size(dev) /* IFLA_VF_PORTS + IFLA_PORT_SELF */ 762 + rtnl_port_size(dev) /* IFLA_VF_PORTS + IFLA_PORT_SELF */
674 + rtnl_link_get_size(dev); /* IFLA_LINKINFO */ 763 + rtnl_link_get_size(dev) /* IFLA_LINKINFO */
764 + rtnl_link_get_af_size(dev); /* IFLA_AF_SPEC */
675} 765}
676 766
677static int rtnl_vf_ports_fill(struct sk_buff *skb, struct net_device *dev) 767static int rtnl_vf_ports_fill(struct sk_buff *skb, struct net_device *dev)
@@ -757,7 +847,8 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
757 struct nlmsghdr *nlh; 847 struct nlmsghdr *nlh;
758 struct rtnl_link_stats64 temp; 848 struct rtnl_link_stats64 temp;
759 const struct rtnl_link_stats64 *stats; 849 const struct rtnl_link_stats64 *stats;
760 struct nlattr *attr; 850 struct nlattr *attr, *af_spec;
851 struct rtnl_af_ops *af_ops;
761 852
762 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ifm), flags); 853 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ifm), flags);
763 if (nlh == NULL) 854 if (nlh == NULL)
@@ -866,6 +957,36 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
866 goto nla_put_failure; 957 goto nla_put_failure;
867 } 958 }
868 959
960 if (!(af_spec = nla_nest_start(skb, IFLA_AF_SPEC)))
961 goto nla_put_failure;
962
963 list_for_each_entry(af_ops, &rtnl_af_ops, list) {
964 if (af_ops->fill_link_af) {
965 struct nlattr *af;
966 int err;
967
968 if (!(af = nla_nest_start(skb, af_ops->family)))
969 goto nla_put_failure;
970
971 err = af_ops->fill_link_af(skb, dev);
972
973 /*
974 * Caller may return ENODATA to indicate that there
975 * was no data to be dumped. This is not an error, it
976 * means we should trim the attribute header and
977 * continue.
978 */
979 if (err == -ENODATA)
980 nla_nest_cancel(skb, af);
981 else if (err < 0)
982 goto nla_put_failure;
983
984 nla_nest_end(skb, af);
985 }
986 }
987
988 nla_nest_end(skb, af_spec);
989
869 return nlmsg_end(skb, nlh); 990 return nlmsg_end(skb, nlh);
870 991
871nla_put_failure: 992nla_put_failure:
@@ -924,6 +1045,7 @@ const struct nla_policy ifla_policy[IFLA_MAX+1] = {
924 [IFLA_VFINFO_LIST] = {. type = NLA_NESTED }, 1045 [IFLA_VFINFO_LIST] = {. type = NLA_NESTED },
925 [IFLA_VF_PORTS] = { .type = NLA_NESTED }, 1046 [IFLA_VF_PORTS] = { .type = NLA_NESTED },
926 [IFLA_PORT_SELF] = { .type = NLA_NESTED }, 1047 [IFLA_PORT_SELF] = { .type = NLA_NESTED },
1048 [IFLA_AF_SPEC] = { .type = NLA_NESTED },
927}; 1049};
928EXPORT_SYMBOL(ifla_policy); 1050EXPORT_SYMBOL(ifla_policy);
929 1051
@@ -985,6 +1107,28 @@ static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[])
985 return -EINVAL; 1107 return -EINVAL;
986 } 1108 }
987 1109
1110 if (tb[IFLA_AF_SPEC]) {
1111 struct nlattr *af;
1112 int rem, err;
1113
1114 nla_for_each_nested(af, tb[IFLA_AF_SPEC], rem) {
1115 const struct rtnl_af_ops *af_ops;
1116
1117 if (!(af_ops = rtnl_af_lookup(nla_type(af))))
1118 return -EAFNOSUPPORT;
1119
1120 if (!af_ops->set_link_af)
1121 return -EOPNOTSUPP;
1122
1123 if (af_ops->validate_link_af) {
1124 err = af_ops->validate_link_af(dev,
1125 tb[IFLA_AF_SPEC]);
1126 if (err < 0)
1127 return err;
1128 }
1129 }
1130 }
1131
988 return 0; 1132 return 0;
989} 1133}
990 1134
@@ -1225,6 +1369,24 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
1225 goto errout; 1369 goto errout;
1226 modified = 1; 1370 modified = 1;
1227 } 1371 }
1372
1373 if (tb[IFLA_AF_SPEC]) {
1374 struct nlattr *af;
1375 int rem;
1376
1377 nla_for_each_nested(af, tb[IFLA_AF_SPEC], rem) {
1378 const struct rtnl_af_ops *af_ops;
1379
1380 if (!(af_ops = rtnl_af_lookup(nla_type(af))))
1381 BUG();
1382
1383 err = af_ops->set_link_af(dev, af);
1384 if (err < 0)
1385 goto errout;
1386
1387 modified = 1;
1388 }
1389 }
1228 err = 0; 1390 err = 0;
1229 1391
1230errout: 1392errout:
diff --git a/net/core/scm.c b/net/core/scm.c
index 413cab89017..bbe45445080 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -79,10 +79,11 @@ static int scm_fp_copy(struct cmsghdr *cmsg, struct scm_fp_list **fplp)
79 return -ENOMEM; 79 return -ENOMEM;
80 *fplp = fpl; 80 *fplp = fpl;
81 fpl->count = 0; 81 fpl->count = 0;
82 fpl->max = SCM_MAX_FD;
82 } 83 }
83 fpp = &fpl->fp[fpl->count]; 84 fpp = &fpl->fp[fpl->count];
84 85
85 if (fpl->count + num > SCM_MAX_FD) 86 if (fpl->count + num > fpl->max)
86 return -EINVAL; 87 return -EINVAL;
87 88
88 /* 89 /*
@@ -331,11 +332,12 @@ struct scm_fp_list *scm_fp_dup(struct scm_fp_list *fpl)
331 if (!fpl) 332 if (!fpl)
332 return NULL; 333 return NULL;
333 334
334 new_fpl = kmalloc(sizeof(*fpl), GFP_KERNEL); 335 new_fpl = kmemdup(fpl, offsetof(struct scm_fp_list, fp[fpl->count]),
336 GFP_KERNEL);
335 if (new_fpl) { 337 if (new_fpl) {
336 for (i=fpl->count-1; i>=0; i--) 338 for (i = 0; i < fpl->count; i++)
337 get_file(fpl->fp[i]); 339 get_file(fpl->fp[i]);
338 memcpy(new_fpl, fpl, sizeof(*fpl)); 340 new_fpl->max = new_fpl->count;
339 } 341 }
340 return new_fpl; 342 return new_fpl;
341} 343}
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 104f8444754..8814a9a52f4 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -778,6 +778,28 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
778 778
779 size = SKB_DATA_ALIGN(size); 779 size = SKB_DATA_ALIGN(size);
780 780
781 /* Check if we can avoid taking references on fragments if we own
782 * the last reference on skb->head. (see skb_release_data())
783 */
784 if (!skb->cloned)
785 fastpath = true;
786 else {
787 int delta = skb->nohdr ? (1 << SKB_DATAREF_SHIFT) + 1 : 1;
788
789 fastpath = atomic_read(&skb_shinfo(skb)->dataref) == delta;
790 }
791
792 if (fastpath &&
793 size + sizeof(struct skb_shared_info) <= ksize(skb->head)) {
794 memmove(skb->head + size, skb_shinfo(skb),
795 offsetof(struct skb_shared_info,
796 frags[skb_shinfo(skb)->nr_frags]));
797 memmove(skb->head + nhead, skb->head,
798 skb_tail_pointer(skb) - skb->head);
799 off = nhead;
800 goto adjust_others;
801 }
802
781 data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask); 803 data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask);
782 if (!data) 804 if (!data)
783 goto nodata; 805 goto nodata;
@@ -791,17 +813,6 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
791 skb_shinfo(skb), 813 skb_shinfo(skb),
792 offsetof(struct skb_shared_info, frags[skb_shinfo(skb)->nr_frags])); 814 offsetof(struct skb_shared_info, frags[skb_shinfo(skb)->nr_frags]));
793 815
794 /* Check if we can avoid taking references on fragments if we own
795 * the last reference on skb->head. (see skb_release_data())
796 */
797 if (!skb->cloned)
798 fastpath = true;
799 else {
800 int delta = skb->nohdr ? (1 << SKB_DATAREF_SHIFT) + 1 : 1;
801
802 fastpath = atomic_read(&skb_shinfo(skb)->dataref) == delta;
803 }
804
805 if (fastpath) { 816 if (fastpath) {
806 kfree(skb->head); 817 kfree(skb->head);
807 } else { 818 } else {
@@ -816,6 +827,7 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
816 off = (data + nhead) - skb->head; 827 off = (data + nhead) - skb->head;
817 828
818 skb->head = data; 829 skb->head = data;
830adjust_others:
819 skb->data += off; 831 skb->data += off;
820#ifdef NET_SKBUFF_DATA_USES_OFFSET 832#ifdef NET_SKBUFF_DATA_USES_OFFSET
821 skb->end = size; 833 skb->end = size;
diff --git a/net/core/timestamping.c b/net/core/timestamping.c
index 0ae6c22da85..b124d28ff1c 100644
--- a/net/core/timestamping.c
+++ b/net/core/timestamping.c
@@ -26,12 +26,12 @@ static struct sock_filter ptp_filter[] = {
26 PTP_FILTER 26 PTP_FILTER
27}; 27};
28 28
29static unsigned int classify(struct sk_buff *skb) 29static unsigned int classify(const struct sk_buff *skb)
30{ 30{
31 if (likely(skb->dev && 31 if (likely(skb->dev &&
32 skb->dev->phydev && 32 skb->dev->phydev &&
33 skb->dev->phydev->drv)) 33 skb->dev->phydev->drv))
34 return sk_run_filter(skb, ptp_filter, ARRAY_SIZE(ptp_filter)); 34 return sk_run_filter(skb, ptp_filter);
35 else 35 else
36 return PTP_CLASS_NONE; 36 return PTP_CLASS_NONE;
37} 37}
diff --git a/net/dccp/Makefile b/net/dccp/Makefile
index 2991efcc8de..5c8362b037e 100644
--- a/net/dccp/Makefile
+++ b/net/dccp/Makefile
@@ -1,7 +1,7 @@
1obj-$(CONFIG_IP_DCCP) += dccp.o dccp_ipv4.o 1obj-$(CONFIG_IP_DCCP) += dccp.o dccp_ipv4.o
2 2
3dccp-y := ccid.o feat.o input.o minisocks.o options.o output.o proto.o timer.o 3dccp-y := ccid.o feat.o input.o minisocks.o options.o output.o proto.o timer.o \
4 4 qpolicy.o
5# 5#
6# CCID algorithms to be used by dccp.ko 6# CCID algorithms to be used by dccp.ko
7# 7#
diff --git a/net/dccp/ackvec.c b/net/dccp/ackvec.c
index 92a6fcb40d7..25b7a8d1ad5 100644
--- a/net/dccp/ackvec.c
+++ b/net/dccp/ackvec.c
@@ -1,444 +1,375 @@
1/* 1/*
2 * net/dccp/ackvec.c 2 * net/dccp/ackvec.c
3 * 3 *
4 * An implementation of the DCCP protocol 4 * An implementation of Ack Vectors for the DCCP protocol
5 * Copyright (c) 2007 University of Aberdeen, Scotland, UK
5 * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@ghostprotocols.net> 6 * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
6 * 7 *
7 * This program is free software; you can redistribute it and/or modify it 8 * This program is free software; you can redistribute it and/or modify it
8 * under the terms of the GNU General Public License as published by the 9 * under the terms of the GNU General Public License as published by the
9 * Free Software Foundation; version 2 of the License; 10 * Free Software Foundation; version 2 of the License;
10 */ 11 */
11
12#include "ackvec.h"
13#include "dccp.h" 12#include "dccp.h"
14
15#include <linux/init.h>
16#include <linux/errno.h>
17#include <linux/kernel.h> 13#include <linux/kernel.h>
18#include <linux/skbuff.h>
19#include <linux/slab.h> 14#include <linux/slab.h>
20 15
21#include <net/sock.h>
22
23static struct kmem_cache *dccp_ackvec_slab; 16static struct kmem_cache *dccp_ackvec_slab;
24static struct kmem_cache *dccp_ackvec_record_slab; 17static struct kmem_cache *dccp_ackvec_record_slab;
25 18
26static struct dccp_ackvec_record *dccp_ackvec_record_new(void) 19struct dccp_ackvec *dccp_ackvec_alloc(const gfp_t priority)
27{ 20{
28 struct dccp_ackvec_record *avr = 21 struct dccp_ackvec *av = kmem_cache_zalloc(dccp_ackvec_slab, priority);
29 kmem_cache_alloc(dccp_ackvec_record_slab, GFP_ATOMIC);
30 22
31 if (avr != NULL) 23 if (av != NULL) {
32 INIT_LIST_HEAD(&avr->avr_node); 24 av->av_buf_head = av->av_buf_tail = DCCPAV_MAX_ACKVEC_LEN - 1;
33 25 INIT_LIST_HEAD(&av->av_records);
34 return avr; 26 }
27 return av;
35} 28}
36 29
37static void dccp_ackvec_record_delete(struct dccp_ackvec_record *avr) 30static void dccp_ackvec_purge_records(struct dccp_ackvec *av)
38{ 31{
39 if (unlikely(avr == NULL)) 32 struct dccp_ackvec_record *cur, *next;
40 return; 33
41 /* Check if deleting a linked record */ 34 list_for_each_entry_safe(cur, next, &av->av_records, avr_node)
42 WARN_ON(!list_empty(&avr->avr_node)); 35 kmem_cache_free(dccp_ackvec_record_slab, cur);
43 kmem_cache_free(dccp_ackvec_record_slab, avr); 36 INIT_LIST_HEAD(&av->av_records);
44} 37}
45 38
46static void dccp_ackvec_insert_avr(struct dccp_ackvec *av, 39void dccp_ackvec_free(struct dccp_ackvec *av)
47 struct dccp_ackvec_record *avr)
48{ 40{
49 /* 41 if (likely(av != NULL)) {
50 * AVRs are sorted by seqno. Since we are sending them in order, we 42 dccp_ackvec_purge_records(av);
51 * just add the AVR at the head of the list. 43 kmem_cache_free(dccp_ackvec_slab, av);
52 * -sorbo.
53 */
54 if (!list_empty(&av->av_records)) {
55 const struct dccp_ackvec_record *head =
56 list_entry(av->av_records.next,
57 struct dccp_ackvec_record,
58 avr_node);
59 BUG_ON(before48(avr->avr_ack_seqno, head->avr_ack_seqno));
60 } 44 }
61
62 list_add(&avr->avr_node, &av->av_records);
63} 45}
64 46
65int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb) 47/**
48 * dccp_ackvec_update_records - Record information about sent Ack Vectors
49 * @av: Ack Vector records to update
50 * @seqno: Sequence number of the packet carrying the Ack Vector just sent
51 * @nonce_sum: The sum of all buffer nonces contained in the Ack Vector
52 */
53int dccp_ackvec_update_records(struct dccp_ackvec *av, u64 seqno, u8 nonce_sum)
66{ 54{
67 struct dccp_sock *dp = dccp_sk(sk);
68 struct dccp_ackvec *av = dp->dccps_hc_rx_ackvec;
69 /* Figure out how many options do we need to represent the ackvec */
70 const u8 nr_opts = DIV_ROUND_UP(av->av_vec_len, DCCP_SINGLE_OPT_MAXLEN);
71 u16 len = av->av_vec_len + 2 * nr_opts, i;
72 u32 elapsed_time;
73 const unsigned char *tail, *from;
74 unsigned char *to;
75 struct dccp_ackvec_record *avr; 55 struct dccp_ackvec_record *avr;
76 suseconds_t delta;
77
78 if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN)
79 return -1;
80
81 delta = ktime_us_delta(ktime_get_real(), av->av_time);
82 elapsed_time = delta / 10;
83 56
84 if (elapsed_time != 0 && 57 avr = kmem_cache_alloc(dccp_ackvec_record_slab, GFP_ATOMIC);
85 dccp_insert_option_elapsed_time(skb, elapsed_time))
86 return -1;
87
88 avr = dccp_ackvec_record_new();
89 if (avr == NULL) 58 if (avr == NULL)
90 return -1; 59 return -ENOBUFS;
91
92 DCCP_SKB_CB(skb)->dccpd_opt_len += len;
93
94 to = skb_push(skb, len);
95 len = av->av_vec_len;
96 from = av->av_buf + av->av_buf_head;
97 tail = av->av_buf + DCCP_MAX_ACKVEC_LEN;
98
99 for (i = 0; i < nr_opts; ++i) {
100 int copylen = len;
101
102 if (len > DCCP_SINGLE_OPT_MAXLEN)
103 copylen = DCCP_SINGLE_OPT_MAXLEN;
104
105 *to++ = DCCPO_ACK_VECTOR_0;
106 *to++ = copylen + 2;
107
108 /* Check if buf_head wraps */
109 if (from + copylen > tail) {
110 const u16 tailsize = tail - from;
111
112 memcpy(to, from, tailsize);
113 to += tailsize;
114 len -= tailsize;
115 copylen -= tailsize;
116 from = av->av_buf;
117 }
118
119 memcpy(to, from, copylen);
120 from += copylen;
121 to += copylen;
122 len -= copylen;
123 }
124 60
61 avr->avr_ack_seqno = seqno;
62 avr->avr_ack_ptr = av->av_buf_head;
63 avr->avr_ack_ackno = av->av_buf_ackno;
64 avr->avr_ack_nonce = nonce_sum;
65 avr->avr_ack_runlen = dccp_ackvec_runlen(av->av_buf + av->av_buf_head);
125 /* 66 /*
126 * From RFC 4340, A.2: 67 * When the buffer overflows, we keep no more than one record. This is
127 * 68 * the simplest way of disambiguating sender-Acks dating from before the
128 * For each acknowledgement it sends, the HC-Receiver will add an 69 * overflow from sender-Acks which refer to after the overflow; a simple
129 * acknowledgement record. ack_seqno will equal the HC-Receiver 70 * solution is preferable here since we are handling an exception.
130 * sequence number it used for the ack packet; ack_ptr will equal
131 * buf_head; ack_ackno will equal buf_ackno; and ack_nonce will
132 * equal buf_nonce.
133 */ 71 */
134 avr->avr_ack_seqno = DCCP_SKB_CB(skb)->dccpd_seq; 72 if (av->av_overflow)
135 avr->avr_ack_ptr = av->av_buf_head; 73 dccp_ackvec_purge_records(av);
136 avr->avr_ack_ackno = av->av_buf_ackno; 74 /*
137 avr->avr_ack_nonce = av->av_buf_nonce; 75 * Since GSS is incremented for each packet, the list is automatically
138 avr->avr_sent_len = av->av_vec_len; 76 * arranged in descending order of @ack_seqno.
139 77 */
140 dccp_ackvec_insert_avr(av, avr); 78 list_add(&avr->avr_node, &av->av_records);
141 79
142 dccp_pr_debug("%s ACK Vector 0, len=%d, ack_seqno=%llu, " 80 dccp_pr_debug("Added Vector, ack_seqno=%llu, ack_ackno=%llu (rl=%u)\n",
143 "ack_ackno=%llu\n",
144 dccp_role(sk), avr->avr_sent_len,
145 (unsigned long long)avr->avr_ack_seqno, 81 (unsigned long long)avr->avr_ack_seqno,
146 (unsigned long long)avr->avr_ack_ackno); 82 (unsigned long long)avr->avr_ack_ackno,
83 avr->avr_ack_runlen);
147 return 0; 84 return 0;
148} 85}
149 86
150struct dccp_ackvec *dccp_ackvec_alloc(const gfp_t priority) 87static struct dccp_ackvec_record *dccp_ackvec_lookup(struct list_head *av_list,
88 const u64 ackno)
151{ 89{
152 struct dccp_ackvec *av = kmem_cache_alloc(dccp_ackvec_slab, priority); 90 struct dccp_ackvec_record *avr;
153 91 /*
154 if (av != NULL) { 92 * Exploit that records are inserted in descending order of sequence
155 av->av_buf_head = DCCP_MAX_ACKVEC_LEN - 1; 93 * number, start with the oldest record first. If @ackno is `before'
156 av->av_buf_ackno = UINT48_MAX + 1; 94 * the earliest ack_ackno, the packet is too old to be considered.
157 av->av_buf_nonce = 0; 95 */
158 av->av_time = ktime_set(0, 0); 96 list_for_each_entry_reverse(avr, av_list, avr_node) {
159 av->av_vec_len = 0; 97 if (avr->avr_ack_seqno == ackno)
160 INIT_LIST_HEAD(&av->av_records); 98 return avr;
99 if (before48(ackno, avr->avr_ack_seqno))
100 break;
161 } 101 }
162 102 return NULL;
163 return av;
164} 103}
165 104
166void dccp_ackvec_free(struct dccp_ackvec *av) 105/*
106 * Buffer index and length computation using modulo-buffersize arithmetic.
107 * Note that, as pointers move from right to left, head is `before' tail.
108 */
109static inline u16 __ackvec_idx_add(const u16 a, const u16 b)
167{ 110{
168 if (unlikely(av == NULL)) 111 return (a + b) % DCCPAV_MAX_ACKVEC_LEN;
169 return;
170
171 if (!list_empty(&av->av_records)) {
172 struct dccp_ackvec_record *avr, *next;
173
174 list_for_each_entry_safe(avr, next, &av->av_records, avr_node) {
175 list_del_init(&avr->avr_node);
176 dccp_ackvec_record_delete(avr);
177 }
178 }
179
180 kmem_cache_free(dccp_ackvec_slab, av);
181} 112}
182 113
183static inline u8 dccp_ackvec_state(const struct dccp_ackvec *av, 114static inline u16 __ackvec_idx_sub(const u16 a, const u16 b)
184 const u32 index)
185{ 115{
186 return av->av_buf[index] & DCCP_ACKVEC_STATE_MASK; 116 return __ackvec_idx_add(a, DCCPAV_MAX_ACKVEC_LEN - b);
187} 117}
188 118
189static inline u8 dccp_ackvec_len(const struct dccp_ackvec *av, 119u16 dccp_ackvec_buflen(const struct dccp_ackvec *av)
190 const u32 index)
191{ 120{
192 return av->av_buf[index] & DCCP_ACKVEC_LEN_MASK; 121 if (unlikely(av->av_overflow))
122 return DCCPAV_MAX_ACKVEC_LEN;
123 return __ackvec_idx_sub(av->av_buf_tail, av->av_buf_head);
193} 124}
194 125
195/* 126/**
196 * If several packets are missing, the HC-Receiver may prefer to enter multiple 127 * dccp_ackvec_update_old - Update previous state as per RFC 4340, 11.4.1
197 * bytes with run length 0, rather than a single byte with a larger run length; 128 * @av: non-empty buffer to update
198 * this simplifies table updates if one of the missing packets arrives. 129 * @distance: negative or zero distance of @seqno from buf_ackno downward
130 * @seqno: the (old) sequence number whose record is to be updated
131 * @state: state in which packet carrying @seqno was received
199 */ 132 */
200static inline int dccp_ackvec_set_buf_head_state(struct dccp_ackvec *av, 133static void dccp_ackvec_update_old(struct dccp_ackvec *av, s64 distance,
201 const unsigned int packets, 134 u64 seqno, enum dccp_ackvec_states state)
202 const unsigned char state)
203{ 135{
204 long gap; 136 u16 ptr = av->av_buf_head;
205 long new_head;
206 137
207 if (av->av_vec_len + packets > DCCP_MAX_ACKVEC_LEN) 138 BUG_ON(distance > 0);
208 return -ENOBUFS; 139 if (unlikely(dccp_ackvec_is_empty(av)))
140 return;
209 141
210 gap = packets - 1; 142 do {
211 new_head = av->av_buf_head - packets; 143 u8 runlen = dccp_ackvec_runlen(av->av_buf + ptr);
212 144
213 if (new_head < 0) { 145 if (distance + runlen >= 0) {
214 if (gap > 0) { 146 /*
215 memset(av->av_buf, DCCP_ACKVEC_STATE_NOT_RECEIVED, 147 * Only update the state if packet has not been received
216 gap + new_head + 1); 148 * yet. This is OK as per the second table in RFC 4340,
217 gap = -new_head; 149 * 11.4.1; i.e. here we are using the following table:
150 * RECEIVED
151 * 0 1 3
152 * S +---+---+---+
153 * T 0 | 0 | 0 | 0 |
154 * O +---+---+---+
155 * R 1 | 1 | 1 | 1 |
156 * E +---+---+---+
157 * D 3 | 0 | 1 | 3 |
158 * +---+---+---+
159 * The "Not Received" state was set by reserve_seats().
160 */
161 if (av->av_buf[ptr] == DCCPAV_NOT_RECEIVED)
162 av->av_buf[ptr] = state;
163 else
164 dccp_pr_debug("Not changing %llu state to %u\n",
165 (unsigned long long)seqno, state);
166 break;
218 } 167 }
219 new_head += DCCP_MAX_ACKVEC_LEN;
220 }
221 168
222 av->av_buf_head = new_head; 169 distance += runlen + 1;
170 ptr = __ackvec_idx_add(ptr, 1);
223 171
224 if (gap > 0) 172 } while (ptr != av->av_buf_tail);
225 memset(av->av_buf + av->av_buf_head + 1, 173}
226 DCCP_ACKVEC_STATE_NOT_RECEIVED, gap);
227 174
228 av->av_buf[av->av_buf_head] = state; 175/* Mark @num entries after buf_head as "Not yet received". */
229 av->av_vec_len += packets; 176static void dccp_ackvec_reserve_seats(struct dccp_ackvec *av, u16 num)
230 return 0; 177{
178 u16 start = __ackvec_idx_add(av->av_buf_head, 1),
179 len = DCCPAV_MAX_ACKVEC_LEN - start;
180
181 /* check for buffer wrap-around */
182 if (num > len) {
183 memset(av->av_buf + start, DCCPAV_NOT_RECEIVED, len);
184 start = 0;
185 num -= len;
186 }
187 if (num)
188 memset(av->av_buf + start, DCCPAV_NOT_RECEIVED, num);
231} 189}
232 190
233/* 191/**
234 * Implements the RFC 4340, Appendix A 192 * dccp_ackvec_add_new - Record one or more new entries in Ack Vector buffer
193 * @av: container of buffer to update (can be empty or non-empty)
194 * @num_packets: number of packets to register (must be >= 1)
195 * @seqno: sequence number of the first packet in @num_packets
196 * @state: state in which packet carrying @seqno was received
235 */ 197 */
236int dccp_ackvec_add(struct dccp_ackvec *av, const struct sock *sk, 198static void dccp_ackvec_add_new(struct dccp_ackvec *av, u32 num_packets,
237 const u64 ackno, const u8 state) 199 u64 seqno, enum dccp_ackvec_states state)
238{ 200{
239 /* 201 u32 num_cells = num_packets;
240 * Check at the right places if the buffer is full, if it is, tell the
241 * caller to start dropping packets till the HC-Sender acks our ACK
242 * vectors, when we will free up space in av_buf.
243 *
244 * We may well decide to do buffer compression, etc, but for now lets
245 * just drop.
246 *
247 * From Appendix A.1.1 (`New Packets'):
248 *
249 * Of course, the circular buffer may overflow, either when the
250 * HC-Sender is sending data at a very high rate, when the
251 * HC-Receiver's acknowledgements are not reaching the HC-Sender,
252 * or when the HC-Sender is forgetting to acknowledge those acks
253 * (so the HC-Receiver is unable to clean up old state). In this
254 * case, the HC-Receiver should either compress the buffer (by
255 * increasing run lengths when possible), transfer its state to
256 * a larger buffer, or, as a last resort, drop all received
257 * packets, without processing them whatsoever, until its buffer
258 * shrinks again.
259 */
260 202
261 /* See if this is the first ackno being inserted */ 203 if (num_packets > DCCPAV_BURST_THRESH) {
262 if (av->av_vec_len == 0) { 204 u32 lost_packets = num_packets - 1;
263 av->av_buf[av->av_buf_head] = state;
264 av->av_vec_len = 1;
265 } else if (after48(ackno, av->av_buf_ackno)) {
266 const u64 delta = dccp_delta_seqno(av->av_buf_ackno, ackno);
267 205
206 DCCP_WARN("Warning: large burst loss (%u)\n", lost_packets);
268 /* 207 /*
269 * Look if the state of this packet is the same as the 208 * We received 1 packet and have a loss of size "num_packets-1"
270 * previous ackno and if so if we can bump the head len. 209 * which we squeeze into num_cells-1 rather than reserving an
210 * entire byte for each lost packet.
211 * The reason is that the vector grows in O(burst_length); when
212 * it grows too large there will no room left for the payload.
213 * This is a trade-off: if a few packets out of the burst show
214 * up later, their state will not be changed; it is simply too
215 * costly to reshuffle/reallocate/copy the buffer each time.
216 * Should such problems persist, we will need to switch to a
217 * different underlying data structure.
271 */ 218 */
272 if (delta == 1 && 219 for (num_packets = num_cells = 1; lost_packets; ++num_cells) {
273 dccp_ackvec_state(av, av->av_buf_head) == state && 220 u8 len = min(lost_packets, (u32)DCCPAV_MAX_RUNLEN);
274 dccp_ackvec_len(av, av->av_buf_head) < DCCP_ACKVEC_LEN_MASK)
275 av->av_buf[av->av_buf_head]++;
276 else if (dccp_ackvec_set_buf_head_state(av, delta, state))
277 return -ENOBUFS;
278 } else {
279 /*
280 * A.1.2. Old Packets
281 *
282 * When a packet with Sequence Number S <= buf_ackno
283 * arrives, the HC-Receiver will scan the table for
284 * the byte corresponding to S. (Indexing structures
285 * could reduce the complexity of this scan.)
286 */
287 u64 delta = dccp_delta_seqno(ackno, av->av_buf_ackno);
288 u32 index = av->av_buf_head;
289 221
290 while (1) { 222 av->av_buf_head = __ackvec_idx_sub(av->av_buf_head, 1);
291 const u8 len = dccp_ackvec_len(av, index); 223 av->av_buf[av->av_buf_head] = DCCPAV_NOT_RECEIVED | len;
292 const u8 av_state = dccp_ackvec_state(av, index); 224
293 /* 225 lost_packets -= len;
294 * valid packets not yet in av_buf have a reserved
295 * entry, with a len equal to 0.
296 */
297 if (av_state == DCCP_ACKVEC_STATE_NOT_RECEIVED &&
298 len == 0 && delta == 0) { /* Found our
299 reserved seat! */
300 dccp_pr_debug("Found %llu reserved seat!\n",
301 (unsigned long long)ackno);
302 av->av_buf[index] = state;
303 goto out;
304 }
305 /* len == 0 means one packet */
306 if (delta < len + 1)
307 goto out_duplicate;
308
309 delta -= len + 1;
310 if (++index == DCCP_MAX_ACKVEC_LEN)
311 index = 0;
312 } 226 }
313 } 227 }
314 228
315 av->av_buf_ackno = ackno; 229 if (num_cells + dccp_ackvec_buflen(av) >= DCCPAV_MAX_ACKVEC_LEN) {
316 av->av_time = ktime_get_real(); 230 DCCP_CRIT("Ack Vector buffer overflow: dropping old entries\n");
317out: 231 av->av_overflow = true;
318 return 0; 232 }
233
234 av->av_buf_head = __ackvec_idx_sub(av->av_buf_head, num_packets);
235 if (av->av_overflow)
236 av->av_buf_tail = av->av_buf_head;
319 237
320out_duplicate: 238 av->av_buf[av->av_buf_head] = state;
321 /* Duplicate packet */ 239 av->av_buf_ackno = seqno;
322 dccp_pr_debug("Received a dup or already considered lost " 240
323 "packet: %llu\n", (unsigned long long)ackno); 241 if (num_packets > 1)
324 return -EILSEQ; 242 dccp_ackvec_reserve_seats(av, num_packets - 1);
325} 243}
326 244
327static void dccp_ackvec_throw_record(struct dccp_ackvec *av, 245/**
328 struct dccp_ackvec_record *avr) 246 * dccp_ackvec_input - Register incoming packet in the buffer
247 */
248void dccp_ackvec_input(struct dccp_ackvec *av, struct sk_buff *skb)
329{ 249{
330 struct dccp_ackvec_record *next; 250 u64 seqno = DCCP_SKB_CB(skb)->dccpd_seq;
251 enum dccp_ackvec_states state = DCCPAV_RECEIVED;
331 252
332 /* sort out vector length */ 253 if (dccp_ackvec_is_empty(av)) {
333 if (av->av_buf_head <= avr->avr_ack_ptr) 254 dccp_ackvec_add_new(av, 1, seqno, state);
334 av->av_vec_len = avr->avr_ack_ptr - av->av_buf_head; 255 av->av_tail_ackno = seqno;
335 else
336 av->av_vec_len = DCCP_MAX_ACKVEC_LEN - 1 -
337 av->av_buf_head + avr->avr_ack_ptr;
338 256
339 /* free records */ 257 } else {
340 list_for_each_entry_safe_from(avr, next, &av->av_records, avr_node) { 258 s64 num_packets = dccp_delta_seqno(av->av_buf_ackno, seqno);
341 list_del_init(&avr->avr_node); 259 u8 *current_head = av->av_buf + av->av_buf_head;
342 dccp_ackvec_record_delete(avr);
343 }
344}
345 260
346void dccp_ackvec_check_rcv_ackno(struct dccp_ackvec *av, struct sock *sk, 261 if (num_packets == 1 &&
347 const u64 ackno) 262 dccp_ackvec_state(current_head) == state &&
348{ 263 dccp_ackvec_runlen(current_head) < DCCPAV_MAX_RUNLEN) {
349 struct dccp_ackvec_record *avr;
350 264
351 /* 265 *current_head += 1;
352 * If we traverse backwards, it should be faster when we have large 266 av->av_buf_ackno = seqno;
353 * windows. We will be receiving ACKs for stuff we sent a while back 267
354 * -sorbo. 268 } else if (num_packets > 0) {
355 */ 269 dccp_ackvec_add_new(av, num_packets, seqno, state);
356 list_for_each_entry_reverse(avr, &av->av_records, avr_node) { 270 } else {
357 if (ackno == avr->avr_ack_seqno) { 271 dccp_ackvec_update_old(av, num_packets, seqno, state);
358 dccp_pr_debug("%s ACK packet 0, len=%d, ack_seqno=%llu, " 272 }
359 "ack_ackno=%llu, ACKED!\n",
360 dccp_role(sk), 1,
361 (unsigned long long)avr->avr_ack_seqno,
362 (unsigned long long)avr->avr_ack_ackno);
363 dccp_ackvec_throw_record(av, avr);
364 break;
365 } else if (avr->avr_ack_seqno > ackno)
366 break; /* old news */
367 } 273 }
368} 274}
369 275
370static void dccp_ackvec_check_rcv_ackvector(struct dccp_ackvec *av, 276/**
371 struct sock *sk, u64 *ackno, 277 * dccp_ackvec_clear_state - Perform house-keeping / garbage-collection
372 const unsigned char len, 278 * This routine is called when the peer acknowledges the receipt of Ack Vectors
373 const unsigned char *vector) 279 * up to and including @ackno. While based on on section A.3 of RFC 4340, here
280 * are additional precautions to prevent corrupted buffer state. In particular,
281 * we use tail_ackno to identify outdated records; it always marks the earliest
282 * packet of group (2) in 11.4.2.
283 */
284void dccp_ackvec_clear_state(struct dccp_ackvec *av, const u64 ackno)
374{ 285{
375 unsigned char i; 286 struct dccp_ackvec_record *avr, *next;
376 struct dccp_ackvec_record *avr; 287 u8 runlen_now, eff_runlen;
288 s64 delta;
377 289
378 /* Check if we actually sent an ACK vector */ 290 avr = dccp_ackvec_lookup(&av->av_records, ackno);
379 if (list_empty(&av->av_records)) 291 if (avr == NULL)
380 return; 292 return;
293 /*
294 * Deal with outdated acknowledgments: this arises when e.g. there are
295 * several old records and the acks from the peer come in slowly. In
296 * that case we may still have records that pre-date tail_ackno.
297 */
298 delta = dccp_delta_seqno(av->av_tail_ackno, avr->avr_ack_ackno);
299 if (delta < 0)
300 goto free_records;
301 /*
302 * Deal with overlapping Ack Vectors: don't subtract more than the
303 * number of packets between tail_ackno and ack_ackno.
304 */
305 eff_runlen = delta < avr->avr_ack_runlen ? delta : avr->avr_ack_runlen;
381 306
382 i = len; 307 runlen_now = dccp_ackvec_runlen(av->av_buf + avr->avr_ack_ptr);
383 /* 308 /*
384 * XXX 309 * The run length of Ack Vector cells does not decrease over time. If
385 * I think it might be more efficient to work backwards. See comment on 310 * the run length is the same as at the time the Ack Vector was sent, we
386 * rcv_ackno. -sorbo. 311 * free the ack_ptr cell. That cell can however not be freed if the run
312 * length has increased: in this case we need to move the tail pointer
313 * backwards (towards higher indices), to its next-oldest neighbour.
387 */ 314 */
388 avr = list_entry(av->av_records.next, struct dccp_ackvec_record, avr_node); 315 if (runlen_now > eff_runlen) {
389 while (i--) {
390 const u8 rl = *vector & DCCP_ACKVEC_LEN_MASK;
391 u64 ackno_end_rl;
392 316
393 dccp_set_seqno(&ackno_end_rl, *ackno - rl); 317 av->av_buf[avr->avr_ack_ptr] -= eff_runlen + 1;
318 av->av_buf_tail = __ackvec_idx_add(avr->avr_ack_ptr, 1);
394 319
320 /* This move may not have cleared the overflow flag. */
321 if (av->av_overflow)
322 av->av_overflow = (av->av_buf_head == av->av_buf_tail);
323 } else {
324 av->av_buf_tail = avr->avr_ack_ptr;
395 /* 325 /*
396 * If our AVR sequence number is greater than the ack, go 326 * We have made sure that avr points to a valid cell within the
397 * forward in the AVR list until it is not so. 327 * buffer. This cell is either older than head, or equals head
328 * (empty buffer): in both cases we no longer have any overflow.
398 */ 329 */
399 list_for_each_entry_from(avr, &av->av_records, avr_node) { 330 av->av_overflow = 0;
400 if (!after48(avr->avr_ack_seqno, *ackno)) 331 }
401 goto found;
402 }
403 /* End of the av_records list, not found, exit */
404 break;
405found:
406 if (between48(avr->avr_ack_seqno, ackno_end_rl, *ackno)) {
407 const u8 state = *vector & DCCP_ACKVEC_STATE_MASK;
408 if (state != DCCP_ACKVEC_STATE_NOT_RECEIVED) {
409 dccp_pr_debug("%s ACK vector 0, len=%d, "
410 "ack_seqno=%llu, ack_ackno=%llu, "
411 "ACKED!\n",
412 dccp_role(sk), len,
413 (unsigned long long)
414 avr->avr_ack_seqno,
415 (unsigned long long)
416 avr->avr_ack_ackno);
417 dccp_ackvec_throw_record(av, avr);
418 break;
419 }
420 /*
421 * If it wasn't received, continue scanning... we might
422 * find another one.
423 */
424 }
425 332
426 dccp_set_seqno(ackno, ackno_end_rl - 1); 333 /*
427 ++vector; 334 * The peer has acknowledged up to and including ack_ackno. Hence the
335 * first packet in group (2) of 11.4.2 is the successor of ack_ackno.
336 */
337 av->av_tail_ackno = ADD48(avr->avr_ack_ackno, 1);
338
339free_records:
340 list_for_each_entry_safe_from(avr, next, &av->av_records, avr_node) {
341 list_del(&avr->avr_node);
342 kmem_cache_free(dccp_ackvec_record_slab, avr);
428 } 343 }
429} 344}
430 345
431int dccp_ackvec_parse(struct sock *sk, const struct sk_buff *skb, 346/*
432 u64 *ackno, const u8 opt, const u8 *value, const u8 len) 347 * Routines to keep track of Ack Vectors received in an skb
348 */
349int dccp_ackvec_parsed_add(struct list_head *head, u8 *vec, u8 len, u8 nonce)
433{ 350{
434 if (len > DCCP_SINGLE_OPT_MAXLEN) 351 struct dccp_ackvec_parsed *new = kmalloc(sizeof(*new), GFP_ATOMIC);
435 return -1; 352
353 if (new == NULL)
354 return -ENOBUFS;
355 new->vec = vec;
356 new->len = len;
357 new->nonce = nonce;
436 358
437 /* dccp_ackvector_print(DCCP_SKB_CB(skb)->dccpd_ack_seq, value, len); */ 359 list_add_tail(&new->node, head);
438 dccp_ackvec_check_rcv_ackvector(dccp_sk(sk)->dccps_hc_rx_ackvec, sk,
439 ackno, len, value);
440 return 0; 360 return 0;
441} 361}
362EXPORT_SYMBOL_GPL(dccp_ackvec_parsed_add);
363
364void dccp_ackvec_parsed_cleanup(struct list_head *parsed_chunks)
365{
366 struct dccp_ackvec_parsed *cur, *next;
367
368 list_for_each_entry_safe(cur, next, parsed_chunks, node)
369 kfree(cur);
370 INIT_LIST_HEAD(parsed_chunks);
371}
372EXPORT_SYMBOL_GPL(dccp_ackvec_parsed_cleanup);
442 373
443int __init dccp_ackvec_init(void) 374int __init dccp_ackvec_init(void)
444{ 375{
@@ -448,10 +379,9 @@ int __init dccp_ackvec_init(void)
448 if (dccp_ackvec_slab == NULL) 379 if (dccp_ackvec_slab == NULL)
449 goto out_err; 380 goto out_err;
450 381
451 dccp_ackvec_record_slab = 382 dccp_ackvec_record_slab = kmem_cache_create("dccp_ackvec_record",
452 kmem_cache_create("dccp_ackvec_record", 383 sizeof(struct dccp_ackvec_record),
453 sizeof(struct dccp_ackvec_record), 384 0, SLAB_HWCACHE_ALIGN, NULL);
454 0, SLAB_HWCACHE_ALIGN, NULL);
455 if (dccp_ackvec_record_slab == NULL) 385 if (dccp_ackvec_record_slab == NULL)
456 goto out_destroy_slab; 386 goto out_destroy_slab;
457 387
diff --git a/net/dccp/ackvec.h b/net/dccp/ackvec.h
index 7ea557b7c6b..e2ab0627a5f 100644
--- a/net/dccp/ackvec.h
+++ b/net/dccp/ackvec.h
@@ -3,9 +3,9 @@
3/* 3/*
4 * net/dccp/ackvec.h 4 * net/dccp/ackvec.h
5 * 5 *
6 * An implementation of the DCCP protocol 6 * An implementation of Ack Vectors for the DCCP protocol
7 * Copyright (c) 2007 University of Aberdeen, Scotland, UK
7 * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@mandriva.com> 8 * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@mandriva.com>
8 *
9 * This program is free software; you can redistribute it and/or modify it 9 * This program is free software; you can redistribute it and/or modify it
10 * under the terms of the GNU General Public License version 2 as 10 * under the terms of the GNU General Public License version 2 as
11 * published by the Free Software Foundation. 11 * published by the Free Software Foundation.
@@ -13,99 +13,124 @@
13 13
14#include <linux/dccp.h> 14#include <linux/dccp.h>
15#include <linux/compiler.h> 15#include <linux/compiler.h>
16#include <linux/ktime.h>
17#include <linux/list.h> 16#include <linux/list.h>
18#include <linux/types.h> 17#include <linux/types.h>
19 18
20/* We can spread an ack vector across multiple options */ 19/*
21#define DCCP_MAX_ACKVEC_LEN (DCCP_SINGLE_OPT_MAXLEN * 2) 20 * Ack Vector buffer space is static, in multiples of %DCCP_SINGLE_OPT_MAXLEN,
21 * the maximum size of a single Ack Vector. Setting %DCCPAV_NUM_ACKVECS to 1
22 * will be sufficient for most cases of low Ack Ratios, using a value of 2 gives
23 * more headroom if Ack Ratio is higher or when the sender acknowledges slowly.
24 * The maximum value is bounded by the u16 types for indices and functions.
25 */
26#define DCCPAV_NUM_ACKVECS 2
27#define DCCPAV_MAX_ACKVEC_LEN (DCCP_SINGLE_OPT_MAXLEN * DCCPAV_NUM_ACKVECS)
22 28
23/* Estimated minimum average Ack Vector length - used for updating MPS */ 29/* Estimated minimum average Ack Vector length - used for updating MPS */
24#define DCCPAV_MIN_OPTLEN 16 30#define DCCPAV_MIN_OPTLEN 16
25 31
26#define DCCP_ACKVEC_STATE_RECEIVED 0 32/* Threshold for coping with large bursts of losses */
27#define DCCP_ACKVEC_STATE_ECN_MARKED (1 << 6) 33#define DCCPAV_BURST_THRESH (DCCPAV_MAX_ACKVEC_LEN / 8)
28#define DCCP_ACKVEC_STATE_NOT_RECEIVED (3 << 6)
29 34
30#define DCCP_ACKVEC_STATE_MASK 0xC0 /* 11000000 */ 35enum dccp_ackvec_states {
31#define DCCP_ACKVEC_LEN_MASK 0x3F /* 00111111 */ 36 DCCPAV_RECEIVED = 0x00,
37 DCCPAV_ECN_MARKED = 0x40,
38 DCCPAV_RESERVED = 0x80,
39 DCCPAV_NOT_RECEIVED = 0xC0
40};
41#define DCCPAV_MAX_RUNLEN 0x3F
32 42
33/** struct dccp_ackvec - ack vector 43static inline u8 dccp_ackvec_runlen(const u8 *cell)
34 * 44{
35 * This data structure is the one defined in RFC 4340, Appendix A. 45 return *cell & DCCPAV_MAX_RUNLEN;
36 * 46}
37 * @av_buf_head - circular buffer head 47
38 * @av_buf_tail - circular buffer tail 48static inline u8 dccp_ackvec_state(const u8 *cell)
39 * @av_buf_ackno - ack # of the most recent packet acknowledgeable in the 49{
40 * buffer (i.e. %av_buf_head) 50 return *cell & ~DCCPAV_MAX_RUNLEN;
41 * @av_buf_nonce - the one-bit sum of the ECN Nonces on all packets acked 51}
42 * by the buffer with State 0 52
43 * 53/** struct dccp_ackvec - Ack Vector main data structure
44 * Additionally, the HC-Receiver must keep some information about the
45 * Ack Vectors it has recently sent. For each packet sent carrying an
46 * Ack Vector, it remembers four variables:
47 * 54 *
48 * @av_records - list of dccp_ackvec_record 55 * This implements a fixed-size circular buffer within an array and is largely
49 * @av_ack_nonce - the one-bit sum of the ECN Nonces for all State 0. 56 * based on Appendix A of RFC 4340.
50 * 57 *
51 * @av_time - the time in usecs 58 * @av_buf: circular buffer storage area
52 * @av_buf - circular buffer of acknowledgeable packets 59 * @av_buf_head: head index; begin of live portion in @av_buf
60 * @av_buf_tail: tail index; first index _after_ the live portion in @av_buf
61 * @av_buf_ackno: highest seqno of acknowledgeable packet recorded in @av_buf
62 * @av_tail_ackno: lowest seqno of acknowledgeable packet recorded in @av_buf
63 * @av_buf_nonce: ECN nonce sums, each covering subsequent segments of up to
64 * %DCCP_SINGLE_OPT_MAXLEN cells in the live portion of @av_buf
65 * @av_overflow: if 1 then buf_head == buf_tail indicates buffer wraparound
66 * @av_records: list of %dccp_ackvec_record (Ack Vectors sent previously)
53 */ 67 */
54struct dccp_ackvec { 68struct dccp_ackvec {
55 u64 av_buf_ackno; 69 u8 av_buf[DCCPAV_MAX_ACKVEC_LEN];
56 struct list_head av_records;
57 ktime_t av_time;
58 u16 av_buf_head; 70 u16 av_buf_head;
59 u16 av_vec_len; 71 u16 av_buf_tail;
60 u8 av_buf_nonce; 72 u64 av_buf_ackno:48;
61 u8 av_ack_nonce; 73 u64 av_tail_ackno:48;
62 u8 av_buf[DCCP_MAX_ACKVEC_LEN]; 74 bool av_buf_nonce[DCCPAV_NUM_ACKVECS];
75 u8 av_overflow:1;
76 struct list_head av_records;
63}; 77};
64 78
65/** struct dccp_ackvec_record - ack vector record 79/** struct dccp_ackvec_record - Records information about sent Ack Vectors
66 * 80 *
67 * ACK vector record as defined in Appendix A of spec. 81 * These list entries define the additional information which the HC-Receiver
82 * keeps about recently-sent Ack Vectors; again refer to RFC 4340, Appendix A.
68 * 83 *
69 * The list is sorted by avr_ack_seqno 84 * @avr_node: the list node in @av_records
85 * @avr_ack_seqno: sequence number of the packet the Ack Vector was sent on
86 * @avr_ack_ackno: the Ack number that this record/Ack Vector refers to
87 * @avr_ack_ptr: pointer into @av_buf where this record starts
88 * @avr_ack_runlen: run length of @avr_ack_ptr at the time of sending
89 * @avr_ack_nonce: the sum of @av_buf_nonce's at the time this record was sent
70 * 90 *
71 * @avr_node - node in av_records 91 * The list as a whole is sorted in descending order by @avr_ack_seqno.
72 * @avr_ack_seqno - sequence number of the packet this record was sent on
73 * @avr_ack_ackno - sequence number being acknowledged
74 * @avr_ack_ptr - pointer into av_buf where this record starts
75 * @avr_ack_nonce - av_ack_nonce at the time this record was sent
76 * @avr_sent_len - lenght of the record in av_buf
77 */ 92 */
78struct dccp_ackvec_record { 93struct dccp_ackvec_record {
79 struct list_head avr_node; 94 struct list_head avr_node;
80 u64 avr_ack_seqno; 95 u64 avr_ack_seqno:48;
81 u64 avr_ack_ackno; 96 u64 avr_ack_ackno:48;
82 u16 avr_ack_ptr; 97 u16 avr_ack_ptr;
83 u16 avr_sent_len; 98 u8 avr_ack_runlen;
84 u8 avr_ack_nonce; 99 u8 avr_ack_nonce:1;
85}; 100};
86 101
87struct sock;
88struct sk_buff;
89
90extern int dccp_ackvec_init(void); 102extern int dccp_ackvec_init(void);
91extern void dccp_ackvec_exit(void); 103extern void dccp_ackvec_exit(void);
92 104
93extern struct dccp_ackvec *dccp_ackvec_alloc(const gfp_t priority); 105extern struct dccp_ackvec *dccp_ackvec_alloc(const gfp_t priority);
94extern void dccp_ackvec_free(struct dccp_ackvec *av); 106extern void dccp_ackvec_free(struct dccp_ackvec *av);
95 107
96extern int dccp_ackvec_add(struct dccp_ackvec *av, const struct sock *sk, 108extern void dccp_ackvec_input(struct dccp_ackvec *av, struct sk_buff *skb);
97 const u64 ackno, const u8 state); 109extern int dccp_ackvec_update_records(struct dccp_ackvec *av, u64 seq, u8 sum);
98 110extern void dccp_ackvec_clear_state(struct dccp_ackvec *av, const u64 ackno);
99extern void dccp_ackvec_check_rcv_ackno(struct dccp_ackvec *av, 111extern u16 dccp_ackvec_buflen(const struct dccp_ackvec *av);
100 struct sock *sk, const u64 ackno);
101extern int dccp_ackvec_parse(struct sock *sk, const struct sk_buff *skb,
102 u64 *ackno, const u8 opt,
103 const u8 *value, const u8 len);
104 112
105extern int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb); 113static inline bool dccp_ackvec_is_empty(const struct dccp_ackvec *av)
106
107static inline int dccp_ackvec_pending(const struct dccp_ackvec *av)
108{ 114{
109 return av->av_vec_len; 115 return av->av_overflow == 0 && av->av_buf_head == av->av_buf_tail;
110} 116}
117
118/**
119 * struct dccp_ackvec_parsed - Record offsets of Ack Vectors in skb
120 * @vec: start of vector (offset into skb)
121 * @len: length of @vec
122 * @nonce: whether @vec had an ECN nonce of 0 or 1
123 * @node: FIFO - arranged in descending order of ack_ackno
124 * This structure is used by CCIDs to access Ack Vectors in a received skb.
125 */
126struct dccp_ackvec_parsed {
127 u8 *vec,
128 len,
129 nonce:1;
130 struct list_head node;
131};
132
133extern int dccp_ackvec_parsed_add(struct list_head *head,
134 u8 *vec, u8 len, u8 nonce);
135extern void dccp_ackvec_parsed_cleanup(struct list_head *parsed_chunks);
111#endif /* _ACKVEC_H */ 136#endif /* _ACKVEC_H */
diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c
index 6576eae9e77..e96d5e81003 100644
--- a/net/dccp/ccids/ccid2.c
+++ b/net/dccp/ccids/ccid2.c
@@ -246,68 +246,6 @@ static void ccid2_hc_tx_packet_sent(struct sock *sk, unsigned int len)
246#endif 246#endif
247} 247}
248 248
249/* XXX Lame code duplication!
250 * returns -1 if none was found.
251 * else returns the next offset to use in the function call.
252 */
253static int ccid2_ackvector(struct sock *sk, struct sk_buff *skb, int offset,
254 unsigned char **vec, unsigned char *veclen)
255{
256 const struct dccp_hdr *dh = dccp_hdr(skb);
257 unsigned char *options = (unsigned char *)dh + dccp_hdr_len(skb);
258 unsigned char *opt_ptr;
259 const unsigned char *opt_end = (unsigned char *)dh +
260 (dh->dccph_doff * 4);
261 unsigned char opt, len;
262 unsigned char *value;
263
264 BUG_ON(offset < 0);
265 options += offset;
266 opt_ptr = options;
267 if (opt_ptr >= opt_end)
268 return -1;
269
270 while (opt_ptr != opt_end) {
271 opt = *opt_ptr++;
272 len = 0;
273 value = NULL;
274
275 /* Check if this isn't a single byte option */
276 if (opt > DCCPO_MAX_RESERVED) {
277 if (opt_ptr == opt_end)
278 goto out_invalid_option;
279
280 len = *opt_ptr++;
281 if (len < 3)
282 goto out_invalid_option;
283 /*
284 * Remove the type and len fields, leaving
285 * just the value size
286 */
287 len -= 2;
288 value = opt_ptr;
289 opt_ptr += len;
290
291 if (opt_ptr > opt_end)
292 goto out_invalid_option;
293 }
294
295 switch (opt) {
296 case DCCPO_ACK_VECTOR_0:
297 case DCCPO_ACK_VECTOR_1:
298 *vec = value;
299 *veclen = len;
300 return offset + (opt_ptr - options);
301 }
302 }
303
304 return -1;
305
306out_invalid_option:
307 DCCP_BUG("Invalid option - this should not happen (previous parsing)!");
308 return -1;
309}
310
311/** 249/**
312 * ccid2_rtt_estimator - Sample RTT and compute RTO using RFC2988 algorithm 250 * ccid2_rtt_estimator - Sample RTT and compute RTO using RFC2988 algorithm
313 * This code is almost identical with TCP's tcp_rtt_estimator(), since 251 * This code is almost identical with TCP's tcp_rtt_estimator(), since
@@ -432,16 +370,28 @@ static void ccid2_congestion_event(struct sock *sk, struct ccid2_seq *seqp)
432 ccid2_change_l_ack_ratio(sk, hc->tx_cwnd); 370 ccid2_change_l_ack_ratio(sk, hc->tx_cwnd);
433} 371}
434 372
373static int ccid2_hc_tx_parse_options(struct sock *sk, u8 packet_type,
374 u8 option, u8 *optval, u8 optlen)
375{
376 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
377
378 switch (option) {
379 case DCCPO_ACK_VECTOR_0:
380 case DCCPO_ACK_VECTOR_1:
381 return dccp_ackvec_parsed_add(&hc->tx_av_chunks, optval, optlen,
382 option - DCCPO_ACK_VECTOR_0);
383 }
384 return 0;
385}
386
435static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) 387static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
436{ 388{
437 struct dccp_sock *dp = dccp_sk(sk); 389 struct dccp_sock *dp = dccp_sk(sk);
438 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); 390 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
439 const bool sender_was_blocked = ccid2_cwnd_network_limited(hc); 391 const bool sender_was_blocked = ccid2_cwnd_network_limited(hc);
392 struct dccp_ackvec_parsed *avp;
440 u64 ackno, seqno; 393 u64 ackno, seqno;
441 struct ccid2_seq *seqp; 394 struct ccid2_seq *seqp;
442 unsigned char *vector;
443 unsigned char veclen;
444 int offset = 0;
445 int done = 0; 395 int done = 0;
446 unsigned int maxincr = 0; 396 unsigned int maxincr = 0;
447 397
@@ -475,17 +425,12 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
475 } 425 }
476 426
477 /* check forward path congestion */ 427 /* check forward path congestion */
478 /* still didn't send out new data packets */ 428 if (dccp_packet_without_ack(skb))
479 if (hc->tx_seqh == hc->tx_seqt)
480 return; 429 return;
481 430
482 switch (DCCP_SKB_CB(skb)->dccpd_type) { 431 /* still didn't send out new data packets */
483 case DCCP_PKT_ACK: 432 if (hc->tx_seqh == hc->tx_seqt)
484 case DCCP_PKT_DATAACK: 433 goto done;
485 break;
486 default:
487 return;
488 }
489 434
490 ackno = DCCP_SKB_CB(skb)->dccpd_ack_seq; 435 ackno = DCCP_SKB_CB(skb)->dccpd_ack_seq;
491 if (after48(ackno, hc->tx_high_ack)) 436 if (after48(ackno, hc->tx_high_ack))
@@ -509,16 +454,16 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
509 maxincr = DIV_ROUND_UP(dp->dccps_l_ack_ratio, 2); 454 maxincr = DIV_ROUND_UP(dp->dccps_l_ack_ratio, 2);
510 455
511 /* go through all ack vectors */ 456 /* go through all ack vectors */
512 while ((offset = ccid2_ackvector(sk, skb, offset, 457 list_for_each_entry(avp, &hc->tx_av_chunks, node) {
513 &vector, &veclen)) != -1) {
514 /* go through this ack vector */ 458 /* go through this ack vector */
515 while (veclen--) { 459 for (; avp->len--; avp->vec++) {
516 const u8 rl = *vector & DCCP_ACKVEC_LEN_MASK; 460 u64 ackno_end_rl = SUB48(ackno,
517 u64 ackno_end_rl = SUB48(ackno, rl); 461 dccp_ackvec_runlen(avp->vec));
518 462
519 ccid2_pr_debug("ackvec start:%llu end:%llu\n", 463 ccid2_pr_debug("ackvec %llu |%u,%u|\n",
520 (unsigned long long)ackno, 464 (unsigned long long)ackno,
521 (unsigned long long)ackno_end_rl); 465 dccp_ackvec_state(avp->vec) >> 6,
466 dccp_ackvec_runlen(avp->vec));
522 /* if the seqno we are analyzing is larger than the 467 /* if the seqno we are analyzing is larger than the
523 * current ackno, then move towards the tail of our 468 * current ackno, then move towards the tail of our
524 * seqnos. 469 * seqnos.
@@ -537,17 +482,15 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
537 * run length 482 * run length
538 */ 483 */
539 while (between48(seqp->ccid2s_seq,ackno_end_rl,ackno)) { 484 while (between48(seqp->ccid2s_seq,ackno_end_rl,ackno)) {
540 const u8 state = *vector & 485 const u8 state = dccp_ackvec_state(avp->vec);
541 DCCP_ACKVEC_STATE_MASK;
542 486
543 /* new packet received or marked */ 487 /* new packet received or marked */
544 if (state != DCCP_ACKVEC_STATE_NOT_RECEIVED && 488 if (state != DCCPAV_NOT_RECEIVED &&
545 !seqp->ccid2s_acked) { 489 !seqp->ccid2s_acked) {
546 if (state == 490 if (state == DCCPAV_ECN_MARKED)
547 DCCP_ACKVEC_STATE_ECN_MARKED) {
548 ccid2_congestion_event(sk, 491 ccid2_congestion_event(sk,
549 seqp); 492 seqp);
550 } else 493 else
551 ccid2_new_ack(sk, seqp, 494 ccid2_new_ack(sk, seqp,
552 &maxincr); 495 &maxincr);
553 496
@@ -566,7 +509,6 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
566 break; 509 break;
567 510
568 ackno = SUB48(ackno_end_rl, 1); 511 ackno = SUB48(ackno_end_rl, 1);
569 vector++;
570 } 512 }
571 if (done) 513 if (done)
572 break; 514 break;
@@ -634,10 +576,11 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
634 sk_stop_timer(sk, &hc->tx_rtotimer); 576 sk_stop_timer(sk, &hc->tx_rtotimer);
635 else 577 else
636 sk_reset_timer(sk, &hc->tx_rtotimer, jiffies + hc->tx_rto); 578 sk_reset_timer(sk, &hc->tx_rtotimer, jiffies + hc->tx_rto);
637 579done:
638 /* check if incoming Acks allow pending packets to be sent */ 580 /* check if incoming Acks allow pending packets to be sent */
639 if (sender_was_blocked && !ccid2_cwnd_network_limited(hc)) 581 if (sender_was_blocked && !ccid2_cwnd_network_limited(hc))
640 tasklet_schedule(&dccp_sk(sk)->dccps_xmitlet); 582 tasklet_schedule(&dccp_sk(sk)->dccps_xmitlet);
583 dccp_ackvec_parsed_cleanup(&hc->tx_av_chunks);
641} 584}
642 585
643static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk) 586static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk)
@@ -666,6 +609,7 @@ static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk)
666 hc->tx_last_cong = ccid2_time_stamp; 609 hc->tx_last_cong = ccid2_time_stamp;
667 setup_timer(&hc->tx_rtotimer, ccid2_hc_tx_rto_expire, 610 setup_timer(&hc->tx_rtotimer, ccid2_hc_tx_rto_expire,
668 (unsigned long)sk); 611 (unsigned long)sk);
612 INIT_LIST_HEAD(&hc->tx_av_chunks);
669 return 0; 613 return 0;
670} 614}
671 615
@@ -699,16 +643,17 @@ static void ccid2_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
699} 643}
700 644
701struct ccid_operations ccid2_ops = { 645struct ccid_operations ccid2_ops = {
702 .ccid_id = DCCPC_CCID2, 646 .ccid_id = DCCPC_CCID2,
703 .ccid_name = "TCP-like", 647 .ccid_name = "TCP-like",
704 .ccid_hc_tx_obj_size = sizeof(struct ccid2_hc_tx_sock), 648 .ccid_hc_tx_obj_size = sizeof(struct ccid2_hc_tx_sock),
705 .ccid_hc_tx_init = ccid2_hc_tx_init, 649 .ccid_hc_tx_init = ccid2_hc_tx_init,
706 .ccid_hc_tx_exit = ccid2_hc_tx_exit, 650 .ccid_hc_tx_exit = ccid2_hc_tx_exit,
707 .ccid_hc_tx_send_packet = ccid2_hc_tx_send_packet, 651 .ccid_hc_tx_send_packet = ccid2_hc_tx_send_packet,
708 .ccid_hc_tx_packet_sent = ccid2_hc_tx_packet_sent, 652 .ccid_hc_tx_packet_sent = ccid2_hc_tx_packet_sent,
709 .ccid_hc_tx_packet_recv = ccid2_hc_tx_packet_recv, 653 .ccid_hc_tx_parse_options = ccid2_hc_tx_parse_options,
710 .ccid_hc_rx_obj_size = sizeof(struct ccid2_hc_rx_sock), 654 .ccid_hc_tx_packet_recv = ccid2_hc_tx_packet_recv,
711 .ccid_hc_rx_packet_recv = ccid2_hc_rx_packet_recv, 655 .ccid_hc_rx_obj_size = sizeof(struct ccid2_hc_rx_sock),
656 .ccid_hc_rx_packet_recv = ccid2_hc_rx_packet_recv,
712}; 657};
713 658
714#ifdef CONFIG_IP_DCCP_CCID2_DEBUG 659#ifdef CONFIG_IP_DCCP_CCID2_DEBUG
diff --git a/net/dccp/ccids/ccid2.h b/net/dccp/ccids/ccid2.h
index 25cb6b216ed..e9985dafc2c 100644
--- a/net/dccp/ccids/ccid2.h
+++ b/net/dccp/ccids/ccid2.h
@@ -55,6 +55,7 @@ struct ccid2_seq {
55 * @tx_rtt_seq: to decay RTTVAR at most once per flight 55 * @tx_rtt_seq: to decay RTTVAR at most once per flight
56 * @tx_rpseq: last consecutive seqno 56 * @tx_rpseq: last consecutive seqno
57 * @tx_rpdupack: dupacks since rpseq 57 * @tx_rpdupack: dupacks since rpseq
58 * @tx_av_chunks: list of Ack Vectors received on current skb
58 */ 59 */
59struct ccid2_hc_tx_sock { 60struct ccid2_hc_tx_sock {
60 u32 tx_cwnd; 61 u32 tx_cwnd;
@@ -79,6 +80,7 @@ struct ccid2_hc_tx_sock {
79 int tx_rpdupack; 80 int tx_rpdupack;
80 u32 tx_last_cong; 81 u32 tx_last_cong;
81 u64 tx_high_ack; 82 u64 tx_high_ack;
83 struct list_head tx_av_chunks;
82}; 84};
83 85
84static inline bool ccid2_cwnd_network_limited(struct ccid2_hc_tx_sock *hc) 86static inline bool ccid2_cwnd_network_limited(struct ccid2_hc_tx_sock *hc)
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index a8ed459508b..48ad5d9da7c 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -243,6 +243,19 @@ extern void dccp_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
243extern void dccp_send_sync(struct sock *sk, const u64 seq, 243extern void dccp_send_sync(struct sock *sk, const u64 seq,
244 const enum dccp_pkt_type pkt_type); 244 const enum dccp_pkt_type pkt_type);
245 245
246/*
247 * TX Packet Dequeueing Interface
248 */
249extern void dccp_qpolicy_push(struct sock *sk, struct sk_buff *skb);
250extern bool dccp_qpolicy_full(struct sock *sk);
251extern void dccp_qpolicy_drop(struct sock *sk, struct sk_buff *skb);
252extern struct sk_buff *dccp_qpolicy_top(struct sock *sk);
253extern struct sk_buff *dccp_qpolicy_pop(struct sock *sk);
254extern bool dccp_qpolicy_param_ok(struct sock *sk, __be32 param);
255
256/*
257 * TX Packet Output and TX Timers
258 */
246extern void dccp_write_xmit(struct sock *sk); 259extern void dccp_write_xmit(struct sock *sk);
247extern void dccp_write_space(struct sock *sk); 260extern void dccp_write_space(struct sock *sk);
248extern void dccp_flush_write_queue(struct sock *sk, long *time_budget); 261extern void dccp_flush_write_queue(struct sock *sk, long *time_budget);
@@ -457,12 +470,15 @@ static inline void dccp_update_gss(struct sock *sk, u64 seq)
457 dp->dccps_awh = dp->dccps_gss; 470 dp->dccps_awh = dp->dccps_gss;
458} 471}
459 472
473static inline int dccp_ackvec_pending(const struct sock *sk)
474{
475 return dccp_sk(sk)->dccps_hc_rx_ackvec != NULL &&
476 !dccp_ackvec_is_empty(dccp_sk(sk)->dccps_hc_rx_ackvec);
477}
478
460static inline int dccp_ack_pending(const struct sock *sk) 479static inline int dccp_ack_pending(const struct sock *sk)
461{ 480{
462 const struct dccp_sock *dp = dccp_sk(sk); 481 return dccp_ackvec_pending(sk) || inet_csk_ack_scheduled(sk);
463 return (dp->dccps_hc_rx_ackvec != NULL &&
464 dccp_ackvec_pending(dp->dccps_hc_rx_ackvec)) ||
465 inet_csk_ack_scheduled(sk);
466} 482}
467 483
468extern int dccp_feat_finalise_settings(struct dccp_sock *dp); 484extern int dccp_feat_finalise_settings(struct dccp_sock *dp);
diff --git a/net/dccp/input.c b/net/dccp/input.c
index e424a09e83f..15af247ea00 100644
--- a/net/dccp/input.c
+++ b/net/dccp/input.c
@@ -160,13 +160,15 @@ static void dccp_rcv_reset(struct sock *sk, struct sk_buff *skb)
160 dccp_time_wait(sk, DCCP_TIME_WAIT, 0); 160 dccp_time_wait(sk, DCCP_TIME_WAIT, 0);
161} 161}
162 162
163static void dccp_event_ack_recv(struct sock *sk, struct sk_buff *skb) 163static void dccp_handle_ackvec_processing(struct sock *sk, struct sk_buff *skb)
164{ 164{
165 struct dccp_sock *dp = dccp_sk(sk); 165 struct dccp_ackvec *av = dccp_sk(sk)->dccps_hc_rx_ackvec;
166 166
167 if (dp->dccps_hc_rx_ackvec != NULL) 167 if (av == NULL)
168 dccp_ackvec_check_rcv_ackno(dp->dccps_hc_rx_ackvec, sk, 168 return;
169 DCCP_SKB_CB(skb)->dccpd_ack_seq); 169 if (DCCP_SKB_CB(skb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ)
170 dccp_ackvec_clear_state(av, DCCP_SKB_CB(skb)->dccpd_ack_seq);
171 dccp_ackvec_input(av, skb);
170} 172}
171 173
172static void dccp_deliver_input_to_ccids(struct sock *sk, struct sk_buff *skb) 174static void dccp_deliver_input_to_ccids(struct sock *sk, struct sk_buff *skb)
@@ -366,22 +368,13 @@ discard:
366int dccp_rcv_established(struct sock *sk, struct sk_buff *skb, 368int dccp_rcv_established(struct sock *sk, struct sk_buff *skb,
367 const struct dccp_hdr *dh, const unsigned len) 369 const struct dccp_hdr *dh, const unsigned len)
368{ 370{
369 struct dccp_sock *dp = dccp_sk(sk);
370
371 if (dccp_check_seqno(sk, skb)) 371 if (dccp_check_seqno(sk, skb))
372 goto discard; 372 goto discard;
373 373
374 if (dccp_parse_options(sk, NULL, skb)) 374 if (dccp_parse_options(sk, NULL, skb))
375 return 1; 375 return 1;
376 376
377 if (DCCP_SKB_CB(skb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ) 377 dccp_handle_ackvec_processing(sk, skb);
378 dccp_event_ack_recv(sk, skb);
379
380 if (dp->dccps_hc_rx_ackvec != NULL &&
381 dccp_ackvec_add(dp->dccps_hc_rx_ackvec, sk,
382 DCCP_SKB_CB(skb)->dccpd_seq,
383 DCCP_ACKVEC_STATE_RECEIVED))
384 goto discard;
385 dccp_deliver_input_to_ccids(sk, skb); 378 dccp_deliver_input_to_ccids(sk, skb);
386 379
387 return __dccp_rcv_established(sk, skb, dh, len); 380 return __dccp_rcv_established(sk, skb, dh, len);
@@ -633,15 +626,7 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
633 if (dccp_parse_options(sk, NULL, skb)) 626 if (dccp_parse_options(sk, NULL, skb))
634 return 1; 627 return 1;
635 628
636 if (dcb->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ) 629 dccp_handle_ackvec_processing(sk, skb);
637 dccp_event_ack_recv(sk, skb);
638
639 if (dp->dccps_hc_rx_ackvec != NULL &&
640 dccp_ackvec_add(dp->dccps_hc_rx_ackvec, sk,
641 DCCP_SKB_CB(skb)->dccpd_seq,
642 DCCP_ACKVEC_STATE_RECEIVED))
643 goto discard;
644
645 dccp_deliver_input_to_ccids(sk, skb); 630 dccp_deliver_input_to_ccids(sk, skb);
646 } 631 }
647 632
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 3f69ea11482..45a434f9416 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -462,15 +462,12 @@ static struct dst_entry* dccp_v4_route_skb(struct net *net, struct sock *sk,
462{ 462{
463 struct rtable *rt; 463 struct rtable *rt;
464 struct flowi fl = { .oif = skb_rtable(skb)->rt_iif, 464 struct flowi fl = { .oif = skb_rtable(skb)->rt_iif,
465 .nl_u = { .ip4_u = 465 .fl4_dst = ip_hdr(skb)->saddr,
466 { .daddr = ip_hdr(skb)->saddr, 466 .fl4_src = ip_hdr(skb)->daddr,
467 .saddr = ip_hdr(skb)->daddr, 467 .fl4_tos = RT_CONN_FLAGS(sk),
468 .tos = RT_CONN_FLAGS(sk) } },
469 .proto = sk->sk_protocol, 468 .proto = sk->sk_protocol,
470 .uli_u = { .ports = 469 .fl_ip_sport = dccp_hdr(skb)->dccph_dport,
471 { .sport = dccp_hdr(skb)->dccph_dport, 470 .fl_ip_dport = dccp_hdr(skb)->dccph_sport
472 .dport = dccp_hdr(skb)->dccph_sport }
473 }
474 }; 471 };
475 472
476 security_skb_classify_flow(skb, &fl); 473 security_skb_classify_flow(skb, &fl);
diff --git a/net/dccp/options.c b/net/dccp/options.c
index cd306181300..f06ffcfc8d7 100644
--- a/net/dccp/options.c
+++ b/net/dccp/options.c
@@ -54,7 +54,6 @@ int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq,
54 struct dccp_sock *dp = dccp_sk(sk); 54 struct dccp_sock *dp = dccp_sk(sk);
55 const struct dccp_hdr *dh = dccp_hdr(skb); 55 const struct dccp_hdr *dh = dccp_hdr(skb);
56 const u8 pkt_type = DCCP_SKB_CB(skb)->dccpd_type; 56 const u8 pkt_type = DCCP_SKB_CB(skb)->dccpd_type;
57 u64 ackno = DCCP_SKB_CB(skb)->dccpd_ack_seq;
58 unsigned char *options = (unsigned char *)dh + dccp_hdr_len(skb); 57 unsigned char *options = (unsigned char *)dh + dccp_hdr_len(skb);
59 unsigned char *opt_ptr = options; 58 unsigned char *opt_ptr = options;
60 const unsigned char *opt_end = (unsigned char *)dh + 59 const unsigned char *opt_end = (unsigned char *)dh +
@@ -129,14 +128,6 @@ int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq,
129 if (rc) 128 if (rc)
130 goto out_featneg_failed; 129 goto out_featneg_failed;
131 break; 130 break;
132 case DCCPO_ACK_VECTOR_0:
133 case DCCPO_ACK_VECTOR_1:
134 if (dccp_packet_without_ack(skb)) /* RFC 4340, 11.4 */
135 break;
136 if (dp->dccps_hc_rx_ackvec != NULL &&
137 dccp_ackvec_parse(sk, skb, &ackno, opt, value, len))
138 goto out_invalid_option;
139 break;
140 case DCCPO_TIMESTAMP: 131 case DCCPO_TIMESTAMP:
141 if (len != 4) 132 if (len != 4)
142 goto out_invalid_option; 133 goto out_invalid_option;
@@ -226,6 +217,16 @@ int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq,
226 pkt_type, opt, value, len)) 217 pkt_type, opt, value, len))
227 goto out_invalid_option; 218 goto out_invalid_option;
228 break; 219 break;
220 case DCCPO_ACK_VECTOR_0:
221 case DCCPO_ACK_VECTOR_1:
222 if (dccp_packet_without_ack(skb)) /* RFC 4340, 11.4 */
223 break;
224 /*
225 * Ack vectors are processed by the TX CCID if it is
226 * interested. The RX CCID need not parse Ack Vectors,
227 * since it is only interested in clearing old state.
228 * Fall through.
229 */
229 case DCCPO_MIN_TX_CCID_SPECIFIC ... DCCPO_MAX_TX_CCID_SPECIFIC: 230 case DCCPO_MIN_TX_CCID_SPECIFIC ... DCCPO_MAX_TX_CCID_SPECIFIC:
230 if (ccid_hc_tx_parse_options(dp->dccps_hc_tx_ccid, sk, 231 if (ccid_hc_tx_parse_options(dp->dccps_hc_tx_ccid, sk,
231 pkt_type, opt, value, len)) 232 pkt_type, opt, value, len))
@@ -340,6 +341,7 @@ static inline int dccp_elapsed_time_len(const u32 elapsed_time)
340 return elapsed_time == 0 ? 0 : elapsed_time <= 0xFFFF ? 2 : 4; 341 return elapsed_time == 0 ? 0 : elapsed_time <= 0xFFFF ? 2 : 4;
341} 342}
342 343
344/* FIXME: This function is currently not used anywhere */
343int dccp_insert_option_elapsed_time(struct sk_buff *skb, u32 elapsed_time) 345int dccp_insert_option_elapsed_time(struct sk_buff *skb, u32 elapsed_time)
344{ 346{
345 const int elapsed_time_len = dccp_elapsed_time_len(elapsed_time); 347 const int elapsed_time_len = dccp_elapsed_time_len(elapsed_time);
@@ -424,6 +426,83 @@ static int dccp_insert_option_timestamp_echo(struct dccp_sock *dp,
424 return 0; 426 return 0;
425} 427}
426 428
429static int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb)
430{
431 struct dccp_sock *dp = dccp_sk(sk);
432 struct dccp_ackvec *av = dp->dccps_hc_rx_ackvec;
433 struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb);
434 const u16 buflen = dccp_ackvec_buflen(av);
435 /* Figure out how many options do we need to represent the ackvec */
436 const u8 nr_opts = DIV_ROUND_UP(buflen, DCCP_SINGLE_OPT_MAXLEN);
437 u16 len = buflen + 2 * nr_opts;
438 u8 i, nonce = 0;
439 const unsigned char *tail, *from;
440 unsigned char *to;
441
442 if (dcb->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) {
443 DCCP_WARN("Lacking space for %u bytes on %s packet\n", len,
444 dccp_packet_name(dcb->dccpd_type));
445 return -1;
446 }
447 /*
448 * Since Ack Vectors are variable-length, we can not always predict
449 * their size. To catch exception cases where the space is running out
450 * on the skb, a separate Sync is scheduled to carry the Ack Vector.
451 */
452 if (len > DCCPAV_MIN_OPTLEN &&
453 len + dcb->dccpd_opt_len + skb->len > dp->dccps_mss_cache) {
454 DCCP_WARN("No space left for Ack Vector (%u) on skb (%u+%u), "
455 "MPS=%u ==> reduce payload size?\n", len, skb->len,
456 dcb->dccpd_opt_len, dp->dccps_mss_cache);
457 dp->dccps_sync_scheduled = 1;
458 return 0;
459 }
460 dcb->dccpd_opt_len += len;
461
462 to = skb_push(skb, len);
463 len = buflen;
464 from = av->av_buf + av->av_buf_head;
465 tail = av->av_buf + DCCPAV_MAX_ACKVEC_LEN;
466
467 for (i = 0; i < nr_opts; ++i) {
468 int copylen = len;
469
470 if (len > DCCP_SINGLE_OPT_MAXLEN)
471 copylen = DCCP_SINGLE_OPT_MAXLEN;
472
473 /*
474 * RFC 4340, 12.2: Encode the Nonce Echo for this Ack Vector via
475 * its type; ack_nonce is the sum of all individual buf_nonce's.
476 */
477 nonce ^= av->av_buf_nonce[i];
478
479 *to++ = DCCPO_ACK_VECTOR_0 + av->av_buf_nonce[i];
480 *to++ = copylen + 2;
481
482 /* Check if buf_head wraps */
483 if (from + copylen > tail) {
484 const u16 tailsize = tail - from;
485
486 memcpy(to, from, tailsize);
487 to += tailsize;
488 len -= tailsize;
489 copylen -= tailsize;
490 from = av->av_buf;
491 }
492
493 memcpy(to, from, copylen);
494 from += copylen;
495 to += copylen;
496 len -= copylen;
497 }
498 /*
499 * Each sent Ack Vector is recorded in the list, as per A.2 of RFC 4340.
500 */
501 if (dccp_ackvec_update_records(av, dcb->dccpd_seq, nonce))
502 return -ENOBUFS;
503 return 0;
504}
505
427/** 506/**
428 * dccp_insert_option_mandatory - Mandatory option (5.8.2) 507 * dccp_insert_option_mandatory - Mandatory option (5.8.2)
429 * Note that since we are using skb_push, this function needs to be called 508 * Note that since we are using skb_push, this function needs to be called
@@ -519,8 +598,7 @@ int dccp_insert_options(struct sock *sk, struct sk_buff *skb)
519 if (dccp_insert_option_timestamp(skb)) 598 if (dccp_insert_option_timestamp(skb))
520 return -1; 599 return -1;
521 600
522 } else if (dp->dccps_hc_rx_ackvec != NULL && 601 } else if (dccp_ackvec_pending(sk) &&
523 dccp_ackvec_pending(dp->dccps_hc_rx_ackvec) &&
524 dccp_insert_option_ackvec(sk, skb)) { 602 dccp_insert_option_ackvec(sk, skb)) {
525 return -1; 603 return -1;
526 } 604 }
diff --git a/net/dccp/output.c b/net/dccp/output.c
index 45b91853f5a..784d3021054 100644
--- a/net/dccp/output.c
+++ b/net/dccp/output.c
@@ -242,7 +242,7 @@ static void dccp_xmit_packet(struct sock *sk)
242{ 242{
243 int err, len; 243 int err, len;
244 struct dccp_sock *dp = dccp_sk(sk); 244 struct dccp_sock *dp = dccp_sk(sk);
245 struct sk_buff *skb = skb_dequeue(&sk->sk_write_queue); 245 struct sk_buff *skb = dccp_qpolicy_pop(sk);
246 246
247 if (unlikely(skb == NULL)) 247 if (unlikely(skb == NULL))
248 return; 248 return;
@@ -283,6 +283,15 @@ static void dccp_xmit_packet(struct sock *sk)
283 * any local drop will eventually be reported via receiver feedback. 283 * any local drop will eventually be reported via receiver feedback.
284 */ 284 */
285 ccid_hc_tx_packet_sent(dp->dccps_hc_tx_ccid, sk, len); 285 ccid_hc_tx_packet_sent(dp->dccps_hc_tx_ccid, sk, len);
286
287 /*
288 * If the CCID needs to transfer additional header options out-of-band
289 * (e.g. Ack Vectors or feature-negotiation options), it activates this
290 * flag to schedule a Sync. The Sync will automatically incorporate all
291 * currently pending header options, thus clearing the backlog.
292 */
293 if (dp->dccps_sync_scheduled)
294 dccp_send_sync(sk, dp->dccps_gsr, DCCP_PKT_SYNC);
286} 295}
287 296
288/** 297/**
@@ -336,7 +345,7 @@ void dccp_write_xmit(struct sock *sk)
336 struct dccp_sock *dp = dccp_sk(sk); 345 struct dccp_sock *dp = dccp_sk(sk);
337 struct sk_buff *skb; 346 struct sk_buff *skb;
338 347
339 while ((skb = skb_peek(&sk->sk_write_queue))) { 348 while ((skb = dccp_qpolicy_top(sk))) {
340 int rc = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb); 349 int rc = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb);
341 350
342 switch (ccid_packet_dequeue_eval(rc)) { 351 switch (ccid_packet_dequeue_eval(rc)) {
@@ -350,8 +359,7 @@ void dccp_write_xmit(struct sock *sk)
350 dccp_xmit_packet(sk); 359 dccp_xmit_packet(sk);
351 break; 360 break;
352 case CCID_PACKET_ERR: 361 case CCID_PACKET_ERR:
353 skb_dequeue(&sk->sk_write_queue); 362 dccp_qpolicy_drop(sk, skb);
354 kfree_skb(skb);
355 dccp_pr_debug("packet discarded due to err=%d\n", rc); 363 dccp_pr_debug("packet discarded due to err=%d\n", rc);
356 } 364 }
357 } 365 }
@@ -636,6 +644,12 @@ void dccp_send_sync(struct sock *sk, const u64 ackno,
636 DCCP_SKB_CB(skb)->dccpd_type = pkt_type; 644 DCCP_SKB_CB(skb)->dccpd_type = pkt_type;
637 DCCP_SKB_CB(skb)->dccpd_ack_seq = ackno; 645 DCCP_SKB_CB(skb)->dccpd_ack_seq = ackno;
638 646
647 /*
648 * Clear the flag in case the Sync was scheduled for out-of-band data,
649 * such as carrying a long Ack Vector.
650 */
651 dccp_sk(sk)->dccps_sync_scheduled = 0;
652
639 dccp_transmit_skb(sk, skb); 653 dccp_transmit_skb(sk, skb);
640} 654}
641 655
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index ef343d53fce..152975d942d 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -185,6 +185,7 @@ int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
185 dp->dccps_role = DCCP_ROLE_UNDEFINED; 185 dp->dccps_role = DCCP_ROLE_UNDEFINED;
186 dp->dccps_service = DCCP_SERVICE_CODE_IS_ABSENT; 186 dp->dccps_service = DCCP_SERVICE_CODE_IS_ABSENT;
187 dp->dccps_l_ack_ratio = dp->dccps_r_ack_ratio = 1; 187 dp->dccps_l_ack_ratio = dp->dccps_r_ack_ratio = 1;
188 dp->dccps_tx_qlen = sysctl_dccp_tx_qlen;
188 189
189 dccp_init_xmit_timers(sk); 190 dccp_init_xmit_timers(sk);
190 191
@@ -532,6 +533,20 @@ static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
532 case DCCP_SOCKOPT_RECV_CSCOV: 533 case DCCP_SOCKOPT_RECV_CSCOV:
533 err = dccp_setsockopt_cscov(sk, val, true); 534 err = dccp_setsockopt_cscov(sk, val, true);
534 break; 535 break;
536 case DCCP_SOCKOPT_QPOLICY_ID:
537 if (sk->sk_state != DCCP_CLOSED)
538 err = -EISCONN;
539 else if (val < 0 || val >= DCCPQ_POLICY_MAX)
540 err = -EINVAL;
541 else
542 dp->dccps_qpolicy = val;
543 break;
544 case DCCP_SOCKOPT_QPOLICY_TXQLEN:
545 if (val < 0)
546 err = -EINVAL;
547 else
548 dp->dccps_tx_qlen = val;
549 break;
535 default: 550 default:
536 err = -ENOPROTOOPT; 551 err = -ENOPROTOOPT;
537 break; 552 break;
@@ -639,6 +654,12 @@ static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
639 case DCCP_SOCKOPT_RECV_CSCOV: 654 case DCCP_SOCKOPT_RECV_CSCOV:
640 val = dp->dccps_pcrlen; 655 val = dp->dccps_pcrlen;
641 break; 656 break;
657 case DCCP_SOCKOPT_QPOLICY_ID:
658 val = dp->dccps_qpolicy;
659 break;
660 case DCCP_SOCKOPT_QPOLICY_TXQLEN:
661 val = dp->dccps_tx_qlen;
662 break;
642 case 128 ... 191: 663 case 128 ... 191:
643 return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname, 664 return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
644 len, (u32 __user *)optval, optlen); 665 len, (u32 __user *)optval, optlen);
@@ -681,6 +702,47 @@ int compat_dccp_getsockopt(struct sock *sk, int level, int optname,
681EXPORT_SYMBOL_GPL(compat_dccp_getsockopt); 702EXPORT_SYMBOL_GPL(compat_dccp_getsockopt);
682#endif 703#endif
683 704
705static int dccp_msghdr_parse(struct msghdr *msg, struct sk_buff *skb)
706{
707 struct cmsghdr *cmsg = CMSG_FIRSTHDR(msg);
708
709 /*
710 * Assign an (opaque) qpolicy priority value to skb->priority.
711 *
712 * We are overloading this skb field for use with the qpolicy subystem.
713 * The skb->priority is normally used for the SO_PRIORITY option, which
714 * is initialised from sk_priority. Since the assignment of sk_priority
715 * to skb->priority happens later (on layer 3), we overload this field
716 * for use with queueing priorities as long as the skb is on layer 4.
717 * The default priority value (if nothing is set) is 0.
718 */
719 skb->priority = 0;
720
721 for (; cmsg != NULL; cmsg = CMSG_NXTHDR(msg, cmsg)) {
722
723 if (!CMSG_OK(msg, cmsg))
724 return -EINVAL;
725
726 if (cmsg->cmsg_level != SOL_DCCP)
727 continue;
728
729 if (cmsg->cmsg_type <= DCCP_SCM_QPOLICY_MAX &&
730 !dccp_qpolicy_param_ok(skb->sk, cmsg->cmsg_type))
731 return -EINVAL;
732
733 switch (cmsg->cmsg_type) {
734 case DCCP_SCM_PRIORITY:
735 if (cmsg->cmsg_len != CMSG_LEN(sizeof(__u32)))
736 return -EINVAL;
737 skb->priority = *(__u32 *)CMSG_DATA(cmsg);
738 break;
739 default:
740 return -EINVAL;
741 }
742 }
743 return 0;
744}
745
684int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, 746int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
685 size_t len) 747 size_t len)
686{ 748{
@@ -696,8 +758,7 @@ int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
696 758
697 lock_sock(sk); 759 lock_sock(sk);
698 760
699 if (sysctl_dccp_tx_qlen && 761 if (dccp_qpolicy_full(sk)) {
700 (sk->sk_write_queue.qlen >= sysctl_dccp_tx_qlen)) {
701 rc = -EAGAIN; 762 rc = -EAGAIN;
702 goto out_release; 763 goto out_release;
703 } 764 }
@@ -725,7 +786,11 @@ int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
725 if (rc != 0) 786 if (rc != 0)
726 goto out_discard; 787 goto out_discard;
727 788
728 skb_queue_tail(&sk->sk_write_queue, skb); 789 rc = dccp_msghdr_parse(msg, skb);
790 if (rc != 0)
791 goto out_discard;
792
793 dccp_qpolicy_push(sk, skb);
729 /* 794 /*
730 * The xmit_timer is set if the TX CCID is rate-based and will expire 795 * The xmit_timer is set if the TX CCID is rate-based and will expire
731 * when congestion control permits to release further packets into the 796 * when congestion control permits to release further packets into the
diff --git a/net/dccp/qpolicy.c b/net/dccp/qpolicy.c
new file mode 100644
index 00000000000..63c30bfa470
--- /dev/null
+++ b/net/dccp/qpolicy.c
@@ -0,0 +1,137 @@
1/*
2 * net/dccp/qpolicy.c
3 *
4 * Policy-based packet dequeueing interface for DCCP.
5 *
6 * Copyright (c) 2008 Tomasz Grobelny <tomasz@grobelny.oswiecenia.net>
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License v2
10 * as published by the Free Software Foundation.
11 */
12#include "dccp.h"
13
14/*
15 * Simple Dequeueing Policy:
16 * If tx_qlen is different from 0, enqueue up to tx_qlen elements.
17 */
18static void qpolicy_simple_push(struct sock *sk, struct sk_buff *skb)
19{
20 skb_queue_tail(&sk->sk_write_queue, skb);
21}
22
23static bool qpolicy_simple_full(struct sock *sk)
24{
25 return dccp_sk(sk)->dccps_tx_qlen &&
26 sk->sk_write_queue.qlen >= dccp_sk(sk)->dccps_tx_qlen;
27}
28
29static struct sk_buff *qpolicy_simple_top(struct sock *sk)
30{
31 return skb_peek(&sk->sk_write_queue);
32}
33
34/*
35 * Priority-based Dequeueing Policy:
36 * If tx_qlen is different from 0 and the queue has reached its upper bound
37 * of tx_qlen elements, replace older packets lowest-priority-first.
38 */
39static struct sk_buff *qpolicy_prio_best_skb(struct sock *sk)
40{
41 struct sk_buff *skb, *best = NULL;
42
43 skb_queue_walk(&sk->sk_write_queue, skb)
44 if (best == NULL || skb->priority > best->priority)
45 best = skb;
46 return best;
47}
48
49static struct sk_buff *qpolicy_prio_worst_skb(struct sock *sk)
50{
51 struct sk_buff *skb, *worst = NULL;
52
53 skb_queue_walk(&sk->sk_write_queue, skb)
54 if (worst == NULL || skb->priority < worst->priority)
55 worst = skb;
56 return worst;
57}
58
59static bool qpolicy_prio_full(struct sock *sk)
60{
61 if (qpolicy_simple_full(sk))
62 dccp_qpolicy_drop(sk, qpolicy_prio_worst_skb(sk));
63 return false;
64}
65
66/**
67 * struct dccp_qpolicy_operations - TX Packet Dequeueing Interface
68 * @push: add a new @skb to the write queue
69 * @full: indicates that no more packets will be admitted
70 * @top: peeks at whatever the queueing policy defines as its `top'
71 */
72static struct dccp_qpolicy_operations {
73 void (*push) (struct sock *sk, struct sk_buff *skb);
74 bool (*full) (struct sock *sk);
75 struct sk_buff* (*top) (struct sock *sk);
76 __be32 params;
77
78} qpol_table[DCCPQ_POLICY_MAX] = {
79 [DCCPQ_POLICY_SIMPLE] = {
80 .push = qpolicy_simple_push,
81 .full = qpolicy_simple_full,
82 .top = qpolicy_simple_top,
83 .params = 0,
84 },
85 [DCCPQ_POLICY_PRIO] = {
86 .push = qpolicy_simple_push,
87 .full = qpolicy_prio_full,
88 .top = qpolicy_prio_best_skb,
89 .params = DCCP_SCM_PRIORITY,
90 },
91};
92
93/*
94 * Externally visible interface
95 */
96void dccp_qpolicy_push(struct sock *sk, struct sk_buff *skb)
97{
98 qpol_table[dccp_sk(sk)->dccps_qpolicy].push(sk, skb);
99}
100
101bool dccp_qpolicy_full(struct sock *sk)
102{
103 return qpol_table[dccp_sk(sk)->dccps_qpolicy].full(sk);
104}
105
106void dccp_qpolicy_drop(struct sock *sk, struct sk_buff *skb)
107{
108 if (skb != NULL) {
109 skb_unlink(skb, &sk->sk_write_queue);
110 kfree_skb(skb);
111 }
112}
113
114struct sk_buff *dccp_qpolicy_top(struct sock *sk)
115{
116 return qpol_table[dccp_sk(sk)->dccps_qpolicy].top(sk);
117}
118
119struct sk_buff *dccp_qpolicy_pop(struct sock *sk)
120{
121 struct sk_buff *skb = dccp_qpolicy_top(sk);
122
123 if (skb != NULL) {
124 /* Clear any skb fields that we used internally */
125 skb->priority = 0;
126 skb_unlink(skb, &sk->sk_write_queue);
127 }
128 return skb;
129}
130
131bool dccp_qpolicy_param_ok(struct sock *sk, __be32 param)
132{
133 /* check if exactly one bit is set */
134 if (!param || (param & (param - 1)))
135 return false;
136 return (qpol_table[dccp_sk(sk)->dccps_qpolicy].params & param) == param;
137}
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index 6f97268ed85..0065e7e14af 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -1850,7 +1850,7 @@ unsigned dn_mss_from_pmtu(struct net_device *dev, int mtu)
1850{ 1850{
1851 unsigned mss = 230 - DN_MAX_NSP_DATA_HEADER; 1851 unsigned mss = 230 - DN_MAX_NSP_DATA_HEADER;
1852 if (dev) { 1852 if (dev) {
1853 struct dn_dev *dn_db = dev->dn_ptr; 1853 struct dn_dev *dn_db = rcu_dereference_raw(dev->dn_ptr);
1854 mtu -= LL_RESERVED_SPACE(dev); 1854 mtu -= LL_RESERVED_SPACE(dev);
1855 if (dn_db->use_long) 1855 if (dn_db->use_long)
1856 mtu -= 21; 1856 mtu -= 21;
diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c
index 4c409b46aa3..0ba15633c41 100644
--- a/net/decnet/dn_dev.c
+++ b/net/decnet/dn_dev.c
@@ -267,7 +267,7 @@ static int dn_forwarding_proc(ctl_table *table, int write,
267 if (table->extra1 == NULL) 267 if (table->extra1 == NULL)
268 return -EINVAL; 268 return -EINVAL;
269 269
270 dn_db = dev->dn_ptr; 270 dn_db = rcu_dereference_raw(dev->dn_ptr);
271 old = dn_db->parms.forwarding; 271 old = dn_db->parms.forwarding;
272 272
273 err = proc_dointvec(table, write, buffer, lenp, ppos); 273 err = proc_dointvec(table, write, buffer, lenp, ppos);
@@ -332,14 +332,19 @@ static struct dn_ifaddr *dn_dev_alloc_ifa(void)
332 return ifa; 332 return ifa;
333} 333}
334 334
335static __inline__ void dn_dev_free_ifa(struct dn_ifaddr *ifa) 335static void dn_dev_free_ifa_rcu(struct rcu_head *head)
336{ 336{
337 kfree(ifa); 337 kfree(container_of(head, struct dn_ifaddr, rcu));
338} 338}
339 339
340static void dn_dev_del_ifa(struct dn_dev *dn_db, struct dn_ifaddr **ifap, int destroy) 340static void dn_dev_free_ifa(struct dn_ifaddr *ifa)
341{ 341{
342 struct dn_ifaddr *ifa1 = *ifap; 342 call_rcu(&ifa->rcu, dn_dev_free_ifa_rcu);
343}
344
345static void dn_dev_del_ifa(struct dn_dev *dn_db, struct dn_ifaddr __rcu **ifap, int destroy)
346{
347 struct dn_ifaddr *ifa1 = rtnl_dereference(*ifap);
343 unsigned char mac_addr[6]; 348 unsigned char mac_addr[6];
344 struct net_device *dev = dn_db->dev; 349 struct net_device *dev = dn_db->dev;
345 350
@@ -373,7 +378,9 @@ static int dn_dev_insert_ifa(struct dn_dev *dn_db, struct dn_ifaddr *ifa)
373 ASSERT_RTNL(); 378 ASSERT_RTNL();
374 379
375 /* Check for duplicates */ 380 /* Check for duplicates */
376 for(ifa1 = dn_db->ifa_list; ifa1; ifa1 = ifa1->ifa_next) { 381 for (ifa1 = rtnl_dereference(dn_db->ifa_list);
382 ifa1 != NULL;
383 ifa1 = rtnl_dereference(ifa1->ifa_next)) {
377 if (ifa1->ifa_local == ifa->ifa_local) 384 if (ifa1->ifa_local == ifa->ifa_local)
378 return -EEXIST; 385 return -EEXIST;
379 } 386 }
@@ -386,7 +393,7 @@ static int dn_dev_insert_ifa(struct dn_dev *dn_db, struct dn_ifaddr *ifa)
386 } 393 }
387 394
388 ifa->ifa_next = dn_db->ifa_list; 395 ifa->ifa_next = dn_db->ifa_list;
389 dn_db->ifa_list = ifa; 396 rcu_assign_pointer(dn_db->ifa_list, ifa);
390 397
391 dn_ifaddr_notify(RTM_NEWADDR, ifa); 398 dn_ifaddr_notify(RTM_NEWADDR, ifa);
392 blocking_notifier_call_chain(&dnaddr_chain, NETDEV_UP, ifa); 399 blocking_notifier_call_chain(&dnaddr_chain, NETDEV_UP, ifa);
@@ -396,7 +403,7 @@ static int dn_dev_insert_ifa(struct dn_dev *dn_db, struct dn_ifaddr *ifa)
396 403
397static int dn_dev_set_ifa(struct net_device *dev, struct dn_ifaddr *ifa) 404static int dn_dev_set_ifa(struct net_device *dev, struct dn_ifaddr *ifa)
398{ 405{
399 struct dn_dev *dn_db = dev->dn_ptr; 406 struct dn_dev *dn_db = rtnl_dereference(dev->dn_ptr);
400 int rv; 407 int rv;
401 408
402 if (dn_db == NULL) { 409 if (dn_db == NULL) {
@@ -425,7 +432,8 @@ int dn_dev_ioctl(unsigned int cmd, void __user *arg)
425 struct sockaddr_dn *sdn = (struct sockaddr_dn *)&ifr->ifr_addr; 432 struct sockaddr_dn *sdn = (struct sockaddr_dn *)&ifr->ifr_addr;
426 struct dn_dev *dn_db; 433 struct dn_dev *dn_db;
427 struct net_device *dev; 434 struct net_device *dev;
428 struct dn_ifaddr *ifa = NULL, **ifap = NULL; 435 struct dn_ifaddr *ifa = NULL;
436 struct dn_ifaddr __rcu **ifap = NULL;
429 int ret = 0; 437 int ret = 0;
430 438
431 if (copy_from_user(ifr, arg, DN_IFREQ_SIZE)) 439 if (copy_from_user(ifr, arg, DN_IFREQ_SIZE))
@@ -454,8 +462,10 @@ int dn_dev_ioctl(unsigned int cmd, void __user *arg)
454 goto done; 462 goto done;
455 } 463 }
456 464
457 if ((dn_db = dev->dn_ptr) != NULL) { 465 if ((dn_db = rtnl_dereference(dev->dn_ptr)) != NULL) {
458 for (ifap = &dn_db->ifa_list; (ifa=*ifap) != NULL; ifap = &ifa->ifa_next) 466 for (ifap = &dn_db->ifa_list;
467 (ifa = rtnl_dereference(*ifap)) != NULL;
468 ifap = &ifa->ifa_next)
459 if (strcmp(ifr->ifr_name, ifa->ifa_label) == 0) 469 if (strcmp(ifr->ifr_name, ifa->ifa_label) == 0)
460 break; 470 break;
461 } 471 }
@@ -558,7 +568,7 @@ static struct dn_dev *dn_dev_by_index(int ifindex)
558 568
559 dev = __dev_get_by_index(&init_net, ifindex); 569 dev = __dev_get_by_index(&init_net, ifindex);
560 if (dev) 570 if (dev)
561 dn_dev = dev->dn_ptr; 571 dn_dev = rtnl_dereference(dev->dn_ptr);
562 572
563 return dn_dev; 573 return dn_dev;
564} 574}
@@ -576,7 +586,8 @@ static int dn_nl_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
576 struct nlattr *tb[IFA_MAX+1]; 586 struct nlattr *tb[IFA_MAX+1];
577 struct dn_dev *dn_db; 587 struct dn_dev *dn_db;
578 struct ifaddrmsg *ifm; 588 struct ifaddrmsg *ifm;
579 struct dn_ifaddr *ifa, **ifap; 589 struct dn_ifaddr *ifa;
590 struct dn_ifaddr __rcu **ifap;
580 int err = -EINVAL; 591 int err = -EINVAL;
581 592
582 if (!net_eq(net, &init_net)) 593 if (!net_eq(net, &init_net))
@@ -592,7 +603,9 @@ static int dn_nl_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
592 goto errout; 603 goto errout;
593 604
594 err = -EADDRNOTAVAIL; 605 err = -EADDRNOTAVAIL;
595 for (ifap = &dn_db->ifa_list; (ifa = *ifap); ifap = &ifa->ifa_next) { 606 for (ifap = &dn_db->ifa_list;
607 (ifa = rtnl_dereference(*ifap)) != NULL;
608 ifap = &ifa->ifa_next) {
596 if (tb[IFA_LOCAL] && 609 if (tb[IFA_LOCAL] &&
597 nla_memcmp(tb[IFA_LOCAL], &ifa->ifa_local, 2)) 610 nla_memcmp(tb[IFA_LOCAL], &ifa->ifa_local, 2))
598 continue; 611 continue;
@@ -632,7 +645,7 @@ static int dn_nl_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
632 if ((dev = __dev_get_by_index(&init_net, ifm->ifa_index)) == NULL) 645 if ((dev = __dev_get_by_index(&init_net, ifm->ifa_index)) == NULL)
633 return -ENODEV; 646 return -ENODEV;
634 647
635 if ((dn_db = dev->dn_ptr) == NULL) { 648 if ((dn_db = rtnl_dereference(dev->dn_ptr)) == NULL) {
636 dn_db = dn_dev_create(dev, &err); 649 dn_db = dn_dev_create(dev, &err);
637 if (!dn_db) 650 if (!dn_db)
638 return err; 651 return err;
@@ -748,11 +761,11 @@ static int dn_nl_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
748 skip_naddr = 0; 761 skip_naddr = 0;
749 } 762 }
750 763
751 if ((dn_db = dev->dn_ptr) == NULL) 764 if ((dn_db = rtnl_dereference(dev->dn_ptr)) == NULL)
752 goto cont; 765 goto cont;
753 766
754 for (ifa = dn_db->ifa_list, dn_idx = 0; ifa; 767 for (ifa = rtnl_dereference(dn_db->ifa_list), dn_idx = 0; ifa;
755 ifa = ifa->ifa_next, dn_idx++) { 768 ifa = rtnl_dereference(ifa->ifa_next), dn_idx++) {
756 if (dn_idx < skip_naddr) 769 if (dn_idx < skip_naddr)
757 continue; 770 continue;
758 771
@@ -773,21 +786,22 @@ done:
773 786
774static int dn_dev_get_first(struct net_device *dev, __le16 *addr) 787static int dn_dev_get_first(struct net_device *dev, __le16 *addr)
775{ 788{
776 struct dn_dev *dn_db = (struct dn_dev *)dev->dn_ptr; 789 struct dn_dev *dn_db;
777 struct dn_ifaddr *ifa; 790 struct dn_ifaddr *ifa;
778 int rv = -ENODEV; 791 int rv = -ENODEV;
779 792
793 rcu_read_lock();
794 dn_db = rcu_dereference(dev->dn_ptr);
780 if (dn_db == NULL) 795 if (dn_db == NULL)
781 goto out; 796 goto out;
782 797
783 rtnl_lock(); 798 ifa = rcu_dereference(dn_db->ifa_list);
784 ifa = dn_db->ifa_list;
785 if (ifa != NULL) { 799 if (ifa != NULL) {
786 *addr = ifa->ifa_local; 800 *addr = ifa->ifa_local;
787 rv = 0; 801 rv = 0;
788 } 802 }
789 rtnl_unlock();
790out: 803out:
804 rcu_read_unlock();
791 return rv; 805 return rv;
792} 806}
793 807
@@ -823,7 +837,7 @@ static void dn_send_endnode_hello(struct net_device *dev, struct dn_ifaddr *ifa)
823 struct endnode_hello_message *msg; 837 struct endnode_hello_message *msg;
824 struct sk_buff *skb = NULL; 838 struct sk_buff *skb = NULL;
825 __le16 *pktlen; 839 __le16 *pktlen;
826 struct dn_dev *dn_db = (struct dn_dev *)dev->dn_ptr; 840 struct dn_dev *dn_db = rcu_dereference_raw(dev->dn_ptr);
827 841
828 if ((skb = dn_alloc_skb(NULL, sizeof(*msg), GFP_ATOMIC)) == NULL) 842 if ((skb = dn_alloc_skb(NULL, sizeof(*msg), GFP_ATOMIC)) == NULL)
829 return; 843 return;
@@ -889,7 +903,7 @@ static int dn_am_i_a_router(struct dn_neigh *dn, struct dn_dev *dn_db, struct dn
889static void dn_send_router_hello(struct net_device *dev, struct dn_ifaddr *ifa) 903static void dn_send_router_hello(struct net_device *dev, struct dn_ifaddr *ifa)
890{ 904{
891 int n; 905 int n;
892 struct dn_dev *dn_db = dev->dn_ptr; 906 struct dn_dev *dn_db = rcu_dereference_raw(dev->dn_ptr);
893 struct dn_neigh *dn = (struct dn_neigh *)dn_db->router; 907 struct dn_neigh *dn = (struct dn_neigh *)dn_db->router;
894 struct sk_buff *skb; 908 struct sk_buff *skb;
895 size_t size; 909 size_t size;
@@ -960,7 +974,7 @@ static void dn_send_router_hello(struct net_device *dev, struct dn_ifaddr *ifa)
960 974
961static void dn_send_brd_hello(struct net_device *dev, struct dn_ifaddr *ifa) 975static void dn_send_brd_hello(struct net_device *dev, struct dn_ifaddr *ifa)
962{ 976{
963 struct dn_dev *dn_db = (struct dn_dev *)dev->dn_ptr; 977 struct dn_dev *dn_db = rcu_dereference_raw(dev->dn_ptr);
964 978
965 if (dn_db->parms.forwarding == 0) 979 if (dn_db->parms.forwarding == 0)
966 dn_send_endnode_hello(dev, ifa); 980 dn_send_endnode_hello(dev, ifa);
@@ -998,7 +1012,7 @@ static void dn_send_ptp_hello(struct net_device *dev, struct dn_ifaddr *ifa)
998 1012
999static int dn_eth_up(struct net_device *dev) 1013static int dn_eth_up(struct net_device *dev)
1000{ 1014{
1001 struct dn_dev *dn_db = dev->dn_ptr; 1015 struct dn_dev *dn_db = rcu_dereference_raw(dev->dn_ptr);
1002 1016
1003 if (dn_db->parms.forwarding == 0) 1017 if (dn_db->parms.forwarding == 0)
1004 dev_mc_add(dev, dn_rt_all_end_mcast); 1018 dev_mc_add(dev, dn_rt_all_end_mcast);
@@ -1012,7 +1026,7 @@ static int dn_eth_up(struct net_device *dev)
1012 1026
1013static void dn_eth_down(struct net_device *dev) 1027static void dn_eth_down(struct net_device *dev)
1014{ 1028{
1015 struct dn_dev *dn_db = dev->dn_ptr; 1029 struct dn_dev *dn_db = rcu_dereference_raw(dev->dn_ptr);
1016 1030
1017 if (dn_db->parms.forwarding == 0) 1031 if (dn_db->parms.forwarding == 0)
1018 dev_mc_del(dev, dn_rt_all_end_mcast); 1032 dev_mc_del(dev, dn_rt_all_end_mcast);
@@ -1025,12 +1039,16 @@ static void dn_dev_set_timer(struct net_device *dev);
1025static void dn_dev_timer_func(unsigned long arg) 1039static void dn_dev_timer_func(unsigned long arg)
1026{ 1040{
1027 struct net_device *dev = (struct net_device *)arg; 1041 struct net_device *dev = (struct net_device *)arg;
1028 struct dn_dev *dn_db = dev->dn_ptr; 1042 struct dn_dev *dn_db;
1029 struct dn_ifaddr *ifa; 1043 struct dn_ifaddr *ifa;
1030 1044
1045 rcu_read_lock();
1046 dn_db = rcu_dereference(dev->dn_ptr);
1031 if (dn_db->t3 <= dn_db->parms.t2) { 1047 if (dn_db->t3 <= dn_db->parms.t2) {
1032 if (dn_db->parms.timer3) { 1048 if (dn_db->parms.timer3) {
1033 for(ifa = dn_db->ifa_list; ifa; ifa = ifa->ifa_next) { 1049 for (ifa = rcu_dereference(dn_db->ifa_list);
1050 ifa;
1051 ifa = rcu_dereference(ifa->ifa_next)) {
1034 if (!(ifa->ifa_flags & IFA_F_SECONDARY)) 1052 if (!(ifa->ifa_flags & IFA_F_SECONDARY))
1035 dn_db->parms.timer3(dev, ifa); 1053 dn_db->parms.timer3(dev, ifa);
1036 } 1054 }
@@ -1039,13 +1057,13 @@ static void dn_dev_timer_func(unsigned long arg)
1039 } else { 1057 } else {
1040 dn_db->t3 -= dn_db->parms.t2; 1058 dn_db->t3 -= dn_db->parms.t2;
1041 } 1059 }
1042 1060 rcu_read_unlock();
1043 dn_dev_set_timer(dev); 1061 dn_dev_set_timer(dev);
1044} 1062}
1045 1063
1046static void dn_dev_set_timer(struct net_device *dev) 1064static void dn_dev_set_timer(struct net_device *dev)
1047{ 1065{
1048 struct dn_dev *dn_db = dev->dn_ptr; 1066 struct dn_dev *dn_db = rcu_dereference_raw(dev->dn_ptr);
1049 1067
1050 if (dn_db->parms.t2 > dn_db->parms.t3) 1068 if (dn_db->parms.t2 > dn_db->parms.t3)
1051 dn_db->parms.t2 = dn_db->parms.t3; 1069 dn_db->parms.t2 = dn_db->parms.t3;
@@ -1077,8 +1095,8 @@ static struct dn_dev *dn_dev_create(struct net_device *dev, int *err)
1077 return NULL; 1095 return NULL;
1078 1096
1079 memcpy(&dn_db->parms, p, sizeof(struct dn_dev_parms)); 1097 memcpy(&dn_db->parms, p, sizeof(struct dn_dev_parms));
1080 smp_wmb(); 1098
1081 dev->dn_ptr = dn_db; 1099 rcu_assign_pointer(dev->dn_ptr, dn_db);
1082 dn_db->dev = dev; 1100 dn_db->dev = dev;
1083 init_timer(&dn_db->timer); 1101 init_timer(&dn_db->timer);
1084 1102
@@ -1086,7 +1104,7 @@ static struct dn_dev *dn_dev_create(struct net_device *dev, int *err)
1086 1104
1087 dn_db->neigh_parms = neigh_parms_alloc(dev, &dn_neigh_table); 1105 dn_db->neigh_parms = neigh_parms_alloc(dev, &dn_neigh_table);
1088 if (!dn_db->neigh_parms) { 1106 if (!dn_db->neigh_parms) {
1089 dev->dn_ptr = NULL; 1107 rcu_assign_pointer(dev->dn_ptr, NULL);
1090 kfree(dn_db); 1108 kfree(dn_db);
1091 return NULL; 1109 return NULL;
1092 } 1110 }
@@ -1125,7 +1143,7 @@ void dn_dev_up(struct net_device *dev)
1125 struct dn_ifaddr *ifa; 1143 struct dn_ifaddr *ifa;
1126 __le16 addr = decnet_address; 1144 __le16 addr = decnet_address;
1127 int maybe_default = 0; 1145 int maybe_default = 0;
1128 struct dn_dev *dn_db = (struct dn_dev *)dev->dn_ptr; 1146 struct dn_dev *dn_db = rtnl_dereference(dev->dn_ptr);
1129 1147
1130 if ((dev->type != ARPHRD_ETHER) && (dev->type != ARPHRD_LOOPBACK)) 1148 if ((dev->type != ARPHRD_ETHER) && (dev->type != ARPHRD_LOOPBACK))
1131 return; 1149 return;
@@ -1176,7 +1194,7 @@ void dn_dev_up(struct net_device *dev)
1176 1194
1177static void dn_dev_delete(struct net_device *dev) 1195static void dn_dev_delete(struct net_device *dev)
1178{ 1196{
1179 struct dn_dev *dn_db = dev->dn_ptr; 1197 struct dn_dev *dn_db = rtnl_dereference(dev->dn_ptr);
1180 1198
1181 if (dn_db == NULL) 1199 if (dn_db == NULL)
1182 return; 1200 return;
@@ -1204,13 +1222,13 @@ static void dn_dev_delete(struct net_device *dev)
1204 1222
1205void dn_dev_down(struct net_device *dev) 1223void dn_dev_down(struct net_device *dev)
1206{ 1224{
1207 struct dn_dev *dn_db = dev->dn_ptr; 1225 struct dn_dev *dn_db = rtnl_dereference(dev->dn_ptr);
1208 struct dn_ifaddr *ifa; 1226 struct dn_ifaddr *ifa;
1209 1227
1210 if (dn_db == NULL) 1228 if (dn_db == NULL)
1211 return; 1229 return;
1212 1230
1213 while((ifa = dn_db->ifa_list) != NULL) { 1231 while ((ifa = rtnl_dereference(dn_db->ifa_list)) != NULL) {
1214 dn_dev_del_ifa(dn_db, &dn_db->ifa_list, 0); 1232 dn_dev_del_ifa(dn_db, &dn_db->ifa_list, 0);
1215 dn_dev_free_ifa(ifa); 1233 dn_dev_free_ifa(ifa);
1216 } 1234 }
@@ -1270,7 +1288,7 @@ static inline int is_dn_dev(struct net_device *dev)
1270} 1288}
1271 1289
1272static void *dn_dev_seq_start(struct seq_file *seq, loff_t *pos) 1290static void *dn_dev_seq_start(struct seq_file *seq, loff_t *pos)
1273 __acquires(rcu) 1291 __acquires(RCU)
1274{ 1292{
1275 int i; 1293 int i;
1276 struct net_device *dev; 1294 struct net_device *dev;
@@ -1313,7 +1331,7 @@ static void *dn_dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1313} 1331}
1314 1332
1315static void dn_dev_seq_stop(struct seq_file *seq, void *v) 1333static void dn_dev_seq_stop(struct seq_file *seq, void *v)
1316 __releases(rcu) 1334 __releases(RCU)
1317{ 1335{
1318 rcu_read_unlock(); 1336 rcu_read_unlock();
1319} 1337}
@@ -1340,7 +1358,7 @@ static int dn_dev_seq_show(struct seq_file *seq, void *v)
1340 struct net_device *dev = v; 1358 struct net_device *dev = v;
1341 char peer_buf[DN_ASCBUF_LEN]; 1359 char peer_buf[DN_ASCBUF_LEN];
1342 char router_buf[DN_ASCBUF_LEN]; 1360 char router_buf[DN_ASCBUF_LEN];
1343 struct dn_dev *dn_db = dev->dn_ptr; 1361 struct dn_dev *dn_db = rcu_dereference(dev->dn_ptr);
1344 1362
1345 seq_printf(seq, "%-8s %1s %04u %04u %04lu %04lu" 1363 seq_printf(seq, "%-8s %1s %04u %04u %04lu %04lu"
1346 " %04hu %03d %02x %-10s %-7s %-7s\n", 1364 " %04hu %03d %02x %-10s %-7s %-7s\n",
diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c
index 4ab96c15166..0ef0a81bcd7 100644
--- a/net/decnet/dn_fib.c
+++ b/net/decnet/dn_fib.c
@@ -610,10 +610,12 @@ static void dn_fib_del_ifaddr(struct dn_ifaddr *ifa)
610 /* Scan device list */ 610 /* Scan device list */
611 rcu_read_lock(); 611 rcu_read_lock();
612 for_each_netdev_rcu(&init_net, dev) { 612 for_each_netdev_rcu(&init_net, dev) {
613 dn_db = dev->dn_ptr; 613 dn_db = rcu_dereference(dev->dn_ptr);
614 if (dn_db == NULL) 614 if (dn_db == NULL)
615 continue; 615 continue;
616 for(ifa2 = dn_db->ifa_list; ifa2; ifa2 = ifa2->ifa_next) { 616 for (ifa2 = rcu_dereference(dn_db->ifa_list);
617 ifa2 != NULL;
618 ifa2 = rcu_dereference(ifa2->ifa_next)) {
617 if (ifa2->ifa_local == ifa->ifa_local) { 619 if (ifa2->ifa_local == ifa->ifa_local) {
618 found_it = 1; 620 found_it = 1;
619 break; 621 break;
diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c
index a085dbcf5c7..602dade7e9a 100644
--- a/net/decnet/dn_neigh.c
+++ b/net/decnet/dn_neigh.c
@@ -391,7 +391,7 @@ int dn_neigh_router_hello(struct sk_buff *skb)
391 write_lock(&neigh->lock); 391 write_lock(&neigh->lock);
392 392
393 neigh->used = jiffies; 393 neigh->used = jiffies;
394 dn_db = (struct dn_dev *)neigh->dev->dn_ptr; 394 dn_db = rcu_dereference(neigh->dev->dn_ptr);
395 395
396 if (!(neigh->nud_state & NUD_PERMANENT)) { 396 if (!(neigh->nud_state & NUD_PERMANENT)) {
397 neigh->updated = jiffies; 397 neigh->updated = jiffies;
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index df0f3e54ff8..8280e43c886 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -93,7 +93,7 @@
93 93
94struct dn_rt_hash_bucket 94struct dn_rt_hash_bucket
95{ 95{
96 struct dn_route *chain; 96 struct dn_route __rcu *chain;
97 spinlock_t lock; 97 spinlock_t lock;
98}; 98};
99 99
@@ -157,15 +157,17 @@ static inline void dnrt_drop(struct dn_route *rt)
157static void dn_dst_check_expire(unsigned long dummy) 157static void dn_dst_check_expire(unsigned long dummy)
158{ 158{
159 int i; 159 int i;
160 struct dn_route *rt, **rtp; 160 struct dn_route *rt;
161 struct dn_route __rcu **rtp;
161 unsigned long now = jiffies; 162 unsigned long now = jiffies;
162 unsigned long expire = 120 * HZ; 163 unsigned long expire = 120 * HZ;
163 164
164 for(i = 0; i <= dn_rt_hash_mask; i++) { 165 for (i = 0; i <= dn_rt_hash_mask; i++) {
165 rtp = &dn_rt_hash_table[i].chain; 166 rtp = &dn_rt_hash_table[i].chain;
166 167
167 spin_lock(&dn_rt_hash_table[i].lock); 168 spin_lock(&dn_rt_hash_table[i].lock);
168 while((rt=*rtp) != NULL) { 169 while ((rt = rcu_dereference_protected(*rtp,
170 lockdep_is_held(&dn_rt_hash_table[i].lock))) != NULL) {
169 if (atomic_read(&rt->dst.__refcnt) || 171 if (atomic_read(&rt->dst.__refcnt) ||
170 (now - rt->dst.lastuse) < expire) { 172 (now - rt->dst.lastuse) < expire) {
171 rtp = &rt->dst.dn_next; 173 rtp = &rt->dst.dn_next;
@@ -186,17 +188,19 @@ static void dn_dst_check_expire(unsigned long dummy)
186 188
187static int dn_dst_gc(struct dst_ops *ops) 189static int dn_dst_gc(struct dst_ops *ops)
188{ 190{
189 struct dn_route *rt, **rtp; 191 struct dn_route *rt;
192 struct dn_route __rcu **rtp;
190 int i; 193 int i;
191 unsigned long now = jiffies; 194 unsigned long now = jiffies;
192 unsigned long expire = 10 * HZ; 195 unsigned long expire = 10 * HZ;
193 196
194 for(i = 0; i <= dn_rt_hash_mask; i++) { 197 for (i = 0; i <= dn_rt_hash_mask; i++) {
195 198
196 spin_lock_bh(&dn_rt_hash_table[i].lock); 199 spin_lock_bh(&dn_rt_hash_table[i].lock);
197 rtp = &dn_rt_hash_table[i].chain; 200 rtp = &dn_rt_hash_table[i].chain;
198 201
199 while((rt=*rtp) != NULL) { 202 while ((rt = rcu_dereference_protected(*rtp,
203 lockdep_is_held(&dn_rt_hash_table[i].lock))) != NULL) {
200 if (atomic_read(&rt->dst.__refcnt) || 204 if (atomic_read(&rt->dst.__refcnt) ||
201 (now - rt->dst.lastuse) < expire) { 205 (now - rt->dst.lastuse) < expire) {
202 rtp = &rt->dst.dn_next; 206 rtp = &rt->dst.dn_next;
@@ -227,7 +231,7 @@ static void dn_dst_update_pmtu(struct dst_entry *dst, u32 mtu)
227{ 231{
228 u32 min_mtu = 230; 232 u32 min_mtu = 230;
229 struct dn_dev *dn = dst->neighbour ? 233 struct dn_dev *dn = dst->neighbour ?
230 (struct dn_dev *)dst->neighbour->dev->dn_ptr : NULL; 234 rcu_dereference_raw(dst->neighbour->dev->dn_ptr) : NULL;
231 235
232 if (dn && dn->use_long == 0) 236 if (dn && dn->use_long == 0)
233 min_mtu -= 6; 237 min_mtu -= 6;
@@ -267,23 +271,25 @@ static void dn_dst_link_failure(struct sk_buff *skb)
267 271
268static inline int compare_keys(struct flowi *fl1, struct flowi *fl2) 272static inline int compare_keys(struct flowi *fl1, struct flowi *fl2)
269{ 273{
270 return ((fl1->nl_u.dn_u.daddr ^ fl2->nl_u.dn_u.daddr) | 274 return ((fl1->fld_dst ^ fl2->fld_dst) |
271 (fl1->nl_u.dn_u.saddr ^ fl2->nl_u.dn_u.saddr) | 275 (fl1->fld_src ^ fl2->fld_src) |
272 (fl1->mark ^ fl2->mark) | 276 (fl1->mark ^ fl2->mark) |
273 (fl1->nl_u.dn_u.scope ^ fl2->nl_u.dn_u.scope) | 277 (fl1->fld_scope ^ fl2->fld_scope) |
274 (fl1->oif ^ fl2->oif) | 278 (fl1->oif ^ fl2->oif) |
275 (fl1->iif ^ fl2->iif)) == 0; 279 (fl1->iif ^ fl2->iif)) == 0;
276} 280}
277 281
278static int dn_insert_route(struct dn_route *rt, unsigned hash, struct dn_route **rp) 282static int dn_insert_route(struct dn_route *rt, unsigned hash, struct dn_route **rp)
279{ 283{
280 struct dn_route *rth, **rthp; 284 struct dn_route *rth;
285 struct dn_route __rcu **rthp;
281 unsigned long now = jiffies; 286 unsigned long now = jiffies;
282 287
283 rthp = &dn_rt_hash_table[hash].chain; 288 rthp = &dn_rt_hash_table[hash].chain;
284 289
285 spin_lock_bh(&dn_rt_hash_table[hash].lock); 290 spin_lock_bh(&dn_rt_hash_table[hash].lock);
286 while((rth = *rthp) != NULL) { 291 while ((rth = rcu_dereference_protected(*rthp,
292 lockdep_is_held(&dn_rt_hash_table[hash].lock))) != NULL) {
287 if (compare_keys(&rth->fl, &rt->fl)) { 293 if (compare_keys(&rth->fl, &rt->fl)) {
288 /* Put it first */ 294 /* Put it first */
289 *rthp = rth->dst.dn_next; 295 *rthp = rth->dst.dn_next;
@@ -315,15 +321,15 @@ static void dn_run_flush(unsigned long dummy)
315 int i; 321 int i;
316 struct dn_route *rt, *next; 322 struct dn_route *rt, *next;
317 323
318 for(i = 0; i < dn_rt_hash_mask; i++) { 324 for (i = 0; i < dn_rt_hash_mask; i++) {
319 spin_lock_bh(&dn_rt_hash_table[i].lock); 325 spin_lock_bh(&dn_rt_hash_table[i].lock);
320 326
321 if ((rt = xchg(&dn_rt_hash_table[i].chain, NULL)) == NULL) 327 if ((rt = xchg((struct dn_route **)&dn_rt_hash_table[i].chain, NULL)) == NULL)
322 goto nothing_to_declare; 328 goto nothing_to_declare;
323 329
324 for(; rt; rt=next) { 330 for(; rt; rt = next) {
325 next = rt->dst.dn_next; 331 next = rcu_dereference_raw(rt->dst.dn_next);
326 rt->dst.dn_next = NULL; 332 RCU_INIT_POINTER(rt->dst.dn_next, NULL);
327 dst_free((struct dst_entry *)rt); 333 dst_free((struct dst_entry *)rt);
328 } 334 }
329 335
@@ -458,15 +464,16 @@ static int dn_return_long(struct sk_buff *skb)
458 */ 464 */
459static int dn_route_rx_packet(struct sk_buff *skb) 465static int dn_route_rx_packet(struct sk_buff *skb)
460{ 466{
461 struct dn_skb_cb *cb = DN_SKB_CB(skb); 467 struct dn_skb_cb *cb;
462 int err; 468 int err;
463 469
464 if ((err = dn_route_input(skb)) == 0) 470 if ((err = dn_route_input(skb)) == 0)
465 return dst_input(skb); 471 return dst_input(skb);
466 472
473 cb = DN_SKB_CB(skb);
467 if (decnet_debug_level & 4) { 474 if (decnet_debug_level & 4) {
468 char *devname = skb->dev ? skb->dev->name : "???"; 475 char *devname = skb->dev ? skb->dev->name : "???";
469 struct dn_skb_cb *cb = DN_SKB_CB(skb); 476
470 printk(KERN_DEBUG 477 printk(KERN_DEBUG
471 "DECnet: dn_route_rx_packet: rt_flags=0x%02x dev=%s len=%d src=0x%04hx dst=0x%04hx err=%d type=%d\n", 478 "DECnet: dn_route_rx_packet: rt_flags=0x%02x dev=%s len=%d src=0x%04hx dst=0x%04hx err=%d type=%d\n",
472 (int)cb->rt_flags, devname, skb->len, 479 (int)cb->rt_flags, devname, skb->len,
@@ -573,7 +580,7 @@ int dn_route_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type
573 struct dn_skb_cb *cb; 580 struct dn_skb_cb *cb;
574 unsigned char flags = 0; 581 unsigned char flags = 0;
575 __u16 len = le16_to_cpu(*(__le16 *)skb->data); 582 __u16 len = le16_to_cpu(*(__le16 *)skb->data);
576 struct dn_dev *dn = (struct dn_dev *)dev->dn_ptr; 583 struct dn_dev *dn = rcu_dereference(dev->dn_ptr);
577 unsigned char padlen = 0; 584 unsigned char padlen = 0;
578 585
579 if (!net_eq(dev_net(dev), &init_net)) 586 if (!net_eq(dev_net(dev), &init_net))
@@ -728,7 +735,7 @@ static int dn_forward(struct sk_buff *skb)
728{ 735{
729 struct dn_skb_cb *cb = DN_SKB_CB(skb); 736 struct dn_skb_cb *cb = DN_SKB_CB(skb);
730 struct dst_entry *dst = skb_dst(skb); 737 struct dst_entry *dst = skb_dst(skb);
731 struct dn_dev *dn_db = dst->dev->dn_ptr; 738 struct dn_dev *dn_db = rcu_dereference(dst->dev->dn_ptr);
732 struct dn_route *rt; 739 struct dn_route *rt;
733 struct neighbour *neigh = dst->neighbour; 740 struct neighbour *neigh = dst->neighbour;
734 int header_len; 741 int header_len;
@@ -835,13 +842,16 @@ static inline int dn_match_addr(__le16 addr1, __le16 addr2)
835static __le16 dnet_select_source(const struct net_device *dev, __le16 daddr, int scope) 842static __le16 dnet_select_source(const struct net_device *dev, __le16 daddr, int scope)
836{ 843{
837 __le16 saddr = 0; 844 __le16 saddr = 0;
838 struct dn_dev *dn_db = dev->dn_ptr; 845 struct dn_dev *dn_db;
839 struct dn_ifaddr *ifa; 846 struct dn_ifaddr *ifa;
840 int best_match = 0; 847 int best_match = 0;
841 int ret; 848 int ret;
842 849
843 read_lock(&dev_base_lock); 850 rcu_read_lock();
844 for(ifa = dn_db->ifa_list; ifa; ifa = ifa->ifa_next) { 851 dn_db = rcu_dereference(dev->dn_ptr);
852 for (ifa = rcu_dereference(dn_db->ifa_list);
853 ifa != NULL;
854 ifa = rcu_dereference(ifa->ifa_next)) {
845 if (ifa->ifa_scope > scope) 855 if (ifa->ifa_scope > scope)
846 continue; 856 continue;
847 if (!daddr) { 857 if (!daddr) {
@@ -854,7 +864,7 @@ static __le16 dnet_select_source(const struct net_device *dev, __le16 daddr, int
854 if (best_match == 0) 864 if (best_match == 0)
855 saddr = ifa->ifa_local; 865 saddr = ifa->ifa_local;
856 } 866 }
857 read_unlock(&dev_base_lock); 867 rcu_read_unlock();
858 868
859 return saddr; 869 return saddr;
860} 870}
@@ -872,11 +882,9 @@ static inline __le16 dn_fib_rules_map_destination(__le16 daddr, struct dn_fib_re
872 882
873static int dn_route_output_slow(struct dst_entry **pprt, const struct flowi *oldflp, int try_hard) 883static int dn_route_output_slow(struct dst_entry **pprt, const struct flowi *oldflp, int try_hard)
874{ 884{
875 struct flowi fl = { .nl_u = { .dn_u = 885 struct flowi fl = { .fld_dst = oldflp->fld_dst,
876 { .daddr = oldflp->fld_dst, 886 .fld_src = oldflp->fld_src,
877 .saddr = oldflp->fld_src, 887 .fld_scope = RT_SCOPE_UNIVERSE,
878 .scope = RT_SCOPE_UNIVERSE,
879 } },
880 .mark = oldflp->mark, 888 .mark = oldflp->mark,
881 .iif = init_net.loopback_dev->ifindex, 889 .iif = init_net.loopback_dev->ifindex,
882 .oif = oldflp->oif }; 890 .oif = oldflp->oif };
@@ -1020,7 +1028,7 @@ source_ok:
1020 err = -ENODEV; 1028 err = -ENODEV;
1021 if (dev_out == NULL) 1029 if (dev_out == NULL)
1022 goto out; 1030 goto out;
1023 dn_db = dev_out->dn_ptr; 1031 dn_db = rcu_dereference_raw(dev_out->dn_ptr);
1024 /* Possible improvement - check all devices for local addr */ 1032 /* Possible improvement - check all devices for local addr */
1025 if (dn_dev_islocal(dev_out, fl.fld_dst)) { 1033 if (dn_dev_islocal(dev_out, fl.fld_dst)) {
1026 dev_put(dev_out); 1034 dev_put(dev_out);
@@ -1171,7 +1179,7 @@ static int __dn_route_output_key(struct dst_entry **pprt, const struct flowi *fl
1171 if ((flp->fld_dst == rt->fl.fld_dst) && 1179 if ((flp->fld_dst == rt->fl.fld_dst) &&
1172 (flp->fld_src == rt->fl.fld_src) && 1180 (flp->fld_src == rt->fl.fld_src) &&
1173 (flp->mark == rt->fl.mark) && 1181 (flp->mark == rt->fl.mark) &&
1174 (rt->fl.iif == 0) && 1182 dn_is_output_route(rt) &&
1175 (rt->fl.oif == flp->oif)) { 1183 (rt->fl.oif == flp->oif)) {
1176 dst_use(&rt->dst, jiffies); 1184 dst_use(&rt->dst, jiffies);
1177 rcu_read_unlock_bh(); 1185 rcu_read_unlock_bh();
@@ -1220,11 +1228,9 @@ static int dn_route_input_slow(struct sk_buff *skb)
1220 int flags = 0; 1228 int flags = 0;
1221 __le16 gateway = 0; 1229 __le16 gateway = 0;
1222 __le16 local_src = 0; 1230 __le16 local_src = 0;
1223 struct flowi fl = { .nl_u = { .dn_u = 1231 struct flowi fl = { .fld_dst = cb->dst,
1224 { .daddr = cb->dst, 1232 .fld_src = cb->src,
1225 .saddr = cb->src, 1233 .fld_scope = RT_SCOPE_UNIVERSE,
1226 .scope = RT_SCOPE_UNIVERSE,
1227 } },
1228 .mark = skb->mark, 1234 .mark = skb->mark,
1229 .iif = skb->dev->ifindex }; 1235 .iif = skb->dev->ifindex };
1230 struct dn_fib_res res = { .fi = NULL, .type = RTN_UNREACHABLE }; 1236 struct dn_fib_res res = { .fi = NULL, .type = RTN_UNREACHABLE };
@@ -1233,7 +1239,7 @@ static int dn_route_input_slow(struct sk_buff *skb)
1233 1239
1234 dev_hold(in_dev); 1240 dev_hold(in_dev);
1235 1241
1236 if ((dn_db = in_dev->dn_ptr) == NULL) 1242 if ((dn_db = rcu_dereference(in_dev->dn_ptr)) == NULL)
1237 goto out; 1243 goto out;
1238 1244
1239 /* Zero source addresses are not allowed */ 1245 /* Zero source addresses are not allowed */
@@ -1502,7 +1508,7 @@ static int dn_rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
1502 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, 0, 0, expires, 1508 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, 0, 0, expires,
1503 rt->dst.error) < 0) 1509 rt->dst.error) < 0)
1504 goto rtattr_failure; 1510 goto rtattr_failure;
1505 if (rt->fl.iif) 1511 if (dn_is_input_route(rt))
1506 RTA_PUT(skb, RTA_IIF, sizeof(int), &rt->fl.iif); 1512 RTA_PUT(skb, RTA_IIF, sizeof(int), &rt->fl.iif);
1507 1513
1508 nlh->nlmsg_len = skb_tail_pointer(skb) - b; 1514 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
@@ -1677,15 +1683,15 @@ static struct dn_route *dn_rt_cache_get_next(struct seq_file *seq, struct dn_rou
1677{ 1683{
1678 struct dn_rt_cache_iter_state *s = seq->private; 1684 struct dn_rt_cache_iter_state *s = seq->private;
1679 1685
1680 rt = rt->dst.dn_next; 1686 rt = rcu_dereference_bh(rt->dst.dn_next);
1681 while(!rt) { 1687 while (!rt) {
1682 rcu_read_unlock_bh(); 1688 rcu_read_unlock_bh();
1683 if (--s->bucket < 0) 1689 if (--s->bucket < 0)
1684 break; 1690 break;
1685 rcu_read_lock_bh(); 1691 rcu_read_lock_bh();
1686 rt = dn_rt_hash_table[s->bucket].chain; 1692 rt = rcu_dereference_bh(dn_rt_hash_table[s->bucket].chain);
1687 } 1693 }
1688 return rcu_dereference_bh(rt); 1694 return rt;
1689} 1695}
1690 1696
1691static void *dn_rt_cache_seq_start(struct seq_file *seq, loff_t *pos) 1697static void *dn_rt_cache_seq_start(struct seq_file *seq, loff_t *pos)
diff --git a/net/decnet/dn_rules.c b/net/decnet/dn_rules.c
index 48fdf10be7a..6eb91df3c55 100644
--- a/net/decnet/dn_rules.c
+++ b/net/decnet/dn_rules.c
@@ -175,7 +175,7 @@ static int dn_fib_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
175 175
176unsigned dnet_addr_type(__le16 addr) 176unsigned dnet_addr_type(__le16 addr)
177{ 177{
178 struct flowi fl = { .nl_u = { .dn_u = { .daddr = addr } } }; 178 struct flowi fl = { .fld_dst = addr };
179 struct dn_fib_res res; 179 struct dn_fib_res res;
180 unsigned ret = RTN_UNICAST; 180 unsigned ret = RTN_UNICAST;
181 struct dn_fib_table *tb = dn_fib_get_table(RT_TABLE_LOCAL, 0); 181 struct dn_fib_table *tb = dn_fib_get_table(RT_TABLE_LOCAL, 0);
diff --git a/net/dns_resolver/Makefile b/net/dns_resolver/Makefile
index c0ef4e71dc4..d5c13c2eb36 100644
--- a/net/dns_resolver/Makefile
+++ b/net/dns_resolver/Makefile
@@ -4,4 +4,4 @@
4 4
5obj-$(CONFIG_DNS_RESOLVER) += dns_resolver.o 5obj-$(CONFIG_DNS_RESOLVER) += dns_resolver.o
6 6
7dns_resolver-objs := dns_key.o dns_query.o 7dns_resolver-y := dns_key.o dns_query.o
diff --git a/net/econet/Makefile b/net/econet/Makefile
index 39f0a77abdb..05fae8be2fe 100644
--- a/net/econet/Makefile
+++ b/net/econet/Makefile
@@ -4,4 +4,4 @@
4 4
5obj-$(CONFIG_ECONET) += econet.o 5obj-$(CONFIG_ECONET) += econet.o
6 6
7econet-objs := af_econet.o 7econet-y := af_econet.o
diff --git a/net/ieee802154/af_ieee802154.c b/net/ieee802154/af_ieee802154.c
index 93c91b633a5..6df6ecf4970 100644
--- a/net/ieee802154/af_ieee802154.c
+++ b/net/ieee802154/af_ieee802154.c
@@ -52,11 +52,11 @@ struct net_device *ieee802154_get_dev(struct net *net,
52 52
53 switch (addr->addr_type) { 53 switch (addr->addr_type) {
54 case IEEE802154_ADDR_LONG: 54 case IEEE802154_ADDR_LONG:
55 rtnl_lock(); 55 rcu_read_lock();
56 dev = dev_getbyhwaddr(net, ARPHRD_IEEE802154, addr->hwaddr); 56 dev = dev_getbyhwaddr_rcu(net, ARPHRD_IEEE802154, addr->hwaddr);
57 if (dev) 57 if (dev)
58 dev_hold(dev); 58 dev_hold(dev);
59 rtnl_unlock(); 59 rcu_read_unlock();
60 break; 60 break;
61 case IEEE802154_ADDR_SHORT: 61 case IEEE802154_ADDR_SHORT:
62 if (addr->pan_id == 0xffff || 62 if (addr->pan_id == 0xffff ||
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index f581f77d109..f2b61107df6 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1148,21 +1148,13 @@ int inet_sk_rebuild_header(struct sock *sk)
1148 struct flowi fl = { 1148 struct flowi fl = {
1149 .oif = sk->sk_bound_dev_if, 1149 .oif = sk->sk_bound_dev_if,
1150 .mark = sk->sk_mark, 1150 .mark = sk->sk_mark,
1151 .nl_u = { 1151 .fl4_dst = daddr,
1152 .ip4_u = { 1152 .fl4_src = inet->inet_saddr,
1153 .daddr = daddr, 1153 .fl4_tos = RT_CONN_FLAGS(sk),
1154 .saddr = inet->inet_saddr,
1155 .tos = RT_CONN_FLAGS(sk),
1156 },
1157 },
1158 .proto = sk->sk_protocol, 1154 .proto = sk->sk_protocol,
1159 .flags = inet_sk_flowi_flags(sk), 1155 .flags = inet_sk_flowi_flags(sk),
1160 .uli_u = { 1156 .fl_ip_sport = inet->inet_sport,
1161 .ports = { 1157 .fl_ip_dport = inet->inet_dport,
1162 .sport = inet->inet_sport,
1163 .dport = inet->inet_dport,
1164 },
1165 },
1166 }; 1158 };
1167 1159
1168 security_sk_classify_flow(sk, &fl); 1160 security_sk_classify_flow(sk, &fl);
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index d8e540c5b07..a2fc7b961db 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -433,8 +433,8 @@ static int arp_ignore(struct in_device *in_dev, __be32 sip, __be32 tip)
433 433
434static int arp_filter(__be32 sip, __be32 tip, struct net_device *dev) 434static int arp_filter(__be32 sip, __be32 tip, struct net_device *dev)
435{ 435{
436 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = sip, 436 struct flowi fl = { .fl4_dst = sip,
437 .saddr = tip } } }; 437 .fl4_src = tip };
438 struct rtable *rt; 438 struct rtable *rt;
439 int flag = 0; 439 int flag = 0;
440 /*unsigned long now; */ 440 /*unsigned long now; */
@@ -883,7 +883,7 @@ static int arp_process(struct sk_buff *skb)
883 883
884 dont_send = arp_ignore(in_dev, sip, tip); 884 dont_send = arp_ignore(in_dev, sip, tip);
885 if (!dont_send && IN_DEV_ARPFILTER(in_dev)) 885 if (!dont_send && IN_DEV_ARPFILTER(in_dev))
886 dont_send |= arp_filter(sip, tip, dev); 886 dont_send = arp_filter(sip, tip, dev);
887 if (!dont_send) { 887 if (!dont_send) {
888 n = neigh_event_ns(&arp_tbl, sha, &sip, dev); 888 n = neigh_event_ns(&arp_tbl, sha, &sip, dev);
889 if (n) { 889 if (n) {
@@ -1017,13 +1017,14 @@ static int arp_req_set_proxy(struct net *net, struct net_device *dev, int on)
1017 IPV4_DEVCONF_ALL(net, PROXY_ARP) = on; 1017 IPV4_DEVCONF_ALL(net, PROXY_ARP) = on;
1018 return 0; 1018 return 0;
1019 } 1019 }
1020 if (__in_dev_get_rtnl(dev)) { 1020 if (__in_dev_get_rcu(dev)) {
1021 IN_DEV_CONF_SET(__in_dev_get_rtnl(dev), PROXY_ARP, on); 1021 IN_DEV_CONF_SET(__in_dev_get_rcu(dev), PROXY_ARP, on);
1022 return 0; 1022 return 0;
1023 } 1023 }
1024 return -ENXIO; 1024 return -ENXIO;
1025} 1025}
1026 1026
1027/* must be called with rcu_read_lock() */
1027static int arp_req_set_public(struct net *net, struct arpreq *r, 1028static int arp_req_set_public(struct net *net, struct arpreq *r,
1028 struct net_device *dev) 1029 struct net_device *dev)
1029{ 1030{
@@ -1033,7 +1034,7 @@ static int arp_req_set_public(struct net *net, struct arpreq *r,
1033 if (mask && mask != htonl(0xFFFFFFFF)) 1034 if (mask && mask != htonl(0xFFFFFFFF))
1034 return -EINVAL; 1035 return -EINVAL;
1035 if (!dev && (r->arp_flags & ATF_COM)) { 1036 if (!dev && (r->arp_flags & ATF_COM)) {
1036 dev = dev_getbyhwaddr(net, r->arp_ha.sa_family, 1037 dev = dev_getbyhwaddr_rcu(net, r->arp_ha.sa_family,
1037 r->arp_ha.sa_data); 1038 r->arp_ha.sa_data);
1038 if (!dev) 1039 if (!dev)
1039 return -ENODEV; 1040 return -ENODEV;
@@ -1061,8 +1062,8 @@ static int arp_req_set(struct net *net, struct arpreq *r,
1061 if (r->arp_flags & ATF_PERM) 1062 if (r->arp_flags & ATF_PERM)
1062 r->arp_flags |= ATF_COM; 1063 r->arp_flags |= ATF_COM;
1063 if (dev == NULL) { 1064 if (dev == NULL) {
1064 struct flowi fl = { .nl_u.ip4_u = { .daddr = ip, 1065 struct flowi fl = { .fl4_dst = ip,
1065 .tos = RTO_ONLINK } }; 1066 .fl4_tos = RTO_ONLINK };
1066 struct rtable *rt; 1067 struct rtable *rt;
1067 err = ip_route_output_key(net, &rt, &fl); 1068 err = ip_route_output_key(net, &rt, &fl);
1068 if (err != 0) 1069 if (err != 0)
@@ -1169,8 +1170,8 @@ static int arp_req_delete(struct net *net, struct arpreq *r,
1169 1170
1170 ip = ((struct sockaddr_in *)&r->arp_pa)->sin_addr.s_addr; 1171 ip = ((struct sockaddr_in *)&r->arp_pa)->sin_addr.s_addr;
1171 if (dev == NULL) { 1172 if (dev == NULL) {
1172 struct flowi fl = { .nl_u.ip4_u = { .daddr = ip, 1173 struct flowi fl = { .fl4_dst = ip,
1173 .tos = RTO_ONLINK } }; 1174 .fl4_tos = RTO_ONLINK };
1174 struct rtable *rt; 1175 struct rtable *rt;
1175 err = ip_route_output_key(net, &rt, &fl); 1176 err = ip_route_output_key(net, &rt, &fl);
1176 if (err != 0) 1177 if (err != 0)
@@ -1225,10 +1226,10 @@ int arp_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1225 if (!(r.arp_flags & ATF_NETMASK)) 1226 if (!(r.arp_flags & ATF_NETMASK))
1226 ((struct sockaddr_in *)&r.arp_netmask)->sin_addr.s_addr = 1227 ((struct sockaddr_in *)&r.arp_netmask)->sin_addr.s_addr =
1227 htonl(0xFFFFFFFFUL); 1228 htonl(0xFFFFFFFFUL);
1228 rtnl_lock(); 1229 rcu_read_lock();
1229 if (r.arp_dev[0]) { 1230 if (r.arp_dev[0]) {
1230 err = -ENODEV; 1231 err = -ENODEV;
1231 dev = __dev_get_by_name(net, r.arp_dev); 1232 dev = dev_get_by_name_rcu(net, r.arp_dev);
1232 if (dev == NULL) 1233 if (dev == NULL)
1233 goto out; 1234 goto out;
1234 1235
@@ -1252,12 +1253,12 @@ int arp_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1252 break; 1253 break;
1253 case SIOCGARP: 1254 case SIOCGARP:
1254 err = arp_req_get(&r, dev); 1255 err = arp_req_get(&r, dev);
1255 if (!err && copy_to_user(arg, &r, sizeof(r)))
1256 err = -EFAULT;
1257 break; 1256 break;
1258 } 1257 }
1259out: 1258out:
1260 rtnl_unlock(); 1259 rcu_read_unlock();
1260 if (cmd == SIOCGARP && !err && copy_to_user(arg, &r, sizeof(r)))
1261 err = -EFAULT;
1261 return err; 1262 return err;
1262} 1263}
1263 1264
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index dc94b0316b7..3b067704ab3 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1256,6 +1256,87 @@ errout:
1256 rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err); 1256 rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1257} 1257}
1258 1258
1259static size_t inet_get_link_af_size(const struct net_device *dev)
1260{
1261 struct in_device *in_dev = __in_dev_get_rtnl(dev);
1262
1263 if (!in_dev)
1264 return 0;
1265
1266 return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1267}
1268
1269static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev)
1270{
1271 struct in_device *in_dev = __in_dev_get_rtnl(dev);
1272 struct nlattr *nla;
1273 int i;
1274
1275 if (!in_dev)
1276 return -ENODATA;
1277
1278 nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1279 if (nla == NULL)
1280 return -EMSGSIZE;
1281
1282 for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1283 ((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1284
1285 return 0;
1286}
1287
1288static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1289 [IFLA_INET_CONF] = { .type = NLA_NESTED },
1290};
1291
1292static int inet_validate_link_af(const struct net_device *dev,
1293 const struct nlattr *nla)
1294{
1295 struct nlattr *a, *tb[IFLA_INET_MAX+1];
1296 int err, rem;
1297
1298 if (dev && !__in_dev_get_rtnl(dev))
1299 return -EAFNOSUPPORT;
1300
1301 err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy);
1302 if (err < 0)
1303 return err;
1304
1305 if (tb[IFLA_INET_CONF]) {
1306 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1307 int cfgid = nla_type(a);
1308
1309 if (nla_len(a) < 4)
1310 return -EINVAL;
1311
1312 if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1313 return -EINVAL;
1314 }
1315 }
1316
1317 return 0;
1318}
1319
1320static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1321{
1322 struct in_device *in_dev = __in_dev_get_rtnl(dev);
1323 struct nlattr *a, *tb[IFLA_INET_MAX+1];
1324 int rem;
1325
1326 if (!in_dev)
1327 return -EAFNOSUPPORT;
1328
1329 if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL) < 0)
1330 BUG();
1331
1332 if (tb[IFLA_INET_CONF]) {
1333 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
1334 ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
1335 }
1336
1337 return 0;
1338}
1339
1259#ifdef CONFIG_SYSCTL 1340#ifdef CONFIG_SYSCTL
1260 1341
1261static void devinet_copy_dflt_conf(struct net *net, int i) 1342static void devinet_copy_dflt_conf(struct net *net, int i)
@@ -1619,6 +1700,14 @@ static __net_initdata struct pernet_operations devinet_ops = {
1619 .exit = devinet_exit_net, 1700 .exit = devinet_exit_net,
1620}; 1701};
1621 1702
1703static struct rtnl_af_ops inet_af_ops = {
1704 .family = AF_INET,
1705 .fill_link_af = inet_fill_link_af,
1706 .get_link_af_size = inet_get_link_af_size,
1707 .validate_link_af = inet_validate_link_af,
1708 .set_link_af = inet_set_link_af,
1709};
1710
1622void __init devinet_init(void) 1711void __init devinet_init(void)
1623{ 1712{
1624 register_pernet_subsys(&devinet_ops); 1713 register_pernet_subsys(&devinet_ops);
@@ -1626,6 +1715,8 @@ void __init devinet_init(void)
1626 register_gifconf(PF_INET, inet_gifconf); 1715 register_gifconf(PF_INET, inet_gifconf);
1627 register_netdevice_notifier(&ip_netdev_notifier); 1716 register_netdevice_notifier(&ip_netdev_notifier);
1628 1717
1718 rtnl_af_register(&inet_af_ops);
1719
1629 rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL); 1720 rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL);
1630 rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL); 1721 rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL);
1631 rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr); 1722 rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr);
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index eb6f69a8f27..d3a1112b9d9 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -158,11 +158,7 @@ static void fib_flush(struct net *net)
158struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref) 158struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
159{ 159{
160 struct flowi fl = { 160 struct flowi fl = {
161 .nl_u = { 161 .fl4_dst = addr,
162 .ip4_u = {
163 .daddr = addr
164 }
165 },
166 .flags = FLOWI_FLAG_MATCH_ANY_IIF 162 .flags = FLOWI_FLAG_MATCH_ANY_IIF
167 }; 163 };
168 struct fib_result res = { 0 }; 164 struct fib_result res = { 0 };
@@ -193,7 +189,7 @@ static inline unsigned __inet_dev_addr_type(struct net *net,
193 const struct net_device *dev, 189 const struct net_device *dev,
194 __be32 addr) 190 __be32 addr)
195{ 191{
196 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } }; 192 struct flowi fl = { .fl4_dst = addr };
197 struct fib_result res; 193 struct fib_result res;
198 unsigned ret = RTN_BROADCAST; 194 unsigned ret = RTN_BROADCAST;
199 struct fib_table *local_table; 195 struct fib_table *local_table;
@@ -247,13 +243,9 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
247{ 243{
248 struct in_device *in_dev; 244 struct in_device *in_dev;
249 struct flowi fl = { 245 struct flowi fl = {
250 .nl_u = { 246 .fl4_dst = src,
251 .ip4_u = { 247 .fl4_src = dst,
252 .daddr = src, 248 .fl4_tos = tos,
253 .saddr = dst,
254 .tos = tos
255 }
256 },
257 .mark = mark, 249 .mark = mark,
258 .iif = oif 250 .iif = oif
259 }; 251 };
@@ -853,13 +845,9 @@ static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb)
853 struct fib_result res; 845 struct fib_result res;
854 struct flowi fl = { 846 struct flowi fl = {
855 .mark = frn->fl_mark, 847 .mark = frn->fl_mark,
856 .nl_u = { 848 .fl4_dst = frn->fl_addr,
857 .ip4_u = { 849 .fl4_tos = frn->fl_tos,
858 .daddr = frn->fl_addr, 850 .fl4_scope = frn->fl_scope,
859 .tos = frn->fl_tos,
860 .scope = frn->fl_scope
861 }
862 }
863 }; 851 };
864 852
865#ifdef CONFIG_IP_MULTIPLE_TABLES 853#ifdef CONFIG_IP_MULTIPLE_TABLES
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 3e0da3ef611..12d3dc3df1b 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -563,12 +563,8 @@ static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi,
563 rcu_read_lock(); 563 rcu_read_lock();
564 { 564 {
565 struct flowi fl = { 565 struct flowi fl = {
566 .nl_u = { 566 .fl4_dst = nh->nh_gw,
567 .ip4_u = { 567 .fl4_scope = cfg->fc_scope + 1,
568 .daddr = nh->nh_gw,
569 .scope = cfg->fc_scope + 1,
570 },
571 },
572 .oif = nh->nh_oif, 568 .oif = nh->nh_oif,
573 }; 569 };
574 570
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index e5d1a44bcbd..4aa1b7f01ea 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -386,10 +386,9 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
386 daddr = icmp_param->replyopts.faddr; 386 daddr = icmp_param->replyopts.faddr;
387 } 387 }
388 { 388 {
389 struct flowi fl = { .nl_u = { .ip4_u = 389 struct flowi fl = { .fl4_dst= daddr,
390 { .daddr = daddr, 390 .fl4_src = rt->rt_spec_dst,
391 .saddr = rt->rt_spec_dst, 391 .fl4_tos = RT_TOS(ip_hdr(skb)->tos),
392 .tos = RT_TOS(ip_hdr(skb)->tos) } },
393 .proto = IPPROTO_ICMP }; 392 .proto = IPPROTO_ICMP };
394 security_skb_classify_flow(skb, &fl); 393 security_skb_classify_flow(skb, &fl);
395 if (ip_route_output_key(net, &rt, &fl)) 394 if (ip_route_output_key(net, &rt, &fl))
@@ -506,8 +505,8 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
506 struct net_device *dev = NULL; 505 struct net_device *dev = NULL;
507 506
508 rcu_read_lock(); 507 rcu_read_lock();
509 if (rt->fl.iif && 508 if (rt_is_input_route(rt) &&
510 net->ipv4.sysctl_icmp_errors_use_inbound_ifaddr) 509 net->ipv4.sysctl_icmp_errors_use_inbound_ifaddr)
511 dev = dev_get_by_index_rcu(net, rt->fl.iif); 510 dev = dev_get_by_index_rcu(net, rt->fl.iif);
512 511
513 if (dev) 512 if (dev)
@@ -542,22 +541,13 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
542 541
543 { 542 {
544 struct flowi fl = { 543 struct flowi fl = {
545 .nl_u = { 544 .fl4_dst = icmp_param.replyopts.srr ?
546 .ip4_u = { 545 icmp_param.replyopts.faddr : iph->saddr,
547 .daddr = icmp_param.replyopts.srr ? 546 .fl4_src = saddr,
548 icmp_param.replyopts.faddr : 547 .fl4_tos = RT_TOS(tos),
549 iph->saddr,
550 .saddr = saddr,
551 .tos = RT_TOS(tos)
552 }
553 },
554 .proto = IPPROTO_ICMP, 548 .proto = IPPROTO_ICMP,
555 .uli_u = { 549 .fl_icmp_type = type,
556 .icmpt = { 550 .fl_icmp_code = code,
557 .type = type,
558 .code = code
559 }
560 }
561 }; 551 };
562 int err; 552 int err;
563 struct rtable *rt2; 553 struct rtable *rt2;
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 3c53c2d89e3..e0e77e297de 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -149,21 +149,37 @@ static void ip_mc_clear_src(struct ip_mc_list *pmc);
149static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode, 149static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode,
150 int sfcount, __be32 *psfsrc, int delta); 150 int sfcount, __be32 *psfsrc, int delta);
151 151
152
153static void ip_mc_list_reclaim(struct rcu_head *head)
154{
155 kfree(container_of(head, struct ip_mc_list, rcu));
156}
157
152static void ip_ma_put(struct ip_mc_list *im) 158static void ip_ma_put(struct ip_mc_list *im)
153{ 159{
154 if (atomic_dec_and_test(&im->refcnt)) { 160 if (atomic_dec_and_test(&im->refcnt)) {
155 in_dev_put(im->interface); 161 in_dev_put(im->interface);
156 kfree(im); 162 call_rcu(&im->rcu, ip_mc_list_reclaim);
157 } 163 }
158} 164}
159 165
166#define for_each_pmc_rcu(in_dev, pmc) \
167 for (pmc = rcu_dereference(in_dev->mc_list); \
168 pmc != NULL; \
169 pmc = rcu_dereference(pmc->next_rcu))
170
171#define for_each_pmc_rtnl(in_dev, pmc) \
172 for (pmc = rtnl_dereference(in_dev->mc_list); \
173 pmc != NULL; \
174 pmc = rtnl_dereference(pmc->next_rcu))
175
160#ifdef CONFIG_IP_MULTICAST 176#ifdef CONFIG_IP_MULTICAST
161 177
162/* 178/*
163 * Timer management 179 * Timer management
164 */ 180 */
165 181
166static __inline__ void igmp_stop_timer(struct ip_mc_list *im) 182static void igmp_stop_timer(struct ip_mc_list *im)
167{ 183{
168 spin_lock_bh(&im->lock); 184 spin_lock_bh(&im->lock);
169 if (del_timer(&im->timer)) 185 if (del_timer(&im->timer))
@@ -284,6 +300,8 @@ igmp_scount(struct ip_mc_list *pmc, int type, int gdeleted, int sdeleted)
284 return scount; 300 return scount;
285} 301}
286 302
303#define igmp_skb_size(skb) (*(unsigned int *)((skb)->cb))
304
287static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size) 305static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size)
288{ 306{
289 struct sk_buff *skb; 307 struct sk_buff *skb;
@@ -292,14 +310,20 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size)
292 struct igmpv3_report *pig; 310 struct igmpv3_report *pig;
293 struct net *net = dev_net(dev); 311 struct net *net = dev_net(dev);
294 312
295 skb = alloc_skb(size + LL_ALLOCATED_SPACE(dev), GFP_ATOMIC); 313 while (1) {
296 if (skb == NULL) 314 skb = alloc_skb(size + LL_ALLOCATED_SPACE(dev),
297 return NULL; 315 GFP_ATOMIC | __GFP_NOWARN);
316 if (skb)
317 break;
318 size >>= 1;
319 if (size < 256)
320 return NULL;
321 }
322 igmp_skb_size(skb) = size;
298 323
299 { 324 {
300 struct flowi fl = { .oif = dev->ifindex, 325 struct flowi fl = { .oif = dev->ifindex,
301 .nl_u = { .ip4_u = { 326 .fl4_dst = IGMPV3_ALL_MCR,
302 .daddr = IGMPV3_ALL_MCR } },
303 .proto = IPPROTO_IGMP }; 327 .proto = IPPROTO_IGMP };
304 if (ip_route_output_key(net, &rt, &fl)) { 328 if (ip_route_output_key(net, &rt, &fl)) {
305 kfree_skb(skb); 329 kfree_skb(skb);
@@ -384,7 +408,7 @@ static struct sk_buff *add_grhead(struct sk_buff *skb, struct ip_mc_list *pmc,
384 return skb; 408 return skb;
385} 409}
386 410
387#define AVAILABLE(skb) ((skb) ? ((skb)->dev ? (skb)->dev->mtu - (skb)->len : \ 411#define AVAILABLE(skb) ((skb) ? ((skb)->dev ? igmp_skb_size(skb) - (skb)->len : \
388 skb_tailroom(skb)) : 0) 412 skb_tailroom(skb)) : 0)
389 413
390static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc, 414static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc,
@@ -502,8 +526,8 @@ static int igmpv3_send_report(struct in_device *in_dev, struct ip_mc_list *pmc)
502 int type; 526 int type;
503 527
504 if (!pmc) { 528 if (!pmc) {
505 read_lock(&in_dev->mc_list_lock); 529 rcu_read_lock();
506 for (pmc=in_dev->mc_list; pmc; pmc=pmc->next) { 530 for_each_pmc_rcu(in_dev, pmc) {
507 if (pmc->multiaddr == IGMP_ALL_HOSTS) 531 if (pmc->multiaddr == IGMP_ALL_HOSTS)
508 continue; 532 continue;
509 spin_lock_bh(&pmc->lock); 533 spin_lock_bh(&pmc->lock);
@@ -514,7 +538,7 @@ static int igmpv3_send_report(struct in_device *in_dev, struct ip_mc_list *pmc)
514 skb = add_grec(skb, pmc, type, 0, 0); 538 skb = add_grec(skb, pmc, type, 0, 0);
515 spin_unlock_bh(&pmc->lock); 539 spin_unlock_bh(&pmc->lock);
516 } 540 }
517 read_unlock(&in_dev->mc_list_lock); 541 rcu_read_unlock();
518 } else { 542 } else {
519 spin_lock_bh(&pmc->lock); 543 spin_lock_bh(&pmc->lock);
520 if (pmc->sfcount[MCAST_EXCLUDE]) 544 if (pmc->sfcount[MCAST_EXCLUDE])
@@ -556,7 +580,7 @@ static void igmpv3_send_cr(struct in_device *in_dev)
556 struct sk_buff *skb = NULL; 580 struct sk_buff *skb = NULL;
557 int type, dtype; 581 int type, dtype;
558 582
559 read_lock(&in_dev->mc_list_lock); 583 rcu_read_lock();
560 spin_lock_bh(&in_dev->mc_tomb_lock); 584 spin_lock_bh(&in_dev->mc_tomb_lock);
561 585
562 /* deleted MCA's */ 586 /* deleted MCA's */
@@ -593,7 +617,7 @@ static void igmpv3_send_cr(struct in_device *in_dev)
593 spin_unlock_bh(&in_dev->mc_tomb_lock); 617 spin_unlock_bh(&in_dev->mc_tomb_lock);
594 618
595 /* change recs */ 619 /* change recs */
596 for (pmc=in_dev->mc_list; pmc; pmc=pmc->next) { 620 for_each_pmc_rcu(in_dev, pmc) {
597 spin_lock_bh(&pmc->lock); 621 spin_lock_bh(&pmc->lock);
598 if (pmc->sfcount[MCAST_EXCLUDE]) { 622 if (pmc->sfcount[MCAST_EXCLUDE]) {
599 type = IGMPV3_BLOCK_OLD_SOURCES; 623 type = IGMPV3_BLOCK_OLD_SOURCES;
@@ -616,7 +640,7 @@ static void igmpv3_send_cr(struct in_device *in_dev)
616 } 640 }
617 spin_unlock_bh(&pmc->lock); 641 spin_unlock_bh(&pmc->lock);
618 } 642 }
619 read_unlock(&in_dev->mc_list_lock); 643 rcu_read_unlock();
620 644
621 if (!skb) 645 if (!skb)
622 return; 646 return;
@@ -644,7 +668,7 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc,
644 668
645 { 669 {
646 struct flowi fl = { .oif = dev->ifindex, 670 struct flowi fl = { .oif = dev->ifindex,
647 .nl_u = { .ip4_u = { .daddr = dst } }, 671 .fl4_dst = dst,
648 .proto = IPPROTO_IGMP }; 672 .proto = IPPROTO_IGMP };
649 if (ip_route_output_key(net, &rt, &fl)) 673 if (ip_route_output_key(net, &rt, &fl))
650 return -1; 674 return -1;
@@ -813,14 +837,14 @@ static void igmp_heard_report(struct in_device *in_dev, __be32 group)
813 if (group == IGMP_ALL_HOSTS) 837 if (group == IGMP_ALL_HOSTS)
814 return; 838 return;
815 839
816 read_lock(&in_dev->mc_list_lock); 840 rcu_read_lock();
817 for (im=in_dev->mc_list; im!=NULL; im=im->next) { 841 for_each_pmc_rcu(in_dev, im) {
818 if (im->multiaddr == group) { 842 if (im->multiaddr == group) {
819 igmp_stop_timer(im); 843 igmp_stop_timer(im);
820 break; 844 break;
821 } 845 }
822 } 846 }
823 read_unlock(&in_dev->mc_list_lock); 847 rcu_read_unlock();
824} 848}
825 849
826static void igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb, 850static void igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb,
@@ -906,8 +930,8 @@ static void igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb,
906 * - Use the igmp->igmp_code field as the maximum 930 * - Use the igmp->igmp_code field as the maximum
907 * delay possible 931 * delay possible
908 */ 932 */
909 read_lock(&in_dev->mc_list_lock); 933 rcu_read_lock();
910 for (im=in_dev->mc_list; im!=NULL; im=im->next) { 934 for_each_pmc_rcu(in_dev, im) {
911 int changed; 935 int changed;
912 936
913 if (group && group != im->multiaddr) 937 if (group && group != im->multiaddr)
@@ -925,7 +949,7 @@ static void igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb,
925 if (changed) 949 if (changed)
926 igmp_mod_timer(im, max_delay); 950 igmp_mod_timer(im, max_delay);
927 } 951 }
928 read_unlock(&in_dev->mc_list_lock); 952 rcu_read_unlock();
929} 953}
930 954
931/* called in rcu_read_lock() section */ 955/* called in rcu_read_lock() section */
@@ -961,7 +985,7 @@ int igmp_rcv(struct sk_buff *skb)
961 case IGMP_HOST_MEMBERSHIP_REPORT: 985 case IGMP_HOST_MEMBERSHIP_REPORT:
962 case IGMPV2_HOST_MEMBERSHIP_REPORT: 986 case IGMPV2_HOST_MEMBERSHIP_REPORT:
963 /* Is it our report looped back? */ 987 /* Is it our report looped back? */
964 if (skb_rtable(skb)->fl.iif == 0) 988 if (rt_is_output_route(skb_rtable(skb)))
965 break; 989 break;
966 /* don't rely on MC router hearing unicast reports */ 990 /* don't rely on MC router hearing unicast reports */
967 if (skb->pkt_type == PACKET_MULTICAST || 991 if (skb->pkt_type == PACKET_MULTICAST ||
@@ -1110,8 +1134,8 @@ static void igmpv3_clear_delrec(struct in_device *in_dev)
1110 kfree(pmc); 1134 kfree(pmc);
1111 } 1135 }
1112 /* clear dead sources, too */ 1136 /* clear dead sources, too */
1113 read_lock(&in_dev->mc_list_lock); 1137 rcu_read_lock();
1114 for (pmc=in_dev->mc_list; pmc; pmc=pmc->next) { 1138 for_each_pmc_rcu(in_dev, pmc) {
1115 struct ip_sf_list *psf, *psf_next; 1139 struct ip_sf_list *psf, *psf_next;
1116 1140
1117 spin_lock_bh(&pmc->lock); 1141 spin_lock_bh(&pmc->lock);
@@ -1123,7 +1147,7 @@ static void igmpv3_clear_delrec(struct in_device *in_dev)
1123 kfree(psf); 1147 kfree(psf);
1124 } 1148 }
1125 } 1149 }
1126 read_unlock(&in_dev->mc_list_lock); 1150 rcu_read_unlock();
1127} 1151}
1128#endif 1152#endif
1129 1153
@@ -1209,7 +1233,7 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 addr)
1209 1233
1210 ASSERT_RTNL(); 1234 ASSERT_RTNL();
1211 1235
1212 for (im=in_dev->mc_list; im; im=im->next) { 1236 for_each_pmc_rtnl(in_dev, im) {
1213 if (im->multiaddr == addr) { 1237 if (im->multiaddr == addr) {
1214 im->users++; 1238 im->users++;
1215 ip_mc_add_src(in_dev, &addr, MCAST_EXCLUDE, 0, NULL, 0); 1239 ip_mc_add_src(in_dev, &addr, MCAST_EXCLUDE, 0, NULL, 0);
@@ -1217,7 +1241,7 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 addr)
1217 } 1241 }
1218 } 1242 }
1219 1243
1220 im = kmalloc(sizeof(*im), GFP_KERNEL); 1244 im = kzalloc(sizeof(*im), GFP_KERNEL);
1221 if (!im) 1245 if (!im)
1222 goto out; 1246 goto out;
1223 1247
@@ -1227,26 +1251,18 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 addr)
1227 im->multiaddr = addr; 1251 im->multiaddr = addr;
1228 /* initial mode is (EX, empty) */ 1252 /* initial mode is (EX, empty) */
1229 im->sfmode = MCAST_EXCLUDE; 1253 im->sfmode = MCAST_EXCLUDE;
1230 im->sfcount[MCAST_INCLUDE] = 0;
1231 im->sfcount[MCAST_EXCLUDE] = 1; 1254 im->sfcount[MCAST_EXCLUDE] = 1;
1232 im->sources = NULL;
1233 im->tomb = NULL;
1234 im->crcount = 0;
1235 atomic_set(&im->refcnt, 1); 1255 atomic_set(&im->refcnt, 1);
1236 spin_lock_init(&im->lock); 1256 spin_lock_init(&im->lock);
1237#ifdef CONFIG_IP_MULTICAST 1257#ifdef CONFIG_IP_MULTICAST
1238 im->tm_running = 0;
1239 setup_timer(&im->timer, &igmp_timer_expire, (unsigned long)im); 1258 setup_timer(&im->timer, &igmp_timer_expire, (unsigned long)im);
1240 im->unsolicit_count = IGMP_Unsolicited_Report_Count; 1259 im->unsolicit_count = IGMP_Unsolicited_Report_Count;
1241 im->reporter = 0;
1242 im->gsquery = 0;
1243#endif 1260#endif
1244 im->loaded = 0; 1261
1245 write_lock_bh(&in_dev->mc_list_lock); 1262 im->next_rcu = in_dev->mc_list;
1246 im->next = in_dev->mc_list;
1247 in_dev->mc_list = im;
1248 in_dev->mc_count++; 1263 in_dev->mc_count++;
1249 write_unlock_bh(&in_dev->mc_list_lock); 1264 rcu_assign_pointer(in_dev->mc_list, im);
1265
1250#ifdef CONFIG_IP_MULTICAST 1266#ifdef CONFIG_IP_MULTICAST
1251 igmpv3_del_delrec(in_dev, im->multiaddr); 1267 igmpv3_del_delrec(in_dev, im->multiaddr);
1252#endif 1268#endif
@@ -1260,26 +1276,32 @@ EXPORT_SYMBOL(ip_mc_inc_group);
1260 1276
1261/* 1277/*
1262 * Resend IGMP JOIN report; used for bonding. 1278 * Resend IGMP JOIN report; used for bonding.
1279 * Called with rcu_read_lock()
1263 */ 1280 */
1264void ip_mc_rejoin_group(struct ip_mc_list *im) 1281void ip_mc_rejoin_groups(struct in_device *in_dev)
1265{ 1282{
1266#ifdef CONFIG_IP_MULTICAST 1283#ifdef CONFIG_IP_MULTICAST
1267 struct in_device *in_dev = im->interface; 1284 struct ip_mc_list *im;
1285 int type;
1268 1286
1269 if (im->multiaddr == IGMP_ALL_HOSTS) 1287 for_each_pmc_rcu(in_dev, im) {
1270 return; 1288 if (im->multiaddr == IGMP_ALL_HOSTS)
1289 continue;
1271 1290
1272 /* a failover is happening and switches 1291 /* a failover is happening and switches
1273 * must be notified immediately */ 1292 * must be notified immediately
1274 if (IGMP_V1_SEEN(in_dev)) 1293 */
1275 igmp_send_report(in_dev, im, IGMP_HOST_MEMBERSHIP_REPORT); 1294 if (IGMP_V1_SEEN(in_dev))
1276 else if (IGMP_V2_SEEN(in_dev)) 1295 type = IGMP_HOST_MEMBERSHIP_REPORT;
1277 igmp_send_report(in_dev, im, IGMPV2_HOST_MEMBERSHIP_REPORT); 1296 else if (IGMP_V2_SEEN(in_dev))
1278 else 1297 type = IGMPV2_HOST_MEMBERSHIP_REPORT;
1279 igmp_send_report(in_dev, im, IGMPV3_HOST_MEMBERSHIP_REPORT); 1298 else
1299 type = IGMPV3_HOST_MEMBERSHIP_REPORT;
1300 igmp_send_report(in_dev, im, type);
1301 }
1280#endif 1302#endif
1281} 1303}
1282EXPORT_SYMBOL(ip_mc_rejoin_group); 1304EXPORT_SYMBOL(ip_mc_rejoin_groups);
1283 1305
1284/* 1306/*
1285 * A socket has left a multicast group on device dev 1307 * A socket has left a multicast group on device dev
@@ -1287,17 +1309,18 @@ EXPORT_SYMBOL(ip_mc_rejoin_group);
1287 1309
1288void ip_mc_dec_group(struct in_device *in_dev, __be32 addr) 1310void ip_mc_dec_group(struct in_device *in_dev, __be32 addr)
1289{ 1311{
1290 struct ip_mc_list *i, **ip; 1312 struct ip_mc_list *i;
1313 struct ip_mc_list __rcu **ip;
1291 1314
1292 ASSERT_RTNL(); 1315 ASSERT_RTNL();
1293 1316
1294 for (ip=&in_dev->mc_list; (i=*ip)!=NULL; ip=&i->next) { 1317 for (ip = &in_dev->mc_list;
1318 (i = rtnl_dereference(*ip)) != NULL;
1319 ip = &i->next_rcu) {
1295 if (i->multiaddr == addr) { 1320 if (i->multiaddr == addr) {
1296 if (--i->users == 0) { 1321 if (--i->users == 0) {
1297 write_lock_bh(&in_dev->mc_list_lock); 1322 *ip = i->next_rcu;
1298 *ip = i->next;
1299 in_dev->mc_count--; 1323 in_dev->mc_count--;
1300 write_unlock_bh(&in_dev->mc_list_lock);
1301 igmp_group_dropped(i); 1324 igmp_group_dropped(i);
1302 1325
1303 if (!in_dev->dead) 1326 if (!in_dev->dead)
@@ -1316,34 +1339,34 @@ EXPORT_SYMBOL(ip_mc_dec_group);
1316 1339
1317void ip_mc_unmap(struct in_device *in_dev) 1340void ip_mc_unmap(struct in_device *in_dev)
1318{ 1341{
1319 struct ip_mc_list *i; 1342 struct ip_mc_list *pmc;
1320 1343
1321 ASSERT_RTNL(); 1344 ASSERT_RTNL();
1322 1345
1323 for (i = in_dev->mc_list; i; i = i->next) 1346 for_each_pmc_rtnl(in_dev, pmc)
1324 igmp_group_dropped(i); 1347 igmp_group_dropped(pmc);
1325} 1348}
1326 1349
1327void ip_mc_remap(struct in_device *in_dev) 1350void ip_mc_remap(struct in_device *in_dev)
1328{ 1351{
1329 struct ip_mc_list *i; 1352 struct ip_mc_list *pmc;
1330 1353
1331 ASSERT_RTNL(); 1354 ASSERT_RTNL();
1332 1355
1333 for (i = in_dev->mc_list; i; i = i->next) 1356 for_each_pmc_rtnl(in_dev, pmc)
1334 igmp_group_added(i); 1357 igmp_group_added(pmc);
1335} 1358}
1336 1359
1337/* Device going down */ 1360/* Device going down */
1338 1361
1339void ip_mc_down(struct in_device *in_dev) 1362void ip_mc_down(struct in_device *in_dev)
1340{ 1363{
1341 struct ip_mc_list *i; 1364 struct ip_mc_list *pmc;
1342 1365
1343 ASSERT_RTNL(); 1366 ASSERT_RTNL();
1344 1367
1345 for (i=in_dev->mc_list; i; i=i->next) 1368 for_each_pmc_rtnl(in_dev, pmc)
1346 igmp_group_dropped(i); 1369 igmp_group_dropped(pmc);
1347 1370
1348#ifdef CONFIG_IP_MULTICAST 1371#ifdef CONFIG_IP_MULTICAST
1349 in_dev->mr_ifc_count = 0; 1372 in_dev->mr_ifc_count = 0;
@@ -1374,7 +1397,6 @@ void ip_mc_init_dev(struct in_device *in_dev)
1374 in_dev->mr_qrv = IGMP_Unsolicited_Report_Count; 1397 in_dev->mr_qrv = IGMP_Unsolicited_Report_Count;
1375#endif 1398#endif
1376 1399
1377 rwlock_init(&in_dev->mc_list_lock);
1378 spin_lock_init(&in_dev->mc_tomb_lock); 1400 spin_lock_init(&in_dev->mc_tomb_lock);
1379} 1401}
1380 1402
@@ -1382,14 +1404,14 @@ void ip_mc_init_dev(struct in_device *in_dev)
1382 1404
1383void ip_mc_up(struct in_device *in_dev) 1405void ip_mc_up(struct in_device *in_dev)
1384{ 1406{
1385 struct ip_mc_list *i; 1407 struct ip_mc_list *pmc;
1386 1408
1387 ASSERT_RTNL(); 1409 ASSERT_RTNL();
1388 1410
1389 ip_mc_inc_group(in_dev, IGMP_ALL_HOSTS); 1411 ip_mc_inc_group(in_dev, IGMP_ALL_HOSTS);
1390 1412
1391 for (i=in_dev->mc_list; i; i=i->next) 1413 for_each_pmc_rtnl(in_dev, pmc)
1392 igmp_group_added(i); 1414 igmp_group_added(pmc);
1393} 1415}
1394 1416
1395/* 1417/*
@@ -1405,24 +1427,19 @@ void ip_mc_destroy_dev(struct in_device *in_dev)
1405 /* Deactivate timers */ 1427 /* Deactivate timers */
1406 ip_mc_down(in_dev); 1428 ip_mc_down(in_dev);
1407 1429
1408 write_lock_bh(&in_dev->mc_list_lock); 1430 while ((i = rtnl_dereference(in_dev->mc_list)) != NULL) {
1409 while ((i = in_dev->mc_list) != NULL) { 1431 in_dev->mc_list = i->next_rcu;
1410 in_dev->mc_list = i->next;
1411 in_dev->mc_count--; 1432 in_dev->mc_count--;
1412 write_unlock_bh(&in_dev->mc_list_lock); 1433
1413 igmp_group_dropped(i); 1434 igmp_group_dropped(i);
1414 ip_ma_put(i); 1435 ip_ma_put(i);
1415
1416 write_lock_bh(&in_dev->mc_list_lock);
1417 } 1436 }
1418 write_unlock_bh(&in_dev->mc_list_lock);
1419} 1437}
1420 1438
1421/* RTNL is locked */ 1439/* RTNL is locked */
1422static struct in_device *ip_mc_find_dev(struct net *net, struct ip_mreqn *imr) 1440static struct in_device *ip_mc_find_dev(struct net *net, struct ip_mreqn *imr)
1423{ 1441{
1424 struct flowi fl = { .nl_u = { .ip4_u = 1442 struct flowi fl = { .fl4_dst = imr->imr_multiaddr.s_addr };
1425 { .daddr = imr->imr_multiaddr.s_addr } } };
1426 struct rtable *rt; 1443 struct rtable *rt;
1427 struct net_device *dev = NULL; 1444 struct net_device *dev = NULL;
1428 struct in_device *idev = NULL; 1445 struct in_device *idev = NULL;
@@ -1513,18 +1530,18 @@ static int ip_mc_del_src(struct in_device *in_dev, __be32 *pmca, int sfmode,
1513 1530
1514 if (!in_dev) 1531 if (!in_dev)
1515 return -ENODEV; 1532 return -ENODEV;
1516 read_lock(&in_dev->mc_list_lock); 1533 rcu_read_lock();
1517 for (pmc=in_dev->mc_list; pmc; pmc=pmc->next) { 1534 for_each_pmc_rcu(in_dev, pmc) {
1518 if (*pmca == pmc->multiaddr) 1535 if (*pmca == pmc->multiaddr)
1519 break; 1536 break;
1520 } 1537 }
1521 if (!pmc) { 1538 if (!pmc) {
1522 /* MCA not found?? bug */ 1539 /* MCA not found?? bug */
1523 read_unlock(&in_dev->mc_list_lock); 1540 rcu_read_unlock();
1524 return -ESRCH; 1541 return -ESRCH;
1525 } 1542 }
1526 spin_lock_bh(&pmc->lock); 1543 spin_lock_bh(&pmc->lock);
1527 read_unlock(&in_dev->mc_list_lock); 1544 rcu_read_unlock();
1528#ifdef CONFIG_IP_MULTICAST 1545#ifdef CONFIG_IP_MULTICAST
1529 sf_markstate(pmc); 1546 sf_markstate(pmc);
1530#endif 1547#endif
@@ -1685,18 +1702,18 @@ static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode,
1685 1702
1686 if (!in_dev) 1703 if (!in_dev)
1687 return -ENODEV; 1704 return -ENODEV;
1688 read_lock(&in_dev->mc_list_lock); 1705 rcu_read_lock();
1689 for (pmc=in_dev->mc_list; pmc; pmc=pmc->next) { 1706 for_each_pmc_rcu(in_dev, pmc) {
1690 if (*pmca == pmc->multiaddr) 1707 if (*pmca == pmc->multiaddr)
1691 break; 1708 break;
1692 } 1709 }
1693 if (!pmc) { 1710 if (!pmc) {
1694 /* MCA not found?? bug */ 1711 /* MCA not found?? bug */
1695 read_unlock(&in_dev->mc_list_lock); 1712 rcu_read_unlock();
1696 return -ESRCH; 1713 return -ESRCH;
1697 } 1714 }
1698 spin_lock_bh(&pmc->lock); 1715 spin_lock_bh(&pmc->lock);
1699 read_unlock(&in_dev->mc_list_lock); 1716 rcu_read_unlock();
1700 1717
1701#ifdef CONFIG_IP_MULTICAST 1718#ifdef CONFIG_IP_MULTICAST
1702 sf_markstate(pmc); 1719 sf_markstate(pmc);
@@ -1793,7 +1810,7 @@ int ip_mc_join_group(struct sock *sk , struct ip_mreqn *imr)
1793 1810
1794 err = -EADDRINUSE; 1811 err = -EADDRINUSE;
1795 ifindex = imr->imr_ifindex; 1812 ifindex = imr->imr_ifindex;
1796 for (i = inet->mc_list; i; i = i->next) { 1813 for_each_pmc_rtnl(inet, i) {
1797 if (i->multi.imr_multiaddr.s_addr == addr && 1814 if (i->multi.imr_multiaddr.s_addr == addr &&
1798 i->multi.imr_ifindex == ifindex) 1815 i->multi.imr_ifindex == ifindex)
1799 goto done; 1816 goto done;
@@ -1807,7 +1824,7 @@ int ip_mc_join_group(struct sock *sk , struct ip_mreqn *imr)
1807 goto done; 1824 goto done;
1808 1825
1809 memcpy(&iml->multi, imr, sizeof(*imr)); 1826 memcpy(&iml->multi, imr, sizeof(*imr));
1810 iml->next = inet->mc_list; 1827 iml->next_rcu = inet->mc_list;
1811 iml->sflist = NULL; 1828 iml->sflist = NULL;
1812 iml->sfmode = MCAST_EXCLUDE; 1829 iml->sfmode = MCAST_EXCLUDE;
1813 rcu_assign_pointer(inet->mc_list, iml); 1830 rcu_assign_pointer(inet->mc_list, iml);
@@ -1821,17 +1838,14 @@ EXPORT_SYMBOL(ip_mc_join_group);
1821 1838
1822static void ip_sf_socklist_reclaim(struct rcu_head *rp) 1839static void ip_sf_socklist_reclaim(struct rcu_head *rp)
1823{ 1840{
1824 struct ip_sf_socklist *psf; 1841 kfree(container_of(rp, struct ip_sf_socklist, rcu));
1825
1826 psf = container_of(rp, struct ip_sf_socklist, rcu);
1827 /* sk_omem_alloc should have been decreased by the caller*/ 1842 /* sk_omem_alloc should have been decreased by the caller*/
1828 kfree(psf);
1829} 1843}
1830 1844
1831static int ip_mc_leave_src(struct sock *sk, struct ip_mc_socklist *iml, 1845static int ip_mc_leave_src(struct sock *sk, struct ip_mc_socklist *iml,
1832 struct in_device *in_dev) 1846 struct in_device *in_dev)
1833{ 1847{
1834 struct ip_sf_socklist *psf = iml->sflist; 1848 struct ip_sf_socklist *psf = rtnl_dereference(iml->sflist);
1835 int err; 1849 int err;
1836 1850
1837 if (psf == NULL) { 1851 if (psf == NULL) {
@@ -1851,11 +1865,8 @@ static int ip_mc_leave_src(struct sock *sk, struct ip_mc_socklist *iml,
1851 1865
1852static void ip_mc_socklist_reclaim(struct rcu_head *rp) 1866static void ip_mc_socklist_reclaim(struct rcu_head *rp)
1853{ 1867{
1854 struct ip_mc_socklist *iml; 1868 kfree(container_of(rp, struct ip_mc_socklist, rcu));
1855
1856 iml = container_of(rp, struct ip_mc_socklist, rcu);
1857 /* sk_omem_alloc should have been decreased by the caller*/ 1869 /* sk_omem_alloc should have been decreased by the caller*/
1858 kfree(iml);
1859} 1870}
1860 1871
1861 1872
@@ -1866,7 +1877,8 @@ static void ip_mc_socklist_reclaim(struct rcu_head *rp)
1866int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr) 1877int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr)
1867{ 1878{
1868 struct inet_sock *inet = inet_sk(sk); 1879 struct inet_sock *inet = inet_sk(sk);
1869 struct ip_mc_socklist *iml, **imlp; 1880 struct ip_mc_socklist *iml;
1881 struct ip_mc_socklist __rcu **imlp;
1870 struct in_device *in_dev; 1882 struct in_device *in_dev;
1871 struct net *net = sock_net(sk); 1883 struct net *net = sock_net(sk);
1872 __be32 group = imr->imr_multiaddr.s_addr; 1884 __be32 group = imr->imr_multiaddr.s_addr;
@@ -1876,7 +1888,9 @@ int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr)
1876 rtnl_lock(); 1888 rtnl_lock();
1877 in_dev = ip_mc_find_dev(net, imr); 1889 in_dev = ip_mc_find_dev(net, imr);
1878 ifindex = imr->imr_ifindex; 1890 ifindex = imr->imr_ifindex;
1879 for (imlp = &inet->mc_list; (iml = *imlp) != NULL; imlp = &iml->next) { 1891 for (imlp = &inet->mc_list;
1892 (iml = rtnl_dereference(*imlp)) != NULL;
1893 imlp = &iml->next_rcu) {
1880 if (iml->multi.imr_multiaddr.s_addr != group) 1894 if (iml->multi.imr_multiaddr.s_addr != group)
1881 continue; 1895 continue;
1882 if (ifindex) { 1896 if (ifindex) {
@@ -1888,7 +1902,7 @@ int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr)
1888 1902
1889 (void) ip_mc_leave_src(sk, iml, in_dev); 1903 (void) ip_mc_leave_src(sk, iml, in_dev);
1890 1904
1891 rcu_assign_pointer(*imlp, iml->next); 1905 *imlp = iml->next_rcu;
1892 1906
1893 if (in_dev) 1907 if (in_dev)
1894 ip_mc_dec_group(in_dev, group); 1908 ip_mc_dec_group(in_dev, group);
@@ -1934,7 +1948,7 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct
1934 } 1948 }
1935 err = -EADDRNOTAVAIL; 1949 err = -EADDRNOTAVAIL;
1936 1950
1937 for (pmc=inet->mc_list; pmc; pmc=pmc->next) { 1951 for_each_pmc_rtnl(inet, pmc) {
1938 if ((pmc->multi.imr_multiaddr.s_addr == 1952 if ((pmc->multi.imr_multiaddr.s_addr ==
1939 imr.imr_multiaddr.s_addr) && 1953 imr.imr_multiaddr.s_addr) &&
1940 (pmc->multi.imr_ifindex == imr.imr_ifindex)) 1954 (pmc->multi.imr_ifindex == imr.imr_ifindex))
@@ -1958,7 +1972,7 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct
1958 pmc->sfmode = omode; 1972 pmc->sfmode = omode;
1959 } 1973 }
1960 1974
1961 psl = pmc->sflist; 1975 psl = rtnl_dereference(pmc->sflist);
1962 if (!add) { 1976 if (!add) {
1963 if (!psl) 1977 if (!psl)
1964 goto done; /* err = -EADDRNOTAVAIL */ 1978 goto done; /* err = -EADDRNOTAVAIL */
@@ -2077,7 +2091,7 @@ int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf, int ifindex)
2077 goto done; 2091 goto done;
2078 } 2092 }
2079 2093
2080 for (pmc=inet->mc_list; pmc; pmc=pmc->next) { 2094 for_each_pmc_rtnl(inet, pmc) {
2081 if (pmc->multi.imr_multiaddr.s_addr == msf->imsf_multiaddr && 2095 if (pmc->multi.imr_multiaddr.s_addr == msf->imsf_multiaddr &&
2082 pmc->multi.imr_ifindex == imr.imr_ifindex) 2096 pmc->multi.imr_ifindex == imr.imr_ifindex)
2083 break; 2097 break;
@@ -2107,7 +2121,7 @@ int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf, int ifindex)
2107 (void) ip_mc_add_src(in_dev, &msf->imsf_multiaddr, 2121 (void) ip_mc_add_src(in_dev, &msf->imsf_multiaddr,
2108 msf->imsf_fmode, 0, NULL, 0); 2122 msf->imsf_fmode, 0, NULL, 0);
2109 } 2123 }
2110 psl = pmc->sflist; 2124 psl = rtnl_dereference(pmc->sflist);
2111 if (psl) { 2125 if (psl) {
2112 (void) ip_mc_del_src(in_dev, &msf->imsf_multiaddr, pmc->sfmode, 2126 (void) ip_mc_del_src(in_dev, &msf->imsf_multiaddr, pmc->sfmode,
2113 psl->sl_count, psl->sl_addr, 0); 2127 psl->sl_count, psl->sl_addr, 0);
@@ -2155,7 +2169,7 @@ int ip_mc_msfget(struct sock *sk, struct ip_msfilter *msf,
2155 } 2169 }
2156 err = -EADDRNOTAVAIL; 2170 err = -EADDRNOTAVAIL;
2157 2171
2158 for (pmc=inet->mc_list; pmc; pmc=pmc->next) { 2172 for_each_pmc_rtnl(inet, pmc) {
2159 if (pmc->multi.imr_multiaddr.s_addr == msf->imsf_multiaddr && 2173 if (pmc->multi.imr_multiaddr.s_addr == msf->imsf_multiaddr &&
2160 pmc->multi.imr_ifindex == imr.imr_ifindex) 2174 pmc->multi.imr_ifindex == imr.imr_ifindex)
2161 break; 2175 break;
@@ -2163,7 +2177,7 @@ int ip_mc_msfget(struct sock *sk, struct ip_msfilter *msf,
2163 if (!pmc) /* must have a prior join */ 2177 if (!pmc) /* must have a prior join */
2164 goto done; 2178 goto done;
2165 msf->imsf_fmode = pmc->sfmode; 2179 msf->imsf_fmode = pmc->sfmode;
2166 psl = pmc->sflist; 2180 psl = rtnl_dereference(pmc->sflist);
2167 rtnl_unlock(); 2181 rtnl_unlock();
2168 if (!psl) { 2182 if (!psl) {
2169 len = 0; 2183 len = 0;
@@ -2208,7 +2222,7 @@ int ip_mc_gsfget(struct sock *sk, struct group_filter *gsf,
2208 2222
2209 err = -EADDRNOTAVAIL; 2223 err = -EADDRNOTAVAIL;
2210 2224
2211 for (pmc=inet->mc_list; pmc; pmc=pmc->next) { 2225 for_each_pmc_rtnl(inet, pmc) {
2212 if (pmc->multi.imr_multiaddr.s_addr == addr && 2226 if (pmc->multi.imr_multiaddr.s_addr == addr &&
2213 pmc->multi.imr_ifindex == gsf->gf_interface) 2227 pmc->multi.imr_ifindex == gsf->gf_interface)
2214 break; 2228 break;
@@ -2216,7 +2230,7 @@ int ip_mc_gsfget(struct sock *sk, struct group_filter *gsf,
2216 if (!pmc) /* must have a prior join */ 2230 if (!pmc) /* must have a prior join */
2217 goto done; 2231 goto done;
2218 gsf->gf_fmode = pmc->sfmode; 2232 gsf->gf_fmode = pmc->sfmode;
2219 psl = pmc->sflist; 2233 psl = rtnl_dereference(pmc->sflist);
2220 rtnl_unlock(); 2234 rtnl_unlock();
2221 count = psl ? psl->sl_count : 0; 2235 count = psl ? psl->sl_count : 0;
2222 copycount = count < gsf->gf_numsrc ? count : gsf->gf_numsrc; 2236 copycount = count < gsf->gf_numsrc ? count : gsf->gf_numsrc;
@@ -2257,7 +2271,7 @@ int ip_mc_sf_allow(struct sock *sk, __be32 loc_addr, __be32 rmt_addr, int dif)
2257 goto out; 2271 goto out;
2258 2272
2259 rcu_read_lock(); 2273 rcu_read_lock();
2260 for (pmc=rcu_dereference(inet->mc_list); pmc; pmc=rcu_dereference(pmc->next)) { 2274 for_each_pmc_rcu(inet, pmc) {
2261 if (pmc->multi.imr_multiaddr.s_addr == loc_addr && 2275 if (pmc->multi.imr_multiaddr.s_addr == loc_addr &&
2262 pmc->multi.imr_ifindex == dif) 2276 pmc->multi.imr_ifindex == dif)
2263 break; 2277 break;
@@ -2265,7 +2279,7 @@ int ip_mc_sf_allow(struct sock *sk, __be32 loc_addr, __be32 rmt_addr, int dif)
2265 ret = inet->mc_all; 2279 ret = inet->mc_all;
2266 if (!pmc) 2280 if (!pmc)
2267 goto unlock; 2281 goto unlock;
2268 psl = pmc->sflist; 2282 psl = rcu_dereference(pmc->sflist);
2269 ret = (pmc->sfmode == MCAST_EXCLUDE); 2283 ret = (pmc->sfmode == MCAST_EXCLUDE);
2270 if (!psl) 2284 if (!psl)
2271 goto unlock; 2285 goto unlock;
@@ -2300,10 +2314,10 @@ void ip_mc_drop_socket(struct sock *sk)
2300 return; 2314 return;
2301 2315
2302 rtnl_lock(); 2316 rtnl_lock();
2303 while ((iml = inet->mc_list) != NULL) { 2317 while ((iml = rtnl_dereference(inet->mc_list)) != NULL) {
2304 struct in_device *in_dev; 2318 struct in_device *in_dev;
2305 rcu_assign_pointer(inet->mc_list, iml->next);
2306 2319
2320 inet->mc_list = iml->next_rcu;
2307 in_dev = inetdev_by_index(net, iml->multi.imr_ifindex); 2321 in_dev = inetdev_by_index(net, iml->multi.imr_ifindex);
2308 (void) ip_mc_leave_src(sk, iml, in_dev); 2322 (void) ip_mc_leave_src(sk, iml, in_dev);
2309 if (in_dev != NULL) 2323 if (in_dev != NULL)
@@ -2321,8 +2335,8 @@ int ip_check_mc(struct in_device *in_dev, __be32 mc_addr, __be32 src_addr, u16 p
2321 struct ip_sf_list *psf; 2335 struct ip_sf_list *psf;
2322 int rv = 0; 2336 int rv = 0;
2323 2337
2324 read_lock(&in_dev->mc_list_lock); 2338 rcu_read_lock();
2325 for (im=in_dev->mc_list; im; im=im->next) { 2339 for_each_pmc_rcu(in_dev, im) {
2326 if (im->multiaddr == mc_addr) 2340 if (im->multiaddr == mc_addr)
2327 break; 2341 break;
2328 } 2342 }
@@ -2343,7 +2357,7 @@ int ip_check_mc(struct in_device *in_dev, __be32 mc_addr, __be32 src_addr, u16 p
2343 } else 2357 } else
2344 rv = 1; /* unspecified source; tentatively allow */ 2358 rv = 1; /* unspecified source; tentatively allow */
2345 } 2359 }
2346 read_unlock(&in_dev->mc_list_lock); 2360 rcu_read_unlock();
2347 return rv; 2361 return rv;
2348} 2362}
2349 2363
@@ -2369,13 +2383,11 @@ static inline struct ip_mc_list *igmp_mc_get_first(struct seq_file *seq)
2369 in_dev = __in_dev_get_rcu(state->dev); 2383 in_dev = __in_dev_get_rcu(state->dev);
2370 if (!in_dev) 2384 if (!in_dev)
2371 continue; 2385 continue;
2372 read_lock(&in_dev->mc_list_lock); 2386 im = rcu_dereference(in_dev->mc_list);
2373 im = in_dev->mc_list;
2374 if (im) { 2387 if (im) {
2375 state->in_dev = in_dev; 2388 state->in_dev = in_dev;
2376 break; 2389 break;
2377 } 2390 }
2378 read_unlock(&in_dev->mc_list_lock);
2379 } 2391 }
2380 return im; 2392 return im;
2381} 2393}
@@ -2383,11 +2395,9 @@ static inline struct ip_mc_list *igmp_mc_get_first(struct seq_file *seq)
2383static struct ip_mc_list *igmp_mc_get_next(struct seq_file *seq, struct ip_mc_list *im) 2395static struct ip_mc_list *igmp_mc_get_next(struct seq_file *seq, struct ip_mc_list *im)
2384{ 2396{
2385 struct igmp_mc_iter_state *state = igmp_mc_seq_private(seq); 2397 struct igmp_mc_iter_state *state = igmp_mc_seq_private(seq);
2386 im = im->next;
2387 while (!im) {
2388 if (likely(state->in_dev != NULL))
2389 read_unlock(&state->in_dev->mc_list_lock);
2390 2398
2399 im = rcu_dereference(im->next_rcu);
2400 while (!im) {
2391 state->dev = next_net_device_rcu(state->dev); 2401 state->dev = next_net_device_rcu(state->dev);
2392 if (!state->dev) { 2402 if (!state->dev) {
2393 state->in_dev = NULL; 2403 state->in_dev = NULL;
@@ -2396,8 +2406,7 @@ static struct ip_mc_list *igmp_mc_get_next(struct seq_file *seq, struct ip_mc_li
2396 state->in_dev = __in_dev_get_rcu(state->dev); 2406 state->in_dev = __in_dev_get_rcu(state->dev);
2397 if (!state->in_dev) 2407 if (!state->in_dev)
2398 continue; 2408 continue;
2399 read_lock(&state->in_dev->mc_list_lock); 2409 im = rcu_dereference(state->in_dev->mc_list);
2400 im = state->in_dev->mc_list;
2401 } 2410 }
2402 return im; 2411 return im;
2403} 2412}
@@ -2433,10 +2442,8 @@ static void igmp_mc_seq_stop(struct seq_file *seq, void *v)
2433 __releases(rcu) 2442 __releases(rcu)
2434{ 2443{
2435 struct igmp_mc_iter_state *state = igmp_mc_seq_private(seq); 2444 struct igmp_mc_iter_state *state = igmp_mc_seq_private(seq);
2436 if (likely(state->in_dev != NULL)) { 2445
2437 read_unlock(&state->in_dev->mc_list_lock); 2446 state->in_dev = NULL;
2438 state->in_dev = NULL;
2439 }
2440 state->dev = NULL; 2447 state->dev = NULL;
2441 rcu_read_unlock(); 2448 rcu_read_unlock();
2442} 2449}
@@ -2458,7 +2465,7 @@ static int igmp_mc_seq_show(struct seq_file *seq, void *v)
2458 querier = "NONE"; 2465 querier = "NONE";
2459#endif 2466#endif
2460 2467
2461 if (state->in_dev->mc_list == im) { 2468 if (rcu_dereference(state->in_dev->mc_list) == im) {
2462 seq_printf(seq, "%d\t%-10s: %5d %7s\n", 2469 seq_printf(seq, "%d\t%-10s: %5d %7s\n",
2463 state->dev->ifindex, state->dev->name, state->in_dev->mc_count, querier); 2470 state->dev->ifindex, state->dev->name, state->in_dev->mc_count, querier);
2464 } 2471 }
@@ -2517,8 +2524,7 @@ static inline struct ip_sf_list *igmp_mcf_get_first(struct seq_file *seq)
2517 idev = __in_dev_get_rcu(state->dev); 2524 idev = __in_dev_get_rcu(state->dev);
2518 if (unlikely(idev == NULL)) 2525 if (unlikely(idev == NULL))
2519 continue; 2526 continue;
2520 read_lock(&idev->mc_list_lock); 2527 im = rcu_dereference(idev->mc_list);
2521 im = idev->mc_list;
2522 if (likely(im != NULL)) { 2528 if (likely(im != NULL)) {
2523 spin_lock_bh(&im->lock); 2529 spin_lock_bh(&im->lock);
2524 psf = im->sources; 2530 psf = im->sources;
@@ -2529,7 +2535,6 @@ static inline struct ip_sf_list *igmp_mcf_get_first(struct seq_file *seq)
2529 } 2535 }
2530 spin_unlock_bh(&im->lock); 2536 spin_unlock_bh(&im->lock);
2531 } 2537 }
2532 read_unlock(&idev->mc_list_lock);
2533 } 2538 }
2534 return psf; 2539 return psf;
2535} 2540}
@@ -2543,9 +2548,6 @@ static struct ip_sf_list *igmp_mcf_get_next(struct seq_file *seq, struct ip_sf_l
2543 spin_unlock_bh(&state->im->lock); 2548 spin_unlock_bh(&state->im->lock);
2544 state->im = state->im->next; 2549 state->im = state->im->next;
2545 while (!state->im) { 2550 while (!state->im) {
2546 if (likely(state->idev != NULL))
2547 read_unlock(&state->idev->mc_list_lock);
2548
2549 state->dev = next_net_device_rcu(state->dev); 2551 state->dev = next_net_device_rcu(state->dev);
2550 if (!state->dev) { 2552 if (!state->dev) {
2551 state->idev = NULL; 2553 state->idev = NULL;
@@ -2554,8 +2556,7 @@ static struct ip_sf_list *igmp_mcf_get_next(struct seq_file *seq, struct ip_sf_l
2554 state->idev = __in_dev_get_rcu(state->dev); 2556 state->idev = __in_dev_get_rcu(state->dev);
2555 if (!state->idev) 2557 if (!state->idev)
2556 continue; 2558 continue;
2557 read_lock(&state->idev->mc_list_lock); 2559 state->im = rcu_dereference(state->idev->mc_list);
2558 state->im = state->idev->mc_list;
2559 } 2560 }
2560 if (!state->im) 2561 if (!state->im)
2561 break; 2562 break;
@@ -2601,10 +2602,7 @@ static void igmp_mcf_seq_stop(struct seq_file *seq, void *v)
2601 spin_unlock_bh(&state->im->lock); 2602 spin_unlock_bh(&state->im->lock);
2602 state->im = NULL; 2603 state->im = NULL;
2603 } 2604 }
2604 if (likely(state->idev != NULL)) { 2605 state->idev = NULL;
2605 read_unlock(&state->idev->mc_list_lock);
2606 state->idev = NULL;
2607 }
2608 state->dev = NULL; 2606 state->dev = NULL;
2609 rcu_read_unlock(); 2607 rcu_read_unlock();
2610} 2608}
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 7174370b119..06f5f8f482f 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -358,17 +358,14 @@ struct dst_entry *inet_csk_route_req(struct sock *sk,
358 struct ip_options *opt = inet_rsk(req)->opt; 358 struct ip_options *opt = inet_rsk(req)->opt;
359 struct flowi fl = { .oif = sk->sk_bound_dev_if, 359 struct flowi fl = { .oif = sk->sk_bound_dev_if,
360 .mark = sk->sk_mark, 360 .mark = sk->sk_mark,
361 .nl_u = { .ip4_u = 361 .fl4_dst = ((opt && opt->srr) ?
362 { .daddr = ((opt && opt->srr) ? 362 opt->faddr : ireq->rmt_addr),
363 opt->faddr : 363 .fl4_src = ireq->loc_addr,
364 ireq->rmt_addr), 364 .fl4_tos = RT_CONN_FLAGS(sk),
365 .saddr = ireq->loc_addr,
366 .tos = RT_CONN_FLAGS(sk) } },
367 .proto = sk->sk_protocol, 365 .proto = sk->sk_protocol,
368 .flags = inet_sk_flowi_flags(sk), 366 .flags = inet_sk_flowi_flags(sk),
369 .uli_u = { .ports = 367 .fl_ip_sport = inet_sk(sk)->inet_sport,
370 { .sport = inet_sk(sk)->inet_sport, 368 .fl_ip_dport = ireq->rmt_port };
371 .dport = ireq->rmt_port } } };
372 struct net *net = sock_net(sk); 369 struct net *net = sock_net(sk);
373 370
374 security_req_classify_flow(req, &fl); 371 security_req_classify_flow(req, &fl);
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index 9e94d7cf4f8..d9bc85751c7 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -63,7 +63,7 @@
63 * refcnt: atomically against modifications on other CPU; 63 * refcnt: atomically against modifications on other CPU;
64 * usually under some other lock to prevent node disappearing 64 * usually under some other lock to prevent node disappearing
65 * dtime: unused node list lock 65 * dtime: unused node list lock
66 * v4daddr: unchangeable 66 * daddr: unchangeable
67 * ip_id_count: atomic value (no lock needed) 67 * ip_id_count: atomic value (no lock needed)
68 */ 68 */
69 69
@@ -79,15 +79,24 @@ static const struct inet_peer peer_fake_node = {
79 .avl_height = 0 79 .avl_height = 0
80}; 80};
81 81
82static struct { 82struct inet_peer_base {
83 struct inet_peer __rcu *root; 83 struct inet_peer __rcu *root;
84 spinlock_t lock; 84 spinlock_t lock;
85 int total; 85 int total;
86} peers = { 86};
87
88static struct inet_peer_base v4_peers = {
89 .root = peer_avl_empty_rcu,
90 .lock = __SPIN_LOCK_UNLOCKED(v4_peers.lock),
91 .total = 0,
92};
93
94static struct inet_peer_base v6_peers = {
87 .root = peer_avl_empty_rcu, 95 .root = peer_avl_empty_rcu,
88 .lock = __SPIN_LOCK_UNLOCKED(peers.lock), 96 .lock = __SPIN_LOCK_UNLOCKED(v6_peers.lock),
89 .total = 0, 97 .total = 0,
90}; 98};
99
91#define PEER_MAXDEPTH 40 /* sufficient for about 2^27 nodes */ 100#define PEER_MAXDEPTH 40 /* sufficient for about 2^27 nodes */
92 101
93/* Exported for sysctl_net_ipv4. */ 102/* Exported for sysctl_net_ipv4. */
@@ -152,28 +161,45 @@ static void unlink_from_unused(struct inet_peer *p)
152 } 161 }
153} 162}
154 163
164static int addr_compare(const struct inetpeer_addr *a,
165 const struct inetpeer_addr *b)
166{
167 int i, n = (a->family == AF_INET ? 1 : 4);
168
169 for (i = 0; i < n; i++) {
170 if (a->a6[i] == b->a6[i])
171 continue;
172 if (a->a6[i] < b->a6[i])
173 return -1;
174 return 1;
175 }
176
177 return 0;
178}
179
155/* 180/*
156 * Called with local BH disabled and the pool lock held. 181 * Called with local BH disabled and the pool lock held.
157 */ 182 */
158#define lookup(_daddr, _stack) \ 183#define lookup(_daddr, _stack, _base) \
159({ \ 184({ \
160 struct inet_peer *u; \ 185 struct inet_peer *u; \
161 struct inet_peer __rcu **v; \ 186 struct inet_peer __rcu **v; \
162 \ 187 \
163 stackptr = _stack; \ 188 stackptr = _stack; \
164 *stackptr++ = &peers.root; \ 189 *stackptr++ = &_base->root; \
165 for (u = rcu_dereference_protected(peers.root, \ 190 for (u = rcu_dereference_protected(_base->root, \
166 lockdep_is_held(&peers.lock)); \ 191 lockdep_is_held(&_base->lock)); \
167 u != peer_avl_empty; ) { \ 192 u != peer_avl_empty; ) { \
168 if (_daddr == u->v4daddr) \ 193 int cmp = addr_compare(_daddr, &u->daddr); \
194 if (cmp == 0) \
169 break; \ 195 break; \
170 if ((__force __u32)_daddr < (__force __u32)u->v4daddr) \ 196 if (cmp == -1) \
171 v = &u->avl_left; \ 197 v = &u->avl_left; \
172 else \ 198 else \
173 v = &u->avl_right; \ 199 v = &u->avl_right; \
174 *stackptr++ = v; \ 200 *stackptr++ = v; \
175 u = rcu_dereference_protected(*v, \ 201 u = rcu_dereference_protected(*v, \
176 lockdep_is_held(&peers.lock)); \ 202 lockdep_is_held(&_base->lock)); \
177 } \ 203 } \
178 u; \ 204 u; \
179}) 205})
@@ -185,13 +211,15 @@ static void unlink_from_unused(struct inet_peer *p)
185 * But every pointer we follow is guaranteed to be valid thanks to RCU. 211 * But every pointer we follow is guaranteed to be valid thanks to RCU.
186 * We exit from this function if number of links exceeds PEER_MAXDEPTH 212 * We exit from this function if number of links exceeds PEER_MAXDEPTH
187 */ 213 */
188static struct inet_peer *lookup_rcu_bh(__be32 daddr) 214static struct inet_peer *lookup_rcu_bh(const struct inetpeer_addr *daddr,
215 struct inet_peer_base *base)
189{ 216{
190 struct inet_peer *u = rcu_dereference_bh(peers.root); 217 struct inet_peer *u = rcu_dereference_bh(base->root);
191 int count = 0; 218 int count = 0;
192 219
193 while (u != peer_avl_empty) { 220 while (u != peer_avl_empty) {
194 if (daddr == u->v4daddr) { 221 int cmp = addr_compare(daddr, &u->daddr);
222 if (cmp == 0) {
195 /* Before taking a reference, check if this entry was 223 /* Before taking a reference, check if this entry was
196 * deleted, unlink_from_pool() sets refcnt=-1 to make 224 * deleted, unlink_from_pool() sets refcnt=-1 to make
197 * distinction between an unused entry (refcnt=0) and 225 * distinction between an unused entry (refcnt=0) and
@@ -201,7 +229,7 @@ static struct inet_peer *lookup_rcu_bh(__be32 daddr)
201 u = NULL; 229 u = NULL;
202 return u; 230 return u;
203 } 231 }
204 if ((__force __u32)daddr < (__force __u32)u->v4daddr) 232 if (cmp == -1)
205 u = rcu_dereference_bh(u->avl_left); 233 u = rcu_dereference_bh(u->avl_left);
206 else 234 else
207 u = rcu_dereference_bh(u->avl_right); 235 u = rcu_dereference_bh(u->avl_right);
@@ -212,19 +240,19 @@ static struct inet_peer *lookup_rcu_bh(__be32 daddr)
212} 240}
213 241
214/* Called with local BH disabled and the pool lock held. */ 242/* Called with local BH disabled and the pool lock held. */
215#define lookup_rightempty(start) \ 243#define lookup_rightempty(start, base) \
216({ \ 244({ \
217 struct inet_peer *u; \ 245 struct inet_peer *u; \
218 struct inet_peer __rcu **v; \ 246 struct inet_peer __rcu **v; \
219 *stackptr++ = &start->avl_left; \ 247 *stackptr++ = &start->avl_left; \
220 v = &start->avl_left; \ 248 v = &start->avl_left; \
221 for (u = rcu_dereference_protected(*v, \ 249 for (u = rcu_dereference_protected(*v, \
222 lockdep_is_held(&peers.lock)); \ 250 lockdep_is_held(&base->lock)); \
223 u->avl_right != peer_avl_empty_rcu; ) { \ 251 u->avl_right != peer_avl_empty_rcu; ) { \
224 v = &u->avl_right; \ 252 v = &u->avl_right; \
225 *stackptr++ = v; \ 253 *stackptr++ = v; \
226 u = rcu_dereference_protected(*v, \ 254 u = rcu_dereference_protected(*v, \
227 lockdep_is_held(&peers.lock)); \ 255 lockdep_is_held(&base->lock)); \
228 } \ 256 } \
229 u; \ 257 u; \
230}) 258})
@@ -234,7 +262,8 @@ static struct inet_peer *lookup_rcu_bh(__be32 daddr)
234 * Look into mm/map_avl.c for more detail description of the ideas. 262 * Look into mm/map_avl.c for more detail description of the ideas.
235 */ 263 */
236static void peer_avl_rebalance(struct inet_peer __rcu **stack[], 264static void peer_avl_rebalance(struct inet_peer __rcu **stack[],
237 struct inet_peer __rcu ***stackend) 265 struct inet_peer __rcu ***stackend,
266 struct inet_peer_base *base)
238{ 267{
239 struct inet_peer __rcu **nodep; 268 struct inet_peer __rcu **nodep;
240 struct inet_peer *node, *l, *r; 269 struct inet_peer *node, *l, *r;
@@ -243,20 +272,20 @@ static void peer_avl_rebalance(struct inet_peer __rcu **stack[],
243 while (stackend > stack) { 272 while (stackend > stack) {
244 nodep = *--stackend; 273 nodep = *--stackend;
245 node = rcu_dereference_protected(*nodep, 274 node = rcu_dereference_protected(*nodep,
246 lockdep_is_held(&peers.lock)); 275 lockdep_is_held(&base->lock));
247 l = rcu_dereference_protected(node->avl_left, 276 l = rcu_dereference_protected(node->avl_left,
248 lockdep_is_held(&peers.lock)); 277 lockdep_is_held(&base->lock));
249 r = rcu_dereference_protected(node->avl_right, 278 r = rcu_dereference_protected(node->avl_right,
250 lockdep_is_held(&peers.lock)); 279 lockdep_is_held(&base->lock));
251 lh = node_height(l); 280 lh = node_height(l);
252 rh = node_height(r); 281 rh = node_height(r);
253 if (lh > rh + 1) { /* l: RH+2 */ 282 if (lh > rh + 1) { /* l: RH+2 */
254 struct inet_peer *ll, *lr, *lrl, *lrr; 283 struct inet_peer *ll, *lr, *lrl, *lrr;
255 int lrh; 284 int lrh;
256 ll = rcu_dereference_protected(l->avl_left, 285 ll = rcu_dereference_protected(l->avl_left,
257 lockdep_is_held(&peers.lock)); 286 lockdep_is_held(&base->lock));
258 lr = rcu_dereference_protected(l->avl_right, 287 lr = rcu_dereference_protected(l->avl_right,
259 lockdep_is_held(&peers.lock)); 288 lockdep_is_held(&base->lock));
260 lrh = node_height(lr); 289 lrh = node_height(lr);
261 if (lrh <= node_height(ll)) { /* ll: RH+1 */ 290 if (lrh <= node_height(ll)) { /* ll: RH+1 */
262 RCU_INIT_POINTER(node->avl_left, lr); /* lr: RH or RH+1 */ 291 RCU_INIT_POINTER(node->avl_left, lr); /* lr: RH or RH+1 */
@@ -268,9 +297,9 @@ static void peer_avl_rebalance(struct inet_peer __rcu **stack[],
268 RCU_INIT_POINTER(*nodep, l); 297 RCU_INIT_POINTER(*nodep, l);
269 } else { /* ll: RH, lr: RH+1 */ 298 } else { /* ll: RH, lr: RH+1 */
270 lrl = rcu_dereference_protected(lr->avl_left, 299 lrl = rcu_dereference_protected(lr->avl_left,
271 lockdep_is_held(&peers.lock)); /* lrl: RH or RH-1 */ 300 lockdep_is_held(&base->lock)); /* lrl: RH or RH-1 */
272 lrr = rcu_dereference_protected(lr->avl_right, 301 lrr = rcu_dereference_protected(lr->avl_right,
273 lockdep_is_held(&peers.lock)); /* lrr: RH or RH-1 */ 302 lockdep_is_held(&base->lock)); /* lrr: RH or RH-1 */
274 RCU_INIT_POINTER(node->avl_left, lrr); /* lrr: RH or RH-1 */ 303 RCU_INIT_POINTER(node->avl_left, lrr); /* lrr: RH or RH-1 */
275 RCU_INIT_POINTER(node->avl_right, r); /* r: RH */ 304 RCU_INIT_POINTER(node->avl_right, r); /* r: RH */
276 node->avl_height = rh + 1; /* node: RH+1 */ 305 node->avl_height = rh + 1; /* node: RH+1 */
@@ -286,9 +315,9 @@ static void peer_avl_rebalance(struct inet_peer __rcu **stack[],
286 struct inet_peer *rr, *rl, *rlr, *rll; 315 struct inet_peer *rr, *rl, *rlr, *rll;
287 int rlh; 316 int rlh;
288 rr = rcu_dereference_protected(r->avl_right, 317 rr = rcu_dereference_protected(r->avl_right,
289 lockdep_is_held(&peers.lock)); 318 lockdep_is_held(&base->lock));
290 rl = rcu_dereference_protected(r->avl_left, 319 rl = rcu_dereference_protected(r->avl_left,
291 lockdep_is_held(&peers.lock)); 320 lockdep_is_held(&base->lock));
292 rlh = node_height(rl); 321 rlh = node_height(rl);
293 if (rlh <= node_height(rr)) { /* rr: LH+1 */ 322 if (rlh <= node_height(rr)) { /* rr: LH+1 */
294 RCU_INIT_POINTER(node->avl_right, rl); /* rl: LH or LH+1 */ 323 RCU_INIT_POINTER(node->avl_right, rl); /* rl: LH or LH+1 */
@@ -300,9 +329,9 @@ static void peer_avl_rebalance(struct inet_peer __rcu **stack[],
300 RCU_INIT_POINTER(*nodep, r); 329 RCU_INIT_POINTER(*nodep, r);
301 } else { /* rr: RH, rl: RH+1 */ 330 } else { /* rr: RH, rl: RH+1 */
302 rlr = rcu_dereference_protected(rl->avl_right, 331 rlr = rcu_dereference_protected(rl->avl_right,
303 lockdep_is_held(&peers.lock)); /* rlr: LH or LH-1 */ 332 lockdep_is_held(&base->lock)); /* rlr: LH or LH-1 */
304 rll = rcu_dereference_protected(rl->avl_left, 333 rll = rcu_dereference_protected(rl->avl_left,
305 lockdep_is_held(&peers.lock)); /* rll: LH or LH-1 */ 334 lockdep_is_held(&base->lock)); /* rll: LH or LH-1 */
306 RCU_INIT_POINTER(node->avl_right, rll); /* rll: LH or LH-1 */ 335 RCU_INIT_POINTER(node->avl_right, rll); /* rll: LH or LH-1 */
307 RCU_INIT_POINTER(node->avl_left, l); /* l: LH */ 336 RCU_INIT_POINTER(node->avl_left, l); /* l: LH */
308 node->avl_height = lh + 1; /* node: LH+1 */ 337 node->avl_height = lh + 1; /* node: LH+1 */
@@ -321,14 +350,14 @@ static void peer_avl_rebalance(struct inet_peer __rcu **stack[],
321} 350}
322 351
323/* Called with local BH disabled and the pool lock held. */ 352/* Called with local BH disabled and the pool lock held. */
324#define link_to_pool(n) \ 353#define link_to_pool(n, base) \
325do { \ 354do { \
326 n->avl_height = 1; \ 355 n->avl_height = 1; \
327 n->avl_left = peer_avl_empty_rcu; \ 356 n->avl_left = peer_avl_empty_rcu; \
328 n->avl_right = peer_avl_empty_rcu; \ 357 n->avl_right = peer_avl_empty_rcu; \
329 /* lockless readers can catch us now */ \ 358 /* lockless readers can catch us now */ \
330 rcu_assign_pointer(**--stackptr, n); \ 359 rcu_assign_pointer(**--stackptr, n); \
331 peer_avl_rebalance(stack, stackptr); \ 360 peer_avl_rebalance(stack, stackptr, base); \
332} while (0) 361} while (0)
333 362
334static void inetpeer_free_rcu(struct rcu_head *head) 363static void inetpeer_free_rcu(struct rcu_head *head)
@@ -337,13 +366,13 @@ static void inetpeer_free_rcu(struct rcu_head *head)
337} 366}
338 367
339/* May be called with local BH enabled. */ 368/* May be called with local BH enabled. */
340static void unlink_from_pool(struct inet_peer *p) 369static void unlink_from_pool(struct inet_peer *p, struct inet_peer_base *base)
341{ 370{
342 int do_free; 371 int do_free;
343 372
344 do_free = 0; 373 do_free = 0;
345 374
346 spin_lock_bh(&peers.lock); 375 spin_lock_bh(&base->lock);
347 /* Check the reference counter. It was artificially incremented by 1 376 /* Check the reference counter. It was artificially incremented by 1
348 * in cleanup() function to prevent sudden disappearing. If we can 377 * in cleanup() function to prevent sudden disappearing. If we can
349 * atomically (because of lockless readers) take this last reference, 378 * atomically (because of lockless readers) take this last reference,
@@ -353,7 +382,7 @@ static void unlink_from_pool(struct inet_peer *p)
353 if (atomic_cmpxchg(&p->refcnt, 1, -1) == 1) { 382 if (atomic_cmpxchg(&p->refcnt, 1, -1) == 1) {
354 struct inet_peer __rcu **stack[PEER_MAXDEPTH]; 383 struct inet_peer __rcu **stack[PEER_MAXDEPTH];
355 struct inet_peer __rcu ***stackptr, ***delp; 384 struct inet_peer __rcu ***stackptr, ***delp;
356 if (lookup(p->v4daddr, stack) != p) 385 if (lookup(&p->daddr, stack, base) != p)
357 BUG(); 386 BUG();
358 delp = stackptr - 1; /* *delp[0] == p */ 387 delp = stackptr - 1; /* *delp[0] == p */
359 if (p->avl_left == peer_avl_empty_rcu) { 388 if (p->avl_left == peer_avl_empty_rcu) {
@@ -362,11 +391,11 @@ static void unlink_from_pool(struct inet_peer *p)
362 } else { 391 } else {
363 /* look for a node to insert instead of p */ 392 /* look for a node to insert instead of p */
364 struct inet_peer *t; 393 struct inet_peer *t;
365 t = lookup_rightempty(p); 394 t = lookup_rightempty(p, base);
366 BUG_ON(rcu_dereference_protected(*stackptr[-1], 395 BUG_ON(rcu_dereference_protected(*stackptr[-1],
367 lockdep_is_held(&peers.lock)) != t); 396 lockdep_is_held(&base->lock)) != t);
368 **--stackptr = t->avl_left; 397 **--stackptr = t->avl_left;
369 /* t is removed, t->v4daddr > x->v4daddr for any 398 /* t is removed, t->daddr > x->daddr for any
370 * x in p->avl_left subtree. 399 * x in p->avl_left subtree.
371 * Put t in the old place of p. */ 400 * Put t in the old place of p. */
372 RCU_INIT_POINTER(*delp[0], t); 401 RCU_INIT_POINTER(*delp[0], t);
@@ -376,11 +405,11 @@ static void unlink_from_pool(struct inet_peer *p)
376 BUG_ON(delp[1] != &p->avl_left); 405 BUG_ON(delp[1] != &p->avl_left);
377 delp[1] = &t->avl_left; /* was &p->avl_left */ 406 delp[1] = &t->avl_left; /* was &p->avl_left */
378 } 407 }
379 peer_avl_rebalance(stack, stackptr); 408 peer_avl_rebalance(stack, stackptr, base);
380 peers.total--; 409 base->total--;
381 do_free = 1; 410 do_free = 1;
382 } 411 }
383 spin_unlock_bh(&peers.lock); 412 spin_unlock_bh(&base->lock);
384 413
385 if (do_free) 414 if (do_free)
386 call_rcu_bh(&p->rcu, inetpeer_free_rcu); 415 call_rcu_bh(&p->rcu, inetpeer_free_rcu);
@@ -395,6 +424,16 @@ static void unlink_from_pool(struct inet_peer *p)
395 inet_putpeer(p); 424 inet_putpeer(p);
396} 425}
397 426
427static struct inet_peer_base *family_to_base(int family)
428{
429 return (family == AF_INET ? &v4_peers : &v6_peers);
430}
431
432static struct inet_peer_base *peer_to_base(struct inet_peer *p)
433{
434 return family_to_base(p->daddr.family);
435}
436
398/* May be called with local BH enabled. */ 437/* May be called with local BH enabled. */
399static int cleanup_once(unsigned long ttl) 438static int cleanup_once(unsigned long ttl)
400{ 439{
@@ -428,21 +467,22 @@ static int cleanup_once(unsigned long ttl)
428 * happen because of entry limits in route cache. */ 467 * happen because of entry limits in route cache. */
429 return -1; 468 return -1;
430 469
431 unlink_from_pool(p); 470 unlink_from_pool(p, peer_to_base(p));
432 return 0; 471 return 0;
433} 472}
434 473
435/* Called with or without local BH being disabled. */ 474/* Called with or without local BH being disabled. */
436struct inet_peer *inet_getpeer(__be32 daddr, int create) 475struct inet_peer *inet_getpeer(struct inetpeer_addr *daddr, int create)
437{ 476{
438 struct inet_peer *p;
439 struct inet_peer __rcu **stack[PEER_MAXDEPTH], ***stackptr; 477 struct inet_peer __rcu **stack[PEER_MAXDEPTH], ***stackptr;
478 struct inet_peer_base *base = family_to_base(AF_INET);
479 struct inet_peer *p;
440 480
441 /* Look up for the address quickly, lockless. 481 /* Look up for the address quickly, lockless.
442 * Because of a concurrent writer, we might not find an existing entry. 482 * Because of a concurrent writer, we might not find an existing entry.
443 */ 483 */
444 rcu_read_lock_bh(); 484 rcu_read_lock_bh();
445 p = lookup_rcu_bh(daddr); 485 p = lookup_rcu_bh(daddr, base);
446 rcu_read_unlock_bh(); 486 rcu_read_unlock_bh();
447 487
448 if (p) { 488 if (p) {
@@ -456,50 +496,57 @@ struct inet_peer *inet_getpeer(__be32 daddr, int create)
456 /* retry an exact lookup, taking the lock before. 496 /* retry an exact lookup, taking the lock before.
457 * At least, nodes should be hot in our cache. 497 * At least, nodes should be hot in our cache.
458 */ 498 */
459 spin_lock_bh(&peers.lock); 499 spin_lock_bh(&base->lock);
460 p = lookup(daddr, stack); 500 p = lookup(daddr, stack, base);
461 if (p != peer_avl_empty) { 501 if (p != peer_avl_empty) {
462 atomic_inc(&p->refcnt); 502 atomic_inc(&p->refcnt);
463 spin_unlock_bh(&peers.lock); 503 spin_unlock_bh(&base->lock);
464 /* Remove the entry from unused list if it was there. */ 504 /* Remove the entry from unused list if it was there. */
465 unlink_from_unused(p); 505 unlink_from_unused(p);
466 return p; 506 return p;
467 } 507 }
468 p = create ? kmem_cache_alloc(peer_cachep, GFP_ATOMIC) : NULL; 508 p = create ? kmem_cache_alloc(peer_cachep, GFP_ATOMIC) : NULL;
469 if (p) { 509 if (p) {
470 p->v4daddr = daddr; 510 p->daddr = *daddr;
471 atomic_set(&p->refcnt, 1); 511 atomic_set(&p->refcnt, 1);
472 atomic_set(&p->rid, 0); 512 atomic_set(&p->rid, 0);
473 atomic_set(&p->ip_id_count, secure_ip_id(daddr)); 513 atomic_set(&p->ip_id_count, secure_ip_id(daddr->a4));
474 p->tcp_ts_stamp = 0; 514 p->tcp_ts_stamp = 0;
475 INIT_LIST_HEAD(&p->unused); 515 INIT_LIST_HEAD(&p->unused);
476 516
477 517
478 /* Link the node. */ 518 /* Link the node. */
479 link_to_pool(p); 519 link_to_pool(p, base);
480 peers.total++; 520 base->total++;
481 } 521 }
482 spin_unlock_bh(&peers.lock); 522 spin_unlock_bh(&base->lock);
483 523
484 if (peers.total >= inet_peer_threshold) 524 if (base->total >= inet_peer_threshold)
485 /* Remove one less-recently-used entry. */ 525 /* Remove one less-recently-used entry. */
486 cleanup_once(0); 526 cleanup_once(0);
487 527
488 return p; 528 return p;
489} 529}
490 530
531static int compute_total(void)
532{
533 return v4_peers.total + v6_peers.total;
534}
535EXPORT_SYMBOL_GPL(inet_getpeer);
536
491/* Called with local BH disabled. */ 537/* Called with local BH disabled. */
492static void peer_check_expire(unsigned long dummy) 538static void peer_check_expire(unsigned long dummy)
493{ 539{
494 unsigned long now = jiffies; 540 unsigned long now = jiffies;
495 int ttl; 541 int ttl, total;
496 542
497 if (peers.total >= inet_peer_threshold) 543 total = compute_total();
544 if (total >= inet_peer_threshold)
498 ttl = inet_peer_minttl; 545 ttl = inet_peer_minttl;
499 else 546 else
500 ttl = inet_peer_maxttl 547 ttl = inet_peer_maxttl
501 - (inet_peer_maxttl - inet_peer_minttl) / HZ * 548 - (inet_peer_maxttl - inet_peer_minttl) / HZ *
502 peers.total / inet_peer_threshold * HZ; 549 total / inet_peer_threshold * HZ;
503 while (!cleanup_once(ttl)) { 550 while (!cleanup_once(ttl)) {
504 if (jiffies != now) 551 if (jiffies != now)
505 break; 552 break;
@@ -508,13 +555,14 @@ static void peer_check_expire(unsigned long dummy)
508 /* Trigger the timer after inet_peer_gc_mintime .. inet_peer_gc_maxtime 555 /* Trigger the timer after inet_peer_gc_mintime .. inet_peer_gc_maxtime
509 * interval depending on the total number of entries (more entries, 556 * interval depending on the total number of entries (more entries,
510 * less interval). */ 557 * less interval). */
511 if (peers.total >= inet_peer_threshold) 558 total = compute_total();
559 if (total >= inet_peer_threshold)
512 peer_periodic_timer.expires = jiffies + inet_peer_gc_mintime; 560 peer_periodic_timer.expires = jiffies + inet_peer_gc_mintime;
513 else 561 else
514 peer_periodic_timer.expires = jiffies 562 peer_periodic_timer.expires = jiffies
515 + inet_peer_gc_maxtime 563 + inet_peer_gc_maxtime
516 - (inet_peer_gc_maxtime - inet_peer_gc_mintime) / HZ * 564 - (inet_peer_gc_maxtime - inet_peer_gc_mintime) / HZ *
517 peers.total / inet_peer_threshold * HZ; 565 total / inet_peer_threshold * HZ;
518 add_timer(&peer_periodic_timer); 566 add_timer(&peer_periodic_timer);
519} 567}
520 568
@@ -530,3 +578,4 @@ void inet_putpeer(struct inet_peer *p)
530 578
531 local_bh_enable(); 579 local_bh_enable();
532} 580}
581EXPORT_SYMBOL_GPL(inet_putpeer);
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 168440834ad..e6215bdd96c 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -141,7 +141,7 @@ static void ip4_frag_init(struct inet_frag_queue *q, void *a)
141 qp->daddr = arg->iph->daddr; 141 qp->daddr = arg->iph->daddr;
142 qp->user = arg->user; 142 qp->user = arg->user;
143 qp->peer = sysctl_ipfrag_max_dist ? 143 qp->peer = sysctl_ipfrag_max_dist ?
144 inet_getpeer(arg->iph->saddr, 1) : NULL; 144 inet_getpeer_v4(arg->iph->saddr, 1) : NULL;
145} 145}
146 146
147static __inline__ void ip4_frag_free(struct inet_frag_queue *q) 147static __inline__ void ip4_frag_free(struct inet_frag_queue *q)
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 70ff77f02ee..258c98d5fa7 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -405,11 +405,11 @@ static struct ip_tunnel *ipgre_tunnel_locate(struct net *net,
405 if (parms->name[0]) 405 if (parms->name[0])
406 strlcpy(name, parms->name, IFNAMSIZ); 406 strlcpy(name, parms->name, IFNAMSIZ);
407 else 407 else
408 sprintf(name, "gre%%d"); 408 strcpy(name, "gre%d");
409 409
410 dev = alloc_netdev(sizeof(*t), name, ipgre_tunnel_setup); 410 dev = alloc_netdev(sizeof(*t), name, ipgre_tunnel_setup);
411 if (!dev) 411 if (!dev)
412 return NULL; 412 return NULL;
413 413
414 dev_net_set(dev, net); 414 dev_net_set(dev, net);
415 415
@@ -634,7 +634,7 @@ static int ipgre_rcv(struct sk_buff *skb)
634#ifdef CONFIG_NET_IPGRE_BROADCAST 634#ifdef CONFIG_NET_IPGRE_BROADCAST
635 if (ipv4_is_multicast(iph->daddr)) { 635 if (ipv4_is_multicast(iph->daddr)) {
636 /* Looped back packet, drop it! */ 636 /* Looped back packet, drop it! */
637 if (skb_rtable(skb)->fl.iif == 0) 637 if (rt_is_output_route(skb_rtable(skb)))
638 goto drop; 638 goto drop;
639 tunnel->dev->stats.multicast++; 639 tunnel->dev->stats.multicast++;
640 skb->pkt_type = PACKET_BROADCAST; 640 skb->pkt_type = PACKET_BROADCAST;
@@ -772,16 +772,11 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
772 { 772 {
773 struct flowi fl = { 773 struct flowi fl = {
774 .oif = tunnel->parms.link, 774 .oif = tunnel->parms.link,
775 .nl_u = { 775 .fl4_dst = dst,
776 .ip4_u = { 776 .fl4_src = tiph->saddr,
777 .daddr = dst, 777 .fl4_tos = RT_TOS(tos),
778 .saddr = tiph->saddr, 778 .fl_gre_key = tunnel->parms.o_key
779 .tos = RT_TOS(tos) 779 };
780 }
781 },
782 .proto = IPPROTO_GRE
783 }
784;
785 if (ip_route_output_key(dev_net(dev), &rt, &fl)) { 780 if (ip_route_output_key(dev_net(dev), &rt, &fl)) {
786 dev->stats.tx_carrier_errors++; 781 dev->stats.tx_carrier_errors++;
787 goto tx_error; 782 goto tx_error;
@@ -951,14 +946,11 @@ static int ipgre_tunnel_bind_dev(struct net_device *dev)
951 if (iph->daddr) { 946 if (iph->daddr) {
952 struct flowi fl = { 947 struct flowi fl = {
953 .oif = tunnel->parms.link, 948 .oif = tunnel->parms.link,
954 .nl_u = { 949 .fl4_dst = iph->daddr,
955 .ip4_u = { 950 .fl4_src = iph->saddr,
956 .daddr = iph->daddr, 951 .fl4_tos = RT_TOS(iph->tos),
957 .saddr = iph->saddr, 952 .proto = IPPROTO_GRE,
958 .tos = RT_TOS(iph->tos) 953 .fl_gre_key = tunnel->parms.o_key
959 }
960 },
961 .proto = IPPROTO_GRE
962 }; 954 };
963 struct rtable *rt; 955 struct rtable *rt;
964 956
@@ -1216,14 +1208,11 @@ static int ipgre_open(struct net_device *dev)
1216 if (ipv4_is_multicast(t->parms.iph.daddr)) { 1208 if (ipv4_is_multicast(t->parms.iph.daddr)) {
1217 struct flowi fl = { 1209 struct flowi fl = {
1218 .oif = t->parms.link, 1210 .oif = t->parms.link,
1219 .nl_u = { 1211 .fl4_dst = t->parms.iph.daddr,
1220 .ip4_u = { 1212 .fl4_src = t->parms.iph.saddr,
1221 .daddr = t->parms.iph.daddr, 1213 .fl4_tos = RT_TOS(t->parms.iph.tos),
1222 .saddr = t->parms.iph.saddr, 1214 .proto = IPPROTO_GRE,
1223 .tos = RT_TOS(t->parms.iph.tos) 1215 .fl_gre_key = t->parms.o_key
1224 }
1225 },
1226 .proto = IPPROTO_GRE
1227 }; 1216 };
1228 struct rtable *rt; 1217 struct rtable *rt;
1229 1218
@@ -1775,3 +1764,4 @@ module_exit(ipgre_fini);
1775MODULE_LICENSE("GPL"); 1764MODULE_LICENSE("GPL");
1776MODULE_ALIAS_RTNL_LINK("gre"); 1765MODULE_ALIAS_RTNL_LINK("gre");
1777MODULE_ALIAS_RTNL_LINK("gretap"); 1766MODULE_ALIAS_RTNL_LINK("gretap");
1767MODULE_ALIAS("gre0");
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 439d2a34ee4..5090c7ff525 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -341,15 +341,13 @@ int ip_queue_xmit(struct sk_buff *skb)
341 { 341 {
342 struct flowi fl = { .oif = sk->sk_bound_dev_if, 342 struct flowi fl = { .oif = sk->sk_bound_dev_if,
343 .mark = sk->sk_mark, 343 .mark = sk->sk_mark,
344 .nl_u = { .ip4_u = 344 .fl4_dst = daddr,
345 { .daddr = daddr, 345 .fl4_src = inet->inet_saddr,
346 .saddr = inet->inet_saddr, 346 .fl4_tos = RT_CONN_FLAGS(sk),
347 .tos = RT_CONN_FLAGS(sk) } },
348 .proto = sk->sk_protocol, 347 .proto = sk->sk_protocol,
349 .flags = inet_sk_flowi_flags(sk), 348 .flags = inet_sk_flowi_flags(sk),
350 .uli_u = { .ports = 349 .fl_ip_sport = inet->inet_sport,
351 { .sport = inet->inet_sport, 350 .fl_ip_dport = inet->inet_dport };
352 .dport = inet->inet_dport } } };
353 351
354 /* If this fails, retransmit mechanism of transport layer will 352 /* If this fails, retransmit mechanism of transport layer will
355 * keep trying until route appears or the connection times 353 * keep trying until route appears or the connection times
@@ -1404,14 +1402,11 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar
1404 1402
1405 { 1403 {
1406 struct flowi fl = { .oif = arg->bound_dev_if, 1404 struct flowi fl = { .oif = arg->bound_dev_if,
1407 .nl_u = { .ip4_u = 1405 .fl4_dst = daddr,
1408 { .daddr = daddr, 1406 .fl4_src = rt->rt_spec_dst,
1409 .saddr = rt->rt_spec_dst, 1407 .fl4_tos = RT_TOS(ip_hdr(skb)->tos),
1410 .tos = RT_TOS(ip_hdr(skb)->tos) } }, 1408 .fl_ip_sport = tcp_hdr(skb)->dest,
1411 /* Not quite clean, but right. */ 1409 .fl_ip_dport = tcp_hdr(skb)->source,
1412 .uli_u = { .ports =
1413 { .sport = tcp_hdr(skb)->dest,
1414 .dport = tcp_hdr(skb)->source } },
1415 .proto = sk->sk_protocol, 1410 .proto = sk->sk_protocol,
1416 .flags = ip_reply_arg_flowi_flags(arg) }; 1411 .flags = ip_reply_arg_flowi_flags(arg) };
1417 security_skb_classify_flow(skb, &fl); 1412 security_skb_classify_flow(skb, &fl);
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index 3a6e1ec5e9a..2b097752426 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -1191,13 +1191,13 @@ static int __init ic_dynamic(void)
1191 (ic_proto_enabled & IC_USE_DHCP) && 1191 (ic_proto_enabled & IC_USE_DHCP) &&
1192 ic_dhcp_msgtype != DHCPACK) { 1192 ic_dhcp_msgtype != DHCPACK) {
1193 ic_got_reply = 0; 1193 ic_got_reply = 0;
1194 printk(","); 1194 printk(KERN_CONT ",");
1195 continue; 1195 continue;
1196 } 1196 }
1197#endif /* IPCONFIG_DHCP */ 1197#endif /* IPCONFIG_DHCP */
1198 1198
1199 if (ic_got_reply) { 1199 if (ic_got_reply) {
1200 printk(" OK\n"); 1200 printk(KERN_CONT " OK\n");
1201 break; 1201 break;
1202 } 1202 }
1203 1203
@@ -1205,7 +1205,7 @@ static int __init ic_dynamic(void)
1205 continue; 1205 continue;
1206 1206
1207 if (! --retries) { 1207 if (! --retries) {
1208 printk(" timed out!\n"); 1208 printk(KERN_CONT " timed out!\n");
1209 break; 1209 break;
1210 } 1210 }
1211 1211
@@ -1215,7 +1215,7 @@ static int __init ic_dynamic(void)
1215 if (timeout > CONF_TIMEOUT_MAX) 1215 if (timeout > CONF_TIMEOUT_MAX)
1216 timeout = CONF_TIMEOUT_MAX; 1216 timeout = CONF_TIMEOUT_MAX;
1217 1217
1218 printk("."); 1218 printk(KERN_CONT ".");
1219 } 1219 }
1220 1220
1221#ifdef IPCONFIG_BOOTP 1221#ifdef IPCONFIG_BOOTP
@@ -1236,7 +1236,7 @@ static int __init ic_dynamic(void)
1236 ((ic_got_reply & IC_RARP) ? "RARP" 1236 ((ic_got_reply & IC_RARP) ? "RARP"
1237 : (ic_proto_enabled & IC_USE_DHCP) ? "DHCP" : "BOOTP"), 1237 : (ic_proto_enabled & IC_USE_DHCP) ? "DHCP" : "BOOTP"),
1238 &ic_servaddr); 1238 &ic_servaddr);
1239 printk("my address is %pI4\n", &ic_myaddr); 1239 printk(KERN_CONT "my address is %pI4\n", &ic_myaddr);
1240 1240
1241 return 0; 1241 return 0;
1242} 1242}
@@ -1468,19 +1468,19 @@ static int __init ip_auto_config(void)
1468 /* 1468 /*
1469 * Clue in the operator. 1469 * Clue in the operator.
1470 */ 1470 */
1471 printk("IP-Config: Complete:"); 1471 printk("IP-Config: Complete:\n");
1472 printk("\n device=%s", ic_dev->name); 1472 printk(" device=%s", ic_dev->name);
1473 printk(", addr=%pI4", &ic_myaddr); 1473 printk(KERN_CONT ", addr=%pI4", &ic_myaddr);
1474 printk(", mask=%pI4", &ic_netmask); 1474 printk(KERN_CONT ", mask=%pI4", &ic_netmask);
1475 printk(", gw=%pI4", &ic_gateway); 1475 printk(KERN_CONT ", gw=%pI4", &ic_gateway);
1476 printk(",\n host=%s, domain=%s, nis-domain=%s", 1476 printk(KERN_CONT ",\n host=%s, domain=%s, nis-domain=%s",
1477 utsname()->nodename, ic_domain, utsname()->domainname); 1477 utsname()->nodename, ic_domain, utsname()->domainname);
1478 printk(",\n bootserver=%pI4", &ic_servaddr); 1478 printk(KERN_CONT ",\n bootserver=%pI4", &ic_servaddr);
1479 printk(", rootserver=%pI4", &root_server_addr); 1479 printk(KERN_CONT ", rootserver=%pI4", &root_server_addr);
1480 printk(", rootpath=%s", root_server_path); 1480 printk(KERN_CONT ", rootpath=%s", root_server_path);
1481 if (ic_dev_mtu) 1481 if (ic_dev_mtu)
1482 printk(", mtu=%d", ic_dev_mtu); 1482 printk(KERN_CONT ", mtu=%d", ic_dev_mtu);
1483 printk("\n"); 1483 printk(KERN_CONT "\n");
1484#endif /* !SILENT */ 1484#endif /* !SILENT */
1485 1485
1486 return 0; 1486 return 0;
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index cd300aaee78..988f52fba54 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -463,13 +463,9 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
463 { 463 {
464 struct flowi fl = { 464 struct flowi fl = {
465 .oif = tunnel->parms.link, 465 .oif = tunnel->parms.link,
466 .nl_u = { 466 .fl4_dst = dst,
467 .ip4_u = { 467 .fl4_src= tiph->saddr,
468 .daddr = dst, 468 .fl4_tos = RT_TOS(tos),
469 .saddr = tiph->saddr,
470 .tos = RT_TOS(tos)
471 }
472 },
473 .proto = IPPROTO_IPIP 469 .proto = IPPROTO_IPIP
474 }; 470 };
475 471
@@ -589,13 +585,9 @@ static void ipip_tunnel_bind_dev(struct net_device *dev)
589 if (iph->daddr) { 585 if (iph->daddr) {
590 struct flowi fl = { 586 struct flowi fl = {
591 .oif = tunnel->parms.link, 587 .oif = tunnel->parms.link,
592 .nl_u = { 588 .fl4_dst = iph->daddr,
593 .ip4_u = { 589 .fl4_src = iph->saddr,
594 .daddr = iph->daddr, 590 .fl4_tos = RT_TOS(iph->tos),
595 .saddr = iph->saddr,
596 .tos = RT_TOS(iph->tos)
597 }
598 },
599 .proto = IPPROTO_IPIP 591 .proto = IPPROTO_IPIP
600 }; 592 };
601 struct rtable *rt; 593 struct rtable *rt;
@@ -921,3 +913,4 @@ static void __exit ipip_fini(void)
921module_init(ipip_init); 913module_init(ipip_init);
922module_exit(ipip_fini); 914module_exit(ipip_fini);
923MODULE_LICENSE("GPL"); 915MODULE_LICENSE("GPL");
916MODULE_ALIAS("tunl0");
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 86dd5691af4..3f3a9afd73e 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -1537,13 +1537,9 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
1537 if (vif->flags & VIFF_TUNNEL) { 1537 if (vif->flags & VIFF_TUNNEL) {
1538 struct flowi fl = { 1538 struct flowi fl = {
1539 .oif = vif->link, 1539 .oif = vif->link,
1540 .nl_u = { 1540 .fl4_dst = vif->remote,
1541 .ip4_u = { 1541 .fl4_src = vif->local,
1542 .daddr = vif->remote, 1542 .fl4_tos = RT_TOS(iph->tos),
1543 .saddr = vif->local,
1544 .tos = RT_TOS(iph->tos)
1545 }
1546 },
1547 .proto = IPPROTO_IPIP 1543 .proto = IPPROTO_IPIP
1548 }; 1544 };
1549 1545
@@ -1553,12 +1549,8 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
1553 } else { 1549 } else {
1554 struct flowi fl = { 1550 struct flowi fl = {
1555 .oif = vif->link, 1551 .oif = vif->link,
1556 .nl_u = { 1552 .fl4_dst = iph->daddr,
1557 .ip4_u = { 1553 .fl4_tos = RT_TOS(iph->tos),
1558 .daddr = iph->daddr,
1559 .tos = RT_TOS(iph->tos)
1560 }
1561 },
1562 .proto = IPPROTO_IPIP 1554 .proto = IPPROTO_IPIP
1563 }; 1555 };
1564 1556
@@ -1654,7 +1646,7 @@ static int ip_mr_forward(struct net *net, struct mr_table *mrt,
1654 if (mrt->vif_table[vif].dev != skb->dev) { 1646 if (mrt->vif_table[vif].dev != skb->dev) {
1655 int true_vifi; 1647 int true_vifi;
1656 1648
1657 if (skb_rtable(skb)->fl.iif == 0) { 1649 if (rt_is_output_route(skb_rtable(skb))) {
1658 /* It is our own packet, looped back. 1650 /* It is our own packet, looped back.
1659 * Very complicated situation... 1651 * Very complicated situation...
1660 * 1652 *
diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c
index d88a46c54fd..994a1f29ebb 100644
--- a/net/ipv4/netfilter.c
+++ b/net/ipv4/netfilter.c
@@ -31,10 +31,10 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type)
31 * packets with foreign saddr to appear on the NF_INET_LOCAL_OUT hook. 31 * packets with foreign saddr to appear on the NF_INET_LOCAL_OUT hook.
32 */ 32 */
33 if (addr_type == RTN_LOCAL) { 33 if (addr_type == RTN_LOCAL) {
34 fl.nl_u.ip4_u.daddr = iph->daddr; 34 fl.fl4_dst = iph->daddr;
35 if (type == RTN_LOCAL) 35 if (type == RTN_LOCAL)
36 fl.nl_u.ip4_u.saddr = iph->saddr; 36 fl.fl4_src = iph->saddr;
37 fl.nl_u.ip4_u.tos = RT_TOS(iph->tos); 37 fl.fl4_tos = RT_TOS(iph->tos);
38 fl.oif = skb->sk ? skb->sk->sk_bound_dev_if : 0; 38 fl.oif = skb->sk ? skb->sk->sk_bound_dev_if : 0;
39 fl.mark = skb->mark; 39 fl.mark = skb->mark;
40 fl.flags = skb->sk ? inet_sk_flowi_flags(skb->sk) : 0; 40 fl.flags = skb->sk ? inet_sk_flowi_flags(skb->sk) : 0;
@@ -47,7 +47,7 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type)
47 } else { 47 } else {
48 /* non-local src, find valid iif to satisfy 48 /* non-local src, find valid iif to satisfy
49 * rp-filter when calling ip_route_input. */ 49 * rp-filter when calling ip_route_input. */
50 fl.nl_u.ip4_u.daddr = iph->saddr; 50 fl.fl4_dst = iph->saddr;
51 if (ip_route_output_key(net, &rt, &fl) != 0) 51 if (ip_route_output_key(net, &rt, &fl) != 0)
52 return -1; 52 return -1;
53 53
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index 48111594ee9..19eb59d0103 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -3,15 +3,15 @@
3# 3#
4 4
5# objects for l3 independent conntrack 5# objects for l3 independent conntrack
6nf_conntrack_ipv4-objs := nf_conntrack_l3proto_ipv4.o nf_conntrack_proto_icmp.o 6nf_conntrack_ipv4-y := nf_conntrack_l3proto_ipv4.o nf_conntrack_proto_icmp.o
7ifeq ($(CONFIG_NF_CONNTRACK_PROC_COMPAT),y) 7ifeq ($(CONFIG_NF_CONNTRACK_PROC_COMPAT),y)
8ifeq ($(CONFIG_PROC_FS),y) 8ifeq ($(CONFIG_PROC_FS),y)
9nf_conntrack_ipv4-objs += nf_conntrack_l3proto_ipv4_compat.o 9nf_conntrack_ipv4-objs += nf_conntrack_l3proto_ipv4_compat.o
10endif 10endif
11endif 11endif
12 12
13nf_nat-objs := nf_nat_core.o nf_nat_helper.o nf_nat_proto_unknown.o nf_nat_proto_common.o nf_nat_proto_tcp.o nf_nat_proto_udp.o nf_nat_proto_icmp.o 13nf_nat-y := nf_nat_core.o nf_nat_helper.o nf_nat_proto_unknown.o nf_nat_proto_common.o nf_nat_proto_tcp.o nf_nat_proto_udp.o nf_nat_proto_icmp.o
14iptable_nat-objs := nf_nat_rule.o nf_nat_standalone.o 14iptable_nat-y := nf_nat_rule.o nf_nat_standalone.o
15 15
16# connection tracking 16# connection tracking
17obj-$(CONFIG_NF_CONNTRACK_IPV4) += nf_conntrack_ipv4.o 17obj-$(CONFIG_NF_CONNTRACK_IPV4) += nf_conntrack_ipv4.o
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 1f85ef28989..a3d5ab786e8 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -549,10 +549,9 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
549 { 549 {
550 struct flowi fl = { .oif = ipc.oif, 550 struct flowi fl = { .oif = ipc.oif,
551 .mark = sk->sk_mark, 551 .mark = sk->sk_mark,
552 .nl_u = { .ip4_u = 552 .fl4_dst = daddr,
553 { .daddr = daddr, 553 .fl4_src = saddr,
554 .saddr = saddr, 554 .fl4_tos = tos,
555 .tos = tos } },
556 .proto = inet->hdrincl ? IPPROTO_RAW : 555 .proto = inet->hdrincl ? IPPROTO_RAW :
557 sk->sk_protocol, 556 sk->sk_protocol,
558 }; 557 };
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 987bf9adb31..3843c2dfde8 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -140,13 +140,15 @@ static unsigned long expires_ljiffies;
140 140
141static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie); 141static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie);
142static void ipv4_dst_destroy(struct dst_entry *dst); 142static void ipv4_dst_destroy(struct dst_entry *dst);
143static void ipv4_dst_ifdown(struct dst_entry *dst,
144 struct net_device *dev, int how);
145static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst); 143static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst);
146static void ipv4_link_failure(struct sk_buff *skb); 144static void ipv4_link_failure(struct sk_buff *skb);
147static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu); 145static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
148static int rt_garbage_collect(struct dst_ops *ops); 146static int rt_garbage_collect(struct dst_ops *ops);
149 147
148static void ipv4_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
149 int how)
150{
151}
150 152
151static struct dst_ops ipv4_dst_ops = { 153static struct dst_ops ipv4_dst_ops = {
152 .family = AF_INET, 154 .family = AF_INET,
@@ -621,7 +623,7 @@ static inline int rt_fast_clean(struct rtable *rth)
621 /* Kill broadcast/multicast entries very aggresively, if they 623 /* Kill broadcast/multicast entries very aggresively, if they
622 collide in hash table with more useful entries */ 624 collide in hash table with more useful entries */
623 return (rth->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) && 625 return (rth->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) &&
624 rth->fl.iif && rth->dst.rt_next; 626 rt_is_input_route(rth) && rth->dst.rt_next;
625} 627}
626 628
627static inline int rt_valuable(struct rtable *rth) 629static inline int rt_valuable(struct rtable *rth)
@@ -666,7 +668,7 @@ static inline u32 rt_score(struct rtable *rt)
666 if (rt_valuable(rt)) 668 if (rt_valuable(rt))
667 score |= (1<<31); 669 score |= (1<<31);
668 670
669 if (!rt->fl.iif || 671 if (rt_is_output_route(rt) ||
670 !(rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST|RTCF_LOCAL))) 672 !(rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST|RTCF_LOCAL)))
671 score |= (1<<30); 673 score |= (1<<30);
672 674
@@ -682,17 +684,17 @@ static inline bool rt_caching(const struct net *net)
682static inline bool compare_hash_inputs(const struct flowi *fl1, 684static inline bool compare_hash_inputs(const struct flowi *fl1,
683 const struct flowi *fl2) 685 const struct flowi *fl2)
684{ 686{
685 return ((((__force u32)fl1->nl_u.ip4_u.daddr ^ (__force u32)fl2->nl_u.ip4_u.daddr) | 687 return ((((__force u32)fl1->fl4_dst ^ (__force u32)fl2->fl4_dst) |
686 ((__force u32)fl1->nl_u.ip4_u.saddr ^ (__force u32)fl2->nl_u.ip4_u.saddr) | 688 ((__force u32)fl1->fl4_src ^ (__force u32)fl2->fl4_src) |
687 (fl1->iif ^ fl2->iif)) == 0); 689 (fl1->iif ^ fl2->iif)) == 0);
688} 690}
689 691
690static inline int compare_keys(struct flowi *fl1, struct flowi *fl2) 692static inline int compare_keys(struct flowi *fl1, struct flowi *fl2)
691{ 693{
692 return (((__force u32)fl1->nl_u.ip4_u.daddr ^ (__force u32)fl2->nl_u.ip4_u.daddr) | 694 return (((__force u32)fl1->fl4_dst ^ (__force u32)fl2->fl4_dst) |
693 ((__force u32)fl1->nl_u.ip4_u.saddr ^ (__force u32)fl2->nl_u.ip4_u.saddr) | 695 ((__force u32)fl1->fl4_src ^ (__force u32)fl2->fl4_src) |
694 (fl1->mark ^ fl2->mark) | 696 (fl1->mark ^ fl2->mark) |
695 (*(u16 *)&fl1->nl_u.ip4_u.tos ^ *(u16 *)&fl2->nl_u.ip4_u.tos) | 697 (*(u16 *)&fl1->fl4_tos ^ *(u16 *)&fl2->fl4_tos) |
696 (fl1->oif ^ fl2->oif) | 698 (fl1->oif ^ fl2->oif) |
697 (fl1->iif ^ fl2->iif)) == 0; 699 (fl1->iif ^ fl2->iif)) == 0;
698} 700}
@@ -1124,7 +1126,7 @@ restart:
1124 */ 1126 */
1125 1127
1126 rt->dst.flags |= DST_NOCACHE; 1128 rt->dst.flags |= DST_NOCACHE;
1127 if (rt->rt_type == RTN_UNICAST || rt->fl.iif == 0) { 1129 if (rt->rt_type == RTN_UNICAST || rt_is_output_route(rt)) {
1128 int err = arp_bind_neighbour(&rt->dst); 1130 int err = arp_bind_neighbour(&rt->dst);
1129 if (err) { 1131 if (err) {
1130 if (net_ratelimit()) 1132 if (net_ratelimit())
@@ -1222,7 +1224,7 @@ restart:
1222 /* Try to bind route to arp only if it is output 1224 /* Try to bind route to arp only if it is output
1223 route or unicast forwarding path. 1225 route or unicast forwarding path.
1224 */ 1226 */
1225 if (rt->rt_type == RTN_UNICAST || rt->fl.iif == 0) { 1227 if (rt->rt_type == RTN_UNICAST || rt_is_output_route(rt)) {
1226 int err = arp_bind_neighbour(&rt->dst); 1228 int err = arp_bind_neighbour(&rt->dst);
1227 if (err) { 1229 if (err) {
1228 spin_unlock_bh(rt_hash_lock_addr(hash)); 1230 spin_unlock_bh(rt_hash_lock_addr(hash));
@@ -1287,7 +1289,7 @@ void rt_bind_peer(struct rtable *rt, int create)
1287{ 1289{
1288 struct inet_peer *peer; 1290 struct inet_peer *peer;
1289 1291
1290 peer = inet_getpeer(rt->rt_dst, create); 1292 peer = inet_getpeer_v4(rt->rt_dst, create);
1291 1293
1292 if (peer && cmpxchg(&rt->peer, NULL, peer) != NULL) 1294 if (peer && cmpxchg(&rt->peer, NULL, peer) != NULL)
1293 inet_putpeer(peer); 1295 inet_putpeer(peer);
@@ -1404,7 +1406,7 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
1404 if (rth->fl.fl4_dst != daddr || 1406 if (rth->fl.fl4_dst != daddr ||
1405 rth->fl.fl4_src != skeys[i] || 1407 rth->fl.fl4_src != skeys[i] ||
1406 rth->fl.oif != ikeys[k] || 1408 rth->fl.oif != ikeys[k] ||
1407 rth->fl.iif != 0 || 1409 rt_is_input_route(rth) ||
1408 rt_is_expired(rth) || 1410 rt_is_expired(rth) ||
1409 !net_eq(dev_net(rth->dst.dev), net)) { 1411 !net_eq(dev_net(rth->dst.dev), net)) {
1410 rthp = &rth->dst.rt_next; 1412 rthp = &rth->dst.rt_next;
@@ -1433,8 +1435,6 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
1433 rt->dst.child = NULL; 1435 rt->dst.child = NULL;
1434 if (rt->dst.dev) 1436 if (rt->dst.dev)
1435 dev_hold(rt->dst.dev); 1437 dev_hold(rt->dst.dev);
1436 if (rt->idev)
1437 in_dev_hold(rt->idev);
1438 rt->dst.obsolete = -1; 1438 rt->dst.obsolete = -1;
1439 rt->dst.lastuse = jiffies; 1439 rt->dst.lastuse = jiffies;
1440 rt->dst.path = &rt->dst; 1440 rt->dst.path = &rt->dst;
@@ -1666,7 +1666,7 @@ unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph,
1666 rth->rt_dst != daddr || 1666 rth->rt_dst != daddr ||
1667 rth->rt_src != iph->saddr || 1667 rth->rt_src != iph->saddr ||
1668 rth->fl.oif != ikeys[k] || 1668 rth->fl.oif != ikeys[k] ||
1669 rth->fl.iif != 0 || 1669 rt_is_input_route(rth) ||
1670 dst_metric_locked(&rth->dst, RTAX_MTU) || 1670 dst_metric_locked(&rth->dst, RTAX_MTU) ||
1671 !net_eq(dev_net(rth->dst.dev), net) || 1671 !net_eq(dev_net(rth->dst.dev), net) ||
1672 rt_is_expired(rth)) 1672 rt_is_expired(rth))
@@ -1728,33 +1728,13 @@ static void ipv4_dst_destroy(struct dst_entry *dst)
1728{ 1728{
1729 struct rtable *rt = (struct rtable *) dst; 1729 struct rtable *rt = (struct rtable *) dst;
1730 struct inet_peer *peer = rt->peer; 1730 struct inet_peer *peer = rt->peer;
1731 struct in_device *idev = rt->idev;
1732 1731
1733 if (peer) { 1732 if (peer) {
1734 rt->peer = NULL; 1733 rt->peer = NULL;
1735 inet_putpeer(peer); 1734 inet_putpeer(peer);
1736 } 1735 }
1737
1738 if (idev) {
1739 rt->idev = NULL;
1740 in_dev_put(idev);
1741 }
1742} 1736}
1743 1737
1744static void ipv4_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
1745 int how)
1746{
1747 struct rtable *rt = (struct rtable *) dst;
1748 struct in_device *idev = rt->idev;
1749 if (dev != dev_net(dev)->loopback_dev && idev && idev->dev == dev) {
1750 struct in_device *loopback_idev =
1751 in_dev_get(dev_net(dev)->loopback_dev);
1752 if (loopback_idev) {
1753 rt->idev = loopback_idev;
1754 in_dev_put(idev);
1755 }
1756 }
1757}
1758 1738
1759static void ipv4_link_failure(struct sk_buff *skb) 1739static void ipv4_link_failure(struct sk_buff *skb)
1760{ 1740{
@@ -1790,7 +1770,7 @@ void ip_rt_get_source(u8 *addr, struct rtable *rt)
1790 __be32 src; 1770 __be32 src;
1791 struct fib_result res; 1771 struct fib_result res;
1792 1772
1793 if (rt->fl.iif == 0) 1773 if (rt_is_output_route(rt))
1794 src = rt->rt_src; 1774 src = rt->rt_src;
1795 else { 1775 else {
1796 rcu_read_lock(); 1776 rcu_read_lock();
@@ -1910,7 +1890,6 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1910 rth->fl.iif = dev->ifindex; 1890 rth->fl.iif = dev->ifindex;
1911 rth->dst.dev = init_net.loopback_dev; 1891 rth->dst.dev = init_net.loopback_dev;
1912 dev_hold(rth->dst.dev); 1892 dev_hold(rth->dst.dev);
1913 rth->idev = in_dev_get(rth->dst.dev);
1914 rth->fl.oif = 0; 1893 rth->fl.oif = 0;
1915 rth->rt_gateway = daddr; 1894 rth->rt_gateway = daddr;
1916 rth->rt_spec_dst= spec_dst; 1895 rth->rt_spec_dst= spec_dst;
@@ -2050,7 +2029,6 @@ static int __mkroute_input(struct sk_buff *skb,
2050 rth->fl.iif = in_dev->dev->ifindex; 2029 rth->fl.iif = in_dev->dev->ifindex;
2051 rth->dst.dev = (out_dev)->dev; 2030 rth->dst.dev = (out_dev)->dev;
2052 dev_hold(rth->dst.dev); 2031 dev_hold(rth->dst.dev);
2053 rth->idev = in_dev_get(rth->dst.dev);
2054 rth->fl.oif = 0; 2032 rth->fl.oif = 0;
2055 rth->rt_spec_dst= spec_dst; 2033 rth->rt_spec_dst= spec_dst;
2056 2034
@@ -2111,12 +2089,10 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
2111{ 2089{
2112 struct fib_result res; 2090 struct fib_result res;
2113 struct in_device *in_dev = __in_dev_get_rcu(dev); 2091 struct in_device *in_dev = __in_dev_get_rcu(dev);
2114 struct flowi fl = { .nl_u = { .ip4_u = 2092 struct flowi fl = { .fl4_dst = daddr,
2115 { .daddr = daddr, 2093 .fl4_src = saddr,
2116 .saddr = saddr, 2094 .fl4_tos = tos,
2117 .tos = tos, 2095 .fl4_scope = RT_SCOPE_UNIVERSE,
2118 .scope = RT_SCOPE_UNIVERSE,
2119 } },
2120 .mark = skb->mark, 2096 .mark = skb->mark,
2121 .iif = dev->ifindex }; 2097 .iif = dev->ifindex };
2122 unsigned flags = 0; 2098 unsigned flags = 0;
@@ -2231,7 +2207,6 @@ local_input:
2231 rth->fl.iif = dev->ifindex; 2207 rth->fl.iif = dev->ifindex;
2232 rth->dst.dev = net->loopback_dev; 2208 rth->dst.dev = net->loopback_dev;
2233 dev_hold(rth->dst.dev); 2209 dev_hold(rth->dst.dev);
2234 rth->idev = in_dev_get(rth->dst.dev);
2235 rth->rt_gateway = daddr; 2210 rth->rt_gateway = daddr;
2236 rth->rt_spec_dst= spec_dst; 2211 rth->rt_spec_dst= spec_dst;
2237 rth->dst.input= ip_local_deliver; 2212 rth->dst.input= ip_local_deliver;
@@ -2417,9 +2392,6 @@ static int __mkroute_output(struct rtable **result,
2417 if (!rth) 2392 if (!rth)
2418 return -ENOBUFS; 2393 return -ENOBUFS;
2419 2394
2420 in_dev_hold(in_dev);
2421 rth->idev = in_dev;
2422
2423 atomic_set(&rth->dst.__refcnt, 1); 2395 atomic_set(&rth->dst.__refcnt, 1);
2424 rth->dst.flags= DST_HOST; 2396 rth->dst.flags= DST_HOST;
2425 if (IN_DEV_CONF_GET(in_dev, NOXFRM)) 2397 if (IN_DEV_CONF_GET(in_dev, NOXFRM))
@@ -2506,14 +2478,11 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp,
2506 const struct flowi *oldflp) 2478 const struct flowi *oldflp)
2507{ 2479{
2508 u32 tos = RT_FL_TOS(oldflp); 2480 u32 tos = RT_FL_TOS(oldflp);
2509 struct flowi fl = { .nl_u = { .ip4_u = 2481 struct flowi fl = { .fl4_dst = oldflp->fl4_dst,
2510 { .daddr = oldflp->fl4_dst, 2482 .fl4_src = oldflp->fl4_src,
2511 .saddr = oldflp->fl4_src, 2483 .fl4_tos = tos & IPTOS_RT_MASK,
2512 .tos = tos & IPTOS_RT_MASK, 2484 .fl4_scope = ((tos & RTO_ONLINK) ?
2513 .scope = ((tos & RTO_ONLINK) ? 2485 RT_SCOPE_LINK : RT_SCOPE_UNIVERSE),
2514 RT_SCOPE_LINK :
2515 RT_SCOPE_UNIVERSE),
2516 } },
2517 .mark = oldflp->mark, 2486 .mark = oldflp->mark,
2518 .iif = net->loopback_dev->ifindex, 2487 .iif = net->loopback_dev->ifindex,
2519 .oif = oldflp->oif }; 2488 .oif = oldflp->oif };
@@ -2695,7 +2664,7 @@ int __ip_route_output_key(struct net *net, struct rtable **rp,
2695 rth = rcu_dereference_bh(rth->dst.rt_next)) { 2664 rth = rcu_dereference_bh(rth->dst.rt_next)) {
2696 if (rth->fl.fl4_dst == flp->fl4_dst && 2665 if (rth->fl.fl4_dst == flp->fl4_dst &&
2697 rth->fl.fl4_src == flp->fl4_src && 2666 rth->fl.fl4_src == flp->fl4_src &&
2698 rth->fl.iif == 0 && 2667 rt_is_output_route(rth) &&
2699 rth->fl.oif == flp->oif && 2668 rth->fl.oif == flp->oif &&
2700 rth->fl.mark == flp->mark && 2669 rth->fl.mark == flp->mark &&
2701 !((rth->fl.fl4_tos ^ flp->fl4_tos) & 2670 !((rth->fl.fl4_tos ^ flp->fl4_tos) &
@@ -2759,9 +2728,6 @@ static int ipv4_dst_blackhole(struct net *net, struct rtable **rp, struct flowi
2759 2728
2760 rt->fl = ort->fl; 2729 rt->fl = ort->fl;
2761 2730
2762 rt->idev = ort->idev;
2763 if (rt->idev)
2764 in_dev_hold(rt->idev);
2765 rt->rt_genid = rt_genid(net); 2731 rt->rt_genid = rt_genid(net);
2766 rt->rt_flags = ort->rt_flags; 2732 rt->rt_flags = ort->rt_flags;
2767 rt->rt_type = ort->rt_type; 2733 rt->rt_type = ort->rt_type;
@@ -2853,7 +2819,7 @@ static int rt_fill_info(struct net *net,
2853 if (rt->dst.tclassid) 2819 if (rt->dst.tclassid)
2854 NLA_PUT_U32(skb, RTA_FLOW, rt->dst.tclassid); 2820 NLA_PUT_U32(skb, RTA_FLOW, rt->dst.tclassid);
2855#endif 2821#endif
2856 if (rt->fl.iif) 2822 if (rt_is_input_route(rt))
2857 NLA_PUT_BE32(skb, RTA_PREFSRC, rt->rt_spec_dst); 2823 NLA_PUT_BE32(skb, RTA_PREFSRC, rt->rt_spec_dst);
2858 else if (rt->rt_src != rt->fl.fl4_src) 2824 else if (rt->rt_src != rt->fl.fl4_src)
2859 NLA_PUT_BE32(skb, RTA_PREFSRC, rt->rt_src); 2825 NLA_PUT_BE32(skb, RTA_PREFSRC, rt->rt_src);
@@ -2878,7 +2844,7 @@ static int rt_fill_info(struct net *net,
2878 } 2844 }
2879 } 2845 }
2880 2846
2881 if (rt->fl.iif) { 2847 if (rt_is_input_route(rt)) {
2882#ifdef CONFIG_IP_MROUTE 2848#ifdef CONFIG_IP_MROUTE
2883 __be32 dst = rt->rt_dst; 2849 __be32 dst = rt->rt_dst;
2884 2850
@@ -2973,13 +2939,9 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void
2973 err = -rt->dst.error; 2939 err = -rt->dst.error;
2974 } else { 2940 } else {
2975 struct flowi fl = { 2941 struct flowi fl = {
2976 .nl_u = { 2942 .fl4_dst = dst,
2977 .ip4_u = { 2943 .fl4_src = src,
2978 .daddr = dst, 2944 .fl4_tos = rtm->rtm_tos,
2979 .saddr = src,
2980 .tos = rtm->rtm_tos,
2981 },
2982 },
2983 .oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0, 2945 .oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0,
2984 .mark = mark, 2946 .mark = mark,
2985 }; 2947 };
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 650cace2180..47519205a01 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -346,17 +346,14 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
346 */ 346 */
347 { 347 {
348 struct flowi fl = { .mark = sk->sk_mark, 348 struct flowi fl = { .mark = sk->sk_mark,
349 .nl_u = { .ip4_u = 349 .fl4_dst = ((opt && opt->srr) ?
350 { .daddr = ((opt && opt->srr) ? 350 opt->faddr : ireq->rmt_addr),
351 opt->faddr : 351 .fl4_src = ireq->loc_addr,
352 ireq->rmt_addr), 352 .fl4_tos = RT_CONN_FLAGS(sk),
353 .saddr = ireq->loc_addr,
354 .tos = RT_CONN_FLAGS(sk) } },
355 .proto = IPPROTO_TCP, 353 .proto = IPPROTO_TCP,
356 .flags = inet_sk_flowi_flags(sk), 354 .flags = inet_sk_flowi_flags(sk),
357 .uli_u = { .ports = 355 .fl_ip_sport = th->dest,
358 { .sport = th->dest, 356 .fl_ip_dport = th->source };
359 .dport = th->source } } };
360 security_req_classify_flow(req, &fl); 357 security_req_classify_flow(req, &fl);
361 if (ip_route_output_key(sock_net(sk), &rt, &fl)) { 358 if (ip_route_output_key(sock_net(sk), &rt, &fl)) {
362 reqsk_free(req); 359 reqsk_free(req);
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index f15c36a706e..6c11eece262 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1193,7 +1193,7 @@ void tcp_cleanup_rbuf(struct sock *sk, int copied)
1193 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue); 1193 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
1194 1194
1195 WARN(skb && !before(tp->copied_seq, TCP_SKB_CB(skb)->end_seq), 1195 WARN(skb && !before(tp->copied_seq, TCP_SKB_CB(skb)->end_seq),
1196 KERN_INFO "cleanup rbuf bug: copied %X seq %X rcvnxt %X\n", 1196 "cleanup rbuf bug: copied %X seq %X rcvnxt %X\n",
1197 tp->copied_seq, TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt); 1197 tp->copied_seq, TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt);
1198#endif 1198#endif
1199 1199
@@ -1477,10 +1477,9 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
1477 * shouldn't happen. 1477 * shouldn't happen.
1478 */ 1478 */
1479 if (WARN(before(*seq, TCP_SKB_CB(skb)->seq), 1479 if (WARN(before(*seq, TCP_SKB_CB(skb)->seq),
1480 KERN_INFO "recvmsg bug: copied %X " 1480 "recvmsg bug: copied %X seq %X rcvnxt %X fl %X\n",
1481 "seq %X rcvnxt %X fl %X\n", *seq, 1481 *seq, TCP_SKB_CB(skb)->seq, tp->rcv_nxt,
1482 TCP_SKB_CB(skb)->seq, tp->rcv_nxt, 1482 flags))
1483 flags))
1484 break; 1483 break;
1485 1484
1486 offset = *seq - TCP_SKB_CB(skb)->seq; 1485 offset = *seq - TCP_SKB_CB(skb)->seq;
@@ -1490,10 +1489,9 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
1490 goto found_ok_skb; 1489 goto found_ok_skb;
1491 if (tcp_hdr(skb)->fin) 1490 if (tcp_hdr(skb)->fin)
1492 goto found_fin_ok; 1491 goto found_fin_ok;
1493 WARN(!(flags & MSG_PEEK), KERN_INFO "recvmsg bug 2: " 1492 WARN(!(flags & MSG_PEEK),
1494 "copied %X seq %X rcvnxt %X fl %X\n", 1493 "recvmsg bug 2: copied %X seq %X rcvnxt %X fl %X\n",
1495 *seq, TCP_SKB_CB(skb)->seq, 1494 *seq, TCP_SKB_CB(skb)->seq, tp->rcv_nxt, flags);
1496 tp->rcv_nxt, flags);
1497 } 1495 }
1498 1496
1499 /* Well, if we have backlog, try to process it now yet. */ 1497 /* Well, if we have backlog, try to process it now yet. */
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index e13da6de1fc..4fc3387aa99 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1210,12 +1210,6 @@ static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
1210}; 1210};
1211#endif 1211#endif
1212 1212
1213static struct timewait_sock_ops tcp_timewait_sock_ops = {
1214 .twsk_obj_size = sizeof(struct tcp_timewait_sock),
1215 .twsk_unique = tcp_twsk_unique,
1216 .twsk_destructor= tcp_twsk_destructor,
1217};
1218
1219int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) 1213int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1220{ 1214{
1221 struct tcp_extend_values tmp_ext; 1215 struct tcp_extend_values tmp_ext;
@@ -1347,7 +1341,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1347 tcp_death_row.sysctl_tw_recycle && 1341 tcp_death_row.sysctl_tw_recycle &&
1348 (dst = inet_csk_route_req(sk, req)) != NULL && 1342 (dst = inet_csk_route_req(sk, req)) != NULL &&
1349 (peer = rt_get_peer((struct rtable *)dst)) != NULL && 1343 (peer = rt_get_peer((struct rtable *)dst)) != NULL &&
1350 peer->v4daddr == saddr) { 1344 peer->daddr.a4 == saddr) {
1351 inet_peer_refcheck(peer); 1345 inet_peer_refcheck(peer);
1352 if ((u32)get_seconds() - peer->tcp_ts_stamp < TCP_PAWS_MSL && 1346 if ((u32)get_seconds() - peer->tcp_ts_stamp < TCP_PAWS_MSL &&
1353 (s32)(peer->tcp_ts - req->ts_recent) > 1347 (s32)(peer->tcp_ts - req->ts_recent) >
@@ -1763,64 +1757,40 @@ do_time_wait:
1763 goto discard_it; 1757 goto discard_it;
1764} 1758}
1765 1759
1766/* VJ's idea. Save last timestamp seen from this destination 1760struct inet_peer *tcp_v4_get_peer(struct sock *sk, bool *release_it)
1767 * and hold it at least for normal timewait interval to use for duplicate
1768 * segment detection in subsequent connections, before they enter synchronized
1769 * state.
1770 */
1771
1772int tcp_v4_remember_stamp(struct sock *sk)
1773{ 1761{
1762 struct rtable *rt = (struct rtable *) __sk_dst_get(sk);
1774 struct inet_sock *inet = inet_sk(sk); 1763 struct inet_sock *inet = inet_sk(sk);
1775 struct tcp_sock *tp = tcp_sk(sk); 1764 struct inet_peer *peer;
1776 struct rtable *rt = (struct rtable *)__sk_dst_get(sk);
1777 struct inet_peer *peer = NULL;
1778 int release_it = 0;
1779 1765
1780 if (!rt || rt->rt_dst != inet->inet_daddr) { 1766 if (!rt || rt->rt_dst != inet->inet_daddr) {
1781 peer = inet_getpeer(inet->inet_daddr, 1); 1767 peer = inet_getpeer_v4(inet->inet_daddr, 1);
1782 release_it = 1; 1768 *release_it = true;
1783 } else { 1769 } else {
1784 if (!rt->peer) 1770 if (!rt->peer)
1785 rt_bind_peer(rt, 1); 1771 rt_bind_peer(rt, 1);
1786 peer = rt->peer; 1772 peer = rt->peer;
1773 *release_it = false;
1787 } 1774 }
1788 1775
1789 if (peer) { 1776 return peer;
1790 if ((s32)(peer->tcp_ts - tp->rx_opt.ts_recent) <= 0 ||
1791 ((u32)get_seconds() - peer->tcp_ts_stamp > TCP_PAWS_MSL &&
1792 peer->tcp_ts_stamp <= (u32)tp->rx_opt.ts_recent_stamp)) {
1793 peer->tcp_ts_stamp = (u32)tp->rx_opt.ts_recent_stamp;
1794 peer->tcp_ts = tp->rx_opt.ts_recent;
1795 }
1796 if (release_it)
1797 inet_putpeer(peer);
1798 return 1;
1799 }
1800
1801 return 0;
1802} 1777}
1803EXPORT_SYMBOL(tcp_v4_remember_stamp); 1778EXPORT_SYMBOL(tcp_v4_get_peer);
1804 1779
1805int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw) 1780void *tcp_v4_tw_get_peer(struct sock *sk)
1806{ 1781{
1807 struct inet_peer *peer = inet_getpeer(tw->tw_daddr, 1); 1782 struct inet_timewait_sock *tw = inet_twsk(sk);
1808
1809 if (peer) {
1810 const struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
1811
1812 if ((s32)(peer->tcp_ts - tcptw->tw_ts_recent) <= 0 ||
1813 ((u32)get_seconds() - peer->tcp_ts_stamp > TCP_PAWS_MSL &&
1814 peer->tcp_ts_stamp <= (u32)tcptw->tw_ts_recent_stamp)) {
1815 peer->tcp_ts_stamp = (u32)tcptw->tw_ts_recent_stamp;
1816 peer->tcp_ts = tcptw->tw_ts_recent;
1817 }
1818 inet_putpeer(peer);
1819 return 1;
1820 }
1821 1783
1822 return 0; 1784 return inet_getpeer_v4(tw->tw_daddr, 1);
1823} 1785}
1786EXPORT_SYMBOL(tcp_v4_tw_get_peer);
1787
1788static struct timewait_sock_ops tcp_timewait_sock_ops = {
1789 .twsk_obj_size = sizeof(struct tcp_timewait_sock),
1790 .twsk_unique = tcp_twsk_unique,
1791 .twsk_destructor= tcp_twsk_destructor,
1792 .twsk_getpeer = tcp_v4_tw_get_peer,
1793};
1824 1794
1825const struct inet_connection_sock_af_ops ipv4_specific = { 1795const struct inet_connection_sock_af_ops ipv4_specific = {
1826 .queue_xmit = ip_queue_xmit, 1796 .queue_xmit = ip_queue_xmit,
@@ -1828,7 +1798,7 @@ const struct inet_connection_sock_af_ops ipv4_specific = {
1828 .rebuild_header = inet_sk_rebuild_header, 1798 .rebuild_header = inet_sk_rebuild_header,
1829 .conn_request = tcp_v4_conn_request, 1799 .conn_request = tcp_v4_conn_request,
1830 .syn_recv_sock = tcp_v4_syn_recv_sock, 1800 .syn_recv_sock = tcp_v4_syn_recv_sock,
1831 .remember_stamp = tcp_v4_remember_stamp, 1801 .get_peer = tcp_v4_get_peer,
1832 .net_header_len = sizeof(struct iphdr), 1802 .net_header_len = sizeof(struct iphdr),
1833 .setsockopt = ip_setsockopt, 1803 .setsockopt = ip_setsockopt,
1834 .getsockopt = ip_getsockopt, 1804 .getsockopt = ip_getsockopt,
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index a66735f7596..80b1f80759a 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -49,6 +49,56 @@ struct inet_timewait_death_row tcp_death_row = {
49}; 49};
50EXPORT_SYMBOL_GPL(tcp_death_row); 50EXPORT_SYMBOL_GPL(tcp_death_row);
51 51
52/* VJ's idea. Save last timestamp seen from this destination
53 * and hold it at least for normal timewait interval to use for duplicate
54 * segment detection in subsequent connections, before they enter synchronized
55 * state.
56 */
57
58static int tcp_remember_stamp(struct sock *sk)
59{
60 const struct inet_connection_sock *icsk = inet_csk(sk);
61 struct tcp_sock *tp = tcp_sk(sk);
62 struct inet_peer *peer;
63 bool release_it;
64
65 peer = icsk->icsk_af_ops->get_peer(sk, &release_it);
66 if (peer) {
67 if ((s32)(peer->tcp_ts - tp->rx_opt.ts_recent) <= 0 ||
68 ((u32)get_seconds() - peer->tcp_ts_stamp > TCP_PAWS_MSL &&
69 peer->tcp_ts_stamp <= (u32)tp->rx_opt.ts_recent_stamp)) {
70 peer->tcp_ts_stamp = (u32)tp->rx_opt.ts_recent_stamp;
71 peer->tcp_ts = tp->rx_opt.ts_recent;
72 }
73 if (release_it)
74 inet_putpeer(peer);
75 return 1;
76 }
77
78 return 0;
79}
80
81static int tcp_tw_remember_stamp(struct inet_timewait_sock *tw)
82{
83 struct sock *sk = (struct sock *) tw;
84 struct inet_peer *peer;
85
86 peer = twsk_getpeer(sk);
87 if (peer) {
88 const struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
89
90 if ((s32)(peer->tcp_ts - tcptw->tw_ts_recent) <= 0 ||
91 ((u32)get_seconds() - peer->tcp_ts_stamp > TCP_PAWS_MSL &&
92 peer->tcp_ts_stamp <= (u32)tcptw->tw_ts_recent_stamp)) {
93 peer->tcp_ts_stamp = (u32)tcptw->tw_ts_recent_stamp;
94 peer->tcp_ts = tcptw->tw_ts_recent;
95 }
96 inet_putpeer(peer);
97 return 1;
98 }
99 return 0;
100}
101
52static __inline__ int tcp_in_window(u32 seq, u32 end_seq, u32 s_win, u32 e_win) 102static __inline__ int tcp_in_window(u32 seq, u32 end_seq, u32 s_win, u32 e_win)
53{ 103{
54 if (seq == s_win) 104 if (seq == s_win)
@@ -149,14 +199,9 @@ kill_with_rst:
149 tcptw->tw_ts_recent = tmp_opt.rcv_tsval; 199 tcptw->tw_ts_recent = tmp_opt.rcv_tsval;
150 } 200 }
151 201
152 /* I am shamed, but failed to make it more elegant. 202 if (tcp_death_row.sysctl_tw_recycle &&
153 * Yes, it is direct reference to IP, which is impossible 203 tcptw->tw_ts_recent_stamp &&
154 * to generalize to IPv6. Taking into account that IPv6 204 tcp_tw_remember_stamp(tw))
155 * do not understand recycling in any case, it not
156 * a big problem in practice. --ANK */
157 if (tw->tw_family == AF_INET &&
158 tcp_death_row.sysctl_tw_recycle && tcptw->tw_ts_recent_stamp &&
159 tcp_v4_tw_remember_stamp(tw))
160 inet_twsk_schedule(tw, &tcp_death_row, tw->tw_timeout, 205 inet_twsk_schedule(tw, &tcp_death_row, tw->tw_timeout,
161 TCP_TIMEWAIT_LEN); 206 TCP_TIMEWAIT_LEN);
162 else 207 else
@@ -274,7 +319,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
274 int recycle_ok = 0; 319 int recycle_ok = 0;
275 320
276 if (tcp_death_row.sysctl_tw_recycle && tp->rx_opt.ts_recent_stamp) 321 if (tcp_death_row.sysctl_tw_recycle && tp->rx_opt.ts_recent_stamp)
277 recycle_ok = icsk->icsk_af_ops->remember_stamp(sk); 322 recycle_ok = tcp_remember_stamp(sk);
278 323
279 if (tcp_death_row.tw_count < tcp_death_row.sysctl_max_tw_buckets) 324 if (tcp_death_row.tw_count < tcp_death_row.sysctl_max_tw_buckets)
280 tw = inet_twsk_alloc(sk, state); 325 tw = inet_twsk_alloc(sk, state);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 61c2463e275..97041f24cd2 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -55,7 +55,7 @@ int sysctl_tcp_workaround_signed_windows __read_mostly = 0;
55int sysctl_tcp_tso_win_divisor __read_mostly = 3; 55int sysctl_tcp_tso_win_divisor __read_mostly = 3;
56 56
57int sysctl_tcp_mtu_probing __read_mostly = 0; 57int sysctl_tcp_mtu_probing __read_mostly = 0;
58int sysctl_tcp_base_mss __read_mostly = 512; 58int sysctl_tcp_base_mss __read_mostly = TCP_BASE_MSS;
59 59
60/* By default, RFC2861 behavior. */ 60/* By default, RFC2861 behavior. */
61int sysctl_tcp_slow_start_after_idle __read_mostly = 1; 61int sysctl_tcp_slow_start_after_idle __read_mostly = 1;
@@ -824,8 +824,11 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
824 &md5); 824 &md5);
825 tcp_header_size = tcp_options_size + sizeof(struct tcphdr); 825 tcp_header_size = tcp_options_size + sizeof(struct tcphdr);
826 826
827 if (tcp_packets_in_flight(tp) == 0) 827 if (tcp_packets_in_flight(tp) == 0) {
828 tcp_ca_event(sk, CA_EVENT_TX_START); 828 tcp_ca_event(sk, CA_EVENT_TX_START);
829 skb->ooo_okay = 1;
830 } else
831 skb->ooo_okay = 0;
829 832
830 skb_push(skb, tcp_header_size); 833 skb_push(skb, tcp_header_size);
831 skb_reset_transport_header(skb); 834 skb_reset_transport_header(skb);
@@ -2596,6 +2599,7 @@ int tcp_connect(struct sock *sk)
2596{ 2599{
2597 struct tcp_sock *tp = tcp_sk(sk); 2600 struct tcp_sock *tp = tcp_sk(sk);
2598 struct sk_buff *buff; 2601 struct sk_buff *buff;
2602 int err;
2599 2603
2600 tcp_connect_init(sk); 2604 tcp_connect_init(sk);
2601 2605
@@ -2618,7 +2622,9 @@ int tcp_connect(struct sock *sk)
2618 sk->sk_wmem_queued += buff->truesize; 2622 sk->sk_wmem_queued += buff->truesize;
2619 sk_mem_charge(sk, buff->truesize); 2623 sk_mem_charge(sk, buff->truesize);
2620 tp->packets_out += tcp_skb_pcount(buff); 2624 tp->packets_out += tcp_skb_pcount(buff);
2621 tcp_transmit_skb(sk, buff, 1, sk->sk_allocation); 2625 err = tcp_transmit_skb(sk, buff, 1, sk->sk_allocation);
2626 if (err == -ECONNREFUSED)
2627 return err;
2622 2628
2623 /* We change tp->snd_nxt after the tcp_transmit_skb() call 2629 /* We change tp->snd_nxt after the tcp_transmit_skb() call
2624 * in order to make this packet get counted in tcpOutSegs. 2630 * in order to make this packet get counted in tcpOutSegs.
diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c
index 6211e211417..85ee7eb7e38 100644
--- a/net/ipv4/tcp_probe.c
+++ b/net/ipv4/tcp_probe.c
@@ -154,7 +154,7 @@ static int tcpprobe_sprint(char *tbuf, int n)
154 struct timespec tv 154 struct timespec tv
155 = ktime_to_timespec(ktime_sub(p->tstamp, tcp_probe.start)); 155 = ktime_to_timespec(ktime_sub(p->tstamp, tcp_probe.start));
156 156
157 return snprintf(tbuf, n, 157 return scnprintf(tbuf, n,
158 "%lu.%09lu %pI4:%u %pI4:%u %d %#x %#x %u %u %u %u\n", 158 "%lu.%09lu %pI4:%u %pI4:%u %d %#x %#x %u %u %u %u\n",
159 (unsigned long) tv.tv_sec, 159 (unsigned long) tv.tv_sec,
160 (unsigned long) tv.tv_nsec, 160 (unsigned long) tv.tv_nsec,
@@ -174,7 +174,7 @@ static ssize_t tcpprobe_read(struct file *file, char __user *buf,
174 return -EINVAL; 174 return -EINVAL;
175 175
176 while (cnt < len) { 176 while (cnt < len) {
177 char tbuf[128]; 177 char tbuf[164];
178 int width; 178 int width;
179 179
180 /* Wait for data in buffer */ 180 /* Wait for data in buffer */
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 5e0a3a582a5..b37181da487 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -430,7 +430,7 @@ begin:
430 430
431 if (result) { 431 if (result) {
432exact_match: 432exact_match:
433 if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt))) 433 if (unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2)))
434 result = NULL; 434 result = NULL;
435 else if (unlikely(compute_score2(result, net, saddr, sport, 435 else if (unlikely(compute_score2(result, net, saddr, sport,
436 daddr, hnum, dif) < badness)) { 436 daddr, hnum, dif) < badness)) {
@@ -500,7 +500,7 @@ begin:
500 goto begin; 500 goto begin;
501 501
502 if (result) { 502 if (result) {
503 if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt))) 503 if (unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2)))
504 result = NULL; 504 result = NULL;
505 else if (unlikely(compute_score(result, net, saddr, hnum, sport, 505 else if (unlikely(compute_score(result, net, saddr, hnum, sport,
506 daddr, dport, dif) < badness)) { 506 daddr, dport, dif) < badness)) {
@@ -890,15 +890,13 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
890 if (rt == NULL) { 890 if (rt == NULL) {
891 struct flowi fl = { .oif = ipc.oif, 891 struct flowi fl = { .oif = ipc.oif,
892 .mark = sk->sk_mark, 892 .mark = sk->sk_mark,
893 .nl_u = { .ip4_u = 893 .fl4_dst = faddr,
894 { .daddr = faddr, 894 .fl4_src = saddr,
895 .saddr = saddr, 895 .fl4_tos = tos,
896 .tos = tos } },
897 .proto = sk->sk_protocol, 896 .proto = sk->sk_protocol,
898 .flags = inet_sk_flowi_flags(sk), 897 .flags = inet_sk_flowi_flags(sk),
899 .uli_u = { .ports = 898 .fl_ip_sport = inet->inet_sport,
900 { .sport = inet->inet_sport, 899 .fl_ip_dport = dport };
901 .dport = dport } } };
902 struct net *net = sock_net(sk); 900 struct net *net = sock_net(sk);
903 901
904 security_sk_classify_flow(sk, &fl); 902 security_sk_classify_flow(sk, &fl);
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 4464f3bff6a..b057d40adde 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -11,6 +11,7 @@
11#include <linux/err.h> 11#include <linux/err.h>
12#include <linux/kernel.h> 12#include <linux/kernel.h>
13#include <linux/inetdevice.h> 13#include <linux/inetdevice.h>
14#include <linux/if_tunnel.h>
14#include <net/dst.h> 15#include <net/dst.h>
15#include <net/xfrm.h> 16#include <net/xfrm.h>
16#include <net/ip.h> 17#include <net/ip.h>
@@ -22,12 +23,8 @@ static struct dst_entry *xfrm4_dst_lookup(struct net *net, int tos,
22 xfrm_address_t *daddr) 23 xfrm_address_t *daddr)
23{ 24{
24 struct flowi fl = { 25 struct flowi fl = {
25 .nl_u = { 26 .fl4_dst = daddr->a4,
26 .ip4_u = { 27 .fl4_tos = tos,
27 .tos = tos,
28 .daddr = daddr->a4,
29 },
30 },
31 }; 28 };
32 struct dst_entry *dst; 29 struct dst_entry *dst;
33 struct rtable *rt; 30 struct rtable *rt;
@@ -80,10 +77,6 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
80 xdst->u.dst.dev = dev; 77 xdst->u.dst.dev = dev;
81 dev_hold(dev); 78 dev_hold(dev);
82 79
83 xdst->u.rt.idev = in_dev_get(dev);
84 if (!xdst->u.rt.idev)
85 return -ENODEV;
86
87 xdst->u.rt.peer = rt->peer; 80 xdst->u.rt.peer = rt->peer;
88 if (rt->peer) 81 if (rt->peer)
89 atomic_inc(&rt->peer->refcnt); 82 atomic_inc(&rt->peer->refcnt);
@@ -158,6 +151,20 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse)
158 fl->fl_ipsec_spi = htonl(ntohs(ipcomp_hdr[1])); 151 fl->fl_ipsec_spi = htonl(ntohs(ipcomp_hdr[1]));
159 } 152 }
160 break; 153 break;
154
155 case IPPROTO_GRE:
156 if (pskb_may_pull(skb, xprth + 12 - skb->data)) {
157 __be16 *greflags = (__be16 *)xprth;
158 __be32 *gre_hdr = (__be32 *)xprth;
159
160 if (greflags[0] & GRE_KEY) {
161 if (greflags[0] & GRE_CSUM)
162 gre_hdr++;
163 fl->fl_gre_key = gre_hdr[1];
164 }
165 }
166 break;
167
161 default: 168 default:
162 fl->fl_ipsec_spi = 0; 169 fl->fl_ipsec_spi = 0;
163 break; 170 break;
@@ -189,8 +196,6 @@ static void xfrm4_dst_destroy(struct dst_entry *dst)
189{ 196{
190 struct xfrm_dst *xdst = (struct xfrm_dst *)dst; 197 struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
191 198
192 if (likely(xdst->u.rt.idev))
193 in_dev_put(xdst->u.rt.idev);
194 if (likely(xdst->u.rt.peer)) 199 if (likely(xdst->u.rt.peer))
195 inet_putpeer(xdst->u.rt.peer); 200 inet_putpeer(xdst->u.rt.peer);
196 xfrm_dst_destroy(xdst); 201 xfrm_dst_destroy(xdst);
@@ -199,27 +204,9 @@ static void xfrm4_dst_destroy(struct dst_entry *dst)
199static void xfrm4_dst_ifdown(struct dst_entry *dst, struct net_device *dev, 204static void xfrm4_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
200 int unregister) 205 int unregister)
201{ 206{
202 struct xfrm_dst *xdst;
203
204 if (!unregister) 207 if (!unregister)
205 return; 208 return;
206 209
207 xdst = (struct xfrm_dst *)dst;
208 if (xdst->u.rt.idev->dev == dev) {
209 struct in_device *loopback_idev =
210 in_dev_get(dev_net(dev)->loopback_dev);
211 BUG_ON(!loopback_idev);
212
213 do {
214 in_dev_put(xdst->u.rt.idev);
215 xdst->u.rt.idev = loopback_idev;
216 in_dev_hold(loopback_idev);
217 xdst = (struct xfrm_dst *)xdst->u.dst.child;
218 } while (xdst->u.dst.xfrm);
219
220 __in_dev_put(loopback_idev);
221 }
222
223 xfrm_dst_ifdown(dst, dev); 210 xfrm_dst_ifdown(dst, dev);
224} 211}
225 212
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 23cc8e1ce8d..1023ad0d2b1 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -3836,6 +3836,15 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
3836 array[DEVCONF_FORCE_TLLAO] = cnf->force_tllao; 3836 array[DEVCONF_FORCE_TLLAO] = cnf->force_tllao;
3837} 3837}
3838 3838
3839static inline size_t inet6_ifla6_size(void)
3840{
3841 return nla_total_size(4) /* IFLA_INET6_FLAGS */
3842 + nla_total_size(sizeof(struct ifla_cacheinfo))
3843 + nla_total_size(DEVCONF_MAX * 4) /* IFLA_INET6_CONF */
3844 + nla_total_size(IPSTATS_MIB_MAX * 8) /* IFLA_INET6_STATS */
3845 + nla_total_size(ICMP6_MIB_MAX * 8); /* IFLA_INET6_ICMP6STATS */
3846}
3847
3839static inline size_t inet6_if_nlmsg_size(void) 3848static inline size_t inet6_if_nlmsg_size(void)
3840{ 3849{
3841 return NLMSG_ALIGN(sizeof(struct ifinfomsg)) 3850 return NLMSG_ALIGN(sizeof(struct ifinfomsg))
@@ -3843,13 +3852,7 @@ static inline size_t inet6_if_nlmsg_size(void)
3843 + nla_total_size(MAX_ADDR_LEN) /* IFLA_ADDRESS */ 3852 + nla_total_size(MAX_ADDR_LEN) /* IFLA_ADDRESS */
3844 + nla_total_size(4) /* IFLA_MTU */ 3853 + nla_total_size(4) /* IFLA_MTU */
3845 + nla_total_size(4) /* IFLA_LINK */ 3854 + nla_total_size(4) /* IFLA_LINK */
3846 + nla_total_size( /* IFLA_PROTINFO */ 3855 + nla_total_size(inet6_ifla6_size()); /* IFLA_PROTINFO */
3847 nla_total_size(4) /* IFLA_INET6_FLAGS */
3848 + nla_total_size(sizeof(struct ifla_cacheinfo))
3849 + nla_total_size(DEVCONF_MAX * 4) /* IFLA_INET6_CONF */
3850 + nla_total_size(IPSTATS_MIB_MAX * 8) /* IFLA_INET6_STATS */
3851 + nla_total_size(ICMP6_MIB_MAX * 8) /* IFLA_INET6_ICMP6STATS */
3852 );
3853} 3856}
3854 3857
3855static inline void __snmp6_fill_stats(u64 *stats, void __percpu **mib, 3858static inline void __snmp6_fill_stats(u64 *stats, void __percpu **mib,
@@ -3896,15 +3899,70 @@ static void snmp6_fill_stats(u64 *stats, struct inet6_dev *idev, int attrtype,
3896 } 3899 }
3897} 3900}
3898 3901
3902static int inet6_fill_ifla6_attrs(struct sk_buff *skb, struct inet6_dev *idev)
3903{
3904 struct nlattr *nla;
3905 struct ifla_cacheinfo ci;
3906
3907 NLA_PUT_U32(skb, IFLA_INET6_FLAGS, idev->if_flags);
3908
3909 ci.max_reasm_len = IPV6_MAXPLEN;
3910 ci.tstamp = cstamp_delta(idev->tstamp);
3911 ci.reachable_time = jiffies_to_msecs(idev->nd_parms->reachable_time);
3912 ci.retrans_time = jiffies_to_msecs(idev->nd_parms->retrans_time);
3913 NLA_PUT(skb, IFLA_INET6_CACHEINFO, sizeof(ci), &ci);
3914
3915 nla = nla_reserve(skb, IFLA_INET6_CONF, DEVCONF_MAX * sizeof(s32));
3916 if (nla == NULL)
3917 goto nla_put_failure;
3918 ipv6_store_devconf(&idev->cnf, nla_data(nla), nla_len(nla));
3919
3920 /* XXX - MC not implemented */
3921
3922 nla = nla_reserve(skb, IFLA_INET6_STATS, IPSTATS_MIB_MAX * sizeof(u64));
3923 if (nla == NULL)
3924 goto nla_put_failure;
3925 snmp6_fill_stats(nla_data(nla), idev, IFLA_INET6_STATS, nla_len(nla));
3926
3927 nla = nla_reserve(skb, IFLA_INET6_ICMP6STATS, ICMP6_MIB_MAX * sizeof(u64));
3928 if (nla == NULL)
3929 goto nla_put_failure;
3930 snmp6_fill_stats(nla_data(nla), idev, IFLA_INET6_ICMP6STATS, nla_len(nla));
3931
3932 return 0;
3933
3934nla_put_failure:
3935 return -EMSGSIZE;
3936}
3937
3938static size_t inet6_get_link_af_size(const struct net_device *dev)
3939{
3940 if (!__in6_dev_get(dev))
3941 return 0;
3942
3943 return inet6_ifla6_size();
3944}
3945
3946static int inet6_fill_link_af(struct sk_buff *skb, const struct net_device *dev)
3947{
3948 struct inet6_dev *idev = __in6_dev_get(dev);
3949
3950 if (!idev)
3951 return -ENODATA;
3952
3953 if (inet6_fill_ifla6_attrs(skb, idev) < 0)
3954 return -EMSGSIZE;
3955
3956 return 0;
3957}
3958
3899static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev, 3959static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev,
3900 u32 pid, u32 seq, int event, unsigned int flags) 3960 u32 pid, u32 seq, int event, unsigned int flags)
3901{ 3961{
3902 struct net_device *dev = idev->dev; 3962 struct net_device *dev = idev->dev;
3903 struct nlattr *nla;
3904 struct ifinfomsg *hdr; 3963 struct ifinfomsg *hdr;
3905 struct nlmsghdr *nlh; 3964 struct nlmsghdr *nlh;
3906 void *protoinfo; 3965 void *protoinfo;
3907 struct ifla_cacheinfo ci;
3908 3966
3909 nlh = nlmsg_put(skb, pid, seq, event, sizeof(*hdr), flags); 3967 nlh = nlmsg_put(skb, pid, seq, event, sizeof(*hdr), flags);
3910 if (nlh == NULL) 3968 if (nlh == NULL)
@@ -3931,30 +3989,8 @@ static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev,
3931 if (protoinfo == NULL) 3989 if (protoinfo == NULL)
3932 goto nla_put_failure; 3990 goto nla_put_failure;
3933 3991
3934 NLA_PUT_U32(skb, IFLA_INET6_FLAGS, idev->if_flags); 3992 if (inet6_fill_ifla6_attrs(skb, idev) < 0)
3935
3936 ci.max_reasm_len = IPV6_MAXPLEN;
3937 ci.tstamp = cstamp_delta(idev->tstamp);
3938 ci.reachable_time = jiffies_to_msecs(idev->nd_parms->reachable_time);
3939 ci.retrans_time = jiffies_to_msecs(idev->nd_parms->retrans_time);
3940 NLA_PUT(skb, IFLA_INET6_CACHEINFO, sizeof(ci), &ci);
3941
3942 nla = nla_reserve(skb, IFLA_INET6_CONF, DEVCONF_MAX * sizeof(s32));
3943 if (nla == NULL)
3944 goto nla_put_failure;
3945 ipv6_store_devconf(&idev->cnf, nla_data(nla), nla_len(nla));
3946
3947 /* XXX - MC not implemented */
3948
3949 nla = nla_reserve(skb, IFLA_INET6_STATS, IPSTATS_MIB_MAX * sizeof(u64));
3950 if (nla == NULL)
3951 goto nla_put_failure;
3952 snmp6_fill_stats(nla_data(nla), idev, IFLA_INET6_STATS, nla_len(nla));
3953
3954 nla = nla_reserve(skb, IFLA_INET6_ICMP6STATS, ICMP6_MIB_MAX * sizeof(u64));
3955 if (nla == NULL)
3956 goto nla_put_failure; 3993 goto nla_put_failure;
3957 snmp6_fill_stats(nla_data(nla), idev, IFLA_INET6_ICMP6STATS, nla_len(nla));
3958 3994
3959 nla_nest_end(skb, protoinfo); 3995 nla_nest_end(skb, protoinfo);
3960 return nlmsg_end(skb, nlh); 3996 return nlmsg_end(skb, nlh);
@@ -4625,6 +4661,12 @@ int unregister_inet6addr_notifier(struct notifier_block *nb)
4625} 4661}
4626EXPORT_SYMBOL(unregister_inet6addr_notifier); 4662EXPORT_SYMBOL(unregister_inet6addr_notifier);
4627 4663
4664static struct rtnl_af_ops inet6_ops = {
4665 .family = AF_INET6,
4666 .fill_link_af = inet6_fill_link_af,
4667 .get_link_af_size = inet6_get_link_af_size,
4668};
4669
4628/* 4670/*
4629 * Init / cleanup code 4671 * Init / cleanup code
4630 */ 4672 */
@@ -4676,6 +4718,10 @@ int __init addrconf_init(void)
4676 4718
4677 addrconf_verify(0); 4719 addrconf_verify(0);
4678 4720
4721 err = rtnl_af_register(&inet6_ops);
4722 if (err < 0)
4723 goto errout_af;
4724
4679 err = __rtnl_register(PF_INET6, RTM_GETLINK, NULL, inet6_dump_ifinfo); 4725 err = __rtnl_register(PF_INET6, RTM_GETLINK, NULL, inet6_dump_ifinfo);
4680 if (err < 0) 4726 if (err < 0)
4681 goto errout; 4727 goto errout;
@@ -4691,6 +4737,8 @@ int __init addrconf_init(void)
4691 4737
4692 return 0; 4738 return 0;
4693errout: 4739errout:
4740 rtnl_af_unregister(&inet6_ops);
4741errout_af:
4694 unregister_netdevice_notifier(&ipv6_dev_notf); 4742 unregister_netdevice_notifier(&ipv6_dev_notf);
4695errlo: 4743errlo:
4696 unregister_pernet_subsys(&addrconf_ops); 4744 unregister_pernet_subsys(&addrconf_ops);
@@ -4711,6 +4759,8 @@ void addrconf_cleanup(void)
4711 4759
4712 rtnl_lock(); 4760 rtnl_lock();
4713 4761
4762 __rtnl_af_unregister(&inet6_ops);
4763
4714 /* clean dev list */ 4764 /* clean dev list */
4715 for_each_netdev(&init_net, dev) { 4765 for_each_netdev(&init_net, dev) {
4716 if (__in6_dev_get(dev) == NULL) 4766 if (__in6_dev_get(dev) == NULL)
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index 8a1628023bd..e46305d1815 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -54,24 +54,54 @@ int inet6_csk_bind_conflict(const struct sock *sk,
54 54
55EXPORT_SYMBOL_GPL(inet6_csk_bind_conflict); 55EXPORT_SYMBOL_GPL(inet6_csk_bind_conflict);
56 56
57struct dst_entry *inet6_csk_route_req(struct sock *sk,
58 const struct request_sock *req)
59{
60 struct inet6_request_sock *treq = inet6_rsk(req);
61 struct ipv6_pinfo *np = inet6_sk(sk);
62 struct in6_addr *final_p, final;
63 struct dst_entry *dst;
64 struct flowi fl;
65
66 memset(&fl, 0, sizeof(fl));
67 fl.proto = IPPROTO_TCP;
68 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
69 final_p = fl6_update_dst(&fl, np->opt, &final);
70 ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr);
71 fl.oif = sk->sk_bound_dev_if;
72 fl.mark = sk->sk_mark;
73 fl.fl_ip_dport = inet_rsk(req)->rmt_port;
74 fl.fl_ip_sport = inet_rsk(req)->loc_port;
75 security_req_classify_flow(req, &fl);
76
77 if (ip6_dst_lookup(sk, &dst, &fl))
78 return NULL;
79
80 if (final_p)
81 ipv6_addr_copy(&fl.fl6_dst, final_p);
82
83 if ((xfrm_lookup(sock_net(sk), &dst, &fl, sk, 0)) < 0)
84 return NULL;
85
86 return dst;
87}
88
57/* 89/*
58 * request_sock (formerly open request) hash tables. 90 * request_sock (formerly open request) hash tables.
59 */ 91 */
60static u32 inet6_synq_hash(const struct in6_addr *raddr, const __be16 rport, 92static u32 inet6_synq_hash(const struct in6_addr *raddr, const __be16 rport,
61 const u32 rnd, const u16 synq_hsize) 93 const u32 rnd, const u16 synq_hsize)
62{ 94{
63 u32 a = (__force u32)raddr->s6_addr32[0]; 95 u32 c;
64 u32 b = (__force u32)raddr->s6_addr32[1]; 96
65 u32 c = (__force u32)raddr->s6_addr32[2]; 97 c = jhash_3words((__force u32)raddr->s6_addr32[0],
66 98 (__force u32)raddr->s6_addr32[1],
67 a += JHASH_GOLDEN_RATIO; 99 (__force u32)raddr->s6_addr32[2],
68 b += JHASH_GOLDEN_RATIO; 100 rnd);
69 c += rnd; 101
70 __jhash_mix(a, b, c); 102 c = jhash_2words((__force u32)raddr->s6_addr32[3],
71 103 (__force u32)rport,
72 a += (__force u32)raddr->s6_addr32[3]; 104 c);
73 b += (__force u32)rport;
74 __jhash_mix(a, b, c);
75 105
76 return c & (synq_hsize - 1); 106 return c & (synq_hsize - 1);
77} 107}
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 70e891a20fb..4f4483e697b 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -58,8 +58,6 @@ MODULE_AUTHOR("Ville Nuorvala");
58MODULE_DESCRIPTION("IPv6 tunneling device"); 58MODULE_DESCRIPTION("IPv6 tunneling device");
59MODULE_LICENSE("GPL"); 59MODULE_LICENSE("GPL");
60 60
61#define IPV6_TLV_TEL_DST_SIZE 8
62
63#ifdef IP6_TNL_DEBUG 61#ifdef IP6_TNL_DEBUG
64#define IP6_TNL_TRACE(x...) printk(KERN_DEBUG "%s:" x "\n", __func__) 62#define IP6_TNL_TRACE(x...) printk(KERN_DEBUG "%s:" x "\n", __func__)
65#else 63#else
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 6f32ffce702..9fab274019c 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -1843,9 +1843,7 @@ static int ip6mr_forward2(struct net *net, struct mr6_table *mrt,
1843 1843
1844 fl = (struct flowi) { 1844 fl = (struct flowi) {
1845 .oif = vif->link, 1845 .oif = vif->link,
1846 .nl_u = { .ip6_u = 1846 .fl6_dst = ipv6h->daddr,
1847 { .daddr = ipv6h->daddr, }
1848 }
1849 }; 1847 };
1850 1848
1851 dst = ip6_route_output(net, NULL, &fl); 1849 dst = ip6_route_output(net, NULL, &fl);
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index d1444b95ad7..49f986d626a 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -82,7 +82,7 @@ static void *__mld2_query_bugs[] __attribute__((__unused__)) = {
82static struct in6_addr mld2_all_mcr = MLD2_ALL_MCR_INIT; 82static struct in6_addr mld2_all_mcr = MLD2_ALL_MCR_INIT;
83 83
84/* Big mc list lock for all the sockets */ 84/* Big mc list lock for all the sockets */
85static DEFINE_RWLOCK(ipv6_sk_mc_lock); 85static DEFINE_SPINLOCK(ipv6_sk_mc_lock);
86 86
87static void igmp6_join_group(struct ifmcaddr6 *ma); 87static void igmp6_join_group(struct ifmcaddr6 *ma);
88static void igmp6_leave_group(struct ifmcaddr6 *ma); 88static void igmp6_leave_group(struct ifmcaddr6 *ma);
@@ -123,6 +123,11 @@ int sysctl_mld_max_msf __read_mostly = IPV6_MLD_MAX_MSF;
123 * socket join on multicast group 123 * socket join on multicast group
124 */ 124 */
125 125
126#define for_each_pmc_rcu(np, pmc) \
127 for (pmc = rcu_dereference(np->ipv6_mc_list); \
128 pmc != NULL; \
129 pmc = rcu_dereference(pmc->next))
130
126int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr) 131int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
127{ 132{
128 struct net_device *dev = NULL; 133 struct net_device *dev = NULL;
@@ -134,15 +139,15 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
134 if (!ipv6_addr_is_multicast(addr)) 139 if (!ipv6_addr_is_multicast(addr))
135 return -EINVAL; 140 return -EINVAL;
136 141
137 read_lock_bh(&ipv6_sk_mc_lock); 142 rcu_read_lock();
138 for (mc_lst=np->ipv6_mc_list; mc_lst; mc_lst=mc_lst->next) { 143 for_each_pmc_rcu(np, mc_lst) {
139 if ((ifindex == 0 || mc_lst->ifindex == ifindex) && 144 if ((ifindex == 0 || mc_lst->ifindex == ifindex) &&
140 ipv6_addr_equal(&mc_lst->addr, addr)) { 145 ipv6_addr_equal(&mc_lst->addr, addr)) {
141 read_unlock_bh(&ipv6_sk_mc_lock); 146 rcu_read_unlock();
142 return -EADDRINUSE; 147 return -EADDRINUSE;
143 } 148 }
144 } 149 }
145 read_unlock_bh(&ipv6_sk_mc_lock); 150 rcu_read_unlock();
146 151
147 mc_lst = sock_kmalloc(sk, sizeof(struct ipv6_mc_socklist), GFP_KERNEL); 152 mc_lst = sock_kmalloc(sk, sizeof(struct ipv6_mc_socklist), GFP_KERNEL);
148 153
@@ -186,33 +191,41 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
186 return err; 191 return err;
187 } 192 }
188 193
189 write_lock_bh(&ipv6_sk_mc_lock); 194 spin_lock(&ipv6_sk_mc_lock);
190 mc_lst->next = np->ipv6_mc_list; 195 mc_lst->next = np->ipv6_mc_list;
191 np->ipv6_mc_list = mc_lst; 196 rcu_assign_pointer(np->ipv6_mc_list, mc_lst);
192 write_unlock_bh(&ipv6_sk_mc_lock); 197 spin_unlock(&ipv6_sk_mc_lock);
193 198
194 rcu_read_unlock(); 199 rcu_read_unlock();
195 200
196 return 0; 201 return 0;
197} 202}
198 203
204static void ipv6_mc_socklist_reclaim(struct rcu_head *head)
205{
206 kfree(container_of(head, struct ipv6_mc_socklist, rcu));
207}
199/* 208/*
200 * socket leave on multicast group 209 * socket leave on multicast group
201 */ 210 */
202int ipv6_sock_mc_drop(struct sock *sk, int ifindex, const struct in6_addr *addr) 211int ipv6_sock_mc_drop(struct sock *sk, int ifindex, const struct in6_addr *addr)
203{ 212{
204 struct ipv6_pinfo *np = inet6_sk(sk); 213 struct ipv6_pinfo *np = inet6_sk(sk);
205 struct ipv6_mc_socklist *mc_lst, **lnk; 214 struct ipv6_mc_socklist *mc_lst;
215 struct ipv6_mc_socklist __rcu **lnk;
206 struct net *net = sock_net(sk); 216 struct net *net = sock_net(sk);
207 217
208 write_lock_bh(&ipv6_sk_mc_lock); 218 spin_lock(&ipv6_sk_mc_lock);
209 for (lnk = &np->ipv6_mc_list; (mc_lst = *lnk) !=NULL ; lnk = &mc_lst->next) { 219 for (lnk = &np->ipv6_mc_list;
220 (mc_lst = rcu_dereference_protected(*lnk,
221 lockdep_is_held(&ipv6_sk_mc_lock))) !=NULL ;
222 lnk = &mc_lst->next) {
210 if ((ifindex == 0 || mc_lst->ifindex == ifindex) && 223 if ((ifindex == 0 || mc_lst->ifindex == ifindex) &&
211 ipv6_addr_equal(&mc_lst->addr, addr)) { 224 ipv6_addr_equal(&mc_lst->addr, addr)) {
212 struct net_device *dev; 225 struct net_device *dev;
213 226
214 *lnk = mc_lst->next; 227 *lnk = mc_lst->next;
215 write_unlock_bh(&ipv6_sk_mc_lock); 228 spin_unlock(&ipv6_sk_mc_lock);
216 229
217 rcu_read_lock(); 230 rcu_read_lock();
218 dev = dev_get_by_index_rcu(net, mc_lst->ifindex); 231 dev = dev_get_by_index_rcu(net, mc_lst->ifindex);
@@ -225,11 +238,12 @@ int ipv6_sock_mc_drop(struct sock *sk, int ifindex, const struct in6_addr *addr)
225 } else 238 } else
226 (void) ip6_mc_leave_src(sk, mc_lst, NULL); 239 (void) ip6_mc_leave_src(sk, mc_lst, NULL);
227 rcu_read_unlock(); 240 rcu_read_unlock();
228 sock_kfree_s(sk, mc_lst, sizeof(*mc_lst)); 241 atomic_sub(sizeof(*mc_lst), &sk->sk_omem_alloc);
242 call_rcu(&mc_lst->rcu, ipv6_mc_socklist_reclaim);
229 return 0; 243 return 0;
230 } 244 }
231 } 245 }
232 write_unlock_bh(&ipv6_sk_mc_lock); 246 spin_unlock(&ipv6_sk_mc_lock);
233 247
234 return -EADDRNOTAVAIL; 248 return -EADDRNOTAVAIL;
235} 249}
@@ -257,7 +271,7 @@ static struct inet6_dev *ip6_mc_find_dev_rcu(struct net *net,
257 return NULL; 271 return NULL;
258 idev = __in6_dev_get(dev); 272 idev = __in6_dev_get(dev);
259 if (!idev) 273 if (!idev)
260 return NULL;; 274 return NULL;
261 read_lock_bh(&idev->lock); 275 read_lock_bh(&idev->lock);
262 if (idev->dead) { 276 if (idev->dead) {
263 read_unlock_bh(&idev->lock); 277 read_unlock_bh(&idev->lock);
@@ -272,12 +286,13 @@ void ipv6_sock_mc_close(struct sock *sk)
272 struct ipv6_mc_socklist *mc_lst; 286 struct ipv6_mc_socklist *mc_lst;
273 struct net *net = sock_net(sk); 287 struct net *net = sock_net(sk);
274 288
275 write_lock_bh(&ipv6_sk_mc_lock); 289 spin_lock(&ipv6_sk_mc_lock);
276 while ((mc_lst = np->ipv6_mc_list) != NULL) { 290 while ((mc_lst = rcu_dereference_protected(np->ipv6_mc_list,
291 lockdep_is_held(&ipv6_sk_mc_lock))) != NULL) {
277 struct net_device *dev; 292 struct net_device *dev;
278 293
279 np->ipv6_mc_list = mc_lst->next; 294 np->ipv6_mc_list = mc_lst->next;
280 write_unlock_bh(&ipv6_sk_mc_lock); 295 spin_unlock(&ipv6_sk_mc_lock);
281 296
282 rcu_read_lock(); 297 rcu_read_lock();
283 dev = dev_get_by_index_rcu(net, mc_lst->ifindex); 298 dev = dev_get_by_index_rcu(net, mc_lst->ifindex);
@@ -290,11 +305,13 @@ void ipv6_sock_mc_close(struct sock *sk)
290 } else 305 } else
291 (void) ip6_mc_leave_src(sk, mc_lst, NULL); 306 (void) ip6_mc_leave_src(sk, mc_lst, NULL);
292 rcu_read_unlock(); 307 rcu_read_unlock();
293 sock_kfree_s(sk, mc_lst, sizeof(*mc_lst));
294 308
295 write_lock_bh(&ipv6_sk_mc_lock); 309 atomic_sub(sizeof(*mc_lst), &sk->sk_omem_alloc);
310 call_rcu(&mc_lst->rcu, ipv6_mc_socklist_reclaim);
311
312 spin_lock(&ipv6_sk_mc_lock);
296 } 313 }
297 write_unlock_bh(&ipv6_sk_mc_lock); 314 spin_unlock(&ipv6_sk_mc_lock);
298} 315}
299 316
300int ip6_mc_source(int add, int omode, struct sock *sk, 317int ip6_mc_source(int add, int omode, struct sock *sk,
@@ -328,8 +345,7 @@ int ip6_mc_source(int add, int omode, struct sock *sk,
328 345
329 err = -EADDRNOTAVAIL; 346 err = -EADDRNOTAVAIL;
330 347
331 read_lock(&ipv6_sk_mc_lock); 348 for_each_pmc_rcu(inet6, pmc) {
332 for (pmc=inet6->ipv6_mc_list; pmc; pmc=pmc->next) {
333 if (pgsr->gsr_interface && pmc->ifindex != pgsr->gsr_interface) 349 if (pgsr->gsr_interface && pmc->ifindex != pgsr->gsr_interface)
334 continue; 350 continue;
335 if (ipv6_addr_equal(&pmc->addr, group)) 351 if (ipv6_addr_equal(&pmc->addr, group))
@@ -428,7 +444,6 @@ int ip6_mc_source(int add, int omode, struct sock *sk,
428done: 444done:
429 if (pmclocked) 445 if (pmclocked)
430 write_unlock(&pmc->sflock); 446 write_unlock(&pmc->sflock);
431 read_unlock(&ipv6_sk_mc_lock);
432 read_unlock_bh(&idev->lock); 447 read_unlock_bh(&idev->lock);
433 rcu_read_unlock(); 448 rcu_read_unlock();
434 if (leavegroup) 449 if (leavegroup)
@@ -466,14 +481,13 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf)
466 dev = idev->dev; 481 dev = idev->dev;
467 482
468 err = 0; 483 err = 0;
469 read_lock(&ipv6_sk_mc_lock);
470 484
471 if (gsf->gf_fmode == MCAST_INCLUDE && gsf->gf_numsrc == 0) { 485 if (gsf->gf_fmode == MCAST_INCLUDE && gsf->gf_numsrc == 0) {
472 leavegroup = 1; 486 leavegroup = 1;
473 goto done; 487 goto done;
474 } 488 }
475 489
476 for (pmc=inet6->ipv6_mc_list; pmc; pmc=pmc->next) { 490 for_each_pmc_rcu(inet6, pmc) {
477 if (pmc->ifindex != gsf->gf_interface) 491 if (pmc->ifindex != gsf->gf_interface)
478 continue; 492 continue;
479 if (ipv6_addr_equal(&pmc->addr, group)) 493 if (ipv6_addr_equal(&pmc->addr, group))
@@ -521,7 +535,6 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf)
521 write_unlock(&pmc->sflock); 535 write_unlock(&pmc->sflock);
522 err = 0; 536 err = 0;
523done: 537done:
524 read_unlock(&ipv6_sk_mc_lock);
525 read_unlock_bh(&idev->lock); 538 read_unlock_bh(&idev->lock);
526 rcu_read_unlock(); 539 rcu_read_unlock();
527 if (leavegroup) 540 if (leavegroup)
@@ -562,7 +575,7 @@ int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf,
562 * so reading the list is safe. 575 * so reading the list is safe.
563 */ 576 */
564 577
565 for (pmc=inet6->ipv6_mc_list; pmc; pmc=pmc->next) { 578 for_each_pmc_rcu(inet6, pmc) {
566 if (pmc->ifindex != gsf->gf_interface) 579 if (pmc->ifindex != gsf->gf_interface)
567 continue; 580 continue;
568 if (ipv6_addr_equal(group, &pmc->addr)) 581 if (ipv6_addr_equal(group, &pmc->addr))
@@ -612,13 +625,13 @@ int inet6_mc_check(struct sock *sk, const struct in6_addr *mc_addr,
612 struct ip6_sf_socklist *psl; 625 struct ip6_sf_socklist *psl;
613 int rv = 1; 626 int rv = 1;
614 627
615 read_lock(&ipv6_sk_mc_lock); 628 rcu_read_lock();
616 for (mc = np->ipv6_mc_list; mc; mc = mc->next) { 629 for_each_pmc_rcu(np, mc) {
617 if (ipv6_addr_equal(&mc->addr, mc_addr)) 630 if (ipv6_addr_equal(&mc->addr, mc_addr))
618 break; 631 break;
619 } 632 }
620 if (!mc) { 633 if (!mc) {
621 read_unlock(&ipv6_sk_mc_lock); 634 rcu_read_unlock();
622 return 1; 635 return 1;
623 } 636 }
624 read_lock(&mc->sflock); 637 read_lock(&mc->sflock);
@@ -638,7 +651,7 @@ int inet6_mc_check(struct sock *sk, const struct in6_addr *mc_addr,
638 rv = 0; 651 rv = 0;
639 } 652 }
640 read_unlock(&mc->sflock); 653 read_unlock(&mc->sflock);
641 read_unlock(&ipv6_sk_mc_lock); 654 rcu_read_unlock();
642 655
643 return rv; 656 return rv;
644} 657}
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 998d6d27e7c..e18f8413020 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -141,18 +141,18 @@ struct neigh_table nd_tbl = {
141 .proxy_redo = pndisc_redo, 141 .proxy_redo = pndisc_redo,
142 .id = "ndisc_cache", 142 .id = "ndisc_cache",
143 .parms = { 143 .parms = {
144 .tbl = &nd_tbl, 144 .tbl = &nd_tbl,
145 .base_reachable_time = 30 * HZ, 145 .base_reachable_time = ND_REACHABLE_TIME,
146 .retrans_time = 1 * HZ, 146 .retrans_time = ND_RETRANS_TIMER,
147 .gc_staletime = 60 * HZ, 147 .gc_staletime = 60 * HZ,
148 .reachable_time = 30 * HZ, 148 .reachable_time = ND_REACHABLE_TIME,
149 .delay_probe_time = 5 * HZ, 149 .delay_probe_time = 5 * HZ,
150 .queue_len = 3, 150 .queue_len = 3,
151 .ucast_probes = 3, 151 .ucast_probes = 3,
152 .mcast_probes = 3, 152 .mcast_probes = 3,
153 .anycast_delay = 1 * HZ, 153 .anycast_delay = 1 * HZ,
154 .proxy_delay = (8 * HZ) / 10, 154 .proxy_delay = (8 * HZ) / 10,
155 .proxy_qlen = 64, 155 .proxy_qlen = 64,
156 }, 156 },
157 .gc_interval = 30 * HZ, 157 .gc_interval = 30 * HZ,
158 .gc_thresh1 = 128, 158 .gc_thresh1 = 128,
diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c
index 7155b2451d7..35915e8617f 100644
--- a/net/ipv6/netfilter.c
+++ b/net/ipv6/netfilter.c
@@ -18,10 +18,8 @@ int ip6_route_me_harder(struct sk_buff *skb)
18 struct flowi fl = { 18 struct flowi fl = {
19 .oif = skb->sk ? skb->sk->sk_bound_dev_if : 0, 19 .oif = skb->sk ? skb->sk->sk_bound_dev_if : 0,
20 .mark = skb->mark, 20 .mark = skb->mark,
21 .nl_u = 21 .fl6_dst = iph->daddr,
22 { .ip6_u = 22 .fl6_src = iph->saddr,
23 { .daddr = iph->daddr,
24 .saddr = iph->saddr, } },
25 }; 23 };
26 24
27 dst = ip6_route_output(net, skb->sk, &fl); 25 dst = ip6_route_output(net, skb->sk, &fl);
diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile
index 0a432c9b079..abfee91ce81 100644
--- a/net/ipv6/netfilter/Makefile
+++ b/net/ipv6/netfilter/Makefile
@@ -11,13 +11,13 @@ obj-$(CONFIG_IP6_NF_RAW) += ip6table_raw.o
11obj-$(CONFIG_IP6_NF_SECURITY) += ip6table_security.o 11obj-$(CONFIG_IP6_NF_SECURITY) += ip6table_security.o
12 12
13# objects for l3 independent conntrack 13# objects for l3 independent conntrack
14nf_conntrack_ipv6-objs := nf_conntrack_l3proto_ipv6.o nf_conntrack_proto_icmpv6.o 14nf_conntrack_ipv6-y := nf_conntrack_l3proto_ipv6.o nf_conntrack_proto_icmpv6.o
15 15
16# l3 independent conntrack 16# l3 independent conntrack
17obj-$(CONFIG_NF_CONNTRACK_IPV6) += nf_conntrack_ipv6.o nf_defrag_ipv6.o 17obj-$(CONFIG_NF_CONNTRACK_IPV6) += nf_conntrack_ipv6.o nf_defrag_ipv6.o
18 18
19# defrag 19# defrag
20nf_defrag_ipv6-objs := nf_defrag_ipv6_hooks.o nf_conntrack_reasm.o 20nf_defrag_ipv6-y := nf_defrag_ipv6_hooks.o nf_conntrack_reasm.o
21obj-$(CONFIG_NF_DEFRAG_IPV6) += nf_defrag_ipv6.o 21obj-$(CONFIG_NF_DEFRAG_IPV6) += nf_defrag_ipv6.o
22 22
23# matches 23# matches
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 0f276645375..07beeb06f75 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -104,26 +104,22 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
104unsigned int inet6_hash_frag(__be32 id, const struct in6_addr *saddr, 104unsigned int inet6_hash_frag(__be32 id, const struct in6_addr *saddr,
105 const struct in6_addr *daddr, u32 rnd) 105 const struct in6_addr *daddr, u32 rnd)
106{ 106{
107 u32 a, b, c; 107 u32 c;
108 108
109 a = (__force u32)saddr->s6_addr32[0]; 109 c = jhash_3words((__force u32)saddr->s6_addr32[0],
110 b = (__force u32)saddr->s6_addr32[1]; 110 (__force u32)saddr->s6_addr32[1],
111 c = (__force u32)saddr->s6_addr32[2]; 111 (__force u32)saddr->s6_addr32[2],
112 112 rnd);
113 a += JHASH_GOLDEN_RATIO; 113
114 b += JHASH_GOLDEN_RATIO; 114 c = jhash_3words((__force u32)saddr->s6_addr32[3],
115 c += rnd; 115 (__force u32)daddr->s6_addr32[0],
116 __jhash_mix(a, b, c); 116 (__force u32)daddr->s6_addr32[1],
117 117 c);
118 a += (__force u32)saddr->s6_addr32[3]; 118
119 b += (__force u32)daddr->s6_addr32[0]; 119 c = jhash_3words((__force u32)daddr->s6_addr32[2],
120 c += (__force u32)daddr->s6_addr32[1]; 120 (__force u32)daddr->s6_addr32[3],
121 __jhash_mix(a, b, c); 121 (__force u32)id,
122 122 c);
123 a += (__force u32)daddr->s6_addr32[2];
124 b += (__force u32)daddr->s6_addr32[3];
125 c += (__force u32)id;
126 __jhash_mix(a, b, c);
127 123
128 return c & (INETFRAGS_HASHSZ - 1); 124 return c & (INETFRAGS_HASHSZ - 1);
129} 125}
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 96455ffb76f..026caef0326 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -188,11 +188,29 @@ static void ip6_dst_destroy(struct dst_entry *dst)
188{ 188{
189 struct rt6_info *rt = (struct rt6_info *)dst; 189 struct rt6_info *rt = (struct rt6_info *)dst;
190 struct inet6_dev *idev = rt->rt6i_idev; 190 struct inet6_dev *idev = rt->rt6i_idev;
191 struct inet_peer *peer = rt->rt6i_peer;
191 192
192 if (idev != NULL) { 193 if (idev != NULL) {
193 rt->rt6i_idev = NULL; 194 rt->rt6i_idev = NULL;
194 in6_dev_put(idev); 195 in6_dev_put(idev);
195 } 196 }
197 if (peer) {
198 BUG_ON(!(rt->rt6i_flags & RTF_CACHE));
199 rt->rt6i_peer = NULL;
200 inet_putpeer(peer);
201 }
202}
203
204void rt6_bind_peer(struct rt6_info *rt, int create)
205{
206 struct inet_peer *peer;
207
208 if (WARN_ON(!(rt->rt6i_flags & RTF_CACHE)))
209 return;
210
211 peer = inet_getpeer_v6(&rt->rt6i_dst.addr, create);
212 if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL)
213 inet_putpeer(peer);
196} 214}
197 215
198static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev, 216static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
@@ -558,11 +576,7 @@ struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
558{ 576{
559 struct flowi fl = { 577 struct flowi fl = {
560 .oif = oif, 578 .oif = oif,
561 .nl_u = { 579 .fl6_dst = *daddr,
562 .ip6_u = {
563 .daddr = *daddr,
564 },
565 },
566 }; 580 };
567 struct dst_entry *dst; 581 struct dst_entry *dst;
568 int flags = strict ? RT6_LOOKUP_F_IFACE : 0; 582 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
@@ -778,13 +792,9 @@ void ip6_route_input(struct sk_buff *skb)
778 int flags = RT6_LOOKUP_F_HAS_SADDR; 792 int flags = RT6_LOOKUP_F_HAS_SADDR;
779 struct flowi fl = { 793 struct flowi fl = {
780 .iif = skb->dev->ifindex, 794 .iif = skb->dev->ifindex,
781 .nl_u = { 795 .fl6_dst = iph->daddr,
782 .ip6_u = { 796 .fl6_src = iph->saddr,
783 .daddr = iph->daddr, 797 .fl6_flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
784 .saddr = iph->saddr,
785 .flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
786 },
787 },
788 .mark = skb->mark, 798 .mark = skb->mark,
789 .proto = iph->nexthdr, 799 .proto = iph->nexthdr,
790 }; 800 };
@@ -1463,12 +1473,8 @@ static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
1463 struct ip6rd_flowi rdfl = { 1473 struct ip6rd_flowi rdfl = {
1464 .fl = { 1474 .fl = {
1465 .oif = dev->ifindex, 1475 .oif = dev->ifindex,
1466 .nl_u = { 1476 .fl6_dst = *dest,
1467 .ip6_u = { 1477 .fl6_src = *src,
1468 .daddr = *dest,
1469 .saddr = *src,
1470 },
1471 },
1472 }, 1478 },
1473 }; 1479 };
1474 1480
@@ -2465,8 +2471,6 @@ static int ip6_route_dev_notify(struct notifier_block *this,
2465 2471
2466#ifdef CONFIG_PROC_FS 2472#ifdef CONFIG_PROC_FS
2467 2473
2468#define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
2469
2470struct rt6_proc_arg 2474struct rt6_proc_arg
2471{ 2475{
2472 char *buffer; 2476 char *buffer;
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 8c4d00c7cd2..8ce38f10a54 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -731,10 +731,9 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
731 } 731 }
732 732
733 { 733 {
734 struct flowi fl = { .nl_u = { .ip4_u = 734 struct flowi fl = { .fl4_dst = dst,
735 { .daddr = dst, 735 .fl4_src = tiph->saddr,
736 .saddr = tiph->saddr, 736 .fl4_tos = RT_TOS(tos),
737 .tos = RT_TOS(tos) } },
738 .oif = tunnel->parms.link, 737 .oif = tunnel->parms.link,
739 .proto = IPPROTO_IPV6 }; 738 .proto = IPPROTO_IPV6 };
740 if (ip_route_output_key(dev_net(dev), &rt, &fl)) { 739 if (ip_route_output_key(dev_net(dev), &rt, &fl)) {
@@ -856,10 +855,9 @@ static void ipip6_tunnel_bind_dev(struct net_device *dev)
856 iph = &tunnel->parms.iph; 855 iph = &tunnel->parms.iph;
857 856
858 if (iph->daddr) { 857 if (iph->daddr) {
859 struct flowi fl = { .nl_u = { .ip4_u = 858 struct flowi fl = { .fl4_dst = iph->daddr,
860 { .daddr = iph->daddr, 859 .fl4_src = iph->saddr,
861 .saddr = iph->saddr, 860 .fl4_tos = RT_TOS(iph->tos),
862 .tos = RT_TOS(iph->tos) } },
863 .oif = tunnel->parms.link, 861 .oif = tunnel->parms.link,
864 .proto = IPPROTO_IPV6 }; 862 .proto = IPPROTO_IPV6 };
865 struct rtable *rt; 863 struct rtable *rt;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 7e41e2cbb85..319458558df 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -130,6 +130,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
130 struct ipv6_pinfo *np = inet6_sk(sk); 130 struct ipv6_pinfo *np = inet6_sk(sk);
131 struct tcp_sock *tp = tcp_sk(sk); 131 struct tcp_sock *tp = tcp_sk(sk);
132 struct in6_addr *saddr = NULL, *final_p, final; 132 struct in6_addr *saddr = NULL, *final_p, final;
133 struct rt6_info *rt;
133 struct flowi fl; 134 struct flowi fl;
134 struct dst_entry *dst; 135 struct dst_entry *dst;
135 int addr_type; 136 int addr_type;
@@ -280,6 +281,26 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
280 sk->sk_gso_type = SKB_GSO_TCPV6; 281 sk->sk_gso_type = SKB_GSO_TCPV6;
281 __ip6_dst_store(sk, dst, NULL, NULL); 282 __ip6_dst_store(sk, dst, NULL, NULL);
282 283
284 rt = (struct rt6_info *) dst;
285 if (tcp_death_row.sysctl_tw_recycle &&
286 !tp->rx_opt.ts_recent_stamp &&
287 ipv6_addr_equal(&rt->rt6i_dst.addr, &np->daddr)) {
288 struct inet_peer *peer = rt6_get_peer(rt);
289 /*
290 * VJ's idea. We save last timestamp seen from
291 * the destination in peer table, when entering state
292 * TIME-WAIT * and initialize rx_opt.ts_recent from it,
293 * when trying new connection.
294 */
295 if (peer) {
296 inet_peer_refcheck(peer);
297 if ((u32)get_seconds() - peer->tcp_ts_stamp <= TCP_PAWS_MSL) {
298 tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp;
299 tp->rx_opt.ts_recent = peer->tcp_ts;
300 }
301 }
302 }
303
283 icsk->icsk_ext_hdr_len = 0; 304 icsk->icsk_ext_hdr_len = 0;
284 if (np->opt) 305 if (np->opt)
285 icsk->icsk_ext_hdr_len = (np->opt->opt_flen + 306 icsk->icsk_ext_hdr_len = (np->opt->opt_flen +
@@ -906,12 +927,6 @@ static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
906}; 927};
907#endif 928#endif
908 929
909static struct timewait_sock_ops tcp6_timewait_sock_ops = {
910 .twsk_obj_size = sizeof(struct tcp6_timewait_sock),
911 .twsk_unique = tcp_twsk_unique,
912 .twsk_destructor= tcp_twsk_destructor,
913};
914
915static void __tcp_v6_send_check(struct sk_buff *skb, 930static void __tcp_v6_send_check(struct sk_buff *skb,
916 struct in6_addr *saddr, struct in6_addr *daddr) 931 struct in6_addr *saddr, struct in6_addr *daddr)
917{ 932{
@@ -1176,6 +1191,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1176 struct ipv6_pinfo *np = inet6_sk(sk); 1191 struct ipv6_pinfo *np = inet6_sk(sk);
1177 struct tcp_sock *tp = tcp_sk(sk); 1192 struct tcp_sock *tp = tcp_sk(sk);
1178 __u32 isn = TCP_SKB_CB(skb)->when; 1193 __u32 isn = TCP_SKB_CB(skb)->when;
1194 struct dst_entry *dst = NULL;
1179#ifdef CONFIG_SYN_COOKIES 1195#ifdef CONFIG_SYN_COOKIES
1180 int want_cookie = 0; 1196 int want_cookie = 0;
1181#else 1197#else
@@ -1273,6 +1289,8 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1273 TCP_ECN_create_request(req, tcp_hdr(skb)); 1289 TCP_ECN_create_request(req, tcp_hdr(skb));
1274 1290
1275 if (!isn) { 1291 if (!isn) {
1292 struct inet_peer *peer = NULL;
1293
1276 if (ipv6_opt_accepted(sk, skb) || 1294 if (ipv6_opt_accepted(sk, skb) ||
1277 np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo || 1295 np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
1278 np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) { 1296 np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) {
@@ -1285,13 +1303,57 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1285 if (!sk->sk_bound_dev_if && 1303 if (!sk->sk_bound_dev_if &&
1286 ipv6_addr_type(&treq->rmt_addr) & IPV6_ADDR_LINKLOCAL) 1304 ipv6_addr_type(&treq->rmt_addr) & IPV6_ADDR_LINKLOCAL)
1287 treq->iif = inet6_iif(skb); 1305 treq->iif = inet6_iif(skb);
1288 if (!want_cookie) { 1306
1289 isn = tcp_v6_init_sequence(skb); 1307 if (want_cookie) {
1290 } else {
1291 isn = cookie_v6_init_sequence(sk, skb, &req->mss); 1308 isn = cookie_v6_init_sequence(sk, skb, &req->mss);
1292 req->cookie_ts = tmp_opt.tstamp_ok; 1309 req->cookie_ts = tmp_opt.tstamp_ok;
1310 goto have_isn;
1293 } 1311 }
1312
1313 /* VJ's idea. We save last timestamp seen
1314 * from the destination in peer table, when entering
1315 * state TIME-WAIT, and check against it before
1316 * accepting new connection request.
1317 *
1318 * If "isn" is not zero, this request hit alive
1319 * timewait bucket, so that all the necessary checks
1320 * are made in the function processing timewait state.
1321 */
1322 if (tmp_opt.saw_tstamp &&
1323 tcp_death_row.sysctl_tw_recycle &&
1324 (dst = inet6_csk_route_req(sk, req)) != NULL &&
1325 (peer = rt6_get_peer((struct rt6_info *)dst)) != NULL &&
1326 ipv6_addr_equal((struct in6_addr *)peer->daddr.a6,
1327 &treq->rmt_addr)) {
1328 inet_peer_refcheck(peer);
1329 if ((u32)get_seconds() - peer->tcp_ts_stamp < TCP_PAWS_MSL &&
1330 (s32)(peer->tcp_ts - req->ts_recent) >
1331 TCP_PAWS_WINDOW) {
1332 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED);
1333 goto drop_and_release;
1334 }
1335 }
1336 /* Kill the following clause, if you dislike this way. */
1337 else if (!sysctl_tcp_syncookies &&
1338 (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
1339 (sysctl_max_syn_backlog >> 2)) &&
1340 (!peer || !peer->tcp_ts_stamp) &&
1341 (!dst || !dst_metric(dst, RTAX_RTT))) {
1342 /* Without syncookies last quarter of
1343 * backlog is filled with destinations,
1344 * proven to be alive.
1345 * It means that we continue to communicate
1346 * to destinations, already remembered
1347 * to the moment of synflood.
1348 */
1349 LIMIT_NETDEBUG(KERN_DEBUG "TCP: drop open request from %pI6/%u\n",
1350 &treq->rmt_addr, ntohs(tcp_hdr(skb)->source));
1351 goto drop_and_release;
1352 }
1353
1354 isn = tcp_v6_init_sequence(skb);
1294 } 1355 }
1356have_isn:
1295 tcp_rsk(req)->snt_isn = isn; 1357 tcp_rsk(req)->snt_isn = isn;
1296 1358
1297 security_inet_conn_request(sk, skb, req); 1359 security_inet_conn_request(sk, skb, req);
@@ -1304,6 +1366,8 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1304 inet6_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT); 1366 inet6_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
1305 return 0; 1367 return 0;
1306 1368
1369drop_and_release:
1370 dst_release(dst);
1307drop_and_free: 1371drop_and_free:
1308 reqsk_free(req); 1372 reqsk_free(req);
1309drop: 1373drop:
@@ -1382,28 +1446,9 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1382 if (sk_acceptq_is_full(sk)) 1446 if (sk_acceptq_is_full(sk))
1383 goto out_overflow; 1447 goto out_overflow;
1384 1448
1385 if (dst == NULL) { 1449 if (!dst) {
1386 struct in6_addr *final_p, final; 1450 dst = inet6_csk_route_req(sk, req);
1387 struct flowi fl; 1451 if (!dst)
1388
1389 memset(&fl, 0, sizeof(fl));
1390 fl.proto = IPPROTO_TCP;
1391 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
1392 final_p = fl6_update_dst(&fl, opt, &final);
1393 ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr);
1394 fl.oif = sk->sk_bound_dev_if;
1395 fl.mark = sk->sk_mark;
1396 fl.fl_ip_dport = inet_rsk(req)->rmt_port;
1397 fl.fl_ip_sport = inet_rsk(req)->loc_port;
1398 security_req_classify_flow(req, &fl);
1399
1400 if (ip6_dst_lookup(sk, &dst, &fl))
1401 goto out;
1402
1403 if (final_p)
1404 ipv6_addr_copy(&fl.fl6_dst, final_p);
1405
1406 if ((xfrm_lookup(sock_net(sk), &dst, &fl, sk, 0)) < 0)
1407 goto out; 1452 goto out;
1408 } 1453 }
1409 1454
@@ -1818,19 +1863,51 @@ do_time_wait:
1818 goto discard_it; 1863 goto discard_it;
1819} 1864}
1820 1865
1821static int tcp_v6_remember_stamp(struct sock *sk) 1866static struct inet_peer *tcp_v6_get_peer(struct sock *sk, bool *release_it)
1822{ 1867{
1823 /* Alas, not yet... */ 1868 struct rt6_info *rt = (struct rt6_info *) __sk_dst_get(sk);
1824 return 0; 1869 struct ipv6_pinfo *np = inet6_sk(sk);
1870 struct inet_peer *peer;
1871
1872 if (!rt ||
1873 !ipv6_addr_equal(&np->daddr, &rt->rt6i_dst.addr)) {
1874 peer = inet_getpeer_v6(&np->daddr, 1);
1875 *release_it = true;
1876 } else {
1877 if (!rt->rt6i_peer)
1878 rt6_bind_peer(rt, 1);
1879 peer = rt->rt6i_peer;
1880 *release_it = true;
1881 }
1882
1883 return peer;
1825} 1884}
1826 1885
1886static void *tcp_v6_tw_get_peer(struct sock *sk)
1887{
1888 struct inet6_timewait_sock *tw6 = inet6_twsk(sk);
1889 struct inet_timewait_sock *tw = inet_twsk(sk);
1890
1891 if (tw->tw_family == AF_INET)
1892 return tcp_v4_tw_get_peer(sk);
1893
1894 return inet_getpeer_v6(&tw6->tw_v6_daddr, 1);
1895}
1896
1897static struct timewait_sock_ops tcp6_timewait_sock_ops = {
1898 .twsk_obj_size = sizeof(struct tcp6_timewait_sock),
1899 .twsk_unique = tcp_twsk_unique,
1900 .twsk_destructor= tcp_twsk_destructor,
1901 .twsk_getpeer = tcp_v6_tw_get_peer,
1902};
1903
1827static const struct inet_connection_sock_af_ops ipv6_specific = { 1904static const struct inet_connection_sock_af_ops ipv6_specific = {
1828 .queue_xmit = inet6_csk_xmit, 1905 .queue_xmit = inet6_csk_xmit,
1829 .send_check = tcp_v6_send_check, 1906 .send_check = tcp_v6_send_check,
1830 .rebuild_header = inet6_sk_rebuild_header, 1907 .rebuild_header = inet6_sk_rebuild_header,
1831 .conn_request = tcp_v6_conn_request, 1908 .conn_request = tcp_v6_conn_request,
1832 .syn_recv_sock = tcp_v6_syn_recv_sock, 1909 .syn_recv_sock = tcp_v6_syn_recv_sock,
1833 .remember_stamp = tcp_v6_remember_stamp, 1910 .get_peer = tcp_v6_get_peer,
1834 .net_header_len = sizeof(struct ipv6hdr), 1911 .net_header_len = sizeof(struct ipv6hdr),
1835 .setsockopt = ipv6_setsockopt, 1912 .setsockopt = ipv6_setsockopt,
1836 .getsockopt = ipv6_getsockopt, 1913 .getsockopt = ipv6_getsockopt,
@@ -1862,7 +1939,7 @@ static const struct inet_connection_sock_af_ops ipv6_mapped = {
1862 .rebuild_header = inet_sk_rebuild_header, 1939 .rebuild_header = inet_sk_rebuild_header,
1863 .conn_request = tcp_v6_conn_request, 1940 .conn_request = tcp_v6_conn_request,
1864 .syn_recv_sock = tcp_v6_syn_recv_sock, 1941 .syn_recv_sock = tcp_v6_syn_recv_sock,
1865 .remember_stamp = tcp_v4_remember_stamp, 1942 .get_peer = tcp_v4_get_peer,
1866 .net_header_len = sizeof(struct iphdr), 1943 .net_header_len = sizeof(struct iphdr),
1867 .setsockopt = ipv6_setsockopt, 1944 .setsockopt = ipv6_setsockopt,
1868 .getsockopt = ipv6_getsockopt, 1945 .getsockopt = ipv6_getsockopt,
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 91def93bec8..b541a4e009f 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -227,7 +227,7 @@ begin:
227 227
228 if (result) { 228 if (result) {
229exact_match: 229exact_match:
230 if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt))) 230 if (unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2)))
231 result = NULL; 231 result = NULL;
232 else if (unlikely(compute_score2(result, net, saddr, sport, 232 else if (unlikely(compute_score2(result, net, saddr, sport,
233 daddr, hnum, dif) < badness)) { 233 daddr, hnum, dif) < badness)) {
@@ -294,7 +294,7 @@ begin:
294 goto begin; 294 goto begin;
295 295
296 if (result) { 296 if (result) {
297 if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt))) 297 if (unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2)))
298 result = NULL; 298 result = NULL;
299 else if (unlikely(compute_score(result, net, hnum, saddr, sport, 299 else if (unlikely(compute_score(result, net, hnum, saddr, sport,
300 daddr, dport, dif) < badness)) { 300 daddr, dport, dif) < badness)) {
diff --git a/net/irda/ircomm/Makefile b/net/irda/ircomm/Makefile
index 48689458c08..ab23b5ba7e3 100644
--- a/net/irda/ircomm/Makefile
+++ b/net/irda/ircomm/Makefile
@@ -4,5 +4,5 @@
4 4
5obj-$(CONFIG_IRCOMM) += ircomm.o ircomm-tty.o 5obj-$(CONFIG_IRCOMM) += ircomm.o ircomm-tty.o
6 6
7ircomm-objs := ircomm_core.o ircomm_event.o ircomm_lmp.o ircomm_ttp.o 7ircomm-y := ircomm_core.o ircomm_event.o ircomm_lmp.o ircomm_ttp.o
8ircomm-tty-objs := ircomm_tty.o ircomm_tty_attach.o ircomm_tty_ioctl.o ircomm_param.o 8ircomm-tty-y := ircomm_tty.o ircomm_tty_attach.o ircomm_tty_ioctl.o ircomm_param.o
diff --git a/net/irda/irlan/Makefile b/net/irda/irlan/Makefile
index 77549bc8641..94eefbc8e6b 100644
--- a/net/irda/irlan/Makefile
+++ b/net/irda/irlan/Makefile
@@ -4,4 +4,4 @@
4 4
5obj-$(CONFIG_IRLAN) += irlan.o 5obj-$(CONFIG_IRLAN) += irlan.o
6 6
7irlan-objs := irlan_common.o irlan_eth.o irlan_event.o irlan_client.o irlan_provider.o irlan_filter.o irlan_provider_event.o irlan_client_event.o 7irlan-y := irlan_common.o irlan_eth.o irlan_event.o irlan_client.o irlan_provider.o irlan_filter.o irlan_provider_event.o irlan_client_event.o
diff --git a/net/irda/irnet/Makefile b/net/irda/irnet/Makefile
index b3ee01e0def..61c365c8a2a 100644
--- a/net/irda/irnet/Makefile
+++ b/net/irda/irnet/Makefile
@@ -4,4 +4,4 @@
4 4
5obj-$(CONFIG_IRNET) += irnet.o 5obj-$(CONFIG_IRNET) += irnet.o
6 6
7irnet-objs := irnet_ppp.o irnet_irda.o 7irnet-y := irnet_ppp.o irnet_irda.o
diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c
index 522e219f355..110efb704c9 100644
--- a/net/l2tp/l2tp_ip.c
+++ b/net/l2tp/l2tp_ip.c
@@ -476,15 +476,13 @@ static int l2tp_ip_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *m
476 476
477 { 477 {
478 struct flowi fl = { .oif = sk->sk_bound_dev_if, 478 struct flowi fl = { .oif = sk->sk_bound_dev_if,
479 .nl_u = { .ip4_u = { 479 .fl4_dst = daddr,
480 .daddr = daddr, 480 .fl4_src = inet->inet_saddr,
481 .saddr = inet->inet_saddr, 481 .fl4_tos = RT_CONN_FLAGS(sk),
482 .tos = RT_CONN_FLAGS(sk) } },
483 .proto = sk->sk_protocol, 482 .proto = sk->sk_protocol,
484 .flags = inet_sk_flowi_flags(sk), 483 .flags = inet_sk_flowi_flags(sk),
485 .uli_u = { .ports = { 484 .fl_ip_sport = inet->inet_sport,
486 .sport = inet->inet_sport, 485 .fl_ip_dport = inet->inet_dport };
487 .dport = inet->inet_dport } } };
488 486
489 /* If this fails, retransmit mechanism of transport layer will 487 /* If this fails, retransmit mechanism of transport layer will
490 * keep trying until route appears or the connection times 488 * keep trying until route appears or the connection times
diff --git a/net/lapb/Makefile b/net/lapb/Makefile
index 53f7c90db16..fff797dfc88 100644
--- a/net/lapb/Makefile
+++ b/net/lapb/Makefile
@@ -4,4 +4,4 @@
4 4
5obj-$(CONFIG_LAPB) += lapb.o 5obj-$(CONFIG_LAPB) += lapb.o
6 6
7lapb-objs := lapb_in.o lapb_out.o lapb_subr.o lapb_timer.o lapb_iface.o 7lapb-y := lapb_in.o lapb_out.o lapb_subr.o lapb_timer.o lapb_iface.o
diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index e35dbe55f52..dfd3a648a55 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -316,7 +316,6 @@ static int llc_ui_bind(struct socket *sock, struct sockaddr *uaddr, int addrlen)
316 if (unlikely(addr->sllc_family != AF_LLC)) 316 if (unlikely(addr->sllc_family != AF_LLC))
317 goto out; 317 goto out;
318 rc = -ENODEV; 318 rc = -ENODEV;
319 rtnl_lock();
320 rcu_read_lock(); 319 rcu_read_lock();
321 if (sk->sk_bound_dev_if) { 320 if (sk->sk_bound_dev_if) {
322 llc->dev = dev_get_by_index_rcu(&init_net, sk->sk_bound_dev_if); 321 llc->dev = dev_get_by_index_rcu(&init_net, sk->sk_bound_dev_if);
@@ -334,10 +333,11 @@ static int llc_ui_bind(struct socket *sock, struct sockaddr *uaddr, int addrlen)
334 } 333 }
335 } 334 }
336 } else 335 } else
337 llc->dev = dev_getbyhwaddr(&init_net, addr->sllc_arphrd, 336 llc->dev = dev_getbyhwaddr_rcu(&init_net, addr->sllc_arphrd,
338 addr->sllc_mac); 337 addr->sllc_mac);
338 if (llc->dev)
339 dev_hold(llc->dev);
339 rcu_read_unlock(); 340 rcu_read_unlock();
340 rtnl_unlock();
341 if (!llc->dev) 341 if (!llc->dev)
342 goto out; 342 goto out;
343 if (!addr->sllc_sap) { 343 if (!addr->sllc_sap) {
diff --git a/net/mac80211/aes_ccm.c b/net/mac80211/aes_ccm.c
index d2b03e0851e..4bd6ef0be38 100644
--- a/net/mac80211/aes_ccm.c
+++ b/net/mac80211/aes_ccm.c
@@ -147,6 +147,5 @@ struct crypto_cipher *ieee80211_aes_key_setup_encrypt(const u8 key[])
147 147
148void ieee80211_aes_key_free(struct crypto_cipher *tfm) 148void ieee80211_aes_key_free(struct crypto_cipher *tfm)
149{ 149{
150 if (tfm) 150 crypto_free_cipher(tfm);
151 crypto_free_cipher(tfm);
152} 151}
diff --git a/net/mac80211/aes_cmac.c b/net/mac80211/aes_cmac.c
index b4d66cca76d..d502b2684a6 100644
--- a/net/mac80211/aes_cmac.c
+++ b/net/mac80211/aes_cmac.c
@@ -128,6 +128,5 @@ struct crypto_cipher * ieee80211_aes_cmac_key_setup(const u8 key[])
128 128
129void ieee80211_aes_cmac_key_free(struct crypto_cipher *tfm) 129void ieee80211_aes_cmac_key_free(struct crypto_cipher *tfm)
130{ 130{
131 if (tfm) 131 crypto_free_cipher(tfm);
132 crypto_free_cipher(tfm);
133} 132}
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 18bd0e55060..0c544074479 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -1299,6 +1299,13 @@ static int ieee80211_set_wiphy_params(struct wiphy *wiphy, u32 changed)
1299 struct ieee80211_local *local = wiphy_priv(wiphy); 1299 struct ieee80211_local *local = wiphy_priv(wiphy);
1300 int err; 1300 int err;
1301 1301
1302 if (changed & WIPHY_PARAM_FRAG_THRESHOLD) {
1303 err = drv_set_frag_threshold(local, wiphy->frag_threshold);
1304
1305 if (err)
1306 return err;
1307 }
1308
1302 if (changed & WIPHY_PARAM_COVERAGE_CLASS) { 1309 if (changed & WIPHY_PARAM_COVERAGE_CLASS) {
1303 err = drv_set_coverage_class(local, wiphy->coverage_class); 1310 err = drv_set_coverage_class(local, wiphy->coverage_class);
1304 1311
@@ -1621,6 +1628,23 @@ static void ieee80211_mgmt_frame_register(struct wiphy *wiphy,
1621 ieee80211_queue_work(&local->hw, &local->reconfig_filter); 1628 ieee80211_queue_work(&local->hw, &local->reconfig_filter);
1622} 1629}
1623 1630
1631static int ieee80211_set_antenna(struct wiphy *wiphy, u32 tx_ant, u32 rx_ant)
1632{
1633 struct ieee80211_local *local = wiphy_priv(wiphy);
1634
1635 if (local->started)
1636 return -EOPNOTSUPP;
1637
1638 return drv_set_antenna(local, tx_ant, rx_ant);
1639}
1640
1641static int ieee80211_get_antenna(struct wiphy *wiphy, u32 *tx_ant, u32 *rx_ant)
1642{
1643 struct ieee80211_local *local = wiphy_priv(wiphy);
1644
1645 return drv_get_antenna(local, tx_ant, rx_ant);
1646}
1647
1624struct cfg80211_ops mac80211_config_ops = { 1648struct cfg80211_ops mac80211_config_ops = {
1625 .add_virtual_intf = ieee80211_add_iface, 1649 .add_virtual_intf = ieee80211_add_iface,
1626 .del_virtual_intf = ieee80211_del_iface, 1650 .del_virtual_intf = ieee80211_del_iface,
@@ -1673,4 +1697,6 @@ struct cfg80211_ops mac80211_config_ops = {
1673 .mgmt_tx = ieee80211_mgmt_tx, 1697 .mgmt_tx = ieee80211_mgmt_tx,
1674 .set_cqm_rssi_config = ieee80211_set_cqm_rssi_config, 1698 .set_cqm_rssi_config = ieee80211_set_cqm_rssi_config,
1675 .mgmt_frame_register = ieee80211_mgmt_frame_register, 1699 .mgmt_frame_register = ieee80211_mgmt_frame_register,
1700 .set_antenna = ieee80211_set_antenna,
1701 .get_antenna = ieee80211_get_antenna,
1676}; 1702};
diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c
index 18260aa99c5..1f02e599a31 100644
--- a/net/mac80211/debugfs.c
+++ b/net/mac80211/debugfs.c
@@ -21,16 +21,30 @@ int mac80211_open_file_generic(struct inode *inode, struct file *file)
21 return 0; 21 return 0;
22} 22}
23 23
24#define DEBUGFS_READONLY_FILE(name, buflen, fmt, value...) \ 24#define DEBUGFS_FORMAT_BUFFER_SIZE 100
25
26int mac80211_format_buffer(char __user *userbuf, size_t count,
27 loff_t *ppos, char *fmt, ...)
28{
29 va_list args;
30 char buf[DEBUGFS_FORMAT_BUFFER_SIZE];
31 int res;
32
33 va_start(args, fmt);
34 res = vscnprintf(buf, sizeof(buf), fmt, args);
35 va_end(args);
36
37 return simple_read_from_buffer(userbuf, count, ppos, buf, res);
38}
39
40#define DEBUGFS_READONLY_FILE(name, fmt, value...) \
25static ssize_t name## _read(struct file *file, char __user *userbuf, \ 41static ssize_t name## _read(struct file *file, char __user *userbuf, \
26 size_t count, loff_t *ppos) \ 42 size_t count, loff_t *ppos) \
27{ \ 43{ \
28 struct ieee80211_local *local = file->private_data; \ 44 struct ieee80211_local *local = file->private_data; \
29 char buf[buflen]; \
30 int res; \
31 \ 45 \
32 res = scnprintf(buf, buflen, fmt "\n", ##value); \ 46 return mac80211_format_buffer(userbuf, count, ppos, \
33 return simple_read_from_buffer(userbuf, count, ppos, buf, res); \ 47 fmt "\n", ##value); \
34} \ 48} \
35 \ 49 \
36static const struct file_operations name## _ops = { \ 50static const struct file_operations name## _ops = { \
@@ -46,13 +60,13 @@ static const struct file_operations name## _ops = { \
46 debugfs_create_file(#name, mode, phyd, local, &name## _ops); 60 debugfs_create_file(#name, mode, phyd, local, &name## _ops);
47 61
48 62
49DEBUGFS_READONLY_FILE(frequency, 20, "%d", 63DEBUGFS_READONLY_FILE(frequency, "%d",
50 local->hw.conf.channel->center_freq); 64 local->hw.conf.channel->center_freq);
51DEBUGFS_READONLY_FILE(total_ps_buffered, 20, "%d", 65DEBUGFS_READONLY_FILE(total_ps_buffered, "%d",
52 local->total_ps_buffered); 66 local->total_ps_buffered);
53DEBUGFS_READONLY_FILE(wep_iv, 20, "%#08x", 67DEBUGFS_READONLY_FILE(wep_iv, "%#08x",
54 local->wep_iv & 0xffffff); 68 local->wep_iv & 0xffffff);
55DEBUGFS_READONLY_FILE(rate_ctrl_alg, 100, "%s", 69DEBUGFS_READONLY_FILE(rate_ctrl_alg, "%s",
56 local->rate_ctrl ? local->rate_ctrl->ops->name : "hw/driver"); 70 local->rate_ctrl ? local->rate_ctrl->ops->name : "hw/driver");
57 71
58static ssize_t tsf_read(struct file *file, char __user *user_buf, 72static ssize_t tsf_read(struct file *file, char __user *user_buf,
@@ -60,13 +74,11 @@ static ssize_t tsf_read(struct file *file, char __user *user_buf,
60{ 74{
61 struct ieee80211_local *local = file->private_data; 75 struct ieee80211_local *local = file->private_data;
62 u64 tsf; 76 u64 tsf;
63 char buf[100];
64 77
65 tsf = drv_get_tsf(local); 78 tsf = drv_get_tsf(local);
66 79
67 snprintf(buf, sizeof(buf), "0x%016llx\n", (unsigned long long) tsf); 80 return mac80211_format_buffer(user_buf, count, ppos, "0x%016llx\n",
68 81 (unsigned long long) tsf);
69 return simple_read_from_buffer(user_buf, count, ppos, buf, 19);
70} 82}
71 83
72static ssize_t tsf_write(struct file *file, 84static ssize_t tsf_write(struct file *file,
@@ -131,12 +143,9 @@ static ssize_t noack_read(struct file *file, char __user *user_buf,
131 size_t count, loff_t *ppos) 143 size_t count, loff_t *ppos)
132{ 144{
133 struct ieee80211_local *local = file->private_data; 145 struct ieee80211_local *local = file->private_data;
134 int res;
135 char buf[10];
136 146
137 res = scnprintf(buf, sizeof(buf), "%d\n", local->wifi_wme_noack_test); 147 return mac80211_format_buffer(user_buf, count, ppos, "%d\n",
138 148 local->wifi_wme_noack_test);
139 return simple_read_from_buffer(user_buf, count, ppos, buf, res);
140} 149}
141 150
142static ssize_t noack_write(struct file *file, 151static ssize_t noack_write(struct file *file,
@@ -168,12 +177,8 @@ static ssize_t uapsd_queues_read(struct file *file, char __user *user_buf,
168 size_t count, loff_t *ppos) 177 size_t count, loff_t *ppos)
169{ 178{
170 struct ieee80211_local *local = file->private_data; 179 struct ieee80211_local *local = file->private_data;
171 int res; 180 return mac80211_format_buffer(user_buf, count, ppos, "0x%x\n",
172 char buf[10]; 181 local->uapsd_queues);
173
174 res = scnprintf(buf, sizeof(buf), "0x%x\n", local->uapsd_queues);
175
176 return simple_read_from_buffer(user_buf, count, ppos, buf, res);
177} 182}
178 183
179static ssize_t uapsd_queues_write(struct file *file, 184static ssize_t uapsd_queues_write(struct file *file,
@@ -215,12 +220,9 @@ static ssize_t uapsd_max_sp_len_read(struct file *file, char __user *user_buf,
215 size_t count, loff_t *ppos) 220 size_t count, loff_t *ppos)
216{ 221{
217 struct ieee80211_local *local = file->private_data; 222 struct ieee80211_local *local = file->private_data;
218 int res;
219 char buf[10];
220 223
221 res = scnprintf(buf, sizeof(buf), "0x%x\n", local->uapsd_max_sp_len); 224 return mac80211_format_buffer(user_buf, count, ppos, "0x%x\n",
222 225 local->uapsd_max_sp_len);
223 return simple_read_from_buffer(user_buf, count, ppos, buf, res);
224} 226}
225 227
226static ssize_t uapsd_max_sp_len_write(struct file *file, 228static ssize_t uapsd_max_sp_len_write(struct file *file,
diff --git a/net/mac80211/debugfs.h b/net/mac80211/debugfs.h
index 09cc9be3479..7c87529630f 100644
--- a/net/mac80211/debugfs.h
+++ b/net/mac80211/debugfs.h
@@ -4,6 +4,8 @@
4#ifdef CONFIG_MAC80211_DEBUGFS 4#ifdef CONFIG_MAC80211_DEBUGFS
5extern void debugfs_hw_add(struct ieee80211_local *local); 5extern void debugfs_hw_add(struct ieee80211_local *local);
6extern int mac80211_open_file_generic(struct inode *inode, struct file *file); 6extern int mac80211_open_file_generic(struct inode *inode, struct file *file);
7extern int mac80211_format_buffer(char __user *userbuf, size_t count,
8 loff_t *ppos, char *fmt, ...);
7#else 9#else
8static inline void debugfs_hw_add(struct ieee80211_local *local) 10static inline void debugfs_hw_add(struct ieee80211_local *local)
9{ 11{
diff --git a/net/mac80211/debugfs_key.c b/net/mac80211/debugfs_key.c
index 1243d1db5c5..5822a6ce767 100644
--- a/net/mac80211/debugfs_key.c
+++ b/net/mac80211/debugfs_key.c
@@ -15,18 +15,17 @@
15#include "debugfs.h" 15#include "debugfs.h"
16#include "debugfs_key.h" 16#include "debugfs_key.h"
17 17
18#define KEY_READ(name, prop, buflen, format_string) \ 18#define KEY_READ(name, prop, format_string) \
19static ssize_t key_##name##_read(struct file *file, \ 19static ssize_t key_##name##_read(struct file *file, \
20 char __user *userbuf, \ 20 char __user *userbuf, \
21 size_t count, loff_t *ppos) \ 21 size_t count, loff_t *ppos) \
22{ \ 22{ \
23 char buf[buflen]; \
24 struct ieee80211_key *key = file->private_data; \ 23 struct ieee80211_key *key = file->private_data; \
25 int res = scnprintf(buf, buflen, format_string, key->prop); \ 24 return mac80211_format_buffer(userbuf, count, ppos, \
26 return simple_read_from_buffer(userbuf, count, ppos, buf, res); \ 25 format_string, key->prop); \
27} 26}
28#define KEY_READ_D(name) KEY_READ(name, name, 20, "%d\n") 27#define KEY_READ_D(name) KEY_READ(name, name, "%d\n")
29#define KEY_READ_X(name) KEY_READ(name, name, 20, "0x%x\n") 28#define KEY_READ_X(name) KEY_READ(name, name, "0x%x\n")
30 29
31#define KEY_OPS(name) \ 30#define KEY_OPS(name) \
32static const struct file_operations key_ ##name## _ops = { \ 31static const struct file_operations key_ ##name## _ops = { \
@@ -39,9 +38,9 @@ static const struct file_operations key_ ##name## _ops = { \
39 KEY_READ_##format(name) \ 38 KEY_READ_##format(name) \
40 KEY_OPS(name) 39 KEY_OPS(name)
41 40
42#define KEY_CONF_READ(name, buflen, format_string) \ 41#define KEY_CONF_READ(name, format_string) \
43 KEY_READ(conf_##name, conf.name, buflen, format_string) 42 KEY_READ(conf_##name, conf.name, format_string)
44#define KEY_CONF_READ_D(name) KEY_CONF_READ(name, 20, "%d\n") 43#define KEY_CONF_READ_D(name) KEY_CONF_READ(name, "%d\n")
45 44
46#define KEY_CONF_OPS(name) \ 45#define KEY_CONF_OPS(name) \
47static const struct file_operations key_ ##name## _ops = { \ 46static const struct file_operations key_ ##name## _ops = { \
@@ -59,7 +58,7 @@ KEY_CONF_FILE(keyidx, D);
59KEY_CONF_FILE(hw_key_idx, D); 58KEY_CONF_FILE(hw_key_idx, D);
60KEY_FILE(flags, X); 59KEY_FILE(flags, X);
61KEY_FILE(tx_rx_count, D); 60KEY_FILE(tx_rx_count, D);
62KEY_READ(ifindex, sdata->name, IFNAMSIZ + 2, "%s\n"); 61KEY_READ(ifindex, sdata->name, "%s\n");
63KEY_OPS(ifindex); 62KEY_OPS(ifindex);
64 63
65static ssize_t key_algorithm_read(struct file *file, 64static ssize_t key_algorithm_read(struct file *file,
diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c
index 4601fea1784..f0fce37f406 100644
--- a/net/mac80211/debugfs_sta.c
+++ b/net/mac80211/debugfs_sta.c
@@ -17,20 +17,18 @@
17 17
18/* sta attributtes */ 18/* sta attributtes */
19 19
20#define STA_READ(name, buflen, field, format_string) \ 20#define STA_READ(name, field, format_string) \
21static ssize_t sta_ ##name## _read(struct file *file, \ 21static ssize_t sta_ ##name## _read(struct file *file, \
22 char __user *userbuf, \ 22 char __user *userbuf, \
23 size_t count, loff_t *ppos) \ 23 size_t count, loff_t *ppos) \
24{ \ 24{ \
25 int res; \
26 struct sta_info *sta = file->private_data; \ 25 struct sta_info *sta = file->private_data; \
27 char buf[buflen]; \ 26 return mac80211_format_buffer(userbuf, count, ppos, \
28 res = scnprintf(buf, buflen, format_string, sta->field); \ 27 format_string, sta->field); \
29 return simple_read_from_buffer(userbuf, count, ppos, buf, res); \
30} 28}
31#define STA_READ_D(name, field) STA_READ(name, 20, field, "%d\n") 29#define STA_READ_D(name, field) STA_READ(name, field, "%d\n")
32#define STA_READ_U(name, field) STA_READ(name, 20, field, "%u\n") 30#define STA_READ_U(name, field) STA_READ(name, field, "%u\n")
33#define STA_READ_S(name, field) STA_READ(name, 20, field, "%s\n") 31#define STA_READ_S(name, field) STA_READ(name, field, "%s\n")
34 32
35#define STA_OPS(name) \ 33#define STA_OPS(name) \
36static const struct file_operations sta_ ##name## _ops = { \ 34static const struct file_operations sta_ ##name## _ops = { \
@@ -79,22 +77,18 @@ static ssize_t sta_num_ps_buf_frames_read(struct file *file,
79 char __user *userbuf, 77 char __user *userbuf,
80 size_t count, loff_t *ppos) 78 size_t count, loff_t *ppos)
81{ 79{
82 char buf[20];
83 struct sta_info *sta = file->private_data; 80 struct sta_info *sta = file->private_data;
84 int res = scnprintf(buf, sizeof(buf), "%u\n", 81 return mac80211_format_buffer(userbuf, count, ppos, "%u\n",
85 skb_queue_len(&sta->ps_tx_buf)); 82 skb_queue_len(&sta->ps_tx_buf));
86 return simple_read_from_buffer(userbuf, count, ppos, buf, res);
87} 83}
88STA_OPS(num_ps_buf_frames); 84STA_OPS(num_ps_buf_frames);
89 85
90static ssize_t sta_inactive_ms_read(struct file *file, char __user *userbuf, 86static ssize_t sta_inactive_ms_read(struct file *file, char __user *userbuf,
91 size_t count, loff_t *ppos) 87 size_t count, loff_t *ppos)
92{ 88{
93 char buf[20];
94 struct sta_info *sta = file->private_data; 89 struct sta_info *sta = file->private_data;
95 int res = scnprintf(buf, sizeof(buf), "%d\n", 90 return mac80211_format_buffer(userbuf, count, ppos, "%d\n",
96 jiffies_to_msecs(jiffies - sta->last_rx)); 91 jiffies_to_msecs(jiffies - sta->last_rx));
97 return simple_read_from_buffer(userbuf, count, ppos, buf, res);
98} 92}
99STA_OPS(inactive_ms); 93STA_OPS(inactive_ms);
100 94
diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index 16983825f8e..4244554d218 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -233,6 +233,20 @@ static inline void drv_get_tkip_seq(struct ieee80211_local *local,
233 trace_drv_get_tkip_seq(local, hw_key_idx, iv32, iv16); 233 trace_drv_get_tkip_seq(local, hw_key_idx, iv32, iv16);
234} 234}
235 235
236static inline int drv_set_frag_threshold(struct ieee80211_local *local,
237 u32 value)
238{
239 int ret = 0;
240
241 might_sleep();
242
243 trace_drv_set_frag_threshold(local, value);
244 if (local->ops->set_frag_threshold)
245 ret = local->ops->set_frag_threshold(&local->hw, value);
246 trace_drv_return_int(local, ret);
247 return ret;
248}
249
236static inline int drv_set_rts_threshold(struct ieee80211_local *local, 250static inline int drv_set_rts_threshold(struct ieee80211_local *local,
237 u32 value) 251 u32 value)
238{ 252{
@@ -428,4 +442,27 @@ static inline void drv_channel_switch(struct ieee80211_local *local,
428 trace_drv_return_void(local); 442 trace_drv_return_void(local);
429} 443}
430 444
445
446static inline int drv_set_antenna(struct ieee80211_local *local,
447 u32 tx_ant, u32 rx_ant)
448{
449 int ret = -EOPNOTSUPP;
450 might_sleep();
451 if (local->ops->set_antenna)
452 ret = local->ops->set_antenna(&local->hw, tx_ant, rx_ant);
453 trace_drv_set_antenna(local, tx_ant, rx_ant, ret);
454 return ret;
455}
456
457static inline int drv_get_antenna(struct ieee80211_local *local,
458 u32 *tx_ant, u32 *rx_ant)
459{
460 int ret = -EOPNOTSUPP;
461 might_sleep();
462 if (local->ops->get_antenna)
463 ret = local->ops->get_antenna(&local->hw, tx_ant, rx_ant);
464 trace_drv_get_antenna(local, *tx_ant, *rx_ant, ret);
465 return ret;
466}
467
431#endif /* __MAC80211_DRIVER_OPS */ 468#endif /* __MAC80211_DRIVER_OPS */
diff --git a/net/mac80211/driver-trace.h b/net/mac80211/driver-trace.h
index 6831fb1641c..c2772f23ac9 100644
--- a/net/mac80211/driver-trace.h
+++ b/net/mac80211/driver-trace.h
@@ -531,6 +531,27 @@ TRACE_EVENT(drv_get_tkip_seq,
531 ) 531 )
532); 532);
533 533
534TRACE_EVENT(drv_set_frag_threshold,
535 TP_PROTO(struct ieee80211_local *local, u32 value),
536
537 TP_ARGS(local, value),
538
539 TP_STRUCT__entry(
540 LOCAL_ENTRY
541 __field(u32, value)
542 ),
543
544 TP_fast_assign(
545 LOCAL_ASSIGN;
546 __entry->value = value;
547 ),
548
549 TP_printk(
550 LOCAL_PR_FMT " value:%d",
551 LOCAL_PR_ARG, __entry->value
552 )
553);
554
534TRACE_EVENT(drv_set_rts_threshold, 555TRACE_EVENT(drv_set_rts_threshold,
535 TP_PROTO(struct ieee80211_local *local, u32 value), 556 TP_PROTO(struct ieee80211_local *local, u32 value),
536 557
@@ -862,6 +883,56 @@ TRACE_EVENT(drv_channel_switch,
862 ) 883 )
863); 884);
864 885
886TRACE_EVENT(drv_set_antenna,
887 TP_PROTO(struct ieee80211_local *local, u32 tx_ant, u32 rx_ant, int ret),
888
889 TP_ARGS(local, tx_ant, rx_ant, ret),
890
891 TP_STRUCT__entry(
892 LOCAL_ENTRY
893 __field(u32, tx_ant)
894 __field(u32, rx_ant)
895 __field(int, ret)
896 ),
897
898 TP_fast_assign(
899 LOCAL_ASSIGN;
900 __entry->tx_ant = tx_ant;
901 __entry->rx_ant = rx_ant;
902 __entry->ret = ret;
903 ),
904
905 TP_printk(
906 LOCAL_PR_FMT " tx_ant:%d rx_ant:%d ret:%d",
907 LOCAL_PR_ARG, __entry->tx_ant, __entry->rx_ant, __entry->ret
908 )
909);
910
911TRACE_EVENT(drv_get_antenna,
912 TP_PROTO(struct ieee80211_local *local, u32 tx_ant, u32 rx_ant, int ret),
913
914 TP_ARGS(local, tx_ant, rx_ant, ret),
915
916 TP_STRUCT__entry(
917 LOCAL_ENTRY
918 __field(u32, tx_ant)
919 __field(u32, rx_ant)
920 __field(int, ret)
921 ),
922
923 TP_fast_assign(
924 LOCAL_ASSIGN;
925 __entry->tx_ant = tx_ant;
926 __entry->rx_ant = rx_ant;
927 __entry->ret = ret;
928 ),
929
930 TP_printk(
931 LOCAL_PR_FMT " tx_ant:%d rx_ant:%d ret:%d",
932 LOCAL_PR_ARG, __entry->tx_ant, __entry->rx_ant, __entry->ret
933 )
934);
935
865/* 936/*
866 * Tracing for API calls that drivers call. 937 * Tracing for API calls that drivers call.
867 */ 938 */
diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index 239c4836a94..410d104b134 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -915,6 +915,8 @@ int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata,
915 915
916 sdata->u.ibss.privacy = params->privacy; 916 sdata->u.ibss.privacy = params->privacy;
917 sdata->u.ibss.basic_rates = params->basic_rates; 917 sdata->u.ibss.basic_rates = params->basic_rates;
918 memcpy(sdata->vif.bss_conf.mcast_rate, params->mcast_rate,
919 sizeof(params->mcast_rate));
918 920
919 sdata->vif.bss_conf.beacon_int = params->beacon_interval; 921 sdata->vif.bss_conf.beacon_int = params->beacon_interval;
920 922
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index b80c3868992..5bc0745368f 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -349,6 +349,7 @@ struct ieee80211_if_managed {
349 struct work_struct chswitch_work; 349 struct work_struct chswitch_work;
350 struct work_struct beacon_connection_loss_work; 350 struct work_struct beacon_connection_loss_work;
351 351
352 unsigned long beacon_timeout;
352 unsigned long probe_timeout; 353 unsigned long probe_timeout;
353 int probe_send_count; 354 int probe_send_count;
354 355
@@ -1264,6 +1265,8 @@ void ieee80211_send_nullfunc(struct ieee80211_local *local,
1264 int powersave); 1265 int powersave);
1265void ieee80211_sta_rx_notify(struct ieee80211_sub_if_data *sdata, 1266void ieee80211_sta_rx_notify(struct ieee80211_sub_if_data *sdata,
1266 struct ieee80211_hdr *hdr); 1267 struct ieee80211_hdr *hdr);
1268void ieee80211_sta_tx_notify(struct ieee80211_sub_if_data *sdata,
1269 struct ieee80211_hdr *hdr);
1267void ieee80211_beacon_connection_loss_work(struct work_struct *work); 1270void ieee80211_beacon_connection_loss_work(struct work_struct *work);
1268 1271
1269void ieee80211_wake_queues_by_reason(struct ieee80211_hw *hw, 1272void ieee80211_wake_queues_by_reason(struct ieee80211_hw *hw,
@@ -1278,6 +1281,9 @@ void ieee80211_add_pending_skb(struct ieee80211_local *local,
1278 struct sk_buff *skb); 1281 struct sk_buff *skb);
1279int ieee80211_add_pending_skbs(struct ieee80211_local *local, 1282int ieee80211_add_pending_skbs(struct ieee80211_local *local,
1280 struct sk_buff_head *skbs); 1283 struct sk_buff_head *skbs);
1284int ieee80211_add_pending_skbs_fn(struct ieee80211_local *local,
1285 struct sk_buff_head *skbs,
1286 void (*fn)(void *data), void *data);
1281 1287
1282void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata, 1288void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata,
1283 u16 transaction, u16 auth_alg, 1289 u16 transaction, u16 auth_alg,
@@ -1287,6 +1293,10 @@ int ieee80211_build_preq_ies(struct ieee80211_local *local, u8 *buffer,
1287 const u8 *ie, size_t ie_len, 1293 const u8 *ie, size_t ie_len,
1288 enum ieee80211_band band, u32 rate_mask, 1294 enum ieee80211_band band, u32 rate_mask,
1289 u8 channel); 1295 u8 channel);
1296struct sk_buff *ieee80211_build_probe_req(struct ieee80211_sub_if_data *sdata,
1297 u8 *dst,
1298 const u8 *ssid, size_t ssid_len,
1299 const u8 *ie, size_t ie_len);
1290void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata, u8 *dst, 1300void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata, u8 *dst,
1291 const u8 *ssid, size_t ssid_len, 1301 const u8 *ssid, size_t ssid_len,
1292 const u8 *ie, size_t ie_len); 1302 const u8 *ie, size_t ie_len);
diff --git a/net/mac80211/key.c b/net/mac80211/key.c
index ccd676b2f59..72df1ca7299 100644
--- a/net/mac80211/key.c
+++ b/net/mac80211/key.c
@@ -84,10 +84,17 @@ static int ieee80211_key_enable_hw_accel(struct ieee80211_key *key)
84 goto out_unsupported; 84 goto out_unsupported;
85 85
86 sdata = key->sdata; 86 sdata = key->sdata;
87 if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) 87 if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) {
88 /*
89 * The driver doesn't know anything about VLAN interfaces.
90 * Hence, don't send GTKs for VLAN interfaces to the driver.
91 */
92 if (!(key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE))
93 goto out_unsupported;
88 sdata = container_of(sdata->bss, 94 sdata = container_of(sdata->bss,
89 struct ieee80211_sub_if_data, 95 struct ieee80211_sub_if_data,
90 u.ap); 96 u.ap);
97 }
91 98
92 ret = drv_set_key(key->local, SET_KEY, sdata, sta, &key->conf); 99 ret = drv_set_key(key->local, SET_KEY, sdata, sta, &key->conf);
93 100
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index a3a9421555a..79480791494 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -28,13 +28,19 @@
28#include "rate.h" 28#include "rate.h"
29#include "led.h" 29#include "led.h"
30 30
31#define IEEE80211_MAX_NULLFUNC_TRIES 2
31#define IEEE80211_MAX_PROBE_TRIES 5 32#define IEEE80211_MAX_PROBE_TRIES 5
32 33
33/* 34/*
34 * beacon loss detection timeout 35 * Beacon loss timeout is calculated as N frames times the
35 * XXX: should depend on beacon interval 36 * advertised beacon interval. This may need to be somewhat
37 * higher than what hardware might detect to account for
38 * delays in the host processing frames. But since we also
39 * probe on beacon miss before declaring the connection lost
40 * default to what we want.
36 */ 41 */
37#define IEEE80211_BEACON_LOSS_TIME (2 * HZ) 42#define IEEE80211_BEACON_LOSS_COUNT 7
43
38/* 44/*
39 * Time the connection can be idle before we probe 45 * Time the connection can be idle before we probe
40 * it to see if we can still talk to the AP. 46 * it to see if we can still talk to the AP.
@@ -121,7 +127,7 @@ void ieee80211_sta_reset_beacon_monitor(struct ieee80211_sub_if_data *sdata)
121 return; 127 return;
122 128
123 mod_timer(&sdata->u.mgd.bcn_mon_timer, 129 mod_timer(&sdata->u.mgd.bcn_mon_timer,
124 round_jiffies_up(jiffies + IEEE80211_BEACON_LOSS_TIME)); 130 round_jiffies_up(jiffies + sdata->u.mgd.beacon_timeout));
125} 131}
126 132
127void ieee80211_sta_reset_conn_monitor(struct ieee80211_sub_if_data *sdata) 133void ieee80211_sta_reset_conn_monitor(struct ieee80211_sub_if_data *sdata)
@@ -871,6 +877,9 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata,
871 bss_info_changed |= ieee80211_handle_bss_capability(sdata, 877 bss_info_changed |= ieee80211_handle_bss_capability(sdata,
872 cbss->capability, bss->has_erp_value, bss->erp_value); 878 cbss->capability, bss->has_erp_value, bss->erp_value);
873 879
880 sdata->u.mgd.beacon_timeout = usecs_to_jiffies(ieee80211_tu_to_usec(
881 IEEE80211_BEACON_LOSS_COUNT * bss_conf->beacon_int));
882
874 sdata->u.mgd.associated = cbss; 883 sdata->u.mgd.associated = cbss;
875 memcpy(sdata->u.mgd.bssid, cbss->bssid, ETH_ALEN); 884 memcpy(sdata->u.mgd.bssid, cbss->bssid, ETH_ALEN);
876 885
@@ -1026,6 +1035,51 @@ void ieee80211_sta_rx_notify(struct ieee80211_sub_if_data *sdata,
1026 ieee80211_sta_reset_conn_monitor(sdata); 1035 ieee80211_sta_reset_conn_monitor(sdata);
1027} 1036}
1028 1037
1038static void ieee80211_reset_ap_probe(struct ieee80211_sub_if_data *sdata)
1039{
1040 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
1041
1042 if (!(ifmgd->flags & (IEEE80211_STA_BEACON_POLL |
1043 IEEE80211_STA_CONNECTION_POLL)))
1044 return;
1045
1046 ifmgd->flags &= ~(IEEE80211_STA_CONNECTION_POLL |
1047 IEEE80211_STA_BEACON_POLL);
1048 mutex_lock(&sdata->local->iflist_mtx);
1049 ieee80211_recalc_ps(sdata->local, -1);
1050 mutex_unlock(&sdata->local->iflist_mtx);
1051
1052 if (sdata->local->hw.flags & IEEE80211_HW_CONNECTION_MONITOR)
1053 return;
1054
1055 /*
1056 * We've received a probe response, but are not sure whether
1057 * we have or will be receiving any beacons or data, so let's
1058 * schedule the timers again, just in case.
1059 */
1060 ieee80211_sta_reset_beacon_monitor(sdata);
1061
1062 mod_timer(&ifmgd->conn_mon_timer,
1063 round_jiffies_up(jiffies +
1064 IEEE80211_CONNECTION_IDLE_TIME));
1065}
1066
1067void ieee80211_sta_tx_notify(struct ieee80211_sub_if_data *sdata,
1068 struct ieee80211_hdr *hdr)
1069{
1070 if (!ieee80211_is_data(hdr->frame_control) &&
1071 !ieee80211_is_nullfunc(hdr->frame_control))
1072 return;
1073
1074 ieee80211_sta_reset_conn_monitor(sdata);
1075
1076 if (ieee80211_is_nullfunc(hdr->frame_control) &&
1077 sdata->u.mgd.probe_send_count > 0) {
1078 sdata->u.mgd.probe_send_count = 0;
1079 ieee80211_queue_work(&sdata->local->hw, &sdata->work);
1080 }
1081}
1082
1029static void ieee80211_mgd_probe_ap_send(struct ieee80211_sub_if_data *sdata) 1083static void ieee80211_mgd_probe_ap_send(struct ieee80211_sub_if_data *sdata)
1030{ 1084{
1031 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; 1085 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
@@ -1041,8 +1095,19 @@ static void ieee80211_mgd_probe_ap_send(struct ieee80211_sub_if_data *sdata)
1041 if (ifmgd->probe_send_count >= unicast_limit) 1095 if (ifmgd->probe_send_count >= unicast_limit)
1042 dst = NULL; 1096 dst = NULL;
1043 1097
1044 ssid = ieee80211_bss_get_ie(ifmgd->associated, WLAN_EID_SSID); 1098 /*
1045 ieee80211_send_probe_req(sdata, dst, ssid + 2, ssid[1], NULL, 0); 1099 * When the hardware reports an accurate Tx ACK status, it's
1100 * better to send a nullfunc frame instead of a probe request,
1101 * as it will kick us off the AP quickly if we aren't associated
1102 * anymore. The timeout will be reset if the frame is ACKed by
1103 * the AP.
1104 */
1105 if (sdata->local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS)
1106 ieee80211_send_nullfunc(sdata->local, sdata, 0);
1107 else {
1108 ssid = ieee80211_bss_get_ie(ifmgd->associated, WLAN_EID_SSID);
1109 ieee80211_send_probe_req(sdata, dst, ssid + 2, ssid[1], NULL, 0);
1110 }
1046 1111
1047 ifmgd->probe_send_count++; 1112 ifmgd->probe_send_count++;
1048 ifmgd->probe_timeout = jiffies + IEEE80211_PROBE_WAIT; 1113 ifmgd->probe_timeout = jiffies + IEEE80211_PROBE_WAIT;
@@ -1108,6 +1173,30 @@ static void ieee80211_mgd_probe_ap(struct ieee80211_sub_if_data *sdata,
1108 mutex_unlock(&ifmgd->mtx); 1173 mutex_unlock(&ifmgd->mtx);
1109} 1174}
1110 1175
1176struct sk_buff *ieee80211_ap_probereq_get(struct ieee80211_hw *hw,
1177 struct ieee80211_vif *vif)
1178{
1179 struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
1180 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
1181 struct sk_buff *skb;
1182 const u8 *ssid;
1183
1184 if (WARN_ON(sdata->vif.type != NL80211_IFTYPE_STATION))
1185 return NULL;
1186
1187 ASSERT_MGD_MTX(ifmgd);
1188
1189 if (!ifmgd->associated)
1190 return NULL;
1191
1192 ssid = ieee80211_bss_get_ie(ifmgd->associated, WLAN_EID_SSID);
1193 skb = ieee80211_build_probe_req(sdata, ifmgd->associated->bssid,
1194 ssid + 2, ssid[1], NULL, 0);
1195
1196 return skb;
1197}
1198EXPORT_SYMBOL(ieee80211_ap_probereq_get);
1199
1111static void __ieee80211_connection_loss(struct ieee80211_sub_if_data *sdata) 1200static void __ieee80211_connection_loss(struct ieee80211_sub_if_data *sdata)
1112{ 1201{
1113 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; 1202 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
@@ -1485,29 +1574,8 @@ static void ieee80211_rx_mgmt_probe_resp(struct ieee80211_sub_if_data *sdata,
1485 ieee80211_rx_bss_info(sdata, mgmt, len, rx_status, &elems, false); 1574 ieee80211_rx_bss_info(sdata, mgmt, len, rx_status, &elems, false);
1486 1575
1487 if (ifmgd->associated && 1576 if (ifmgd->associated &&
1488 memcmp(mgmt->bssid, ifmgd->associated->bssid, ETH_ALEN) == 0 && 1577 memcmp(mgmt->bssid, ifmgd->associated->bssid, ETH_ALEN) == 0)
1489 ifmgd->flags & (IEEE80211_STA_BEACON_POLL | 1578 ieee80211_reset_ap_probe(sdata);
1490 IEEE80211_STA_CONNECTION_POLL)) {
1491 ifmgd->flags &= ~(IEEE80211_STA_CONNECTION_POLL |
1492 IEEE80211_STA_BEACON_POLL);
1493 mutex_lock(&sdata->local->iflist_mtx);
1494 ieee80211_recalc_ps(sdata->local, -1);
1495 mutex_unlock(&sdata->local->iflist_mtx);
1496
1497 if (sdata->local->hw.flags & IEEE80211_HW_CONNECTION_MONITOR)
1498 return;
1499
1500 /*
1501 * We've received a probe response, but are not sure whether
1502 * we have or will be receiving any beacons or data, so let's
1503 * schedule the timers again, just in case.
1504 */
1505 ieee80211_sta_reset_beacon_monitor(sdata);
1506
1507 mod_timer(&ifmgd->conn_mon_timer,
1508 round_jiffies_up(jiffies +
1509 IEEE80211_CONNECTION_IDLE_TIME));
1510 }
1511} 1579}
1512 1580
1513/* 1581/*
@@ -1857,12 +1925,23 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata)
1857 IEEE80211_STA_CONNECTION_POLL) && 1925 IEEE80211_STA_CONNECTION_POLL) &&
1858 ifmgd->associated) { 1926 ifmgd->associated) {
1859 u8 bssid[ETH_ALEN]; 1927 u8 bssid[ETH_ALEN];
1928 int max_tries;
1860 1929
1861 memcpy(bssid, ifmgd->associated->bssid, ETH_ALEN); 1930 memcpy(bssid, ifmgd->associated->bssid, ETH_ALEN);
1862 if (time_is_after_jiffies(ifmgd->probe_timeout)) 1931
1932 if (local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS)
1933 max_tries = IEEE80211_MAX_NULLFUNC_TRIES;
1934 else
1935 max_tries = IEEE80211_MAX_PROBE_TRIES;
1936
1937 /* ACK received for nullfunc probing frame */
1938 if (!ifmgd->probe_send_count)
1939 ieee80211_reset_ap_probe(sdata);
1940
1941 else if (time_is_after_jiffies(ifmgd->probe_timeout))
1863 run_again(ifmgd, ifmgd->probe_timeout); 1942 run_again(ifmgd, ifmgd->probe_timeout);
1864 1943
1865 else if (ifmgd->probe_send_count < IEEE80211_MAX_PROBE_TRIES) { 1944 else if (ifmgd->probe_send_count < max_tries) {
1866#ifdef CONFIG_MAC80211_VERBOSE_DEBUG 1945#ifdef CONFIG_MAC80211_VERBOSE_DEBUG
1867 wiphy_debug(local->hw.wiphy, 1946 wiphy_debug(local->hw.wiphy,
1868 "%s: No probe response from AP %pM" 1947 "%s: No probe response from AP %pM"
@@ -1988,6 +2067,8 @@ void ieee80211_sta_restart(struct ieee80211_sub_if_data *sdata)
1988 add_timer(&ifmgd->timer); 2067 add_timer(&ifmgd->timer);
1989 if (test_and_clear_bit(TMR_RUNNING_CHANSW, &ifmgd->timers_running)) 2068 if (test_and_clear_bit(TMR_RUNNING_CHANSW, &ifmgd->timers_running))
1990 add_timer(&ifmgd->chswitch_timer); 2069 add_timer(&ifmgd->chswitch_timer);
2070 ieee80211_sta_reset_beacon_monitor(sdata);
2071 ieee80211_restart_sta_timer(sdata);
1991} 2072}
1992#endif 2073#endif
1993 2074
diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c
index 33f76993da0..3d5a2cb835c 100644
--- a/net/mac80211/rate.c
+++ b/net/mac80211/rate.c
@@ -211,7 +211,8 @@ static bool rc_no_data_or_no_ack(struct ieee80211_tx_rate_control *txrc)
211 return (info->flags & IEEE80211_TX_CTL_NO_ACK) || !ieee80211_is_data(fc); 211 return (info->flags & IEEE80211_TX_CTL_NO_ACK) || !ieee80211_is_data(fc);
212} 212}
213 213
214static void rc_send_low_broadcast(s8 *idx, u32 basic_rates, u8 max_rate_idx) 214static void rc_send_low_broadcast(s8 *idx, u32 basic_rates,
215 struct ieee80211_supported_band *sband)
215{ 216{
216 u8 i; 217 u8 i;
217 218
@@ -222,7 +223,7 @@ static void rc_send_low_broadcast(s8 *idx, u32 basic_rates, u8 max_rate_idx)
222 if (basic_rates & (1 << *idx)) 223 if (basic_rates & (1 << *idx))
223 return; /* selected rate is a basic rate */ 224 return; /* selected rate is a basic rate */
224 225
225 for (i = *idx + 1; i <= max_rate_idx; i++) { 226 for (i = *idx + 1; i <= sband->n_bitrates; i++) {
226 if (basic_rates & (1 << i)) { 227 if (basic_rates & (1 << i)) {
227 *idx = i; 228 *idx = i;
228 return; 229 return;
@@ -237,16 +238,25 @@ bool rate_control_send_low(struct ieee80211_sta *sta,
237 struct ieee80211_tx_rate_control *txrc) 238 struct ieee80211_tx_rate_control *txrc)
238{ 239{
239 struct ieee80211_tx_info *info = IEEE80211_SKB_CB(txrc->skb); 240 struct ieee80211_tx_info *info = IEEE80211_SKB_CB(txrc->skb);
241 struct ieee80211_supported_band *sband = txrc->sband;
242 int mcast_rate;
240 243
241 if (!sta || !priv_sta || rc_no_data_or_no_ack(txrc)) { 244 if (!sta || !priv_sta || rc_no_data_or_no_ack(txrc)) {
242 info->control.rates[0].idx = rate_lowest_index(txrc->sband, sta); 245 info->control.rates[0].idx = rate_lowest_index(txrc->sband, sta);
243 info->control.rates[0].count = 246 info->control.rates[0].count =
244 (info->flags & IEEE80211_TX_CTL_NO_ACK) ? 247 (info->flags & IEEE80211_TX_CTL_NO_ACK) ?
245 1 : txrc->hw->max_rate_tries; 248 1 : txrc->hw->max_rate_tries;
246 if (!sta && txrc->ap) 249 if (!sta && txrc->bss) {
250 mcast_rate = txrc->bss_conf->mcast_rate[sband->band];
251 if (mcast_rate > 0) {
252 info->control.rates[0].idx = mcast_rate - 1;
253 return true;
254 }
255
247 rc_send_low_broadcast(&info->control.rates[0].idx, 256 rc_send_low_broadcast(&info->control.rates[0].idx,
248 txrc->bss_conf->basic_rates, 257 txrc->bss_conf->basic_rates,
249 txrc->sband->n_bitrates); 258 sband);
259 }
250 return true; 260 return true;
251 } 261 }
252 return false; 262 return false;
diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c
index 2a18d6602d4..4ad7a362fcc 100644
--- a/net/mac80211/rc80211_minstrel_ht.c
+++ b/net/mac80211/rc80211_minstrel_ht.c
@@ -371,6 +371,9 @@ minstrel_aggr_check(struct minstrel_priv *mp, struct ieee80211_sta *pubsta, stru
371 if (likely(sta->ampdu_mlme.tid_tx[tid])) 371 if (likely(sta->ampdu_mlme.tid_tx[tid]))
372 return; 372 return;
373 373
374 if (skb_get_queue_mapping(skb) == IEEE80211_AC_VO)
375 return;
376
374 ieee80211_start_tx_ba_session(pubsta, tid); 377 ieee80211_start_tx_ba_session(pubsta, tid);
375} 378}
376 379
@@ -407,8 +410,8 @@ minstrel_ht_tx_status(void *priv, struct ieee80211_supported_band *sband,
407 mi->ampdu_len += info->status.ampdu_len; 410 mi->ampdu_len += info->status.ampdu_len;
408 411
409 if (!mi->sample_wait && !mi->sample_tries && mi->sample_count > 0) { 412 if (!mi->sample_wait && !mi->sample_tries && mi->sample_count > 0) {
410 mi->sample_wait = 4 + 2 * MINSTREL_TRUNC(mi->avg_ampdu_len); 413 mi->sample_wait = 16 + 2 * MINSTREL_TRUNC(mi->avg_ampdu_len);
411 mi->sample_tries = 3; 414 mi->sample_tries = 2;
412 mi->sample_count--; 415 mi->sample_count--;
413 } 416 }
414 417
@@ -506,7 +509,9 @@ minstrel_ht_set_rate(struct minstrel_priv *mp, struct minstrel_ht_sta *mi,
506 if (!mr->retry_updated) 509 if (!mr->retry_updated)
507 minstrel_calc_retransmit(mp, mi, index); 510 minstrel_calc_retransmit(mp, mi, index);
508 511
509 if (mr->probability < MINSTREL_FRAC(20, 100)) 512 if (sample)
513 rate->count = 1;
514 else if (mr->probability < MINSTREL_FRAC(20, 100))
510 rate->count = 2; 515 rate->count = 2;
511 else if (rtscts) 516 else if (rtscts)
512 rate->count = mr->retry_count_rtscts; 517 rate->count = mr->retry_count_rtscts;
@@ -562,7 +567,7 @@ minstrel_get_sample_rate(struct minstrel_priv *mp, struct minstrel_ht_sta *mi)
562 */ 567 */
563 if (minstrel_get_duration(sample_idx) > 568 if (minstrel_get_duration(sample_idx) >
564 minstrel_get_duration(mi->max_tp_rate)) { 569 minstrel_get_duration(mi->max_tp_rate)) {
565 if (mr->sample_skipped < 10) 570 if (mr->sample_skipped < 20)
566 goto next; 571 goto next;
567 572
568 if (mi->sample_slow++ > 2) 573 if (mi->sample_slow++ > 2)
@@ -586,6 +591,7 @@ minstrel_ht_get_rate(void *priv, struct ieee80211_sta *sta, void *priv_sta,
586 struct minstrel_ht_sta *mi = &msp->ht; 591 struct minstrel_ht_sta *mi = &msp->ht;
587 struct minstrel_priv *mp = priv; 592 struct minstrel_priv *mp = priv;
588 int sample_idx; 593 int sample_idx;
594 bool sample = false;
589 595
590 if (rate_control_send_low(sta, priv_sta, txrc)) 596 if (rate_control_send_low(sta, priv_sta, txrc))
591 return; 597 return;
@@ -596,10 +602,11 @@ minstrel_ht_get_rate(void *priv, struct ieee80211_sta *sta, void *priv_sta,
596 info->flags |= mi->tx_flags; 602 info->flags |= mi->tx_flags;
597 sample_idx = minstrel_get_sample_rate(mp, mi); 603 sample_idx = minstrel_get_sample_rate(mp, mi);
598 if (sample_idx >= 0) { 604 if (sample_idx >= 0) {
605 sample = true;
599 minstrel_ht_set_rate(mp, mi, &ar[0], sample_idx, 606 minstrel_ht_set_rate(mp, mi, &ar[0], sample_idx,
600 txrc, true, false); 607 txrc, true, false);
601 minstrel_ht_set_rate(mp, mi, &ar[1], mi->max_tp_rate, 608 minstrel_ht_set_rate(mp, mi, &ar[1], mi->max_tp_rate,
602 txrc, false, true); 609 txrc, false, false);
603 info->flags |= IEEE80211_TX_CTL_RATE_CTRL_PROBE; 610 info->flags |= IEEE80211_TX_CTL_RATE_CTRL_PROBE;
604 } else { 611 } else {
605 minstrel_ht_set_rate(mp, mi, &ar[0], mi->max_tp_rate, 612 minstrel_ht_set_rate(mp, mi, &ar[0], mi->max_tp_rate,
@@ -607,7 +614,7 @@ minstrel_ht_get_rate(void *priv, struct ieee80211_sta *sta, void *priv_sta,
607 minstrel_ht_set_rate(mp, mi, &ar[1], mi->max_tp_rate2, 614 minstrel_ht_set_rate(mp, mi, &ar[1], mi->max_tp_rate2,
608 txrc, false, true); 615 txrc, false, true);
609 } 616 }
610 minstrel_ht_set_rate(mp, mi, &ar[2], mi->max_prob_rate, txrc, false, true); 617 minstrel_ht_set_rate(mp, mi, &ar[2], mi->max_prob_rate, txrc, false, !sample);
611 618
612 ar[3].count = 0; 619 ar[3].count = 0;
613 ar[3].idx = -1; 620 ar[3].idx = -1;
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 54fb4a0e76f..55337709de4 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -1102,8 +1102,6 @@ static void ap_sta_ps_end(struct sta_info *sta)
1102 1102
1103 atomic_dec(&sdata->bss->num_sta_ps); 1103 atomic_dec(&sdata->bss->num_sta_ps);
1104 1104
1105 clear_sta_flags(sta, WLAN_STA_PS_STA);
1106
1107#ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG 1105#ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG
1108 printk(KERN_DEBUG "%s: STA %pM aid %d exits power save mode\n", 1106 printk(KERN_DEBUG "%s: STA %pM aid %d exits power save mode\n",
1109 sdata->name, sta->sta.addr, sta->sta.aid); 1107 sdata->name, sta->sta.addr, sta->sta.aid);
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 6d8f897d876..eff58571fd7 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -199,8 +199,11 @@ static void sta_unblock(struct work_struct *wk)
199 199
200 if (!test_sta_flags(sta, WLAN_STA_PS_STA)) 200 if (!test_sta_flags(sta, WLAN_STA_PS_STA))
201 ieee80211_sta_ps_deliver_wakeup(sta); 201 ieee80211_sta_ps_deliver_wakeup(sta);
202 else if (test_and_clear_sta_flags(sta, WLAN_STA_PSPOLL)) 202 else if (test_and_clear_sta_flags(sta, WLAN_STA_PSPOLL)) {
203 clear_sta_flags(sta, WLAN_STA_PS_DRIVER);
203 ieee80211_sta_ps_deliver_poll_response(sta); 204 ieee80211_sta_ps_deliver_poll_response(sta);
205 } else
206 clear_sta_flags(sta, WLAN_STA_PS_DRIVER);
204} 207}
205 208
206static int sta_prepare_rate_control(struct ieee80211_local *local, 209static int sta_prepare_rate_control(struct ieee80211_local *local,
@@ -880,6 +883,13 @@ struct ieee80211_sta *ieee80211_find_sta(struct ieee80211_vif *vif,
880} 883}
881EXPORT_SYMBOL(ieee80211_find_sta); 884EXPORT_SYMBOL(ieee80211_find_sta);
882 885
886static void clear_sta_ps_flags(void *_sta)
887{
888 struct sta_info *sta = _sta;
889
890 clear_sta_flags(sta, WLAN_STA_PS_DRIVER | WLAN_STA_PS_STA);
891}
892
883/* powersave support code */ 893/* powersave support code */
884void ieee80211_sta_ps_deliver_wakeup(struct sta_info *sta) 894void ieee80211_sta_ps_deliver_wakeup(struct sta_info *sta)
885{ 895{
@@ -894,7 +904,8 @@ void ieee80211_sta_ps_deliver_wakeup(struct sta_info *sta)
894 904
895 /* Send all buffered frames to the station */ 905 /* Send all buffered frames to the station */
896 sent = ieee80211_add_pending_skbs(local, &sta->tx_filtered); 906 sent = ieee80211_add_pending_skbs(local, &sta->tx_filtered);
897 buffered = ieee80211_add_pending_skbs(local, &sta->ps_tx_buf); 907 buffered = ieee80211_add_pending_skbs_fn(local, &sta->ps_tx_buf,
908 clear_sta_ps_flags, sta);
898 sent += buffered; 909 sent += buffered;
899 local->total_ps_buffered -= buffered; 910 local->total_ps_buffered -= buffered;
900 911
@@ -973,7 +984,7 @@ void ieee80211_sta_block_awake(struct ieee80211_hw *hw,
973 984
974 if (block) 985 if (block)
975 set_sta_flags(sta, WLAN_STA_PS_DRIVER); 986 set_sta_flags(sta, WLAN_STA_PS_DRIVER);
976 else 987 else if (test_sta_flags(sta, WLAN_STA_PS_DRIVER))
977 ieee80211_queue_work(hw, &sta->drv_unblock_wk); 988 ieee80211_queue_work(hw, &sta->drv_unblock_wk);
978} 989}
979EXPORT_SYMBOL(ieee80211_sta_block_awake); 990EXPORT_SYMBOL(ieee80211_sta_block_awake);
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index 9265acadef3..b562d9b6a70 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -248,6 +248,7 @@ enum plink_state {
248 * @sta: station information we share with the driver 248 * @sta: station information we share with the driver
249 * @dead: set to true when sta is unlinked 249 * @dead: set to true when sta is unlinked
250 * @uploaded: set to true when sta is uploaded to the driver 250 * @uploaded: set to true when sta is uploaded to the driver
251 * @lost_packets: number of consecutive lost packets
251 */ 252 */
252struct sta_info { 253struct sta_info {
253 /* General information, mostly static */ 254 /* General information, mostly static */
@@ -335,6 +336,8 @@ struct sta_info {
335 } debugfs; 336 } debugfs;
336#endif 337#endif
337 338
339 unsigned int lost_packets;
340
338 /* keep last! */ 341 /* keep last! */
339 struct ieee80211_sta sta; 342 struct ieee80211_sta sta;
340}; 343};
diff --git a/net/mac80211/status.c b/net/mac80211/status.c
index 3153c19893b..bed7e32ed90 100644
--- a/net/mac80211/status.c
+++ b/net/mac80211/status.c
@@ -155,8 +155,21 @@ static void ieee80211_frame_acked(struct sta_info *sta, struct sk_buff *skb)
155 155
156 ieee80211_queue_work(&local->hw, &local->recalc_smps); 156 ieee80211_queue_work(&local->hw, &local->recalc_smps);
157 } 157 }
158
159 if ((sdata->vif.type == NL80211_IFTYPE_STATION) &&
160 (local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS))
161 ieee80211_sta_tx_notify(sdata, (void *) skb->data);
158} 162}
159 163
164/*
165 * Use a static threshold for now, best value to be determined
166 * by testing ...
167 * Should it depend on:
168 * - on # of retransmissions
169 * - current throughput (higher value for higher tpt)?
170 */
171#define STA_LOST_PKT_THRESHOLD 50
172
160void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) 173void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
161{ 174{
162 struct sk_buff *skb2; 175 struct sk_buff *skb2;
@@ -243,6 +256,19 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
243 if (!(info->flags & IEEE80211_TX_CTL_INJECTED) && 256 if (!(info->flags & IEEE80211_TX_CTL_INJECTED) &&
244 (info->flags & IEEE80211_TX_STAT_ACK)) 257 (info->flags & IEEE80211_TX_STAT_ACK))
245 ieee80211_frame_acked(sta, skb); 258 ieee80211_frame_acked(sta, skb);
259
260 if (local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS) {
261 if (info->flags & IEEE80211_TX_STAT_ACK) {
262 if (sta->lost_packets)
263 sta->lost_packets = 0;
264 } else if (++sta->lost_packets >= STA_LOST_PKT_THRESHOLD) {
265 cfg80211_cqm_pktloss_notify(sta->sdata->dev,
266 sta->sta.addr,
267 sta->lost_packets,
268 GFP_ATOMIC);
269 sta->lost_packets = 0;
270 }
271 }
246 } 272 }
247 273
248 rcu_read_unlock(); 274 rcu_read_unlock();
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index df6aac52353..2ba74265682 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -622,7 +622,8 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx)
622 txrc.max_rate_idx = -1; 622 txrc.max_rate_idx = -1;
623 else 623 else
624 txrc.max_rate_idx = fls(txrc.rate_idx_mask) - 1; 624 txrc.max_rate_idx = fls(txrc.rate_idx_mask) - 1;
625 txrc.ap = tx->sdata->vif.type == NL80211_IFTYPE_AP; 625 txrc.bss = (tx->sdata->vif.type == NL80211_IFTYPE_AP ||
626 tx->sdata->vif.type == NL80211_IFTYPE_ADHOC);
626 627
627 /* set up RTS protection if desired */ 628 /* set up RTS protection if desired */
628 if (len > tx->local->hw.wiphy->rts_threshold) { 629 if (len > tx->local->hw.wiphy->rts_threshold) {
@@ -1033,6 +1034,7 @@ static bool __ieee80211_parse_tx_radiotap(struct ieee80211_tx_data *tx,
1033 struct ieee80211_radiotap_header *rthdr = 1034 struct ieee80211_radiotap_header *rthdr =
1034 (struct ieee80211_radiotap_header *) skb->data; 1035 (struct ieee80211_radiotap_header *) skb->data;
1035 struct ieee80211_supported_band *sband; 1036 struct ieee80211_supported_band *sband;
1037 bool hw_frag;
1036 struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); 1038 struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
1037 int ret = ieee80211_radiotap_iterator_init(&iterator, rthdr, skb->len, 1039 int ret = ieee80211_radiotap_iterator_init(&iterator, rthdr, skb->len,
1038 NULL); 1040 NULL);
@@ -1042,6 +1044,9 @@ static bool __ieee80211_parse_tx_radiotap(struct ieee80211_tx_data *tx,
1042 info->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT; 1044 info->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT;
1043 tx->flags &= ~IEEE80211_TX_FRAGMENTED; 1045 tx->flags &= ~IEEE80211_TX_FRAGMENTED;
1044 1046
1047 /* packet is fragmented in HW if we have a non-NULL driver callback */
1048 hw_frag = (tx->local->ops->set_frag_threshold != NULL);
1049
1045 /* 1050 /*
1046 * for every radiotap entry that is present 1051 * for every radiotap entry that is present
1047 * (ieee80211_radiotap_iterator_next returns -ENOENT when no more 1052 * (ieee80211_radiotap_iterator_next returns -ENOENT when no more
@@ -1078,7 +1083,8 @@ static bool __ieee80211_parse_tx_radiotap(struct ieee80211_tx_data *tx,
1078 } 1083 }
1079 if (*iterator.this_arg & IEEE80211_RADIOTAP_F_WEP) 1084 if (*iterator.this_arg & IEEE80211_RADIOTAP_F_WEP)
1080 info->flags &= ~IEEE80211_TX_INTFL_DONT_ENCRYPT; 1085 info->flags &= ~IEEE80211_TX_INTFL_DONT_ENCRYPT;
1081 if (*iterator.this_arg & IEEE80211_RADIOTAP_F_FRAG) 1086 if ((*iterator.this_arg & IEEE80211_RADIOTAP_F_FRAG) &&
1087 !hw_frag)
1082 tx->flags |= IEEE80211_TX_FRAGMENTED; 1088 tx->flags |= IEEE80211_TX_FRAGMENTED;
1083 break; 1089 break;
1084 1090
@@ -1181,8 +1187,10 @@ ieee80211_tx_prepare(struct ieee80211_sub_if_data *sdata,
1181 /* 1187 /*
1182 * Set this flag (used below to indicate "automatic fragmentation"), 1188 * Set this flag (used below to indicate "automatic fragmentation"),
1183 * it will be cleared/left by radiotap as desired. 1189 * it will be cleared/left by radiotap as desired.
1190 * Only valid when fragmentation is done by the stack.
1184 */ 1191 */
1185 tx->flags |= IEEE80211_TX_FRAGMENTED; 1192 if (!local->ops->set_frag_threshold)
1193 tx->flags |= IEEE80211_TX_FRAGMENTED;
1186 1194
1187 /* process and remove the injection radiotap header */ 1195 /* process and remove the injection radiotap header */
1188 if (unlikely(info->flags & IEEE80211_TX_INTFL_HAS_RADIOTAP)) { 1196 if (unlikely(info->flags & IEEE80211_TX_INTFL_HAS_RADIOTAP)) {
@@ -2306,7 +2314,7 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw,
2306 txrc.max_rate_idx = -1; 2314 txrc.max_rate_idx = -1;
2307 else 2315 else
2308 txrc.max_rate_idx = fls(txrc.rate_idx_mask) - 1; 2316 txrc.max_rate_idx = fls(txrc.rate_idx_mask) - 1;
2309 txrc.ap = true; 2317 txrc.bss = true;
2310 rate_control_get_rate(sdata, NULL, &txrc); 2318 rate_control_get_rate(sdata, NULL, &txrc);
2311 2319
2312 info->control.vif = vif; 2320 info->control.vif = vif;
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 0b6fc92bc0d..e497476174c 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -368,8 +368,9 @@ void ieee80211_add_pending_skb(struct ieee80211_local *local,
368 spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags); 368 spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags);
369} 369}
370 370
371int ieee80211_add_pending_skbs(struct ieee80211_local *local, 371int ieee80211_add_pending_skbs_fn(struct ieee80211_local *local,
372 struct sk_buff_head *skbs) 372 struct sk_buff_head *skbs,
373 void (*fn)(void *data), void *data)
373{ 374{
374 struct ieee80211_hw *hw = &local->hw; 375 struct ieee80211_hw *hw = &local->hw;
375 struct sk_buff *skb; 376 struct sk_buff *skb;
@@ -394,6 +395,9 @@ int ieee80211_add_pending_skbs(struct ieee80211_local *local,
394 __skb_queue_tail(&local->pending[queue], skb); 395 __skb_queue_tail(&local->pending[queue], skb);
395 } 396 }
396 397
398 if (fn)
399 fn(data);
400
397 for (i = 0; i < hw->queues; i++) 401 for (i = 0; i < hw->queues; i++)
398 __ieee80211_wake_queue(hw, i, 402 __ieee80211_wake_queue(hw, i,
399 IEEE80211_QUEUE_STOP_REASON_SKB_ADD); 403 IEEE80211_QUEUE_STOP_REASON_SKB_ADD);
@@ -402,6 +406,12 @@ int ieee80211_add_pending_skbs(struct ieee80211_local *local,
402 return ret; 406 return ret;
403} 407}
404 408
409int ieee80211_add_pending_skbs(struct ieee80211_local *local,
410 struct sk_buff_head *skbs)
411{
412 return ieee80211_add_pending_skbs_fn(local, skbs, NULL, NULL);
413}
414
405void ieee80211_stop_queues_by_reason(struct ieee80211_hw *hw, 415void ieee80211_stop_queues_by_reason(struct ieee80211_hw *hw,
406 enum queue_stop_reason reason) 416 enum queue_stop_reason reason)
407{ 417{
@@ -1011,9 +1021,10 @@ int ieee80211_build_preq_ies(struct ieee80211_local *local, u8 *buffer,
1011 return pos - buffer; 1021 return pos - buffer;
1012} 1022}
1013 1023
1014void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata, u8 *dst, 1024struct sk_buff *ieee80211_build_probe_req(struct ieee80211_sub_if_data *sdata,
1015 const u8 *ssid, size_t ssid_len, 1025 u8 *dst,
1016 const u8 *ie, size_t ie_len) 1026 const u8 *ssid, size_t ssid_len,
1027 const u8 *ie, size_t ie_len)
1017{ 1028{
1018 struct ieee80211_local *local = sdata->local; 1029 struct ieee80211_local *local = sdata->local;
1019 struct sk_buff *skb; 1030 struct sk_buff *skb;
@@ -1027,7 +1038,7 @@ void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata, u8 *dst,
1027 if (!buf) { 1038 if (!buf) {
1028 printk(KERN_DEBUG "%s: failed to allocate temporary IE " 1039 printk(KERN_DEBUG "%s: failed to allocate temporary IE "
1029 "buffer\n", sdata->name); 1040 "buffer\n", sdata->name);
1030 return; 1041 return NULL;
1031 } 1042 }
1032 1043
1033 chan = ieee80211_frequency_to_channel( 1044 chan = ieee80211_frequency_to_channel(
@@ -1050,8 +1061,20 @@ void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata, u8 *dst,
1050 } 1061 }
1051 1062
1052 IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT; 1063 IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT;
1053 ieee80211_tx_skb(sdata, skb);
1054 kfree(buf); 1064 kfree(buf);
1065
1066 return skb;
1067}
1068
1069void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata, u8 *dst,
1070 const u8 *ssid, size_t ssid_len,
1071 const u8 *ie, size_t ie_len)
1072{
1073 struct sk_buff *skb;
1074
1075 skb = ieee80211_build_probe_req(sdata, dst, ssid, ssid_len, ie, ie_len);
1076 if (skb)
1077 ieee80211_tx_skb(sdata, skb);
1055} 1078}
1056 1079
1057u32 ieee80211_sta_get_rates(struct ieee80211_local *local, 1080u32 ieee80211_sta_get_rates(struct ieee80211_local *local,
@@ -1152,6 +1175,9 @@ int ieee80211_reconfig(struct ieee80211_local *local)
1152 } 1175 }
1153 mutex_unlock(&local->sta_mtx); 1176 mutex_unlock(&local->sta_mtx);
1154 1177
1178 /* setup fragmentation threshold */
1179 drv_set_frag_threshold(local, hw->wiphy->frag_threshold);
1180
1155 /* setup RTS threshold */ 1181 /* setup RTS threshold */
1156 drv_set_rts_threshold(local, hw->wiphy->rts_threshold); 1182 drv_set_rts_threshold(local, hw->wiphy->rts_threshold);
1157 1183
diff --git a/net/mac80211/wme.c b/net/mac80211/wme.c
index 34e6d02da77..58e75bbc1f9 100644
--- a/net/mac80211/wme.c
+++ b/net/mac80211/wme.c
@@ -21,7 +21,16 @@
21/* Default mapping in classifier to work with default 21/* Default mapping in classifier to work with default
22 * queue setup. 22 * queue setup.
23 */ 23 */
24const int ieee802_1d_to_ac[8] = { 2, 3, 3, 2, 1, 1, 0, 0 }; 24const int ieee802_1d_to_ac[8] = {
25 IEEE80211_AC_BE,
26 IEEE80211_AC_BK,
27 IEEE80211_AC_BK,
28 IEEE80211_AC_BE,
29 IEEE80211_AC_VI,
30 IEEE80211_AC_VI,
31 IEEE80211_AC_VO,
32 IEEE80211_AC_VO
33};
25 34
26static int wme_downgrade_ac(struct sk_buff *skb) 35static int wme_downgrade_ac(struct sk_buff *skb)
27{ 36{
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 85dabb86be6..32fcbe290c0 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -173,9 +173,11 @@ next_hook:
173 outdev, &elem, okfn, hook_thresh); 173 outdev, &elem, okfn, hook_thresh);
174 if (verdict == NF_ACCEPT || verdict == NF_STOP) { 174 if (verdict == NF_ACCEPT || verdict == NF_STOP) {
175 ret = 1; 175 ret = 1;
176 } else if (verdict == NF_DROP) { 176 } else if ((verdict & NF_VERDICT_MASK) == NF_DROP) {
177 kfree_skb(skb); 177 kfree_skb(skb);
178 ret = -EPERM; 178 ret = -(verdict >> NF_VERDICT_BITS);
179 if (ret == 0)
180 ret = -EPERM;
179 } else if ((verdict & NF_VERDICT_MASK) == NF_QUEUE) { 181 } else if ((verdict & NF_VERDICT_MASK) == NF_QUEUE) {
180 if (!nf_queue(skb, elem, pf, hook, indev, outdev, okfn, 182 if (!nf_queue(skb, elem, pf, hook, indev, outdev, okfn,
181 verdict >> NF_VERDICT_BITS)) 183 verdict >> NF_VERDICT_BITS))
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 5f5daa30b0a..c6f29363922 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -110,10 +110,8 @@ static int __ip_vs_addr_is_local_v6(const struct in6_addr *addr)
110 struct rt6_info *rt; 110 struct rt6_info *rt;
111 struct flowi fl = { 111 struct flowi fl = {
112 .oif = 0, 112 .oif = 0,
113 .nl_u = { 113 .fl6_dst = *addr,
114 .ip6_u = { 114 .fl6_src = { .s6_addr32 = {0, 0, 0, 0} },
115 .daddr = *addr,
116 .saddr = { .s6_addr32 = {0, 0, 0, 0} }, } },
117 }; 115 };
118 116
119 rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl); 117 rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl);
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index de04ea39cde..5325a3fbe4a 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -96,12 +96,8 @@ __ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_dest *dest,
96 if (!(rt = (struct rtable *) 96 if (!(rt = (struct rtable *)
97 __ip_vs_dst_check(dest, rtos))) { 97 __ip_vs_dst_check(dest, rtos))) {
98 struct flowi fl = { 98 struct flowi fl = {
99 .oif = 0, 99 .fl4_dst = dest->addr.ip,
100 .nl_u = { 100 .fl4_tos = rtos,
101 .ip4_u = {
102 .daddr = dest->addr.ip,
103 .saddr = 0,
104 .tos = rtos, } },
105 }; 101 };
106 102
107 if (ip_route_output_key(net, &rt, &fl)) { 103 if (ip_route_output_key(net, &rt, &fl)) {
@@ -118,12 +114,8 @@ __ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_dest *dest,
118 spin_unlock(&dest->dst_lock); 114 spin_unlock(&dest->dst_lock);
119 } else { 115 } else {
120 struct flowi fl = { 116 struct flowi fl = {
121 .oif = 0, 117 .fl4_dst = daddr,
122 .nl_u = { 118 .fl4_tos = rtos,
123 .ip4_u = {
124 .daddr = daddr,
125 .saddr = 0,
126 .tos = rtos, } },
127 }; 119 };
128 120
129 if (ip_route_output_key(net, &rt, &fl)) { 121 if (ip_route_output_key(net, &rt, &fl)) {
@@ -169,7 +161,7 @@ __ip_vs_reroute_locally(struct sk_buff *skb)
169 struct net *net = dev_net(dev); 161 struct net *net = dev_net(dev);
170 struct iphdr *iph = ip_hdr(skb); 162 struct iphdr *iph = ip_hdr(skb);
171 163
172 if (rt->fl.iif) { 164 if (rt_is_input_route(rt)) {
173 unsigned long orefdst = skb->_skb_refdst; 165 unsigned long orefdst = skb->_skb_refdst;
174 166
175 if (ip_route_input(skb, iph->daddr, iph->saddr, 167 if (ip_route_input(skb, iph->daddr, iph->saddr,
@@ -178,14 +170,9 @@ __ip_vs_reroute_locally(struct sk_buff *skb)
178 refdst_drop(orefdst); 170 refdst_drop(orefdst);
179 } else { 171 } else {
180 struct flowi fl = { 172 struct flowi fl = {
181 .oif = 0, 173 .fl4_dst = iph->daddr,
182 .nl_u = { 174 .fl4_src = iph->saddr,
183 .ip4_u = { 175 .fl4_tos = RT_TOS(iph->tos),
184 .daddr = iph->daddr,
185 .saddr = iph->saddr,
186 .tos = RT_TOS(iph->tos),
187 }
188 },
189 .mark = skb->mark, 176 .mark = skb->mark,
190 }; 177 };
191 struct rtable *rt; 178 struct rtable *rt;
@@ -216,12 +203,7 @@ __ip_vs_route_output_v6(struct net *net, struct in6_addr *daddr,
216{ 203{
217 struct dst_entry *dst; 204 struct dst_entry *dst;
218 struct flowi fl = { 205 struct flowi fl = {
219 .oif = 0, 206 .fl6_dst = *daddr,
220 .nl_u = {
221 .ip6_u = {
222 .daddr = *daddr,
223 },
224 },
225 }; 207 };
226 208
227 dst = ip6_route_output(net, NULL, &fl); 209 dst = ip6_route_output(net, NULL, &fl);
@@ -552,7 +534,8 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
552#endif 534#endif
553 535
554 /* From world but DNAT to loopback address? */ 536 /* From world but DNAT to loopback address? */
555 if (local && ipv4_is_loopback(rt->rt_dst) && skb_rtable(skb)->fl.iif) { 537 if (local && ipv4_is_loopback(rt->rt_dst) &&
538 rt_is_input_route(skb_rtable(skb))) {
556 IP_VS_DBG_RL_PKT(1, AF_INET, pp, skb, 0, "ip_vs_nat_xmit(): " 539 IP_VS_DBG_RL_PKT(1, AF_INET, pp, skb, 0, "ip_vs_nat_xmit(): "
557 "stopping DNAT to loopback address"); 540 "stopping DNAT to loopback address");
558 goto tx_error_put; 541 goto tx_error_put;
@@ -1165,7 +1148,8 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
1165#endif 1148#endif
1166 1149
1167 /* From world but DNAT to loopback address? */ 1150 /* From world but DNAT to loopback address? */
1168 if (local && ipv4_is_loopback(rt->rt_dst) && skb_rtable(skb)->fl.iif) { 1151 if (local && ipv4_is_loopback(rt->rt_dst) &&
1152 rt_is_input_route(skb_rtable(skb))) {
1169 IP_VS_DBG(1, "%s(): " 1153 IP_VS_DBG(1, "%s(): "
1170 "stopping DNAT to loopback %pI4\n", 1154 "stopping DNAT to loopback %pI4\n",
1171 __func__, &cp->daddr.ip); 1155 __func__, &cp->daddr.ip);
diff --git a/net/netfilter/xt_TEE.c b/net/netfilter/xt_TEE.c
index 22a2d421e7e..5128a6c4cb2 100644
--- a/net/netfilter/xt_TEE.c
+++ b/net/netfilter/xt_TEE.c
@@ -70,9 +70,9 @@ tee_tg_route4(struct sk_buff *skb, const struct xt_tee_tginfo *info)
70 return false; 70 return false;
71 fl.oif = info->priv->oif; 71 fl.oif = info->priv->oif;
72 } 72 }
73 fl.nl_u.ip4_u.daddr = info->gw.ip; 73 fl.fl4_dst = info->gw.ip;
74 fl.nl_u.ip4_u.tos = RT_TOS(iph->tos); 74 fl.fl4_tos = RT_TOS(iph->tos);
75 fl.nl_u.ip4_u.scope = RT_SCOPE_UNIVERSE; 75 fl.fl4_scope = RT_SCOPE_UNIVERSE;
76 if (ip_route_output_key(net, &rt, &fl) != 0) 76 if (ip_route_output_key(net, &rt, &fl) != 0)
77 return false; 77 return false;
78 78
@@ -150,9 +150,9 @@ tee_tg_route6(struct sk_buff *skb, const struct xt_tee_tginfo *info)
150 return false; 150 return false;
151 fl.oif = info->priv->oif; 151 fl.oif = info->priv->oif;
152 } 152 }
153 fl.nl_u.ip6_u.daddr = info->gw.in6; 153 fl.fl6_dst = info->gw.in6;
154 fl.nl_u.ip6_u.flowlabel = ((iph->flow_lbl[0] & 0xF) << 16) | 154 fl.fl6_flowlabel = ((iph->flow_lbl[0] & 0xF) << 16) |
155 (iph->flow_lbl[1] << 8) | iph->flow_lbl[2]; 155 (iph->flow_lbl[1] << 8) | iph->flow_lbl[2];
156 dst = ip6_route_output(net, NULL, &fl); 156 dst = ip6_route_output(net, NULL, &fl);
157 if (dst == NULL) 157 if (dst == NULL)
158 return false; 158 return false;
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 8298e676f5a..246a04a1323 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -61,6 +61,7 @@
61#include <linux/kernel.h> 61#include <linux/kernel.h>
62#include <linux/kmod.h> 62#include <linux/kmod.h>
63#include <linux/slab.h> 63#include <linux/slab.h>
64#include <linux/vmalloc.h>
64#include <net/net_namespace.h> 65#include <net/net_namespace.h>
65#include <net/ip.h> 66#include <net/ip.h>
66#include <net/protocol.h> 67#include <net/protocol.h>
@@ -163,8 +164,13 @@ struct packet_mreq_max {
163static int packet_set_ring(struct sock *sk, struct tpacket_req *req, 164static int packet_set_ring(struct sock *sk, struct tpacket_req *req,
164 int closing, int tx_ring); 165 int closing, int tx_ring);
165 166
167#define PGV_FROM_VMALLOC 1
168struct pgv {
169 char *buffer;
170};
171
166struct packet_ring_buffer { 172struct packet_ring_buffer {
167 char **pg_vec; 173 struct pgv *pg_vec;
168 unsigned int head; 174 unsigned int head;
169 unsigned int frames_per_block; 175 unsigned int frames_per_block;
170 unsigned int frame_size; 176 unsigned int frame_size;
@@ -217,6 +223,13 @@ struct packet_skb_cb {
217 223
218#define PACKET_SKB_CB(__skb) ((struct packet_skb_cb *)((__skb)->cb)) 224#define PACKET_SKB_CB(__skb) ((struct packet_skb_cb *)((__skb)->cb))
219 225
226static inline __pure struct page *pgv_to_page(void *addr)
227{
228 if (is_vmalloc_addr(addr))
229 return vmalloc_to_page(addr);
230 return virt_to_page(addr);
231}
232
220static void __packet_set_status(struct packet_sock *po, void *frame, int status) 233static void __packet_set_status(struct packet_sock *po, void *frame, int status)
221{ 234{
222 union { 235 union {
@@ -229,11 +242,11 @@ static void __packet_set_status(struct packet_sock *po, void *frame, int status)
229 switch (po->tp_version) { 242 switch (po->tp_version) {
230 case TPACKET_V1: 243 case TPACKET_V1:
231 h.h1->tp_status = status; 244 h.h1->tp_status = status;
232 flush_dcache_page(virt_to_page(&h.h1->tp_status)); 245 flush_dcache_page(pgv_to_page(&h.h1->tp_status));
233 break; 246 break;
234 case TPACKET_V2: 247 case TPACKET_V2:
235 h.h2->tp_status = status; 248 h.h2->tp_status = status;
236 flush_dcache_page(virt_to_page(&h.h2->tp_status)); 249 flush_dcache_page(pgv_to_page(&h.h2->tp_status));
237 break; 250 break;
238 default: 251 default:
239 pr_err("TPACKET version not supported\n"); 252 pr_err("TPACKET version not supported\n");
@@ -256,10 +269,10 @@ static int __packet_get_status(struct packet_sock *po, void *frame)
256 h.raw = frame; 269 h.raw = frame;
257 switch (po->tp_version) { 270 switch (po->tp_version) {
258 case TPACKET_V1: 271 case TPACKET_V1:
259 flush_dcache_page(virt_to_page(&h.h1->tp_status)); 272 flush_dcache_page(pgv_to_page(&h.h1->tp_status));
260 return h.h1->tp_status; 273 return h.h1->tp_status;
261 case TPACKET_V2: 274 case TPACKET_V2:
262 flush_dcache_page(virt_to_page(&h.h2->tp_status)); 275 flush_dcache_page(pgv_to_page(&h.h2->tp_status));
263 return h.h2->tp_status; 276 return h.h2->tp_status;
264 default: 277 default:
265 pr_err("TPACKET version not supported\n"); 278 pr_err("TPACKET version not supported\n");
@@ -283,7 +296,8 @@ static void *packet_lookup_frame(struct packet_sock *po,
283 pg_vec_pos = position / rb->frames_per_block; 296 pg_vec_pos = position / rb->frames_per_block;
284 frame_offset = position % rb->frames_per_block; 297 frame_offset = position % rb->frames_per_block;
285 298
286 h.raw = rb->pg_vec[pg_vec_pos] + (frame_offset * rb->frame_size); 299 h.raw = rb->pg_vec[pg_vec_pos].buffer +
300 (frame_offset * rb->frame_size);
287 301
288 if (status != __packet_get_status(po, h.raw)) 302 if (status != __packet_get_status(po, h.raw))
289 return NULL; 303 return NULL;
@@ -503,7 +517,8 @@ out_free:
503 return err; 517 return err;
504} 518}
505 519
506static inline unsigned int run_filter(struct sk_buff *skb, struct sock *sk, 520static inline unsigned int run_filter(const struct sk_buff *skb,
521 const struct sock *sk,
507 unsigned int res) 522 unsigned int res)
508{ 523{
509 struct sk_filter *filter; 524 struct sk_filter *filter;
@@ -511,22 +526,22 @@ static inline unsigned int run_filter(struct sk_buff *skb, struct sock *sk,
511 rcu_read_lock_bh(); 526 rcu_read_lock_bh();
512 filter = rcu_dereference_bh(sk->sk_filter); 527 filter = rcu_dereference_bh(sk->sk_filter);
513 if (filter != NULL) 528 if (filter != NULL)
514 res = sk_run_filter(skb, filter->insns, filter->len); 529 res = sk_run_filter(skb, filter->insns);
515 rcu_read_unlock_bh(); 530 rcu_read_unlock_bh();
516 531
517 return res; 532 return res;
518} 533}
519 534
520/* 535/*
521 This function makes lazy skb cloning in hope that most of packets 536 * This function makes lazy skb cloning in hope that most of packets
522 are discarded by BPF. 537 * are discarded by BPF.
523 538 *
524 Note tricky part: we DO mangle shared skb! skb->data, skb->len 539 * Note tricky part: we DO mangle shared skb! skb->data, skb->len
525 and skb->cb are mangled. It works because (and until) packets 540 * and skb->cb are mangled. It works because (and until) packets
526 falling here are owned by current CPU. Output packets are cloned 541 * falling here are owned by current CPU. Output packets are cloned
527 by dev_queue_xmit_nit(), input packets are processed by net_bh 542 * by dev_queue_xmit_nit(), input packets are processed by net_bh
528 sequencially, so that if we return skb to original state on exit, 543 * sequencially, so that if we return skb to original state on exit,
529 we will not harm anyone. 544 * we will not harm anyone.
530 */ 545 */
531 546
532static int packet_rcv(struct sk_buff *skb, struct net_device *dev, 547static int packet_rcv(struct sk_buff *skb, struct net_device *dev,
@@ -552,11 +567,11 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev,
552 567
553 if (dev->header_ops) { 568 if (dev->header_ops) {
554 /* The device has an explicit notion of ll header, 569 /* The device has an explicit notion of ll header,
555 exported to higher levels. 570 * exported to higher levels.
556 571 *
557 Otherwise, the device hides datails of it frame 572 * Otherwise, the device hides details of its frame
558 structure, so that corresponding packet head 573 * structure, so that corresponding packet head is
559 never delivered to user. 574 * never delivered to user.
560 */ 575 */
561 if (sk->sk_type != SOCK_DGRAM) 576 if (sk->sk_type != SOCK_DGRAM)
562 skb_push(skb, skb->data - skb_mac_header(skb)); 577 skb_push(skb, skb->data - skb_mac_header(skb));
@@ -791,17 +806,15 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
791 806
792 __packet_set_status(po, h.raw, status); 807 __packet_set_status(po, h.raw, status);
793 smp_mb(); 808 smp_mb();
809#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1
794 { 810 {
795 struct page *p_start, *p_end; 811 u8 *start, *end;
796 u8 *h_end = h.raw + macoff + snaplen - 1; 812
797 813 end = (u8 *)PAGE_ALIGN((unsigned long)h.raw + macoff + snaplen);
798 p_start = virt_to_page(h.raw); 814 for (start = h.raw; start < end; start += PAGE_SIZE)
799 p_end = virt_to_page(h_end); 815 flush_dcache_page(pgv_to_page(start));
800 while (p_start <= p_end) {
801 flush_dcache_page(p_start);
802 p_start++;
803 }
804 } 816 }
817#endif
805 818
806 sk->sk_data_ready(sk, 0); 819 sk->sk_data_ready(sk, 0);
807 820
@@ -907,7 +920,6 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
907 } 920 }
908 921
909 err = -EFAULT; 922 err = -EFAULT;
910 page = virt_to_page(data);
911 offset = offset_in_page(data); 923 offset = offset_in_page(data);
912 len_max = PAGE_SIZE - offset; 924 len_max = PAGE_SIZE - offset;
913 len = ((to_write > len_max) ? len_max : to_write); 925 len = ((to_write > len_max) ? len_max : to_write);
@@ -926,11 +938,11 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
926 return -EFAULT; 938 return -EFAULT;
927 } 939 }
928 940
941 page = pgv_to_page(data);
942 data += len;
929 flush_dcache_page(page); 943 flush_dcache_page(page);
930 get_page(page); 944 get_page(page);
931 skb_fill_page_desc(skb, 945 skb_fill_page_desc(skb, nr_frags, page, offset, len);
932 nr_frags,
933 page++, offset, len);
934 to_write -= len; 946 to_write -= len;
935 offset = 0; 947 offset = 0;
936 len_max = PAGE_SIZE; 948 len_max = PAGE_SIZE;
@@ -2325,37 +2337,70 @@ static const struct vm_operations_struct packet_mmap_ops = {
2325 .close = packet_mm_close, 2337 .close = packet_mm_close,
2326}; 2338};
2327 2339
2328static void free_pg_vec(char **pg_vec, unsigned int order, unsigned int len) 2340static void free_pg_vec(struct pgv *pg_vec, unsigned int order,
2341 unsigned int len)
2329{ 2342{
2330 int i; 2343 int i;
2331 2344
2332 for (i = 0; i < len; i++) { 2345 for (i = 0; i < len; i++) {
2333 if (likely(pg_vec[i])) 2346 if (likely(pg_vec[i].buffer)) {
2334 free_pages((unsigned long) pg_vec[i], order); 2347 if (is_vmalloc_addr(pg_vec[i].buffer))
2348 vfree(pg_vec[i].buffer);
2349 else
2350 free_pages((unsigned long)pg_vec[i].buffer,
2351 order);
2352 pg_vec[i].buffer = NULL;
2353 }
2335 } 2354 }
2336 kfree(pg_vec); 2355 kfree(pg_vec);
2337} 2356}
2338 2357
2339static inline char *alloc_one_pg_vec_page(unsigned long order) 2358static inline char *alloc_one_pg_vec_page(unsigned long order)
2340{ 2359{
2341 gfp_t gfp_flags = GFP_KERNEL | __GFP_COMP | __GFP_ZERO | __GFP_NOWARN; 2360 char *buffer = NULL;
2361 gfp_t gfp_flags = GFP_KERNEL | __GFP_COMP |
2362 __GFP_ZERO | __GFP_NOWARN | __GFP_NORETRY;
2342 2363
2343 return (char *) __get_free_pages(gfp_flags, order); 2364 buffer = (char *) __get_free_pages(gfp_flags, order);
2365
2366 if (buffer)
2367 return buffer;
2368
2369 /*
2370 * __get_free_pages failed, fall back to vmalloc
2371 */
2372 buffer = vzalloc((1 << order) * PAGE_SIZE);
2373
2374 if (buffer)
2375 return buffer;
2376
2377 /*
2378 * vmalloc failed, lets dig into swap here
2379 */
2380 gfp_flags &= ~__GFP_NORETRY;
2381 buffer = (char *)__get_free_pages(gfp_flags, order);
2382 if (buffer)
2383 return buffer;
2384
2385 /*
2386 * complete and utter failure
2387 */
2388 return NULL;
2344} 2389}
2345 2390
2346static char **alloc_pg_vec(struct tpacket_req *req, int order) 2391static struct pgv *alloc_pg_vec(struct tpacket_req *req, int order)
2347{ 2392{
2348 unsigned int block_nr = req->tp_block_nr; 2393 unsigned int block_nr = req->tp_block_nr;
2349 char **pg_vec; 2394 struct pgv *pg_vec;
2350 int i; 2395 int i;
2351 2396
2352 pg_vec = kzalloc(block_nr * sizeof(char *), GFP_KERNEL); 2397 pg_vec = kcalloc(block_nr, sizeof(struct pgv), GFP_KERNEL);
2353 if (unlikely(!pg_vec)) 2398 if (unlikely(!pg_vec))
2354 goto out; 2399 goto out;
2355 2400
2356 for (i = 0; i < block_nr; i++) { 2401 for (i = 0; i < block_nr; i++) {
2357 pg_vec[i] = alloc_one_pg_vec_page(order); 2402 pg_vec[i].buffer = alloc_one_pg_vec_page(order);
2358 if (unlikely(!pg_vec[i])) 2403 if (unlikely(!pg_vec[i].buffer))
2359 goto out_free_pgvec; 2404 goto out_free_pgvec;
2360 } 2405 }
2361 2406
@@ -2371,7 +2416,7 @@ out_free_pgvec:
2371static int packet_set_ring(struct sock *sk, struct tpacket_req *req, 2416static int packet_set_ring(struct sock *sk, struct tpacket_req *req,
2372 int closing, int tx_ring) 2417 int closing, int tx_ring)
2373{ 2418{
2374 char **pg_vec = NULL; 2419 struct pgv *pg_vec = NULL;
2375 struct packet_sock *po = pkt_sk(sk); 2420 struct packet_sock *po = pkt_sk(sk);
2376 int was_running, order = 0; 2421 int was_running, order = 0;
2377 struct packet_ring_buffer *rb; 2422 struct packet_ring_buffer *rb;
@@ -2533,15 +2578,17 @@ static int packet_mmap(struct file *file, struct socket *sock,
2533 continue; 2578 continue;
2534 2579
2535 for (i = 0; i < rb->pg_vec_len; i++) { 2580 for (i = 0; i < rb->pg_vec_len; i++) {
2536 struct page *page = virt_to_page(rb->pg_vec[i]); 2581 struct page *page;
2582 void *kaddr = rb->pg_vec[i].buffer;
2537 int pg_num; 2583 int pg_num;
2538 2584
2539 for (pg_num = 0; pg_num < rb->pg_vec_pages; 2585 for (pg_num = 0; pg_num < rb->pg_vec_pages; pg_num++) {
2540 pg_num++, page++) { 2586 page = pgv_to_page(kaddr);
2541 err = vm_insert_page(vma, start, page); 2587 err = vm_insert_page(vma, start, page);
2542 if (unlikely(err)) 2588 if (unlikely(err))
2543 goto out; 2589 goto out;
2544 start += PAGE_SIZE; 2590 start += PAGE_SIZE;
2591 kaddr += PAGE_SIZE;
2545 } 2592 }
2546 } 2593 }
2547 } 2594 }
diff --git a/net/phonet/Makefile b/net/phonet/Makefile
index d62bbba649b..e10b1b182ce 100644
--- a/net/phonet/Makefile
+++ b/net/phonet/Makefile
@@ -1,6 +1,6 @@
1obj-$(CONFIG_PHONET) += phonet.o pn_pep.o 1obj-$(CONFIG_PHONET) += phonet.o pn_pep.o
2 2
3phonet-objs := \ 3phonet-y := \
4 pn_dev.o \ 4 pn_dev.o \
5 pn_netlink.o \ 5 pn_netlink.o \
6 socket.o \ 6 socket.o \
@@ -8,4 +8,4 @@ phonet-objs := \
8 sysctl.o \ 8 sysctl.o \
9 af_phonet.o 9 af_phonet.o
10 10
11pn_pep-objs := pep.o pep-gprs.o 11pn_pep-y := pep.o pep-gprs.o
diff --git a/net/rds/Makefile b/net/rds/Makefile
index b46eca10968..56d3f6023ce 100644
--- a/net/rds/Makefile
+++ b/net/rds/Makefile
@@ -4,7 +4,7 @@ rds-y := af_rds.o bind.o cong.o connection.o info.o message.o \
4 loop.o page.o rdma.o 4 loop.o page.o rdma.o
5 5
6obj-$(CONFIG_RDS_RDMA) += rds_rdma.o 6obj-$(CONFIG_RDS_RDMA) += rds_rdma.o
7rds_rdma-objs := rdma_transport.o \ 7rds_rdma-y := rdma_transport.o \
8 ib.o ib_cm.o ib_recv.o ib_ring.o ib_send.o ib_stats.o \ 8 ib.o ib_cm.o ib_recv.o ib_ring.o ib_send.o ib_stats.o \
9 ib_sysctl.o ib_rdma.o \ 9 ib_sysctl.o ib_rdma.o \
10 iw.o iw_cm.o iw_recv.o iw_ring.o iw_send.o iw_stats.o \ 10 iw.o iw_cm.o iw_recv.o iw_ring.o iw_send.o iw_stats.o \
@@ -12,10 +12,8 @@ rds_rdma-objs := rdma_transport.o \
12 12
13 13
14obj-$(CONFIG_RDS_TCP) += rds_tcp.o 14obj-$(CONFIG_RDS_TCP) += rds_tcp.o
15rds_tcp-objs := tcp.o tcp_connect.o tcp_listen.o tcp_recv.o \ 15rds_tcp-y := tcp.o tcp_connect.o tcp_listen.o tcp_recv.o \
16 tcp_send.o tcp_stats.o 16 tcp_send.o tcp_stats.o
17 17
18ifeq ($(CONFIG_RDS_DEBUG), y) 18ccflags-$(CONFIG_RDS_DEBUG) := -DDEBUG
19EXTRA_CFLAGS += -DDEBUG
20endif
21 19
diff --git a/net/rfkill/core.c b/net/rfkill/core.c
index 04f599089e6..0198191b756 100644
--- a/net/rfkill/core.c
+++ b/net/rfkill/core.c
@@ -149,20 +149,6 @@ static void rfkill_led_trigger_activate(struct led_classdev *led)
149 rfkill_led_trigger_event(rfkill); 149 rfkill_led_trigger_event(rfkill);
150} 150}
151 151
152const char *rfkill_get_led_trigger_name(struct rfkill *rfkill)
153{
154 return rfkill->led_trigger.name;
155}
156EXPORT_SYMBOL(rfkill_get_led_trigger_name);
157
158void rfkill_set_led_trigger_name(struct rfkill *rfkill, const char *name)
159{
160 BUG_ON(!rfkill);
161
162 rfkill->ledtrigname = name;
163}
164EXPORT_SYMBOL(rfkill_set_led_trigger_name);
165
166static int rfkill_led_trigger_register(struct rfkill *rfkill) 152static int rfkill_led_trigger_register(struct rfkill *rfkill)
167{ 153{
168 rfkill->led_trigger.name = rfkill->ledtrigname 154 rfkill->led_trigger.name = rfkill->ledtrigname
diff --git a/net/rxrpc/Makefile b/net/rxrpc/Makefile
index c46867c61c9..d1c3429b69e 100644
--- a/net/rxrpc/Makefile
+++ b/net/rxrpc/Makefile
@@ -2,7 +2,7 @@
2# Makefile for Linux kernel RxRPC 2# Makefile for Linux kernel RxRPC
3# 3#
4 4
5af-rxrpc-objs := \ 5af-rxrpc-y := \
6 af_rxrpc.o \ 6 af_rxrpc.o \
7 ar-accept.o \ 7 ar-accept.o \
8 ar-ack.o \ 8 ar-ack.o \
@@ -21,7 +21,7 @@ af-rxrpc-objs := \
21 ar-transport.o 21 ar-transport.o
22 22
23ifeq ($(CONFIG_PROC_FS),y) 23ifeq ($(CONFIG_PROC_FS),y)
24af-rxrpc-objs += ar-proc.o 24af-rxrpc-y += ar-proc.o
25endif 25endif
26 26
27obj-$(CONFIG_AF_RXRPC) += af-rxrpc.o 27obj-$(CONFIG_AF_RXRPC) += af-rxrpc.o
diff --git a/net/rxrpc/ar-peer.c b/net/rxrpc/ar-peer.c
index 9f1729bd60d..a53fb25a64e 100644
--- a/net/rxrpc/ar-peer.c
+++ b/net/rxrpc/ar-peer.c
@@ -47,12 +47,12 @@ static void rxrpc_assess_MTU_size(struct rxrpc_peer *peer)
47 case AF_INET: 47 case AF_INET:
48 fl.oif = 0; 48 fl.oif = 0;
49 fl.proto = IPPROTO_UDP, 49 fl.proto = IPPROTO_UDP,
50 fl.nl_u.ip4_u.saddr = 0; 50 fl.fl4_dst = peer->srx.transport.sin.sin_addr.s_addr;
51 fl.nl_u.ip4_u.daddr = peer->srx.transport.sin.sin_addr.s_addr; 51 fl.fl4_src = 0;
52 fl.nl_u.ip4_u.tos = 0; 52 fl.fl4_tos = 0;
53 /* assume AFS.CM talking to AFS.FS */ 53 /* assume AFS.CM talking to AFS.FS */
54 fl.uli_u.ports.sport = htons(7001); 54 fl.fl_ip_sport = htons(7001);
55 fl.uli_u.ports.dport = htons(7000); 55 fl.fl_ip_dport = htons(7000);
56 break; 56 break;
57 default: 57 default:
58 BUG(); 58 BUG();
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 5dbb3cd96e5..0918834ee4a 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -60,8 +60,7 @@ static inline struct sk_buff *dequeue_skb(struct Qdisc *q)
60 60
61 /* check the reason of requeuing without tx lock first */ 61 /* check the reason of requeuing without tx lock first */
62 txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb)); 62 txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb));
63 if (!netif_tx_queue_stopped(txq) && 63 if (!netif_tx_queue_frozen_or_stopped(txq)) {
64 !netif_tx_queue_frozen(txq)) {
65 q->gso_skb = NULL; 64 q->gso_skb = NULL;
66 q->q.qlen--; 65 q->q.qlen--;
67 } else 66 } else
@@ -122,7 +121,7 @@ int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
122 spin_unlock(root_lock); 121 spin_unlock(root_lock);
123 122
124 HARD_TX_LOCK(dev, txq, smp_processor_id()); 123 HARD_TX_LOCK(dev, txq, smp_processor_id());
125 if (!netif_tx_queue_stopped(txq) && !netif_tx_queue_frozen(txq)) 124 if (!netif_tx_queue_frozen_or_stopped(txq))
126 ret = dev_hard_start_xmit(skb, dev, txq); 125 ret = dev_hard_start_xmit(skb, dev, txq);
127 126
128 HARD_TX_UNLOCK(dev, txq); 127 HARD_TX_UNLOCK(dev, txq);
@@ -144,8 +143,7 @@ int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
144 ret = dev_requeue_skb(skb, q); 143 ret = dev_requeue_skb(skb, q);
145 } 144 }
146 145
147 if (ret && (netif_tx_queue_stopped(txq) || 146 if (ret && netif_tx_queue_frozen_or_stopped(txq))
148 netif_tx_queue_frozen(txq)))
149 ret = 0; 147 ret = 0;
150 148
151 return ret; 149 return ret;
@@ -555,7 +553,9 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
555 size = QDISC_ALIGN(sizeof(*sch)); 553 size = QDISC_ALIGN(sizeof(*sch));
556 size += ops->priv_size + (QDISC_ALIGNTO - 1); 554 size += ops->priv_size + (QDISC_ALIGNTO - 1);
557 555
558 p = kzalloc(size, GFP_KERNEL); 556 p = kzalloc_node(size, GFP_KERNEL,
557 netdev_queue_numa_node_read(dev_queue));
558
559 if (!p) 559 if (!p)
560 goto errout; 560 goto errout;
561 sch = (struct Qdisc *) QDISC_ALIGN((unsigned long) p); 561 sch = (struct Qdisc *) QDISC_ALIGN((unsigned long) p);
diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c
index 401af959670..106479a7c94 100644
--- a/net/sched/sch_teql.c
+++ b/net/sched/sch_teql.c
@@ -309,8 +309,7 @@ restart:
309 if (__netif_tx_trylock(slave_txq)) { 309 if (__netif_tx_trylock(slave_txq)) {
310 unsigned int length = qdisc_pkt_len(skb); 310 unsigned int length = qdisc_pkt_len(skb);
311 311
312 if (!netif_tx_queue_stopped(slave_txq) && 312 if (!netif_tx_queue_frozen_or_stopped(slave_txq) &&
313 !netif_tx_queue_frozen(slave_txq) &&
314 slave_ops->ndo_start_xmit(skb, slave) == NETDEV_TX_OK) { 313 slave_ops->ndo_start_xmit(skb, slave) == NETDEV_TX_OK) {
315 txq_trans_update(slave_txq); 314 txq_trans_update(slave_txq);
316 __netif_tx_unlock(slave_txq); 315 __netif_tx_unlock(slave_txq);
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 6bd554323a3..842c7f3650b 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -6047,7 +6047,7 @@ static struct sk_buff *sctp_skb_recv_datagram(struct sock *sk, int flags,
6047 * will suddenly eat the receive_queue. 6047 * will suddenly eat the receive_queue.
6048 * 6048 *
6049 * Look at current nfs client by the way... 6049 * Look at current nfs client by the way...
6050 * However, this function was corrent in any case. 8) 6050 * However, this function was correct in any case. 8)
6051 */ 6051 */
6052 if (flags & MSG_PEEK) { 6052 if (flags & MSG_PEEK) {
6053 spin_lock_bh(&sk->sk_receive_queue.lock); 6053 spin_lock_bh(&sk->sk_receive_queue.lock);
diff --git a/net/socket.c b/net/socket.c
index 3ca2fd9e372..c898df76e92 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -156,7 +156,7 @@ static const struct file_operations socket_file_ops = {
156 */ 156 */
157 157
158static DEFINE_SPINLOCK(net_family_lock); 158static DEFINE_SPINLOCK(net_family_lock);
159static const struct net_proto_family *net_families[NPROTO] __read_mostly; 159static const struct net_proto_family __rcu *net_families[NPROTO] __read_mostly;
160 160
161/* 161/*
162 * Statistics counters of the socket lists 162 * Statistics counters of the socket lists
@@ -1200,7 +1200,7 @@ int __sock_create(struct net *net, int family, int type, int protocol,
1200 * requested real, full-featured networking support upon configuration. 1200 * requested real, full-featured networking support upon configuration.
1201 * Otherwise module support will break! 1201 * Otherwise module support will break!
1202 */ 1202 */
1203 if (net_families[family] == NULL) 1203 if (rcu_access_pointer(net_families[family]) == NULL)
1204 request_module("net-pf-%d", family); 1204 request_module("net-pf-%d", family);
1205#endif 1205#endif
1206 1206
@@ -2332,10 +2332,11 @@ int sock_register(const struct net_proto_family *ops)
2332 } 2332 }
2333 2333
2334 spin_lock(&net_family_lock); 2334 spin_lock(&net_family_lock);
2335 if (net_families[ops->family]) 2335 if (rcu_dereference_protected(net_families[ops->family],
2336 lockdep_is_held(&net_family_lock)))
2336 err = -EEXIST; 2337 err = -EEXIST;
2337 else { 2338 else {
2338 net_families[ops->family] = ops; 2339 rcu_assign_pointer(net_families[ops->family], ops);
2339 err = 0; 2340 err = 0;
2340 } 2341 }
2341 spin_unlock(&net_family_lock); 2342 spin_unlock(&net_family_lock);
@@ -2363,7 +2364,7 @@ void sock_unregister(int family)
2363 BUG_ON(family < 0 || family >= NPROTO); 2364 BUG_ON(family < 0 || family >= NPROTO);
2364 2365
2365 spin_lock(&net_family_lock); 2366 spin_lock(&net_family_lock);
2366 net_families[family] = NULL; 2367 rcu_assign_pointer(net_families[family], NULL);
2367 spin_unlock(&net_family_lock); 2368 spin_unlock(&net_family_lock);
2368 2369
2369 synchronize_rcu(); 2370 synchronize_rcu();
diff --git a/net/sunrpc/auth_gss/Makefile b/net/sunrpc/auth_gss/Makefile
index 7350d86a32e..9e4cb59ef9f 100644
--- a/net/sunrpc/auth_gss/Makefile
+++ b/net/sunrpc/auth_gss/Makefile
@@ -4,10 +4,10 @@
4 4
5obj-$(CONFIG_SUNRPC_GSS) += auth_rpcgss.o 5obj-$(CONFIG_SUNRPC_GSS) += auth_rpcgss.o
6 6
7auth_rpcgss-objs := auth_gss.o gss_generic_token.o \ 7auth_rpcgss-y := auth_gss.o gss_generic_token.o \
8 gss_mech_switch.o svcauth_gss.o 8 gss_mech_switch.o svcauth_gss.o
9 9
10obj-$(CONFIG_RPCSEC_GSS_KRB5) += rpcsec_gss_krb5.o 10obj-$(CONFIG_RPCSEC_GSS_KRB5) += rpcsec_gss_krb5.o
11 11
12rpcsec_gss_krb5-objs := gss_krb5_mech.o gss_krb5_seal.o gss_krb5_unseal.o \ 12rpcsec_gss_krb5-y := gss_krb5_mech.o gss_krb5_seal.o gss_krb5_unseal.o \
13 gss_krb5_seqnum.o gss_krb5_wrap.o gss_krb5_crypto.o gss_krb5_keys.o 13 gss_krb5_seqnum.o gss_krb5_wrap.o gss_krb5_crypto.o gss_krb5_keys.o
diff --git a/net/tipc/addr.c b/net/tipc/addr.c
index 8a2e89bffde..886715a7525 100644
--- a/net/tipc/addr.c
+++ b/net/tipc/addr.c
@@ -35,11 +35,9 @@
35 */ 35 */
36 36
37#include "core.h" 37#include "core.h"
38#include "dbg.h"
39#include "addr.h" 38#include "addr.h"
40#include "zone.h" 39#include "zone.h"
41#include "cluster.h" 40#include "cluster.h"
42#include "net.h"
43 41
44/** 42/**
45 * tipc_addr_domain_valid - validates a network domain address 43 * tipc_addr_domain_valid - validates a network domain address
diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index 22a60fc9839..6d828d9eda4 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -36,17 +36,9 @@
36 */ 36 */
37 37
38#include "core.h" 38#include "core.h"
39#include "msg.h"
40#include "dbg.h"
41#include "link.h" 39#include "link.h"
42#include "net.h"
43#include "node.h"
44#include "port.h" 40#include "port.h"
45#include "addr.h"
46#include "node_subscr.h"
47#include "name_distr.h" 41#include "name_distr.h"
48#include "bearer.h"
49#include "name_table.h"
50#include "bcast.h" 42#include "bcast.h"
51 43
52#define MAX_PKT_DEFAULT_MCAST 1500 /* bcast link max packet size (fixed) */ 44#define MAX_PKT_DEFAULT_MCAST 1500 /* bcast link max packet size (fixed) */
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index 9927d1d56c4..885da94be4a 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -36,12 +36,9 @@
36 36
37#include "core.h" 37#include "core.h"
38#include "config.h" 38#include "config.h"
39#include "dbg.h"
40#include "bearer.h" 39#include "bearer.h"
41#include "link.h"
42#include "port.h" 40#include "port.h"
43#include "discover.h" 41#include "discover.h"
44#include "bcast.h"
45 42
46#define MAX_ADDR_STR 32 43#define MAX_ADDR_STR 32
47 44
@@ -625,7 +622,7 @@ int tipc_block_bearer(const char *name)
625 * Note: This routine assumes caller holds tipc_net_lock. 622 * Note: This routine assumes caller holds tipc_net_lock.
626 */ 623 */
627 624
628static int bearer_disable(struct bearer *b_ptr) 625static void bearer_disable(struct bearer *b_ptr)
629{ 626{
630 struct link *l_ptr; 627 struct link *l_ptr;
631 struct link *temp_l_ptr; 628 struct link *temp_l_ptr;
@@ -641,7 +638,6 @@ static int bearer_disable(struct bearer *b_ptr)
641 } 638 }
642 spin_unlock_bh(&b_ptr->publ.lock); 639 spin_unlock_bh(&b_ptr->publ.lock);
643 memset(b_ptr, 0, sizeof(struct bearer)); 640 memset(b_ptr, 0, sizeof(struct bearer));
644 return 0;
645} 641}
646 642
647int tipc_disable_bearer(const char *name) 643int tipc_disable_bearer(const char *name)
@@ -654,8 +650,10 @@ int tipc_disable_bearer(const char *name)
654 if (b_ptr == NULL) { 650 if (b_ptr == NULL) {
655 warn("Attempt to disable unknown bearer <%s>\n", name); 651 warn("Attempt to disable unknown bearer <%s>\n", name);
656 res = -EINVAL; 652 res = -EINVAL;
657 } else 653 } else {
658 res = bearer_disable(b_ptr); 654 bearer_disable(b_ptr);
655 res = 0;
656 }
659 write_unlock_bh(&tipc_net_lock); 657 write_unlock_bh(&tipc_net_lock);
660 return res; 658 return res;
661} 659}
diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h
index a850b389663..85f451d5aac 100644
--- a/net/tipc/bearer.h
+++ b/net/tipc/bearer.h
@@ -37,12 +37,50 @@
37#ifndef _TIPC_BEARER_H 37#ifndef _TIPC_BEARER_H
38#define _TIPC_BEARER_H 38#define _TIPC_BEARER_H
39 39
40#include "core.h"
41#include "bcast.h" 40#include "bcast.h"
42 41
43#define MAX_BEARERS 8 42#define MAX_BEARERS 8
44#define MAX_MEDIA 4 43#define MAX_MEDIA 4
45 44
45/*
46 * Identifiers of supported TIPC media types
47 */
48#define TIPC_MEDIA_TYPE_ETH 1
49
50/*
51 * Destination address structure used by TIPC bearers when sending messages
52 *
53 * IMPORTANT: The fields of this structure MUST be stored using the specified
54 * byte order indicated below, as the structure is exchanged between nodes
55 * as part of a link setup process.
56 */
57struct tipc_media_addr {
58 __be32 type; /* bearer type (network byte order) */
59 union {
60 __u8 eth_addr[6]; /* 48 bit Ethernet addr (byte array) */
61 } dev_addr;
62};
63
64/**
65 * struct tipc_bearer - TIPC bearer info available to media code
66 * @usr_handle: pointer to additional media-specific information about bearer
67 * @mtu: max packet size bearer can support
68 * @blocked: non-zero if bearer is blocked
69 * @lock: spinlock for controlling access to bearer
70 * @addr: media-specific address associated with bearer
71 * @name: bearer name (format = media:interface)
72 *
73 * Note: TIPC initializes "name" and "lock" fields; media code is responsible
74 * for initialization all other fields when a bearer is enabled.
75 */
76struct tipc_bearer {
77 void *usr_handle;
78 u32 mtu;
79 int blocked;
80 spinlock_t lock;
81 struct tipc_media_addr addr;
82 char name[TIPC_MAX_BEARER_NAME];
83};
46 84
47/** 85/**
48 * struct media - TIPC media information available to internal users 86 * struct media - TIPC media information available to internal users
@@ -55,7 +93,7 @@
55 * @priority: default link (and bearer) priority 93 * @priority: default link (and bearer) priority
56 * @tolerance: default time (in ms) before declaring link failure 94 * @tolerance: default time (in ms) before declaring link failure
57 * @window: default window (in packets) before declaring link congestion 95 * @window: default window (in packets) before declaring link congestion
58 * @type_id: TIPC media identifier [defined in tipc_bearer.h] 96 * @type_id: TIPC media identifier
59 * @name: media name 97 * @name: media name
60 */ 98 */
61 99
@@ -116,6 +154,34 @@ struct link;
116 154
117extern struct bearer tipc_bearers[]; 155extern struct bearer tipc_bearers[];
118 156
157/*
158 * TIPC routines available to supported media types
159 */
160int tipc_register_media(u32 media_type,
161 char *media_name, int (*enable)(struct tipc_bearer *),
162 void (*disable)(struct tipc_bearer *),
163 int (*send_msg)(struct sk_buff *,
164 struct tipc_bearer *, struct tipc_media_addr *),
165 char *(*addr2str)(struct tipc_media_addr *a,
166 char *str_buf, int str_size),
167 struct tipc_media_addr *bcast_addr, const u32 bearer_priority,
168 const u32 link_tolerance, /* [ms] */
169 const u32 send_window_limit);
170
171void tipc_recv_msg(struct sk_buff *buf, struct tipc_bearer *tb_ptr);
172
173int tipc_block_bearer(const char *name);
174void tipc_continue(struct tipc_bearer *tb_ptr);
175
176int tipc_enable_bearer(const char *bearer_name, u32 bcast_scope, u32 priority);
177int tipc_disable_bearer(const char *name);
178
179/*
180 * Routines made available to TIPC by supported media types
181 */
182int tipc_eth_media_start(void);
183void tipc_eth_media_stop(void);
184
119void tipc_media_addr_printf(struct print_buf *pb, struct tipc_media_addr *a); 185void tipc_media_addr_printf(struct print_buf *pb, struct tipc_media_addr *a);
120struct sk_buff *tipc_media_get_names(void); 186struct sk_buff *tipc_media_get_names(void);
121 187
@@ -126,7 +192,6 @@ void tipc_bearer_schedule(struct bearer *b_ptr, struct link *l_ptr);
126struct bearer *tipc_bearer_find_interface(const char *if_name); 192struct bearer *tipc_bearer_find_interface(const char *if_name);
127int tipc_bearer_resolve_congestion(struct bearer *b_ptr, struct link *l_ptr); 193int tipc_bearer_resolve_congestion(struct bearer *b_ptr, struct link *l_ptr);
128int tipc_bearer_congested(struct bearer *b_ptr, struct link *l_ptr); 194int tipc_bearer_congested(struct bearer *b_ptr, struct link *l_ptr);
129int tipc_bearer_init(void);
130void tipc_bearer_stop(void); 195void tipc_bearer_stop(void);
131void tipc_bearer_lock_push(struct bearer *b_ptr); 196void tipc_bearer_lock_push(struct bearer *b_ptr);
132 197
diff --git a/net/tipc/cluster.c b/net/tipc/cluster.c
index 7fea14b98b9..405be87157b 100644
--- a/net/tipc/cluster.c
+++ b/net/tipc/cluster.c
@@ -36,17 +36,10 @@
36 36
37#include "core.h" 37#include "core.h"
38#include "cluster.h" 38#include "cluster.h"
39#include "addr.h"
40#include "node_subscr.h"
41#include "link.h" 39#include "link.h"
42#include "node.h"
43#include "net.h"
44#include "msg.h"
45#include "bearer.h"
46 40
47static void tipc_cltr_multicast(struct cluster *c_ptr, struct sk_buff *buf, 41static void tipc_cltr_multicast(struct cluster *c_ptr, struct sk_buff *buf,
48 u32 lower, u32 upper); 42 u32 lower, u32 upper);
49static struct sk_buff *tipc_cltr_prepare_routing_msg(u32 data_size, u32 dest);
50 43
51struct tipc_node **tipc_local_nodes = NULL; 44struct tipc_node **tipc_local_nodes = NULL;
52struct tipc_node_map tipc_cltr_bcast_nodes = {0,{0,}}; 45struct tipc_node_map tipc_cltr_bcast_nodes = {0,{0,}};
diff --git a/net/tipc/config.c b/net/tipc/config.c
index 50a6133a366..bdde39f0436 100644
--- a/net/tipc/config.c
+++ b/net/tipc/config.c
@@ -35,23 +35,11 @@
35 */ 35 */
36 36
37#include "core.h" 37#include "core.h"
38#include "dbg.h"
39#include "bearer.h"
40#include "port.h" 38#include "port.h"
41#include "link.h" 39#include "link.h"
42#include "zone.h"
43#include "addr.h"
44#include "name_table.h" 40#include "name_table.h"
45#include "node.h" 41#include "user_reg.h"
46#include "config.h" 42#include "config.h"
47#include "discover.h"
48
49struct subscr_data {
50 char usr_handle[8];
51 u32 domain;
52 u32 port_ref;
53 struct list_head subd_list;
54};
55 43
56struct manager { 44struct manager {
57 u32 user_ref; 45 u32 user_ref;
@@ -572,7 +560,7 @@ int tipc_cfg_init(void)
572 struct tipc_name_seq seq; 560 struct tipc_name_seq seq;
573 int res; 561 int res;
574 562
575 res = tipc_attach(&mng.user_ref, NULL, NULL); 563 res = tipc_attach(&mng.user_ref);
576 if (res) 564 if (res)
577 goto failed; 565 goto failed;
578 566
diff --git a/net/tipc/config.h b/net/tipc/config.h
index 481e12ece71..443159a166f 100644
--- a/net/tipc/config.h
+++ b/net/tipc/config.h
@@ -39,7 +39,6 @@
39 39
40/* ---------------------------------------------------------------------- */ 40/* ---------------------------------------------------------------------- */
41 41
42#include "core.h"
43#include "link.h" 42#include "link.h"
44 43
45struct sk_buff *tipc_cfg_reply_alloc(int payload_size); 44struct sk_buff *tipc_cfg_reply_alloc(int payload_size);
diff --git a/net/tipc/core.c b/net/tipc/core.c
index e2a09eb8efd..f5d62c174de 100644
--- a/net/tipc/core.c
+++ b/net/tipc/core.c
@@ -40,7 +40,6 @@
40#include <linux/random.h> 40#include <linux/random.h>
41 41
42#include "core.h" 42#include "core.h"
43#include "dbg.h"
44#include "ref.h" 43#include "ref.h"
45#include "net.h" 44#include "net.h"
46#include "user_reg.h" 45#include "user_reg.h"
@@ -236,43 +235,3 @@ module_exit(tipc_exit);
236MODULE_DESCRIPTION("TIPC: Transparent Inter Process Communication"); 235MODULE_DESCRIPTION("TIPC: Transparent Inter Process Communication");
237MODULE_LICENSE("Dual BSD/GPL"); 236MODULE_LICENSE("Dual BSD/GPL");
238MODULE_VERSION(TIPC_MOD_VER); 237MODULE_VERSION(TIPC_MOD_VER);
239
240/* Native TIPC API for kernel-space applications (see tipc.h) */
241
242EXPORT_SYMBOL(tipc_attach);
243EXPORT_SYMBOL(tipc_detach);
244EXPORT_SYMBOL(tipc_createport);
245EXPORT_SYMBOL(tipc_deleteport);
246EXPORT_SYMBOL(tipc_ownidentity);
247EXPORT_SYMBOL(tipc_portimportance);
248EXPORT_SYMBOL(tipc_set_portimportance);
249EXPORT_SYMBOL(tipc_portunreliable);
250EXPORT_SYMBOL(tipc_set_portunreliable);
251EXPORT_SYMBOL(tipc_portunreturnable);
252EXPORT_SYMBOL(tipc_set_portunreturnable);
253EXPORT_SYMBOL(tipc_publish);
254EXPORT_SYMBOL(tipc_withdraw);
255EXPORT_SYMBOL(tipc_connect2port);
256EXPORT_SYMBOL(tipc_disconnect);
257EXPORT_SYMBOL(tipc_shutdown);
258EXPORT_SYMBOL(tipc_send);
259EXPORT_SYMBOL(tipc_send2name);
260EXPORT_SYMBOL(tipc_send2port);
261EXPORT_SYMBOL(tipc_multicast);
262
263/* TIPC API for external bearers (see tipc_bearer.h) */
264
265EXPORT_SYMBOL(tipc_block_bearer);
266EXPORT_SYMBOL(tipc_continue);
267EXPORT_SYMBOL(tipc_disable_bearer);
268EXPORT_SYMBOL(tipc_enable_bearer);
269EXPORT_SYMBOL(tipc_recv_msg);
270EXPORT_SYMBOL(tipc_register_media);
271
272/* TIPC API for external APIs (see tipc_port.h) */
273
274EXPORT_SYMBOL(tipc_createport_raw);
275EXPORT_SYMBOL(tipc_reject_msg);
276EXPORT_SYMBOL(tipc_send_buf_fast);
277EXPORT_SYMBOL(tipc_acknowledge);
278
diff --git a/net/tipc/core.h b/net/tipc/core.h
index e19389e5722..ca7e171c104 100644
--- a/net/tipc/core.h
+++ b/net/tipc/core.h
@@ -39,10 +39,6 @@
39 39
40#include <linux/tipc.h> 40#include <linux/tipc.h>
41#include <linux/tipc_config.h> 41#include <linux/tipc_config.h>
42#include <net/tipc/tipc_msg.h>
43#include <net/tipc/tipc_port.h>
44#include <net/tipc/tipc_bearer.h>
45#include <net/tipc/tipc.h>
46#include <linux/types.h> 42#include <linux/types.h>
47#include <linux/kernel.h> 43#include <linux/kernel.h>
48#include <linux/errno.h> 44#include <linux/errno.h>
@@ -62,6 +58,9 @@
62 58
63#define TIPC_MOD_VER "2.0.0" 59#define TIPC_MOD_VER "2.0.0"
64 60
61struct tipc_msg; /* msg.h */
62struct print_buf; /* dbg.h */
63
65/* 64/*
66 * TIPC sanity test macros 65 * TIPC sanity test macros
67 */ 66 */
@@ -174,6 +173,13 @@ void tipc_dump_dbg(struct print_buf *, const char *fmt, ...);
174#define ELINKCONG EAGAIN /* link congestion <=> resource unavailable */ 173#define ELINKCONG EAGAIN /* link congestion <=> resource unavailable */
175 174
176/* 175/*
176 * TIPC operating mode routines
177 */
178#define TIPC_NOT_RUNNING 0
179#define TIPC_NODE_MODE 1
180#define TIPC_NET_MODE 2
181
182/*
177 * Global configuration variables 183 * Global configuration variables
178 */ 184 */
179 185
diff --git a/net/tipc/discover.c b/net/tipc/discover.c
index 4a7cd3719b7..f2ce36baf42 100644
--- a/net/tipc/discover.c
+++ b/net/tipc/discover.c
@@ -35,9 +35,7 @@
35 */ 35 */
36 36
37#include "core.h" 37#include "core.h"
38#include "dbg.h"
39#include "link.h" 38#include "link.h"
40#include "zone.h"
41#include "discover.h" 39#include "discover.h"
42#include "port.h" 40#include "port.h"
43#include "name_table.h" 41#include "name_table.h"
diff --git a/net/tipc/discover.h b/net/tipc/discover.h
index f8e75063612..d2c3cffb79f 100644
--- a/net/tipc/discover.h
+++ b/net/tipc/discover.h
@@ -37,8 +37,6 @@
37#ifndef _TIPC_DISCOVER_H 37#ifndef _TIPC_DISCOVER_H
38#define _TIPC_DISCOVER_H 38#define _TIPC_DISCOVER_H
39 39
40#include "core.h"
41
42struct link_req; 40struct link_req;
43 41
44struct link_req *tipc_disc_init_link_req(struct bearer *b_ptr, 42struct link_req *tipc_disc_init_link_req(struct bearer *b_ptr,
diff --git a/net/tipc/eth_media.c b/net/tipc/eth_media.c
index 6e988ba485f..ee683cc8f4b 100644
--- a/net/tipc/eth_media.c
+++ b/net/tipc/eth_media.c
@@ -34,13 +34,13 @@
34 * POSSIBILITY OF SUCH DAMAGE. 34 * POSSIBILITY OF SUCH DAMAGE.
35 */ 35 */
36 36
37#include <net/tipc/tipc.h>
38#include <net/tipc/tipc_bearer.h>
39#include <net/tipc/tipc_msg.h>
40#include <linux/netdevice.h> 37#include <linux/netdevice.h>
41#include <linux/slab.h> 38#include <linux/slab.h>
42#include <net/net_namespace.h> 39#include <net/net_namespace.h>
43 40
41#include "core.h"
42#include "bearer.h"
43
44#define MAX_ETH_BEARERS 2 44#define MAX_ETH_BEARERS 2
45#define ETH_LINK_PRIORITY TIPC_DEF_LINK_PRI 45#define ETH_LINK_PRIORITY TIPC_DEF_LINK_PRI
46#define ETH_LINK_TOLERANCE TIPC_DEF_LINK_TOL 46#define ETH_LINK_TOLERANCE TIPC_DEF_LINK_TOL
diff --git a/net/tipc/link.c b/net/tipc/link.c
index b31992ccd5d..cf414cf05e7 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -35,19 +35,11 @@
35 */ 35 */
36 36
37#include "core.h" 37#include "core.h"
38#include "dbg.h"
39#include "link.h" 38#include "link.h"
40#include "net.h"
41#include "node.h"
42#include "port.h" 39#include "port.h"
43#include "addr.h"
44#include "node_subscr.h"
45#include "name_distr.h" 40#include "name_distr.h"
46#include "bearer.h"
47#include "name_table.h"
48#include "discover.h" 41#include "discover.h"
49#include "config.h" 42#include "config.h"
50#include "bcast.h"
51 43
52 44
53/* 45/*
@@ -57,12 +49,6 @@
57#define INVALID_SESSION 0x10000 49#define INVALID_SESSION 0x10000
58 50
59/* 51/*
60 * Limit for deferred reception queue:
61 */
62
63#define DEF_QUEUE_LIMIT 256u
64
65/*
66 * Link state events: 52 * Link state events:
67 */ 53 */
68 54
diff --git a/net/tipc/link.h b/net/tipc/link.h
index f98bc613de6..c562888d25d 100644
--- a/net/tipc/link.h
+++ b/net/tipc/link.h
@@ -39,7 +39,6 @@
39 39
40#include "dbg.h" 40#include "dbg.h"
41#include "msg.h" 41#include "msg.h"
42#include "bearer.h"
43#include "node.h" 42#include "node.h"
44 43
45#define PUSH_FAILED 1 44#define PUSH_FAILED 1
diff --git a/net/tipc/msg.c b/net/tipc/msg.c
index ecb532fb035..ee6b4c68d4a 100644
--- a/net/tipc/msg.c
+++ b/net/tipc/msg.c
@@ -36,9 +36,7 @@
36 36
37#include "core.h" 37#include "core.h"
38#include "addr.h" 38#include "addr.h"
39#include "dbg.h"
40#include "msg.h" 39#include "msg.h"
41#include "bearer.h"
42 40
43u32 tipc_msg_tot_importance(struct tipc_msg *m) 41u32 tipc_msg_tot_importance(struct tipc_msg *m)
44{ 42{
diff --git a/net/tipc/msg.h b/net/tipc/msg.h
index 031aad18efc..aee53864d7a 100644
--- a/net/tipc/msg.h
+++ b/net/tipc/msg.h
@@ -37,10 +37,51 @@
37#ifndef _TIPC_MSG_H 37#ifndef _TIPC_MSG_H
38#define _TIPC_MSG_H 38#define _TIPC_MSG_H
39 39
40#include "core.h" 40#include "bearer.h"
41 41
42#define TIPC_VERSION 2 42#define TIPC_VERSION 2
43 43
44/*
45 * TIPC user data message header format, version 2:
46 *
47 *
48 * 1 0 9 8 7 6 5 4|3 2 1 0 9 8 7 6|5 4 3 2 1 0 9 8|7 6 5 4 3 2 1 0
49 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
50 * w0:|vers | user |hdr sz |n|d|s|-| message size |
51 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
52 * w1:|mstyp| error |rer cnt|lsc|opt p| broadcast ack no |
53 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
54 * w2:| link level ack no | broadcast/link level seq no |
55 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
56 * w3:| previous node |
57 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
58 * w4:| originating port |
59 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
60 * w5:| destination port |
61 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
62 * w6:| originating node |
63 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
64 * w7:| destination node |
65 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
66 * w8:| name type / transport sequence number |
67 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
68 * w9:| name instance/multicast lower bound |
69 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
70 * wA:| multicast upper bound |
71 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
72 * / /
73 * \ options \
74 * / /
75 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
76 *
77 */
78
79#define TIPC_CONN_MSG 0
80#define TIPC_MCAST_MSG 1
81#define TIPC_NAMED_MSG 2
82#define TIPC_DIRECT_MSG 3
83
84
44#define SHORT_H_SIZE 24 /* Connected, in-cluster messages */ 85#define SHORT_H_SIZE 24 /* Connected, in-cluster messages */
45#define DIR_MSG_H_SIZE 32 /* Directly addressed messages */ 86#define DIR_MSG_H_SIZE 32 /* Directly addressed messages */
46#define LONG_H_SIZE 40 /* Named messages */ 87#define LONG_H_SIZE 40 /* Named messages */
@@ -52,20 +93,26 @@
52#define MAX_MSG_SIZE (MAX_H_SIZE + TIPC_MAX_USER_MSG_SIZE) 93#define MAX_MSG_SIZE (MAX_H_SIZE + TIPC_MAX_USER_MSG_SIZE)
53 94
54 95
55/* 96struct tipc_msg {
56 TIPC user data message header format, version 2 97 __be32 hdr[15];
98};
57 99
58 - Fundamental definitions available to privileged TIPC users
59 are located in tipc_msg.h.
60 - Remaining definitions available to TIPC internal users appear below.
61*/
62 100
101static inline u32 msg_word(struct tipc_msg *m, u32 pos)
102{
103 return ntohl(m->hdr[pos]);
104}
63 105
64static inline void msg_set_word(struct tipc_msg *m, u32 w, u32 val) 106static inline void msg_set_word(struct tipc_msg *m, u32 w, u32 val)
65{ 107{
66 m->hdr[w] = htonl(val); 108 m->hdr[w] = htonl(val);
67} 109}
68 110
111static inline u32 msg_bits(struct tipc_msg *m, u32 w, u32 pos, u32 mask)
112{
113 return (msg_word(m, w) >> pos) & mask;
114}
115
69static inline void msg_set_bits(struct tipc_msg *m, u32 w, 116static inline void msg_set_bits(struct tipc_msg *m, u32 w,
70 u32 pos, u32 mask, u32 val) 117 u32 pos, u32 mask, u32 val)
71{ 118{
@@ -112,16 +159,36 @@ static inline void msg_set_user(struct tipc_msg *m, u32 n)
112 msg_set_bits(m, 0, 25, 0xf, n); 159 msg_set_bits(m, 0, 25, 0xf, n);
113} 160}
114 161
162static inline u32 msg_importance(struct tipc_msg *m)
163{
164 return msg_bits(m, 0, 25, 0xf);
165}
166
115static inline void msg_set_importance(struct tipc_msg *m, u32 i) 167static inline void msg_set_importance(struct tipc_msg *m, u32 i)
116{ 168{
117 msg_set_user(m, i); 169 msg_set_user(m, i);
118} 170}
119 171
172static inline u32 msg_hdr_sz(struct tipc_msg *m)
173{
174 return msg_bits(m, 0, 21, 0xf) << 2;
175}
176
120static inline void msg_set_hdr_sz(struct tipc_msg *m,u32 n) 177static inline void msg_set_hdr_sz(struct tipc_msg *m,u32 n)
121{ 178{
122 msg_set_bits(m, 0, 21, 0xf, n>>2); 179 msg_set_bits(m, 0, 21, 0xf, n>>2);
123} 180}
124 181
182static inline u32 msg_size(struct tipc_msg *m)
183{
184 return msg_bits(m, 0, 0, 0x1ffff);
185}
186
187static inline u32 msg_data_sz(struct tipc_msg *m)
188{
189 return msg_size(m) - msg_hdr_sz(m);
190}
191
125static inline int msg_non_seq(struct tipc_msg *m) 192static inline int msg_non_seq(struct tipc_msg *m)
126{ 193{
127 return msg_bits(m, 0, 20, 1); 194 return msg_bits(m, 0, 20, 1);
@@ -162,11 +229,36 @@ static inline void msg_set_size(struct tipc_msg *m, u32 sz)
162 * Word 1 229 * Word 1
163 */ 230 */
164 231
232static inline u32 msg_type(struct tipc_msg *m)
233{
234 return msg_bits(m, 1, 29, 0x7);
235}
236
165static inline void msg_set_type(struct tipc_msg *m, u32 n) 237static inline void msg_set_type(struct tipc_msg *m, u32 n)
166{ 238{
167 msg_set_bits(m, 1, 29, 0x7, n); 239 msg_set_bits(m, 1, 29, 0x7, n);
168} 240}
169 241
242static inline u32 msg_named(struct tipc_msg *m)
243{
244 return msg_type(m) == TIPC_NAMED_MSG;
245}
246
247static inline u32 msg_mcast(struct tipc_msg *m)
248{
249 return msg_type(m) == TIPC_MCAST_MSG;
250}
251
252static inline u32 msg_connected(struct tipc_msg *m)
253{
254 return msg_type(m) == TIPC_CONN_MSG;
255}
256
257static inline u32 msg_errcode(struct tipc_msg *m)
258{
259 return msg_bits(m, 1, 25, 0xf);
260}
261
170static inline void msg_set_errcode(struct tipc_msg *m, u32 err) 262static inline void msg_set_errcode(struct tipc_msg *m, u32 err)
171{ 263{
172 msg_set_bits(m, 1, 25, 0xf, err); 264 msg_set_bits(m, 1, 25, 0xf, err);
@@ -257,31 +349,68 @@ static inline void msg_set_destnode_cache(struct tipc_msg *m, u32 dnode)
257 */ 349 */
258 350
259 351
352static inline u32 msg_prevnode(struct tipc_msg *m)
353{
354 return msg_word(m, 3);
355}
356
260static inline void msg_set_prevnode(struct tipc_msg *m, u32 a) 357static inline void msg_set_prevnode(struct tipc_msg *m, u32 a)
261{ 358{
262 msg_set_word(m, 3, a); 359 msg_set_word(m, 3, a);
263} 360}
264 361
362static inline u32 msg_origport(struct tipc_msg *m)
363{
364 return msg_word(m, 4);
365}
366
265static inline void msg_set_origport(struct tipc_msg *m, u32 p) 367static inline void msg_set_origport(struct tipc_msg *m, u32 p)
266{ 368{
267 msg_set_word(m, 4, p); 369 msg_set_word(m, 4, p);
268} 370}
269 371
372static inline u32 msg_destport(struct tipc_msg *m)
373{
374 return msg_word(m, 5);
375}
376
270static inline void msg_set_destport(struct tipc_msg *m, u32 p) 377static inline void msg_set_destport(struct tipc_msg *m, u32 p)
271{ 378{
272 msg_set_word(m, 5, p); 379 msg_set_word(m, 5, p);
273} 380}
274 381
382static inline u32 msg_mc_netid(struct tipc_msg *m)
383{
384 return msg_word(m, 5);
385}
386
275static inline void msg_set_mc_netid(struct tipc_msg *m, u32 p) 387static inline void msg_set_mc_netid(struct tipc_msg *m, u32 p)
276{ 388{
277 msg_set_word(m, 5, p); 389 msg_set_word(m, 5, p);
278} 390}
279 391
392static inline int msg_short(struct tipc_msg *m)
393{
394 return msg_hdr_sz(m) == 24;
395}
396
397static inline u32 msg_orignode(struct tipc_msg *m)
398{
399 if (likely(msg_short(m)))
400 return msg_prevnode(m);
401 return msg_word(m, 6);
402}
403
280static inline void msg_set_orignode(struct tipc_msg *m, u32 a) 404static inline void msg_set_orignode(struct tipc_msg *m, u32 a)
281{ 405{
282 msg_set_word(m, 6, a); 406 msg_set_word(m, 6, a);
283} 407}
284 408
409static inline u32 msg_destnode(struct tipc_msg *m)
410{
411 return msg_word(m, 7);
412}
413
285static inline void msg_set_destnode(struct tipc_msg *m, u32 a) 414static inline void msg_set_destnode(struct tipc_msg *m, u32 a)
286{ 415{
287 msg_set_word(m, 7, a); 416 msg_set_word(m, 7, a);
@@ -299,6 +428,11 @@ static inline u32 msg_routed(struct tipc_msg *m)
299 return(msg_destnode(m) ^ msg_orignode(m)) >> 11; 428 return(msg_destnode(m) ^ msg_orignode(m)) >> 11;
300} 429}
301 430
431static inline u32 msg_nametype(struct tipc_msg *m)
432{
433 return msg_word(m, 8);
434}
435
302static inline void msg_set_nametype(struct tipc_msg *m, u32 n) 436static inline void msg_set_nametype(struct tipc_msg *m, u32 n)
303{ 437{
304 msg_set_word(m, 8, n); 438 msg_set_word(m, 8, n);
@@ -324,6 +458,16 @@ static inline void msg_set_transp_seqno(struct tipc_msg *m, u32 n)
324 msg_set_word(m, 8, n); 458 msg_set_word(m, 8, n);
325} 459}
326 460
461static inline u32 msg_nameinst(struct tipc_msg *m)
462{
463 return msg_word(m, 9);
464}
465
466static inline u32 msg_namelower(struct tipc_msg *m)
467{
468 return msg_nameinst(m);
469}
470
327static inline void msg_set_namelower(struct tipc_msg *m, u32 n) 471static inline void msg_set_namelower(struct tipc_msg *m, u32 n)
328{ 472{
329 msg_set_word(m, 9, n); 473 msg_set_word(m, 9, n);
@@ -334,11 +478,21 @@ static inline void msg_set_nameinst(struct tipc_msg *m, u32 n)
334 msg_set_namelower(m, n); 478 msg_set_namelower(m, n);
335} 479}
336 480
481static inline u32 msg_nameupper(struct tipc_msg *m)
482{
483 return msg_word(m, 10);
484}
485
337static inline void msg_set_nameupper(struct tipc_msg *m, u32 n) 486static inline void msg_set_nameupper(struct tipc_msg *m, u32 n)
338{ 487{
339 msg_set_word(m, 10, n); 488 msg_set_word(m, 10, n);
340} 489}
341 490
491static inline unchar *msg_data(struct tipc_msg *m)
492{
493 return ((unchar *)m) + msg_hdr_sz(m);
494}
495
342static inline struct tipc_msg *msg_get_wrapped(struct tipc_msg *m) 496static inline struct tipc_msg *msg_get_wrapped(struct tipc_msg *m)
343{ 497{
344 return (struct tipc_msg *)msg_data(m); 498 return (struct tipc_msg *)msg_data(m);
diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c
index 7b907171f87..10ff48be3c0 100644
--- a/net/tipc/name_distr.c
+++ b/net/tipc/name_distr.c
@@ -36,9 +36,7 @@
36 36
37#include "core.h" 37#include "core.h"
38#include "cluster.h" 38#include "cluster.h"
39#include "dbg.h"
40#include "link.h" 39#include "link.h"
41#include "msg.h"
42#include "name_distr.h" 40#include "name_distr.h"
43 41
44#define ITEM_SIZE sizeof(struct distr_item) 42#define ITEM_SIZE sizeof(struct distr_item)
diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
index 3a8de4334da..d5adb045674 100644
--- a/net/tipc/name_table.c
+++ b/net/tipc/name_table.c
@@ -36,15 +36,10 @@
36 36
37#include "core.h" 37#include "core.h"
38#include "config.h" 38#include "config.h"
39#include "dbg.h"
40#include "name_table.h" 39#include "name_table.h"
41#include "name_distr.h" 40#include "name_distr.h"
42#include "addr.h"
43#include "node_subscr.h"
44#include "subscr.h" 41#include "subscr.h"
45#include "port.h" 42#include "port.h"
46#include "cluster.h"
47#include "bcast.h"
48 43
49static int tipc_nametbl_size = 1024; /* must be a power of 2 */ 44static int tipc_nametbl_size = 1024; /* must be a power of 2 */
50 45
diff --git a/net/tipc/net.c b/net/tipc/net.c
index 1a621cfd660..c2b4b86c2e6 100644
--- a/net/tipc/net.c
+++ b/net/tipc/net.c
@@ -35,18 +35,13 @@
35 */ 35 */
36 36
37#include "core.h" 37#include "core.h"
38#include "bearer.h"
39#include "net.h" 38#include "net.h"
40#include "zone.h" 39#include "zone.h"
41#include "addr.h"
42#include "name_table.h" 40#include "name_table.h"
43#include "name_distr.h" 41#include "name_distr.h"
44#include "subscr.h" 42#include "subscr.h"
45#include "link.h" 43#include "link.h"
46#include "msg.h"
47#include "port.h" 44#include "port.h"
48#include "bcast.h"
49#include "discover.h"
50#include "config.h" 45#include "config.h"
51 46
52/* 47/*
diff --git a/net/tipc/node.c b/net/tipc/node.c
index b4d87eb2dc5..df71dfc3a9a 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -37,16 +37,9 @@
37#include "core.h" 37#include "core.h"
38#include "config.h" 38#include "config.h"
39#include "node.h" 39#include "node.h"
40#include "cluster.h"
41#include "net.h"
42#include "addr.h"
43#include "node_subscr.h"
44#include "link.h"
45#include "port.h" 40#include "port.h"
46#include "bearer.h"
47#include "name_distr.h" 41#include "name_distr.h"
48 42
49void node_print(struct print_buf *buf, struct tipc_node *n_ptr, char *str);
50static void node_lost_contact(struct tipc_node *n_ptr); 43static void node_lost_contact(struct tipc_node *n_ptr);
51static void node_established_contact(struct tipc_node *n_ptr); 44static void node_established_contact(struct tipc_node *n_ptr);
52 45
diff --git a/net/tipc/node_subscr.c b/net/tipc/node_subscr.c
index 19194d476a9..018a55332d9 100644
--- a/net/tipc/node_subscr.c
+++ b/net/tipc/node_subscr.c
@@ -35,10 +35,8 @@
35 */ 35 */
36 36
37#include "core.h" 37#include "core.h"
38#include "dbg.h"
39#include "node_subscr.h" 38#include "node_subscr.h"
40#include "node.h" 39#include "node.h"
41#include "addr.h"
42 40
43/** 41/**
44 * tipc_nodesub_subscribe - create "node down" subscription for specified node 42 * tipc_nodesub_subscribe - create "node down" subscription for specified node
diff --git a/net/tipc/port.c b/net/tipc/port.c
index 82092eaa153..7873283f496 100644
--- a/net/tipc/port.c
+++ b/net/tipc/port.c
@@ -36,15 +36,9 @@
36 36
37#include "core.h" 37#include "core.h"
38#include "config.h" 38#include "config.h"
39#include "dbg.h"
40#include "port.h" 39#include "port.h"
41#include "addr.h"
42#include "link.h"
43#include "node.h"
44#include "name_table.h" 40#include "name_table.h"
45#include "user_reg.h" 41#include "user_reg.h"
46#include "msg.h"
47#include "bcast.h"
48 42
49/* Connection management: */ 43/* Connection management: */
50#define PROBING_INTERVAL 3600000 /* [ms] => 1 h */ 44#define PROBING_INTERVAL 3600000 /* [ms] => 1 h */
@@ -94,7 +88,7 @@ static void port_incr_out_seqno(struct port *p_ptr)
94 * tipc_multicast - send a multicast message to local and remote destinations 88 * tipc_multicast - send a multicast message to local and remote destinations
95 */ 89 */
96 90
97int tipc_multicast(u32 ref, struct tipc_name_seq const *seq, u32 domain, 91int tipc_multicast(u32 ref, struct tipc_name_seq const *seq,
98 u32 num_sect, struct iovec const *msg_sect) 92 u32 num_sect, struct iovec const *msg_sect)
99{ 93{
100 struct tipc_msg *hdr; 94 struct tipc_msg *hdr;
@@ -989,13 +983,6 @@ int tipc_createport(u32 user_ref,
989 return 0; 983 return 0;
990} 984}
991 985
992int tipc_ownidentity(u32 ref, struct tipc_portid *id)
993{
994 id->ref = ref;
995 id->node = tipc_own_addr;
996 return 0;
997}
998
999int tipc_portimportance(u32 ref, unsigned int *importance) 986int tipc_portimportance(u32 ref, unsigned int *importance)
1000{ 987{
1001 struct port *p_ptr; 988 struct port *p_ptr;
@@ -1271,16 +1258,11 @@ int tipc_send(u32 ref, unsigned int num_sect, struct iovec const *msg_sect)
1271} 1258}
1272 1259
1273/** 1260/**
1274 * tipc_forward2name - forward message sections to port name 1261 * tipc_send2name - send message sections to port name
1275 */ 1262 */
1276 1263
1277static int tipc_forward2name(u32 ref, 1264int tipc_send2name(u32 ref, struct tipc_name const *name, unsigned int domain,
1278 struct tipc_name const *name, 1265 unsigned int num_sect, struct iovec const *msg_sect)
1279 u32 domain,
1280 u32 num_sect,
1281 struct iovec const *msg_sect,
1282 struct tipc_portid const *orig,
1283 unsigned int importance)
1284{ 1266{
1285 struct port *p_ptr; 1267 struct port *p_ptr;
1286 struct tipc_msg *msg; 1268 struct tipc_msg *msg;
@@ -1294,14 +1276,12 @@ static int tipc_forward2name(u32 ref,
1294 1276
1295 msg = &p_ptr->publ.phdr; 1277 msg = &p_ptr->publ.phdr;
1296 msg_set_type(msg, TIPC_NAMED_MSG); 1278 msg_set_type(msg, TIPC_NAMED_MSG);
1297 msg_set_orignode(msg, orig->node); 1279 msg_set_orignode(msg, tipc_own_addr);
1298 msg_set_origport(msg, orig->ref); 1280 msg_set_origport(msg, ref);
1299 msg_set_hdr_sz(msg, LONG_H_SIZE); 1281 msg_set_hdr_sz(msg, LONG_H_SIZE);
1300 msg_set_nametype(msg, name->type); 1282 msg_set_nametype(msg, name->type);
1301 msg_set_nameinst(msg, name->instance); 1283 msg_set_nameinst(msg, name->instance);
1302 msg_set_lookup_scope(msg, tipc_addr_scope(domain)); 1284 msg_set_lookup_scope(msg, tipc_addr_scope(domain));
1303 if (importance <= TIPC_CRITICAL_IMPORTANCE)
1304 msg_set_importance(msg,importance);
1305 destport = tipc_nametbl_translate(name->type, name->instance, &destnode); 1285 destport = tipc_nametbl_translate(name->type, name->instance, &destnode);
1306 msg_set_destnode(msg, destnode); 1286 msg_set_destnode(msg, destnode);
1307 msg_set_destport(msg, destport); 1287 msg_set_destport(msg, destport);
@@ -1325,33 +1305,11 @@ static int tipc_forward2name(u32 ref,
1325} 1305}
1326 1306
1327/** 1307/**
1328 * tipc_send2name - send message sections to port name 1308 * tipc_send2port - send message sections to port identity
1329 */
1330
1331int tipc_send2name(u32 ref,
1332 struct tipc_name const *name,
1333 unsigned int domain,
1334 unsigned int num_sect,
1335 struct iovec const *msg_sect)
1336{
1337 struct tipc_portid orig;
1338
1339 orig.ref = ref;
1340 orig.node = tipc_own_addr;
1341 return tipc_forward2name(ref, name, domain, num_sect, msg_sect, &orig,
1342 TIPC_PORT_IMPORTANCE);
1343}
1344
1345/**
1346 * tipc_forward2port - forward message sections to port identity
1347 */ 1309 */
1348 1310
1349static int tipc_forward2port(u32 ref, 1311int tipc_send2port(u32 ref, struct tipc_portid const *dest,
1350 struct tipc_portid const *dest, 1312 unsigned int num_sect, struct iovec const *msg_sect)
1351 unsigned int num_sect,
1352 struct iovec const *msg_sect,
1353 struct tipc_portid const *orig,
1354 unsigned int importance)
1355{ 1313{
1356 struct port *p_ptr; 1314 struct port *p_ptr;
1357 struct tipc_msg *msg; 1315 struct tipc_msg *msg;
@@ -1363,13 +1321,11 @@ static int tipc_forward2port(u32 ref,
1363 1321
1364 msg = &p_ptr->publ.phdr; 1322 msg = &p_ptr->publ.phdr;
1365 msg_set_type(msg, TIPC_DIRECT_MSG); 1323 msg_set_type(msg, TIPC_DIRECT_MSG);
1366 msg_set_orignode(msg, orig->node); 1324 msg_set_orignode(msg, tipc_own_addr);
1367 msg_set_origport(msg, orig->ref); 1325 msg_set_origport(msg, ref);
1368 msg_set_destnode(msg, dest->node); 1326 msg_set_destnode(msg, dest->node);
1369 msg_set_destport(msg, dest->ref); 1327 msg_set_destport(msg, dest->ref);
1370 msg_set_hdr_sz(msg, DIR_MSG_H_SIZE); 1328 msg_set_hdr_sz(msg, DIR_MSG_H_SIZE);
1371 if (importance <= TIPC_CRITICAL_IMPORTANCE)
1372 msg_set_importance(msg, importance);
1373 p_ptr->sent++; 1329 p_ptr->sent++;
1374 if (dest->node == tipc_own_addr) 1330 if (dest->node == tipc_own_addr)
1375 return tipc_port_recv_sections(p_ptr, num_sect, msg_sect); 1331 return tipc_port_recv_sections(p_ptr, num_sect, msg_sect);
@@ -1384,31 +1340,11 @@ static int tipc_forward2port(u32 ref,
1384} 1340}
1385 1341
1386/** 1342/**
1387 * tipc_send2port - send message sections to port identity 1343 * tipc_send_buf2port - send message buffer to port identity
1388 */ 1344 */
1389 1345
1390int tipc_send2port(u32 ref, 1346int tipc_send_buf2port(u32 ref, struct tipc_portid const *dest,
1391 struct tipc_portid const *dest, 1347 struct sk_buff *buf, unsigned int dsz)
1392 unsigned int num_sect,
1393 struct iovec const *msg_sect)
1394{
1395 struct tipc_portid orig;
1396
1397 orig.ref = ref;
1398 orig.node = tipc_own_addr;
1399 return tipc_forward2port(ref, dest, num_sect, msg_sect, &orig,
1400 TIPC_PORT_IMPORTANCE);
1401}
1402
1403/**
1404 * tipc_forward_buf2port - forward message buffer to port identity
1405 */
1406static int tipc_forward_buf2port(u32 ref,
1407 struct tipc_portid const *dest,
1408 struct sk_buff *buf,
1409 unsigned int dsz,
1410 struct tipc_portid const *orig,
1411 unsigned int importance)
1412{ 1348{
1413 struct port *p_ptr; 1349 struct port *p_ptr;
1414 struct tipc_msg *msg; 1350 struct tipc_msg *msg;
@@ -1420,13 +1356,11 @@ static int tipc_forward_buf2port(u32 ref,
1420 1356
1421 msg = &p_ptr->publ.phdr; 1357 msg = &p_ptr->publ.phdr;
1422 msg_set_type(msg, TIPC_DIRECT_MSG); 1358 msg_set_type(msg, TIPC_DIRECT_MSG);
1423 msg_set_orignode(msg, orig->node); 1359 msg_set_orignode(msg, tipc_own_addr);
1424 msg_set_origport(msg, orig->ref); 1360 msg_set_origport(msg, ref);
1425 msg_set_destnode(msg, dest->node); 1361 msg_set_destnode(msg, dest->node);
1426 msg_set_destport(msg, dest->ref); 1362 msg_set_destport(msg, dest->ref);
1427 msg_set_hdr_sz(msg, DIR_MSG_H_SIZE); 1363 msg_set_hdr_sz(msg, DIR_MSG_H_SIZE);
1428 if (importance <= TIPC_CRITICAL_IMPORTANCE)
1429 msg_set_importance(msg, importance);
1430 msg_set_size(msg, DIR_MSG_H_SIZE + dsz); 1364 msg_set_size(msg, DIR_MSG_H_SIZE + dsz);
1431 if (skb_cow(buf, DIR_MSG_H_SIZE)) 1365 if (skb_cow(buf, DIR_MSG_H_SIZE))
1432 return -ENOMEM; 1366 return -ENOMEM;
@@ -1445,20 +1379,3 @@ static int tipc_forward_buf2port(u32 ref,
1445 return -ELINKCONG; 1379 return -ELINKCONG;
1446} 1380}
1447 1381
1448/**
1449 * tipc_send_buf2port - send message buffer to port identity
1450 */
1451
1452int tipc_send_buf2port(u32 ref,
1453 struct tipc_portid const *dest,
1454 struct sk_buff *buf,
1455 unsigned int dsz)
1456{
1457 struct tipc_portid orig;
1458
1459 orig.ref = ref;
1460 orig.node = tipc_own_addr;
1461 return tipc_forward_buf2port(ref, dest, buf, dsz, &orig,
1462 TIPC_PORT_IMPORTANCE);
1463}
1464
diff --git a/net/tipc/port.h b/net/tipc/port.h
index 73bbf442b34..3a807fcec2b 100644
--- a/net/tipc/port.h
+++ b/net/tipc/port.h
@@ -37,13 +37,44 @@
37#ifndef _TIPC_PORT_H 37#ifndef _TIPC_PORT_H
38#define _TIPC_PORT_H 38#define _TIPC_PORT_H
39 39
40#include "core.h"
41#include "ref.h" 40#include "ref.h"
42#include "net.h" 41#include "net.h"
43#include "msg.h" 42#include "msg.h"
44#include "dbg.h"
45#include "node_subscr.h" 43#include "node_subscr.h"
46 44
45#define TIPC_FLOW_CONTROL_WIN 512
46
47typedef void (*tipc_msg_err_event) (void *usr_handle, u32 portref,
48 struct sk_buff **buf, unsigned char const *data,
49 unsigned int size, int reason,
50 struct tipc_portid const *attmpt_destid);
51
52typedef void (*tipc_named_msg_err_event) (void *usr_handle, u32 portref,
53 struct sk_buff **buf, unsigned char const *data,
54 unsigned int size, int reason,
55 struct tipc_name_seq const *attmpt_dest);
56
57typedef void (*tipc_conn_shutdown_event) (void *usr_handle, u32 portref,
58 struct sk_buff **buf, unsigned char const *data,
59 unsigned int size, int reason);
60
61typedef void (*tipc_msg_event) (void *usr_handle, u32 portref,
62 struct sk_buff **buf, unsigned char const *data,
63 unsigned int size, unsigned int importance,
64 struct tipc_portid const *origin);
65
66typedef void (*tipc_named_msg_event) (void *usr_handle, u32 portref,
67 struct sk_buff **buf, unsigned char const *data,
68 unsigned int size, unsigned int importance,
69 struct tipc_portid const *orig,
70 struct tipc_name_seq const *dest);
71
72typedef void (*tipc_conn_msg_event) (void *usr_handle, u32 portref,
73 struct sk_buff **buf, unsigned char const *data,
74 unsigned int size);
75
76typedef void (*tipc_continue_event) (void *usr_handle, u32 portref);
77
47/** 78/**
48 * struct user_port - TIPC user port (used with native API) 79 * struct user_port - TIPC user port (used with native API)
49 * @user_ref: id of user who created user port 80 * @user_ref: id of user who created user port
@@ -68,6 +99,34 @@ struct user_port {
68}; 99};
69 100
70/** 101/**
102 * struct tipc_port - TIPC port info available to socket API
103 * @usr_handle: pointer to additional user-defined information about port
104 * @lock: pointer to spinlock for controlling access to port
105 * @connected: non-zero if port is currently connected to a peer port
106 * @conn_type: TIPC type used when connection was established
107 * @conn_instance: TIPC instance used when connection was established
108 * @conn_unacked: number of unacknowledged messages received from peer port
109 * @published: non-zero if port has one or more associated names
110 * @congested: non-zero if cannot send because of link or port congestion
111 * @max_pkt: maximum packet size "hint" used when building messages sent by port
112 * @ref: unique reference to port in TIPC object registry
113 * @phdr: preformatted message header used when sending messages
114 */
115struct tipc_port {
116 void *usr_handle;
117 spinlock_t *lock;
118 int connected;
119 u32 conn_type;
120 u32 conn_instance;
121 u32 conn_unacked;
122 int published;
123 u32 congested;
124 u32 max_pkt;
125 u32 ref;
126 struct tipc_msg phdr;
127};
128
129/**
71 * struct port - TIPC port structure 130 * struct port - TIPC port structure
72 * @publ: TIPC port info available to privileged users 131 * @publ: TIPC port info available to privileged users
73 * @port_list: adjacent ports in TIPC's global list of ports 132 * @port_list: adjacent ports in TIPC's global list of ports
@@ -109,11 +168,76 @@ struct port {
109extern spinlock_t tipc_port_list_lock; 168extern spinlock_t tipc_port_list_lock;
110struct port_list; 169struct port_list;
111 170
171/*
172 * TIPC port manipulation routines
173 */
174struct tipc_port *tipc_createport_raw(void *usr_handle,
175 u32 (*dispatcher)(struct tipc_port *, struct sk_buff *),
176 void (*wakeup)(struct tipc_port *), const u32 importance);
177
178int tipc_reject_msg(struct sk_buff *buf, u32 err);
179
180int tipc_send_buf_fast(struct sk_buff *buf, u32 destnode);
181
182void tipc_acknowledge(u32 port_ref, u32 ack);
183
184int tipc_createport(unsigned int tipc_user, void *usr_handle,
185 unsigned int importance, tipc_msg_err_event error_cb,
186 tipc_named_msg_err_event named_error_cb,
187 tipc_conn_shutdown_event conn_error_cb, tipc_msg_event msg_cb,
188 tipc_named_msg_event named_msg_cb,
189 tipc_conn_msg_event conn_msg_cb,
190 tipc_continue_event continue_event_cb, u32 *portref);
191
192int tipc_deleteport(u32 portref);
193
194int tipc_portimportance(u32 portref, unsigned int *importance);
195int tipc_set_portimportance(u32 portref, unsigned int importance);
196
197int tipc_portunreliable(u32 portref, unsigned int *isunreliable);
198int tipc_set_portunreliable(u32 portref, unsigned int isunreliable);
199
200int tipc_portunreturnable(u32 portref, unsigned int *isunreturnable);
201int tipc_set_portunreturnable(u32 portref, unsigned int isunreturnable);
202
203int tipc_publish(u32 portref, unsigned int scope,
204 struct tipc_name_seq const *name_seq);
205int tipc_withdraw(u32 portref, unsigned int scope,
206 struct tipc_name_seq const *name_seq);
207
208int tipc_connect2port(u32 portref, struct tipc_portid const *port);
209
210int tipc_disconnect(u32 portref);
211
212int tipc_shutdown(u32 ref);
213
214
215/*
216 * The following routines require that the port be locked on entry
217 */
218int tipc_disconnect_port(struct tipc_port *tp_ptr);
219
220/*
221 * TIPC messaging routines
222 */
223int tipc_send(u32 portref, unsigned int num_sect, struct iovec const *msg_sect);
224
225int tipc_send2name(u32 portref, struct tipc_name const *name, u32 domain,
226 unsigned int num_sect, struct iovec const *msg_sect);
227
228int tipc_send2port(u32 portref, struct tipc_portid const *dest,
229 unsigned int num_sect, struct iovec const *msg_sect);
230
231int tipc_send_buf2port(u32 portref, struct tipc_portid const *dest,
232 struct sk_buff *buf, unsigned int dsz);
233
234int tipc_multicast(u32 portref, struct tipc_name_seq const *seq,
235 unsigned int section_count, struct iovec const *msg);
236
112int tipc_port_reject_sections(struct port *p_ptr, struct tipc_msg *hdr, 237int tipc_port_reject_sections(struct port *p_ptr, struct tipc_msg *hdr,
113 struct iovec const *msg_sect, u32 num_sect, 238 struct iovec const *msg_sect, u32 num_sect,
114 int err); 239 int err);
115struct sk_buff *tipc_port_get_ports(void); 240struct sk_buff *tipc_port_get_ports(void);
116struct sk_buff *port_show_stats(const void *req_tlv_area, int req_tlv_space);
117void tipc_port_recv_proto_msg(struct sk_buff *buf); 241void tipc_port_recv_proto_msg(struct sk_buff *buf);
118void tipc_port_recv_mcast(struct sk_buff *buf, struct port_list *dp); 242void tipc_port_recv_mcast(struct sk_buff *buf, struct port_list *dp);
119void tipc_port_reinit(void); 243void tipc_port_reinit(void);
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index e9f0d500448..cd0bb77f267 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -49,10 +49,9 @@
49 49
50#include <linux/tipc.h> 50#include <linux/tipc.h>
51#include <linux/tipc_config.h> 51#include <linux/tipc_config.h>
52#include <net/tipc/tipc_msg.h>
53#include <net/tipc/tipc_port.h>
54 52
55#include "core.h" 53#include "core.h"
54#include "port.h"
56 55
57#define SS_LISTENING -1 /* socket is listening */ 56#define SS_LISTENING -1 /* socket is listening */
58#define SS_READY -2 /* socket is connectionless */ 57#define SS_READY -2 /* socket is connectionless */
@@ -404,7 +403,8 @@ static int get_name(struct socket *sock, struct sockaddr *uaddr,
404 addr->addr.id.ref = tsock->peer_name.ref; 403 addr->addr.id.ref = tsock->peer_name.ref;
405 addr->addr.id.node = tsock->peer_name.node; 404 addr->addr.id.node = tsock->peer_name.node;
406 } else { 405 } else {
407 tipc_ownidentity(tsock->p->ref, &addr->addr.id); 406 addr->addr.id.ref = tsock->p->ref;
407 addr->addr.id.node = tipc_own_addr;
408 } 408 }
409 409
410 *uaddr_len = sizeof(*addr); 410 *uaddr_len = sizeof(*addr);
@@ -597,7 +597,6 @@ static int send_msg(struct kiocb *iocb, struct socket *sock,
597 break; 597 break;
598 res = tipc_multicast(tport->ref, 598 res = tipc_multicast(tport->ref,
599 &dest->addr.nameseq, 599 &dest->addr.nameseq,
600 0,
601 m->msg_iovlen, 600 m->msg_iovlen,
602 m->msg_iov); 601 m->msg_iov);
603 } 602 }
diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c
index 33313961d01..23f43d03980 100644
--- a/net/tipc/subscr.c
+++ b/net/tipc/subscr.c
@@ -35,10 +35,8 @@
35 */ 35 */
36 36
37#include "core.h" 37#include "core.h"
38#include "dbg.h"
39#include "name_table.h" 38#include "name_table.h"
40#include "port.h" 39#include "user_reg.h"
41#include "ref.h"
42#include "subscr.h" 40#include "subscr.h"
43 41
44/** 42/**
@@ -544,14 +542,14 @@ static void subscr_named_msg_event(void *usr_handle,
544int tipc_subscr_start(void) 542int tipc_subscr_start(void)
545{ 543{
546 struct tipc_name_seq seq = {TIPC_TOP_SRV, TIPC_TOP_SRV, TIPC_TOP_SRV}; 544 struct tipc_name_seq seq = {TIPC_TOP_SRV, TIPC_TOP_SRV, TIPC_TOP_SRV};
547 int res = -1; 545 int res;
548 546
549 memset(&topsrv, 0, sizeof (topsrv)); 547 memset(&topsrv, 0, sizeof (topsrv));
550 spin_lock_init(&topsrv.lock); 548 spin_lock_init(&topsrv.lock);
551 INIT_LIST_HEAD(&topsrv.subscriber_list); 549 INIT_LIST_HEAD(&topsrv.subscriber_list);
552 550
553 spin_lock_bh(&topsrv.lock); 551 spin_lock_bh(&topsrv.lock);
554 res = tipc_attach(&topsrv.user_ref, NULL, NULL); 552 res = tipc_attach(&topsrv.user_ref);
555 if (res) { 553 if (res) {
556 spin_unlock_bh(&topsrv.lock); 554 spin_unlock_bh(&topsrv.lock);
557 return res; 555 return res;
diff --git a/net/tipc/user_reg.c b/net/tipc/user_reg.c
index 50692880316..2e2702e2049 100644
--- a/net/tipc/user_reg.c
+++ b/net/tipc/user_reg.c
@@ -50,15 +50,11 @@
50/** 50/**
51 * struct tipc_user - registered TIPC user info 51 * struct tipc_user - registered TIPC user info
52 * @next: index of next free registry entry (or -1 for an allocated entry) 52 * @next: index of next free registry entry (or -1 for an allocated entry)
53 * @callback: ptr to routine to call when TIPC mode changes (NULL if none)
54 * @usr_handle: user-defined value passed to callback routine
55 * @ports: list of user ports owned by the user 53 * @ports: list of user ports owned by the user
56 */ 54 */
57 55
58struct tipc_user { 56struct tipc_user {
59 int next; 57 int next;
60 tipc_mode_event callback;
61 void *usr_handle;
62 struct list_head ports; 58 struct list_head ports;
63}; 59};
64 60
@@ -95,41 +91,12 @@ static int reg_init(void)
95} 91}
96 92
97/** 93/**
98 * reg_callback - inform TIPC user about current operating mode
99 */
100
101static void reg_callback(struct tipc_user *user_ptr)
102{
103 tipc_mode_event cb;
104 void *arg;
105
106 spin_lock_bh(&reg_lock);
107 cb = user_ptr->callback;
108 arg = user_ptr->usr_handle;
109 spin_unlock_bh(&reg_lock);
110
111 if (cb)
112 cb(arg, tipc_mode, tipc_own_addr);
113}
114
115/**
116 * tipc_reg_start - activate TIPC user registry 94 * tipc_reg_start - activate TIPC user registry
117 */ 95 */
118 96
119int tipc_reg_start(void) 97int tipc_reg_start(void)
120{ 98{
121 u32 u; 99 return reg_init();
122 int res;
123
124 if ((res = reg_init()))
125 return res;
126
127 for (u = 1; u <= MAX_USERID; u++) {
128 if (users[u].callback)
129 tipc_k_signal((Handler)reg_callback,
130 (unsigned long)&users[u]);
131 }
132 return 0;
133} 100}
134 101
135/** 102/**
@@ -138,15 +105,9 @@ int tipc_reg_start(void)
138 105
139void tipc_reg_stop(void) 106void tipc_reg_stop(void)
140{ 107{
141 int id;
142
143 if (!users) 108 if (!users)
144 return; 109 return;
145 110
146 for (id = 1; id <= MAX_USERID; id++) {
147 if (users[id].callback)
148 reg_callback(&users[id]);
149 }
150 kfree(users); 111 kfree(users);
151 users = NULL; 112 users = NULL;
152} 113}
@@ -157,12 +118,10 @@ void tipc_reg_stop(void)
157 * NOTE: This routine may be called when TIPC is inactive. 118 * NOTE: This routine may be called when TIPC is inactive.
158 */ 119 */
159 120
160int tipc_attach(u32 *userid, tipc_mode_event cb, void *usr_handle) 121int tipc_attach(u32 *userid)
161{ 122{
162 struct tipc_user *user_ptr; 123 struct tipc_user *user_ptr;
163 124
164 if ((tipc_mode == TIPC_NOT_RUNNING) && !cb)
165 return -ENOPROTOOPT;
166 if (!users) 125 if (!users)
167 reg_init(); 126 reg_init();
168 127
@@ -177,13 +136,9 @@ int tipc_attach(u32 *userid, tipc_mode_event cb, void *usr_handle)
177 user_ptr->next = -1; 136 user_ptr->next = -1;
178 spin_unlock_bh(&reg_lock); 137 spin_unlock_bh(&reg_lock);
179 138
180 user_ptr->callback = cb;
181 user_ptr->usr_handle = usr_handle;
182 INIT_LIST_HEAD(&user_ptr->ports); 139 INIT_LIST_HEAD(&user_ptr->ports);
183 atomic_inc(&tipc_user_count); 140 atomic_inc(&tipc_user_count);
184 141
185 if (cb && (tipc_mode != TIPC_NOT_RUNNING))
186 tipc_k_signal((Handler)reg_callback, (unsigned long)user_ptr);
187 return 0; 142 return 0;
188} 143}
189 144
@@ -207,7 +162,6 @@ void tipc_detach(u32 userid)
207 } 162 }
208 163
209 user_ptr = &users[userid]; 164 user_ptr = &users[userid];
210 user_ptr->callback = NULL;
211 INIT_LIST_HEAD(&ports_temp); 165 INIT_LIST_HEAD(&ports_temp);
212 list_splice(&user_ptr->ports, &ports_temp); 166 list_splice(&user_ptr->ports, &ports_temp);
213 user_ptr->next = next_free_user; 167 user_ptr->next = next_free_user;
diff --git a/net/tipc/user_reg.h b/net/tipc/user_reg.h
index 81dc12e2882..109eed0d6de 100644
--- a/net/tipc/user_reg.h
+++ b/net/tipc/user_reg.h
@@ -42,6 +42,9 @@
42int tipc_reg_start(void); 42int tipc_reg_start(void);
43void tipc_reg_stop(void); 43void tipc_reg_stop(void);
44 44
45int tipc_attach(unsigned int *userref);
46void tipc_detach(unsigned int userref);
47
45int tipc_reg_add_port(struct user_port *up_ptr); 48int tipc_reg_add_port(struct user_port *up_ptr);
46int tipc_reg_remove_port(struct user_port *up_ptr); 49int tipc_reg_remove_port(struct user_port *up_ptr);
47 50
diff --git a/net/tipc/zone.c b/net/tipc/zone.c
index 83f8b5e91fc..1b61ca8c48e 100644
--- a/net/tipc/zone.c
+++ b/net/tipc/zone.c
@@ -36,9 +36,6 @@
36 36
37#include "core.h" 37#include "core.h"
38#include "zone.h" 38#include "zone.h"
39#include "net.h"
40#include "addr.h"
41#include "node_subscr.h"
42#include "cluster.h" 39#include "cluster.h"
43#include "node.h" 40#include "node.h"
44 41
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 2268e679812..417d7a6c36c 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -316,7 +316,8 @@ static void unix_write_space(struct sock *sk)
316 if (unix_writable(sk)) { 316 if (unix_writable(sk)) {
317 wq = rcu_dereference(sk->sk_wq); 317 wq = rcu_dereference(sk->sk_wq);
318 if (wq_has_sleeper(wq)) 318 if (wq_has_sleeper(wq))
319 wake_up_interruptible_sync(&wq->wait); 319 wake_up_interruptible_sync_poll(&wq->wait,
320 POLLOUT | POLLWRNORM | POLLWRBAND);
320 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); 321 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
321 } 322 }
322 rcu_read_unlock(); 323 rcu_read_unlock();
@@ -1736,7 +1737,8 @@ static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock,
1736 goto out_unlock; 1737 goto out_unlock;
1737 } 1738 }
1738 1739
1739 wake_up_interruptible_sync(&u->peer_wait); 1740 wake_up_interruptible_sync_poll(&u->peer_wait,
1741 POLLOUT | POLLWRNORM | POLLWRBAND);
1740 1742
1741 if (msg->msg_name) 1743 if (msg->msg_name)
1742 unix_copy_addr(msg, skb->sk); 1744 unix_copy_addr(msg, skb->sk);
@@ -2099,13 +2101,12 @@ static unsigned int unix_dgram_poll(struct file *file, struct socket *sock,
2099 if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) 2101 if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
2100 mask |= POLLERR; 2102 mask |= POLLERR;
2101 if (sk->sk_shutdown & RCV_SHUTDOWN) 2103 if (sk->sk_shutdown & RCV_SHUTDOWN)
2102 mask |= POLLRDHUP; 2104 mask |= POLLRDHUP | POLLIN | POLLRDNORM;
2103 if (sk->sk_shutdown == SHUTDOWN_MASK) 2105 if (sk->sk_shutdown == SHUTDOWN_MASK)
2104 mask |= POLLHUP; 2106 mask |= POLLHUP;
2105 2107
2106 /* readable? */ 2108 /* readable? */
2107 if (!skb_queue_empty(&sk->sk_receive_queue) || 2109 if (!skb_queue_empty(&sk->sk_receive_queue))
2108 (sk->sk_shutdown & RCV_SHUTDOWN))
2109 mask |= POLLIN | POLLRDNORM; 2110 mask |= POLLIN | POLLRDNORM;
2110 2111
2111 /* Connection-based need to check for termination and startup */ 2112 /* Connection-based need to check for termination and startup */
@@ -2117,20 +2118,19 @@ static unsigned int unix_dgram_poll(struct file *file, struct socket *sock,
2117 return mask; 2118 return mask;
2118 } 2119 }
2119 2120
2120 /* writable? */ 2121 /* No write status requested, avoid expensive OUT tests. */
2121 writable = unix_writable(sk); 2122 if (wait && !(wait->key & (POLLWRBAND | POLLWRNORM | POLLOUT)))
2122 if (writable) { 2123 return mask;
2123 other = unix_peer_get(sk);
2124 if (other) {
2125 if (unix_peer(other) != sk) {
2126 sock_poll_wait(file, &unix_sk(other)->peer_wait,
2127 wait);
2128 if (unix_recvq_full(other))
2129 writable = 0;
2130 }
2131 2124
2132 sock_put(other); 2125 writable = unix_writable(sk);
2126 other = unix_peer_get(sk);
2127 if (other) {
2128 if (unix_peer(other) != sk) {
2129 sock_poll_wait(file, &unix_sk(other)->peer_wait, wait);
2130 if (unix_recvq_full(other))
2131 writable = 0;
2133 } 2132 }
2133 sock_put(other);
2134 } 2134 }
2135 2135
2136 if (writable) 2136 if (writable)
diff --git a/net/wanrouter/Makefile b/net/wanrouter/Makefile
index 9f188ab3dcd..4da14bc4807 100644
--- a/net/wanrouter/Makefile
+++ b/net/wanrouter/Makefile
@@ -4,4 +4,4 @@
4 4
5obj-$(CONFIG_WAN_ROUTER) += wanrouter.o 5obj-$(CONFIG_WAN_ROUTER) += wanrouter.o
6 6
7wanrouter-objs := wanproc.o wanmain.o 7wanrouter-y := wanproc.o wanmain.o
diff --git a/net/wireless/core.c b/net/wireless/core.c
index 9c21ebf9780..630bcf0a2f0 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -4,6 +4,8 @@
4 * Copyright 2006-2010 Johannes Berg <johannes@sipsolutions.net> 4 * Copyright 2006-2010 Johannes Berg <johannes@sipsolutions.net>
5 */ 5 */
6 6
7#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
8
7#include <linux/if.h> 9#include <linux/if.h>
8#include <linux/module.h> 10#include <linux/module.h>
9#include <linux/err.h> 11#include <linux/err.h>
@@ -216,8 +218,7 @@ int cfg80211_dev_rename(struct cfg80211_registered_device *rdev,
216 rdev->wiphy.debugfsdir, 218 rdev->wiphy.debugfsdir,
217 rdev->wiphy.debugfsdir->d_parent, 219 rdev->wiphy.debugfsdir->d_parent,
218 newname)) 220 newname))
219 printk(KERN_ERR "cfg80211: failed to rename debugfs dir to %s!\n", 221 pr_err("failed to rename debugfs dir to %s!\n", newname);
220 newname);
221 222
222 nl80211_notify_dev_rename(rdev); 223 nl80211_notify_dev_rename(rdev);
223 224
@@ -699,8 +700,7 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb,
699 700
700 if (sysfs_create_link(&dev->dev.kobj, &rdev->wiphy.dev.kobj, 701 if (sysfs_create_link(&dev->dev.kobj, &rdev->wiphy.dev.kobj,
701 "phy80211")) { 702 "phy80211")) {
702 printk(KERN_ERR "wireless: failed to add phy80211 " 703 pr_err("failed to add phy80211 symlink to netdev!\n");
703 "symlink to netdev!\n");
704 } 704 }
705 wdev->netdev = dev; 705 wdev->netdev = dev;
706 wdev->sme_state = CFG80211_SME_IDLE; 706 wdev->sme_state = CFG80211_SME_IDLE;
diff --git a/net/wireless/lib80211.c b/net/wireless/lib80211.c
index 97d411f7450..3268fac5ab2 100644
--- a/net/wireless/lib80211.c
+++ b/net/wireless/lib80211.c
@@ -13,6 +13,8 @@
13 * 13 *
14 */ 14 */
15 15
16#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
17
16#include <linux/module.h> 18#include <linux/module.h>
17#include <linux/ctype.h> 19#include <linux/ctype.h>
18#include <linux/ieee80211.h> 20#include <linux/ieee80211.h>
@@ -224,8 +226,8 @@ int lib80211_unregister_crypto_ops(struct lib80211_crypto_ops *ops)
224 return -EINVAL; 226 return -EINVAL;
225 227
226 found: 228 found:
227 printk(KERN_DEBUG "lib80211_crypt: unregistered algorithm " 229 printk(KERN_DEBUG "lib80211_crypt: unregistered algorithm '%s'\n",
228 "'%s'\n", ops->name); 230 ops->name);
229 list_del(&alg->list); 231 list_del(&alg->list);
230 spin_unlock_irqrestore(&lib80211_crypto_lock, flags); 232 spin_unlock_irqrestore(&lib80211_crypto_lock, flags);
231 kfree(alg); 233 kfree(alg);
@@ -270,7 +272,7 @@ static struct lib80211_crypto_ops lib80211_crypt_null = {
270 272
271static int __init lib80211_init(void) 273static int __init lib80211_init(void)
272{ 274{
273 printk(KERN_INFO DRV_NAME ": " DRV_DESCRIPTION "\n"); 275 pr_info(DRV_DESCRIPTION "\n");
274 return lib80211_register_crypto_ops(&lib80211_crypt_null); 276 return lib80211_register_crypto_ops(&lib80211_crypt_null);
275} 277}
276 278
diff --git a/net/wireless/lib80211_crypt_tkip.c b/net/wireless/lib80211_crypt_tkip.c
index 0fe40510e2c..7ea4f2b0770 100644
--- a/net/wireless/lib80211_crypt_tkip.c
+++ b/net/wireless/lib80211_crypt_tkip.c
@@ -10,6 +10,8 @@
10 * more details. 10 * more details.
11 */ 11 */
12 12
13#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
14
13#include <linux/err.h> 15#include <linux/err.h>
14#include <linux/module.h> 16#include <linux/module.h>
15#include <linux/init.h> 17#include <linux/init.h>
@@ -99,8 +101,7 @@ static void *lib80211_tkip_init(int key_idx)
99 priv->tx_tfm_arc4 = crypto_alloc_blkcipher("ecb(arc4)", 0, 101 priv->tx_tfm_arc4 = crypto_alloc_blkcipher("ecb(arc4)", 0,
100 CRYPTO_ALG_ASYNC); 102 CRYPTO_ALG_ASYNC);
101 if (IS_ERR(priv->tx_tfm_arc4)) { 103 if (IS_ERR(priv->tx_tfm_arc4)) {
102 printk(KERN_DEBUG "lib80211_crypt_tkip: could not allocate " 104 printk(KERN_DEBUG pr_fmt("could not allocate crypto API arc4\n"));
103 "crypto API arc4\n");
104 priv->tx_tfm_arc4 = NULL; 105 priv->tx_tfm_arc4 = NULL;
105 goto fail; 106 goto fail;
106 } 107 }
@@ -108,8 +109,7 @@ static void *lib80211_tkip_init(int key_idx)
108 priv->tx_tfm_michael = crypto_alloc_hash("michael_mic", 0, 109 priv->tx_tfm_michael = crypto_alloc_hash("michael_mic", 0,
109 CRYPTO_ALG_ASYNC); 110 CRYPTO_ALG_ASYNC);
110 if (IS_ERR(priv->tx_tfm_michael)) { 111 if (IS_ERR(priv->tx_tfm_michael)) {
111 printk(KERN_DEBUG "lib80211_crypt_tkip: could not allocate " 112 printk(KERN_DEBUG pr_fmt("could not allocate crypto API michael_mic\n"));
112 "crypto API michael_mic\n");
113 priv->tx_tfm_michael = NULL; 113 priv->tx_tfm_michael = NULL;
114 goto fail; 114 goto fail;
115 } 115 }
@@ -117,8 +117,7 @@ static void *lib80211_tkip_init(int key_idx)
117 priv->rx_tfm_arc4 = crypto_alloc_blkcipher("ecb(arc4)", 0, 117 priv->rx_tfm_arc4 = crypto_alloc_blkcipher("ecb(arc4)", 0,
118 CRYPTO_ALG_ASYNC); 118 CRYPTO_ALG_ASYNC);
119 if (IS_ERR(priv->rx_tfm_arc4)) { 119 if (IS_ERR(priv->rx_tfm_arc4)) {
120 printk(KERN_DEBUG "lib80211_crypt_tkip: could not allocate " 120 printk(KERN_DEBUG pr_fmt("could not allocate crypto API arc4\n"));
121 "crypto API arc4\n");
122 priv->rx_tfm_arc4 = NULL; 121 priv->rx_tfm_arc4 = NULL;
123 goto fail; 122 goto fail;
124 } 123 }
@@ -126,8 +125,7 @@ static void *lib80211_tkip_init(int key_idx)
126 priv->rx_tfm_michael = crypto_alloc_hash("michael_mic", 0, 125 priv->rx_tfm_michael = crypto_alloc_hash("michael_mic", 0,
127 CRYPTO_ALG_ASYNC); 126 CRYPTO_ALG_ASYNC);
128 if (IS_ERR(priv->rx_tfm_michael)) { 127 if (IS_ERR(priv->rx_tfm_michael)) {
129 printk(KERN_DEBUG "lib80211_crypt_tkip: could not allocate " 128 printk(KERN_DEBUG pr_fmt("could not allocate crypto API michael_mic\n"));
130 "crypto API michael_mic\n");
131 priv->rx_tfm_michael = NULL; 129 priv->rx_tfm_michael = NULL;
132 goto fail; 130 goto fail;
133 } 131 }
@@ -536,7 +534,7 @@ static int michael_mic(struct crypto_hash *tfm_michael, u8 * key, u8 * hdr,
536 struct scatterlist sg[2]; 534 struct scatterlist sg[2];
537 535
538 if (tfm_michael == NULL) { 536 if (tfm_michael == NULL) {
539 printk(KERN_WARNING "michael_mic: tfm_michael == NULL\n"); 537 pr_warn("%s(): tfm_michael == NULL\n", __func__);
540 return -1; 538 return -1;
541 } 539 }
542 sg_init_table(sg, 2); 540 sg_init_table(sg, 2);
diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c
index 26838d903b9..6980a0c315b 100644
--- a/net/wireless/mlme.c
+++ b/net/wireless/mlme.c
@@ -1028,3 +1028,15 @@ void cfg80211_cqm_rssi_notify(struct net_device *dev,
1028 nl80211_send_cqm_rssi_notify(rdev, dev, rssi_event, gfp); 1028 nl80211_send_cqm_rssi_notify(rdev, dev, rssi_event, gfp);
1029} 1029}
1030EXPORT_SYMBOL(cfg80211_cqm_rssi_notify); 1030EXPORT_SYMBOL(cfg80211_cqm_rssi_notify);
1031
1032void cfg80211_cqm_pktloss_notify(struct net_device *dev,
1033 const u8 *peer, u32 num_packets, gfp_t gfp)
1034{
1035 struct wireless_dev *wdev = dev->ieee80211_ptr;
1036 struct wiphy *wiphy = wdev->wiphy;
1037 struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
1038
1039 /* Indicate roaming trigger event to user space */
1040 nl80211_send_cqm_pktloss_notify(rdev, dev, peer, num_packets, gfp);
1041}
1042EXPORT_SYMBOL(cfg80211_cqm_pktloss_notify);
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 4e78e3f2679..67ff7e92cb9 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -166,7 +166,13 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = {
166 166
167 [NL80211_ATTR_WIPHY_TX_POWER_SETTING] = { .type = NLA_U32 }, 167 [NL80211_ATTR_WIPHY_TX_POWER_SETTING] = { .type = NLA_U32 },
168 [NL80211_ATTR_WIPHY_TX_POWER_LEVEL] = { .type = NLA_U32 }, 168 [NL80211_ATTR_WIPHY_TX_POWER_LEVEL] = { .type = NLA_U32 },
169
169 [NL80211_ATTR_FRAME_TYPE] = { .type = NLA_U16 }, 170 [NL80211_ATTR_FRAME_TYPE] = { .type = NLA_U16 },
171
172 [NL80211_ATTR_WIPHY_ANTENNA_TX] = { .type = NLA_U32 },
173 [NL80211_ATTR_WIPHY_ANTENNA_RX] = { .type = NLA_U32 },
174
175 [NL80211_ATTR_MCAST_RATE] = { .type = NLA_U32 },
170}; 176};
171 177
172/* policy for the key attributes */ 178/* policy for the key attributes */
@@ -526,7 +532,6 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags,
526 dev->wiphy.rts_threshold); 532 dev->wiphy.rts_threshold);
527 NLA_PUT_U8(msg, NL80211_ATTR_WIPHY_COVERAGE_CLASS, 533 NLA_PUT_U8(msg, NL80211_ATTR_WIPHY_COVERAGE_CLASS,
528 dev->wiphy.coverage_class); 534 dev->wiphy.coverage_class);
529
530 NLA_PUT_U8(msg, NL80211_ATTR_MAX_NUM_SCAN_SSIDS, 535 NLA_PUT_U8(msg, NL80211_ATTR_MAX_NUM_SCAN_SSIDS,
531 dev->wiphy.max_scan_ssids); 536 dev->wiphy.max_scan_ssids);
532 NLA_PUT_U16(msg, NL80211_ATTR_MAX_SCAN_IE_LEN, 537 NLA_PUT_U16(msg, NL80211_ATTR_MAX_SCAN_IE_LEN,
@@ -545,6 +550,16 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags,
545 if (dev->wiphy.flags & WIPHY_FLAG_CONTROL_PORT_PROTOCOL) 550 if (dev->wiphy.flags & WIPHY_FLAG_CONTROL_PORT_PROTOCOL)
546 NLA_PUT_FLAG(msg, NL80211_ATTR_CONTROL_PORT_ETHERTYPE); 551 NLA_PUT_FLAG(msg, NL80211_ATTR_CONTROL_PORT_ETHERTYPE);
547 552
553 if (dev->ops->get_antenna) {
554 u32 tx_ant = 0, rx_ant = 0;
555 int res;
556 res = dev->ops->get_antenna(&dev->wiphy, &tx_ant, &rx_ant);
557 if (!res) {
558 NLA_PUT_U32(msg, NL80211_ATTR_WIPHY_ANTENNA_TX, tx_ant);
559 NLA_PUT_U32(msg, NL80211_ATTR_WIPHY_ANTENNA_RX, rx_ant);
560 }
561 }
562
548 nl_modes = nla_nest_start(msg, NL80211_ATTR_SUPPORTED_IFTYPES); 563 nl_modes = nla_nest_start(msg, NL80211_ATTR_SUPPORTED_IFTYPES);
549 if (!nl_modes) 564 if (!nl_modes)
550 goto nla_put_failure; 565 goto nla_put_failure;
@@ -1024,6 +1039,22 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info)
1024 goto bad_res; 1039 goto bad_res;
1025 } 1040 }
1026 1041
1042 if (info->attrs[NL80211_ATTR_WIPHY_ANTENNA_TX] &&
1043 info->attrs[NL80211_ATTR_WIPHY_ANTENNA_RX]) {
1044 u32 tx_ant, rx_ant;
1045 if (!rdev->ops->set_antenna) {
1046 result = -EOPNOTSUPP;
1047 goto bad_res;
1048 }
1049
1050 tx_ant = nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_ANTENNA_TX]);
1051 rx_ant = nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_ANTENNA_RX]);
1052
1053 result = rdev->ops->set_antenna(&rdev->wiphy, tx_ant, rx_ant);
1054 if (result)
1055 goto bad_res;
1056 }
1057
1027 changed = 0; 1058 changed = 0;
1028 1059
1029 if (info->attrs[NL80211_ATTR_WIPHY_RETRY_SHORT]) { 1060 if (info->attrs[NL80211_ATTR_WIPHY_RETRY_SHORT]) {
@@ -3569,6 +3600,34 @@ static int nl80211_disassociate(struct sk_buff *skb, struct genl_info *info)
3569 local_state_change); 3600 local_state_change);
3570} 3601}
3571 3602
3603static bool
3604nl80211_parse_mcast_rate(struct cfg80211_registered_device *rdev,
3605 int mcast_rate[IEEE80211_NUM_BANDS],
3606 int rateval)
3607{
3608 struct wiphy *wiphy = &rdev->wiphy;
3609 bool found = false;
3610 int band, i;
3611
3612 for (band = 0; band < IEEE80211_NUM_BANDS; band++) {
3613 struct ieee80211_supported_band *sband;
3614
3615 sband = wiphy->bands[band];
3616 if (!sband)
3617 continue;
3618
3619 for (i = 0; i < sband->n_bitrates; i++) {
3620 if (sband->bitrates[i].bitrate == rateval) {
3621 mcast_rate[band] = i + 1;
3622 found = true;
3623 break;
3624 }
3625 }
3626 }
3627
3628 return found;
3629}
3630
3572static int nl80211_join_ibss(struct sk_buff *skb, struct genl_info *info) 3631static int nl80211_join_ibss(struct sk_buff *skb, struct genl_info *info)
3573{ 3632{
3574 struct cfg80211_registered_device *rdev = info->user_ptr[0]; 3633 struct cfg80211_registered_device *rdev = info->user_ptr[0];
@@ -3653,6 +3712,11 @@ static int nl80211_join_ibss(struct sk_buff *skb, struct genl_info *info)
3653 } 3712 }
3654 } 3713 }
3655 3714
3715 if (info->attrs[NL80211_ATTR_MCAST_RATE] &&
3716 !nl80211_parse_mcast_rate(rdev, ibss.mcast_rate,
3717 nla_get_u32(info->attrs[NL80211_ATTR_MCAST_RATE])))
3718 return -EINVAL;
3719
3656 if (ibss.privacy && info->attrs[NL80211_ATTR_KEYS]) { 3720 if (ibss.privacy && info->attrs[NL80211_ATTR_KEYS]) {
3657 connkeys = nl80211_parse_connkeys(rdev, 3721 connkeys = nl80211_parse_connkeys(rdev,
3658 info->attrs[NL80211_ATTR_KEYS]); 3722 info->attrs[NL80211_ATTR_KEYS]);
@@ -5651,6 +5715,51 @@ nl80211_send_cqm_rssi_notify(struct cfg80211_registered_device *rdev,
5651 nlmsg_free(msg); 5715 nlmsg_free(msg);
5652} 5716}
5653 5717
5718void
5719nl80211_send_cqm_pktloss_notify(struct cfg80211_registered_device *rdev,
5720 struct net_device *netdev, const u8 *peer,
5721 u32 num_packets, gfp_t gfp)
5722{
5723 struct sk_buff *msg;
5724 struct nlattr *pinfoattr;
5725 void *hdr;
5726
5727 msg = nlmsg_new(NLMSG_GOODSIZE, gfp);
5728 if (!msg)
5729 return;
5730
5731 hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_NOTIFY_CQM);
5732 if (!hdr) {
5733 nlmsg_free(msg);
5734 return;
5735 }
5736
5737 NLA_PUT_U32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx);
5738 NLA_PUT_U32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex);
5739 NLA_PUT(msg, NL80211_ATTR_MAC, ETH_ALEN, peer);
5740
5741 pinfoattr = nla_nest_start(msg, NL80211_ATTR_CQM);
5742 if (!pinfoattr)
5743 goto nla_put_failure;
5744
5745 NLA_PUT_U32(msg, NL80211_ATTR_CQM_PKT_LOSS_EVENT, num_packets);
5746
5747 nla_nest_end(msg, pinfoattr);
5748
5749 if (genlmsg_end(msg, hdr) < 0) {
5750 nlmsg_free(msg);
5751 return;
5752 }
5753
5754 genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0,
5755 nl80211_mlme_mcgrp.id, gfp);
5756 return;
5757
5758 nla_put_failure:
5759 genlmsg_cancel(msg, hdr);
5760 nlmsg_free(msg);
5761}
5762
5654static int nl80211_netlink_notify(struct notifier_block * nb, 5763static int nl80211_netlink_notify(struct notifier_block * nb,
5655 unsigned long state, 5764 unsigned long state,
5656 void *_notify) 5765 void *_notify)
diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h
index 30d2f939150..16c2f719076 100644
--- a/net/wireless/nl80211.h
+++ b/net/wireless/nl80211.h
@@ -87,5 +87,9 @@ nl80211_send_cqm_rssi_notify(struct cfg80211_registered_device *rdev,
87 struct net_device *netdev, 87 struct net_device *netdev,
88 enum nl80211_cqm_rssi_threshold_event rssi_event, 88 enum nl80211_cqm_rssi_threshold_event rssi_event,
89 gfp_t gfp); 89 gfp_t gfp);
90void
91nl80211_send_cqm_pktloss_notify(struct cfg80211_registered_device *rdev,
92 struct net_device *netdev, const u8 *peer,
93 u32 num_packets, gfp_t gfp);
90 94
91#endif /* __NET_WIRELESS_NL80211_H */ 95#endif /* __NET_WIRELESS_NL80211_H */
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 4b9f8912526..5ed615f94e0 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -32,6 +32,9 @@
32 * rely on some SHA1 checksum of the regdomain for example. 32 * rely on some SHA1 checksum of the regdomain for example.
33 * 33 *
34 */ 34 */
35
36#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
37
35#include <linux/kernel.h> 38#include <linux/kernel.h>
36#include <linux/slab.h> 39#include <linux/slab.h>
37#include <linux/list.h> 40#include <linux/list.h>
@@ -48,7 +51,7 @@
48#ifdef CONFIG_CFG80211_REG_DEBUG 51#ifdef CONFIG_CFG80211_REG_DEBUG
49#define REG_DBG_PRINT(format, args...) \ 52#define REG_DBG_PRINT(format, args...) \
50 do { \ 53 do { \
51 printk(KERN_DEBUG format , ## args); \ 54 printk(KERN_DEBUG pr_fmt(format), ##args); \
52 } while (0) 55 } while (0)
53#else 56#else
54#define REG_DBG_PRINT(args...) 57#define REG_DBG_PRINT(args...)
@@ -96,6 +99,9 @@ struct reg_beacon {
96 struct ieee80211_channel chan; 99 struct ieee80211_channel chan;
97}; 100};
98 101
102static void reg_todo(struct work_struct *work);
103static DECLARE_WORK(reg_work, reg_todo);
104
99/* We keep a static world regulatory domain in case of the absence of CRDA */ 105/* We keep a static world regulatory domain in case of the absence of CRDA */
100static const struct ieee80211_regdomain world_regdom = { 106static const struct ieee80211_regdomain world_regdom = {
101 .n_reg_rules = 5, 107 .n_reg_rules = 5,
@@ -367,11 +373,10 @@ static int call_crda(const char *alpha2)
367 }; 373 };
368 374
369 if (!is_world_regdom((char *) alpha2)) 375 if (!is_world_regdom((char *) alpha2))
370 printk(KERN_INFO "cfg80211: Calling CRDA for country: %c%c\n", 376 pr_info("Calling CRDA for country: %c%c\n",
371 alpha2[0], alpha2[1]); 377 alpha2[0], alpha2[1]);
372 else 378 else
373 printk(KERN_INFO "cfg80211: Calling CRDA to update world " 379 pr_info("Calling CRDA to update world regulatory domain\n");
374 "regulatory domain\n");
375 380
376 /* query internal regulatory database (if it exists) */ 381 /* query internal regulatory database (if it exists) */
377 reg_regdb_query(alpha2); 382 reg_regdb_query(alpha2);
@@ -711,6 +716,60 @@ int freq_reg_info(struct wiphy *wiphy,
711} 716}
712EXPORT_SYMBOL(freq_reg_info); 717EXPORT_SYMBOL(freq_reg_info);
713 718
719#ifdef CONFIG_CFG80211_REG_DEBUG
720static const char *reg_initiator_name(enum nl80211_reg_initiator initiator)
721{
722 switch (initiator) {
723 case NL80211_REGDOM_SET_BY_CORE:
724 return "Set by core";
725 case NL80211_REGDOM_SET_BY_USER:
726 return "Set by user";
727 case NL80211_REGDOM_SET_BY_DRIVER:
728 return "Set by driver";
729 case NL80211_REGDOM_SET_BY_COUNTRY_IE:
730 return "Set by country IE";
731 default:
732 WARN_ON(1);
733 return "Set by bug";
734 }
735}
736
737static void chan_reg_rule_print_dbg(struct ieee80211_channel *chan,
738 u32 desired_bw_khz,
739 const struct ieee80211_reg_rule *reg_rule)
740{
741 const struct ieee80211_power_rule *power_rule;
742 const struct ieee80211_freq_range *freq_range;
743 char max_antenna_gain[32];
744
745 power_rule = &reg_rule->power_rule;
746 freq_range = &reg_rule->freq_range;
747
748 if (!power_rule->max_antenna_gain)
749 snprintf(max_antenna_gain, 32, "N/A");
750 else
751 snprintf(max_antenna_gain, 32, "%d", power_rule->max_antenna_gain);
752
753 REG_DBG_PRINT("Updating information on frequency %d MHz "
754 "for %d a MHz width channel with regulatory rule:\n",
755 chan->center_freq,
756 KHZ_TO_MHZ(desired_bw_khz));
757
758 REG_DBG_PRINT("%d KHz - %d KHz @ KHz), (%s mBi, %d mBm)\n",
759 freq_range->start_freq_khz,
760 freq_range->end_freq_khz,
761 max_antenna_gain,
762 power_rule->max_eirp);
763}
764#else
765static void chan_reg_rule_print_dbg(struct ieee80211_channel *chan,
766 u32 desired_bw_khz,
767 const struct ieee80211_reg_rule *reg_rule)
768{
769 return;
770}
771#endif
772
714/* 773/*
715 * Note that right now we assume the desired channel bandwidth 774 * Note that right now we assume the desired channel bandwidth
716 * is always 20 MHz for each individual channel (HT40 uses 20 MHz 775 * is always 20 MHz for each individual channel (HT40 uses 20 MHz
@@ -720,7 +779,9 @@ EXPORT_SYMBOL(freq_reg_info);
720 * on the wiphy with the target_bw specified. Then we can simply use 779 * on the wiphy with the target_bw specified. Then we can simply use
721 * that below for the desired_bw_khz below. 780 * that below for the desired_bw_khz below.
722 */ 781 */
723static void handle_channel(struct wiphy *wiphy, enum ieee80211_band band, 782static void handle_channel(struct wiphy *wiphy,
783 enum nl80211_reg_initiator initiator,
784 enum ieee80211_band band,
724 unsigned int chan_idx) 785 unsigned int chan_idx)
725{ 786{
726 int r; 787 int r;
@@ -748,8 +809,27 @@ static void handle_channel(struct wiphy *wiphy, enum ieee80211_band band,
748 desired_bw_khz, 809 desired_bw_khz,
749 &reg_rule); 810 &reg_rule);
750 811
751 if (r) 812 if (r) {
813 /*
814 * We will disable all channels that do not match our
815 * recieved regulatory rule unless the hint is coming
816 * from a Country IE and the Country IE had no information
817 * about a band. The IEEE 802.11 spec allows for an AP
818 * to send only a subset of the regulatory rules allowed,
819 * so an AP in the US that only supports 2.4 GHz may only send
820 * a country IE with information for the 2.4 GHz band
821 * while 5 GHz is still supported.
822 */
823 if (initiator == NL80211_REGDOM_SET_BY_COUNTRY_IE &&
824 r == -ERANGE)
825 return;
826
827 REG_DBG_PRINT("Disabling freq %d MHz\n", chan->center_freq);
828 chan->flags = IEEE80211_CHAN_DISABLED;
752 return; 829 return;
830 }
831
832 chan_reg_rule_print_dbg(chan, desired_bw_khz, reg_rule);
753 833
754 power_rule = &reg_rule->power_rule; 834 power_rule = &reg_rule->power_rule;
755 freq_range = &reg_rule->freq_range; 835 freq_range = &reg_rule->freq_range;
@@ -784,7 +864,9 @@ static void handle_channel(struct wiphy *wiphy, enum ieee80211_band band,
784 chan->max_power = (int) MBM_TO_DBM(power_rule->max_eirp); 864 chan->max_power = (int) MBM_TO_DBM(power_rule->max_eirp);
785} 865}
786 866
787static void handle_band(struct wiphy *wiphy, enum ieee80211_band band) 867static void handle_band(struct wiphy *wiphy,
868 enum ieee80211_band band,
869 enum nl80211_reg_initiator initiator)
788{ 870{
789 unsigned int i; 871 unsigned int i;
790 struct ieee80211_supported_band *sband; 872 struct ieee80211_supported_band *sband;
@@ -793,24 +875,42 @@ static void handle_band(struct wiphy *wiphy, enum ieee80211_band band)
793 sband = wiphy->bands[band]; 875 sband = wiphy->bands[band];
794 876
795 for (i = 0; i < sband->n_channels; i++) 877 for (i = 0; i < sband->n_channels; i++)
796 handle_channel(wiphy, band, i); 878 handle_channel(wiphy, initiator, band, i);
797} 879}
798 880
799static bool ignore_reg_update(struct wiphy *wiphy, 881static bool ignore_reg_update(struct wiphy *wiphy,
800 enum nl80211_reg_initiator initiator) 882 enum nl80211_reg_initiator initiator)
801{ 883{
802 if (!last_request) 884 if (!last_request) {
885 REG_DBG_PRINT("Ignoring regulatory request %s since "
886 "last_request is not set\n",
887 reg_initiator_name(initiator));
803 return true; 888 return true;
889 }
890
804 if (initiator == NL80211_REGDOM_SET_BY_CORE && 891 if (initiator == NL80211_REGDOM_SET_BY_CORE &&
805 wiphy->flags & WIPHY_FLAG_CUSTOM_REGULATORY) 892 wiphy->flags & WIPHY_FLAG_CUSTOM_REGULATORY) {
893 REG_DBG_PRINT("Ignoring regulatory request %s "
894 "since the driver uses its own custom "
895 "regulatory domain ",
896 reg_initiator_name(initiator));
806 return true; 897 return true;
898 }
899
807 /* 900 /*
808 * wiphy->regd will be set once the device has its own 901 * wiphy->regd will be set once the device has its own
809 * desired regulatory domain set 902 * desired regulatory domain set
810 */ 903 */
811 if (wiphy->flags & WIPHY_FLAG_STRICT_REGULATORY && !wiphy->regd && 904 if (wiphy->flags & WIPHY_FLAG_STRICT_REGULATORY && !wiphy->regd &&
812 !is_world_regdom(last_request->alpha2)) 905 initiator != NL80211_REGDOM_SET_BY_COUNTRY_IE &&
906 !is_world_regdom(last_request->alpha2)) {
907 REG_DBG_PRINT("Ignoring regulatory request %s "
908 "since the driver requires its own regulaotry "
909 "domain to be set first",
910 reg_initiator_name(initiator));
813 return true; 911 return true;
912 }
913
814 return false; 914 return false;
815} 915}
816 916
@@ -1030,7 +1130,7 @@ void wiphy_update_regulatory(struct wiphy *wiphy,
1030 goto out; 1130 goto out;
1031 for (band = 0; band < IEEE80211_NUM_BANDS; band++) { 1131 for (band = 0; band < IEEE80211_NUM_BANDS; band++) {
1032 if (wiphy->bands[band]) 1132 if (wiphy->bands[band])
1033 handle_band(wiphy, band); 1133 handle_band(wiphy, band, initiator);
1034 } 1134 }
1035out: 1135out:
1036 reg_process_beacons(wiphy); 1136 reg_process_beacons(wiphy);
@@ -1066,10 +1166,17 @@ static void handle_channel_custom(struct wiphy *wiphy,
1066 regd); 1166 regd);
1067 1167
1068 if (r) { 1168 if (r) {
1169 REG_DBG_PRINT("Disabling freq %d MHz as custom "
1170 "regd has no rule that fits a %d MHz "
1171 "wide channel\n",
1172 chan->center_freq,
1173 KHZ_TO_MHZ(desired_bw_khz));
1069 chan->flags = IEEE80211_CHAN_DISABLED; 1174 chan->flags = IEEE80211_CHAN_DISABLED;
1070 return; 1175 return;
1071 } 1176 }
1072 1177
1178 chan_reg_rule_print_dbg(chan, desired_bw_khz, reg_rule);
1179
1073 power_rule = &reg_rule->power_rule; 1180 power_rule = &reg_rule->power_rule;
1074 freq_range = &reg_rule->freq_range; 1181 freq_range = &reg_rule->freq_range;
1075 1182
@@ -1215,6 +1322,21 @@ static int ignore_request(struct wiphy *wiphy,
1215 return -EINVAL; 1322 return -EINVAL;
1216} 1323}
1217 1324
1325static void reg_set_request_processed(void)
1326{
1327 bool need_more_processing = false;
1328
1329 last_request->processed = true;
1330
1331 spin_lock(&reg_requests_lock);
1332 if (!list_empty(&reg_requests_list))
1333 need_more_processing = true;
1334 spin_unlock(&reg_requests_lock);
1335
1336 if (need_more_processing)
1337 schedule_work(&reg_work);
1338}
1339
1218/** 1340/**
1219 * __regulatory_hint - hint to the wireless core a regulatory domain 1341 * __regulatory_hint - hint to the wireless core a regulatory domain
1220 * @wiphy: if the hint comes from country information from an AP, this 1342 * @wiphy: if the hint comes from country information from an AP, this
@@ -1290,8 +1412,10 @@ new_request:
1290 * have applied the requested regulatory domain before we just 1412 * have applied the requested regulatory domain before we just
1291 * inform userspace we have processed the request 1413 * inform userspace we have processed the request
1292 */ 1414 */
1293 if (r == -EALREADY) 1415 if (r == -EALREADY) {
1294 nl80211_send_reg_change_event(last_request); 1416 nl80211_send_reg_change_event(last_request);
1417 reg_set_request_processed();
1418 }
1295 return r; 1419 return r;
1296 } 1420 }
1297 1421
@@ -1307,16 +1431,13 @@ static void reg_process_hint(struct regulatory_request *reg_request)
1307 1431
1308 BUG_ON(!reg_request->alpha2); 1432 BUG_ON(!reg_request->alpha2);
1309 1433
1310 mutex_lock(&cfg80211_mutex);
1311 mutex_lock(&reg_mutex);
1312
1313 if (wiphy_idx_valid(reg_request->wiphy_idx)) 1434 if (wiphy_idx_valid(reg_request->wiphy_idx))
1314 wiphy = wiphy_idx_to_wiphy(reg_request->wiphy_idx); 1435 wiphy = wiphy_idx_to_wiphy(reg_request->wiphy_idx);
1315 1436
1316 if (reg_request->initiator == NL80211_REGDOM_SET_BY_DRIVER && 1437 if (reg_request->initiator == NL80211_REGDOM_SET_BY_DRIVER &&
1317 !wiphy) { 1438 !wiphy) {
1318 kfree(reg_request); 1439 kfree(reg_request);
1319 goto out; 1440 return;
1320 } 1441 }
1321 1442
1322 r = __regulatory_hint(wiphy, reg_request); 1443 r = __regulatory_hint(wiphy, reg_request);
@@ -1324,28 +1445,46 @@ static void reg_process_hint(struct regulatory_request *reg_request)
1324 if (r == -EALREADY && wiphy && 1445 if (r == -EALREADY && wiphy &&
1325 wiphy->flags & WIPHY_FLAG_STRICT_REGULATORY) 1446 wiphy->flags & WIPHY_FLAG_STRICT_REGULATORY)
1326 wiphy_update_regulatory(wiphy, initiator); 1447 wiphy_update_regulatory(wiphy, initiator);
1327out:
1328 mutex_unlock(&reg_mutex);
1329 mutex_unlock(&cfg80211_mutex);
1330} 1448}
1331 1449
1332/* Processes regulatory hints, this is all the NL80211_REGDOM_SET_BY_* */ 1450/*
1451 * Processes regulatory hints, this is all the NL80211_REGDOM_SET_BY_*
1452 * Regulatory hints come on a first come first serve basis and we
1453 * must process each one atomically.
1454 */
1333static void reg_process_pending_hints(void) 1455static void reg_process_pending_hints(void)
1334 { 1456{
1335 struct regulatory_request *reg_request; 1457 struct regulatory_request *reg_request;
1336 1458
1459 mutex_lock(&cfg80211_mutex);
1460 mutex_lock(&reg_mutex);
1461
1462 /* When last_request->processed becomes true this will be rescheduled */
1463 if (last_request && !last_request->processed) {
1464 REG_DBG_PRINT("Pending regulatory request, waiting "
1465 "for it to be processed...");
1466 goto out;
1467 }
1468
1337 spin_lock(&reg_requests_lock); 1469 spin_lock(&reg_requests_lock);
1338 while (!list_empty(&reg_requests_list)) {
1339 reg_request = list_first_entry(&reg_requests_list,
1340 struct regulatory_request,
1341 list);
1342 list_del_init(&reg_request->list);
1343 1470
1471 if (list_empty(&reg_requests_list)) {
1344 spin_unlock(&reg_requests_lock); 1472 spin_unlock(&reg_requests_lock);
1345 reg_process_hint(reg_request); 1473 goto out;
1346 spin_lock(&reg_requests_lock);
1347 } 1474 }
1475
1476 reg_request = list_first_entry(&reg_requests_list,
1477 struct regulatory_request,
1478 list);
1479 list_del_init(&reg_request->list);
1480
1348 spin_unlock(&reg_requests_lock); 1481 spin_unlock(&reg_requests_lock);
1482
1483 reg_process_hint(reg_request);
1484
1485out:
1486 mutex_unlock(&reg_mutex);
1487 mutex_unlock(&cfg80211_mutex);
1349} 1488}
1350 1489
1351/* Processes beacon hints -- this has nothing to do with country IEs */ 1490/* Processes beacon hints -- this has nothing to do with country IEs */
@@ -1392,8 +1531,6 @@ static void reg_todo(struct work_struct *work)
1392 reg_process_pending_beacon_hints(); 1531 reg_process_pending_beacon_hints();
1393} 1532}
1394 1533
1395static DECLARE_WORK(reg_work, reg_todo);
1396
1397static void queue_regulatory_request(struct regulatory_request *request) 1534static void queue_regulatory_request(struct regulatory_request *request)
1398{ 1535{
1399 if (isalpha(request->alpha2[0])) 1536 if (isalpha(request->alpha2[0]))
@@ -1428,12 +1565,7 @@ static int regulatory_hint_core(const char *alpha2)
1428 request->alpha2[1] = alpha2[1]; 1565 request->alpha2[1] = alpha2[1];
1429 request->initiator = NL80211_REGDOM_SET_BY_CORE; 1566 request->initiator = NL80211_REGDOM_SET_BY_CORE;
1430 1567
1431 /* 1568 queue_regulatory_request(request);
1432 * This ensures last_request is populated once modules
1433 * come swinging in and calling regulatory hints and
1434 * wiphy_apply_custom_regulatory().
1435 */
1436 reg_process_hint(request);
1437 1569
1438 return 0; 1570 return 0;
1439} 1571}
@@ -1559,7 +1691,7 @@ static void restore_alpha2(char *alpha2, bool reset_user)
1559 if (is_user_regdom_saved()) { 1691 if (is_user_regdom_saved()) {
1560 /* Unless we're asked to ignore it and reset it */ 1692 /* Unless we're asked to ignore it and reset it */
1561 if (reset_user) { 1693 if (reset_user) {
1562 REG_DBG_PRINT("cfg80211: Restoring regulatory settings " 1694 REG_DBG_PRINT("Restoring regulatory settings "
1563 "including user preference\n"); 1695 "including user preference\n");
1564 user_alpha2[0] = '9'; 1696 user_alpha2[0] = '9';
1565 user_alpha2[1] = '7'; 1697 user_alpha2[1] = '7';
@@ -1570,7 +1702,7 @@ static void restore_alpha2(char *alpha2, bool reset_user)
1570 * back as they were for a full restore. 1702 * back as they were for a full restore.
1571 */ 1703 */
1572 if (!is_world_regdom(ieee80211_regdom)) { 1704 if (!is_world_regdom(ieee80211_regdom)) {
1573 REG_DBG_PRINT("cfg80211: Keeping preference on " 1705 REG_DBG_PRINT("Keeping preference on "
1574 "module parameter ieee80211_regdom: %c%c\n", 1706 "module parameter ieee80211_regdom: %c%c\n",
1575 ieee80211_regdom[0], 1707 ieee80211_regdom[0],
1576 ieee80211_regdom[1]); 1708 ieee80211_regdom[1]);
@@ -1578,7 +1710,7 @@ static void restore_alpha2(char *alpha2, bool reset_user)
1578 alpha2[1] = ieee80211_regdom[1]; 1710 alpha2[1] = ieee80211_regdom[1];
1579 } 1711 }
1580 } else { 1712 } else {
1581 REG_DBG_PRINT("cfg80211: Restoring regulatory settings " 1713 REG_DBG_PRINT("Restoring regulatory settings "
1582 "while preserving user preference for: %c%c\n", 1714 "while preserving user preference for: %c%c\n",
1583 user_alpha2[0], 1715 user_alpha2[0],
1584 user_alpha2[1]); 1716 user_alpha2[1]);
@@ -1586,14 +1718,14 @@ static void restore_alpha2(char *alpha2, bool reset_user)
1586 alpha2[1] = user_alpha2[1]; 1718 alpha2[1] = user_alpha2[1];
1587 } 1719 }
1588 } else if (!is_world_regdom(ieee80211_regdom)) { 1720 } else if (!is_world_regdom(ieee80211_regdom)) {
1589 REG_DBG_PRINT("cfg80211: Keeping preference on " 1721 REG_DBG_PRINT("Keeping preference on "
1590 "module parameter ieee80211_regdom: %c%c\n", 1722 "module parameter ieee80211_regdom: %c%c\n",
1591 ieee80211_regdom[0], 1723 ieee80211_regdom[0],
1592 ieee80211_regdom[1]); 1724 ieee80211_regdom[1]);
1593 alpha2[0] = ieee80211_regdom[0]; 1725 alpha2[0] = ieee80211_regdom[0];
1594 alpha2[1] = ieee80211_regdom[1]; 1726 alpha2[1] = ieee80211_regdom[1];
1595 } else 1727 } else
1596 REG_DBG_PRINT("cfg80211: Restoring regulatory settings\n"); 1728 REG_DBG_PRINT("Restoring regulatory settings\n");
1597} 1729}
1598 1730
1599/* 1731/*
@@ -1661,7 +1793,7 @@ static void restore_regulatory_settings(bool reset_user)
1661 1793
1662void regulatory_hint_disconnect(void) 1794void regulatory_hint_disconnect(void)
1663{ 1795{
1664 REG_DBG_PRINT("cfg80211: All devices are disconnected, going to " 1796 REG_DBG_PRINT("All devices are disconnected, going to "
1665 "restore regulatory settings\n"); 1797 "restore regulatory settings\n");
1666 restore_regulatory_settings(false); 1798 restore_regulatory_settings(false);
1667} 1799}
@@ -1691,7 +1823,7 @@ int regulatory_hint_found_beacon(struct wiphy *wiphy,
1691 if (!reg_beacon) 1823 if (!reg_beacon)
1692 return -ENOMEM; 1824 return -ENOMEM;
1693 1825
1694 REG_DBG_PRINT("cfg80211: Found new beacon on " 1826 REG_DBG_PRINT("Found new beacon on "
1695 "frequency: %d MHz (Ch %d) on %s\n", 1827 "frequency: %d MHz (Ch %d) on %s\n",
1696 beacon_chan->center_freq, 1828 beacon_chan->center_freq,
1697 ieee80211_frequency_to_channel(beacon_chan->center_freq), 1829 ieee80211_frequency_to_channel(beacon_chan->center_freq),
@@ -1721,8 +1853,7 @@ static void print_rd_rules(const struct ieee80211_regdomain *rd)
1721 const struct ieee80211_freq_range *freq_range = NULL; 1853 const struct ieee80211_freq_range *freq_range = NULL;
1722 const struct ieee80211_power_rule *power_rule = NULL; 1854 const struct ieee80211_power_rule *power_rule = NULL;
1723 1855
1724 printk(KERN_INFO " (start_freq - end_freq @ bandwidth), " 1856 pr_info(" (start_freq - end_freq @ bandwidth), (max_antenna_gain, max_eirp)\n");
1725 "(max_antenna_gain, max_eirp)\n");
1726 1857
1727 for (i = 0; i < rd->n_reg_rules; i++) { 1858 for (i = 0; i < rd->n_reg_rules; i++) {
1728 reg_rule = &rd->reg_rules[i]; 1859 reg_rule = &rd->reg_rules[i];
@@ -1734,16 +1865,14 @@ static void print_rd_rules(const struct ieee80211_regdomain *rd)
1734 * in certain regions 1865 * in certain regions
1735 */ 1866 */
1736 if (power_rule->max_antenna_gain) 1867 if (power_rule->max_antenna_gain)
1737 printk(KERN_INFO " (%d KHz - %d KHz @ %d KHz), " 1868 pr_info(" (%d KHz - %d KHz @ %d KHz), (%d mBi, %d mBm)\n",
1738 "(%d mBi, %d mBm)\n",
1739 freq_range->start_freq_khz, 1869 freq_range->start_freq_khz,
1740 freq_range->end_freq_khz, 1870 freq_range->end_freq_khz,
1741 freq_range->max_bandwidth_khz, 1871 freq_range->max_bandwidth_khz,
1742 power_rule->max_antenna_gain, 1872 power_rule->max_antenna_gain,
1743 power_rule->max_eirp); 1873 power_rule->max_eirp);
1744 else 1874 else
1745 printk(KERN_INFO " (%d KHz - %d KHz @ %d KHz), " 1875 pr_info(" (%d KHz - %d KHz @ %d KHz), (N/A, %d mBm)\n",
1746 "(N/A, %d mBm)\n",
1747 freq_range->start_freq_khz, 1876 freq_range->start_freq_khz,
1748 freq_range->end_freq_khz, 1877 freq_range->end_freq_khz,
1749 freq_range->max_bandwidth_khz, 1878 freq_range->max_bandwidth_khz,
@@ -1762,27 +1891,20 @@ static void print_regdomain(const struct ieee80211_regdomain *rd)
1762 rdev = cfg80211_rdev_by_wiphy_idx( 1891 rdev = cfg80211_rdev_by_wiphy_idx(
1763 last_request->wiphy_idx); 1892 last_request->wiphy_idx);
1764 if (rdev) { 1893 if (rdev) {
1765 printk(KERN_INFO "cfg80211: Current regulatory " 1894 pr_info("Current regulatory domain updated by AP to: %c%c\n",
1766 "domain updated by AP to: %c%c\n",
1767 rdev->country_ie_alpha2[0], 1895 rdev->country_ie_alpha2[0],
1768 rdev->country_ie_alpha2[1]); 1896 rdev->country_ie_alpha2[1]);
1769 } else 1897 } else
1770 printk(KERN_INFO "cfg80211: Current regulatory " 1898 pr_info("Current regulatory domain intersected:\n");
1771 "domain intersected:\n");
1772 } else 1899 } else
1773 printk(KERN_INFO "cfg80211: Current regulatory " 1900 pr_info("Current regulatory domain intersected:\n");
1774 "domain intersected:\n");
1775 } else if (is_world_regdom(rd->alpha2)) 1901 } else if (is_world_regdom(rd->alpha2))
1776 printk(KERN_INFO "cfg80211: World regulatory " 1902 pr_info("World regulatory domain updated:\n");
1777 "domain updated:\n");
1778 else { 1903 else {
1779 if (is_unknown_alpha2(rd->alpha2)) 1904 if (is_unknown_alpha2(rd->alpha2))
1780 printk(KERN_INFO "cfg80211: Regulatory domain " 1905 pr_info("Regulatory domain changed to driver built-in settings (unknown country)\n");
1781 "changed to driver built-in settings "
1782 "(unknown country)\n");
1783 else 1906 else
1784 printk(KERN_INFO "cfg80211: Regulatory domain " 1907 pr_info("Regulatory domain changed to country: %c%c\n",
1785 "changed to country: %c%c\n",
1786 rd->alpha2[0], rd->alpha2[1]); 1908 rd->alpha2[0], rd->alpha2[1]);
1787 } 1909 }
1788 print_rd_rules(rd); 1910 print_rd_rules(rd);
@@ -1790,8 +1912,7 @@ static void print_regdomain(const struct ieee80211_regdomain *rd)
1790 1912
1791static void print_regdomain_info(const struct ieee80211_regdomain *rd) 1913static void print_regdomain_info(const struct ieee80211_regdomain *rd)
1792{ 1914{
1793 printk(KERN_INFO "cfg80211: Regulatory domain: %c%c\n", 1915 pr_info("Regulatory domain: %c%c\n", rd->alpha2[0], rd->alpha2[1]);
1794 rd->alpha2[0], rd->alpha2[1]);
1795 print_rd_rules(rd); 1916 print_rd_rules(rd);
1796} 1917}
1797 1918
@@ -1842,8 +1963,7 @@ static int __set_regdom(const struct ieee80211_regdomain *rd)
1842 return -EINVAL; 1963 return -EINVAL;
1843 1964
1844 if (!is_valid_rd(rd)) { 1965 if (!is_valid_rd(rd)) {
1845 printk(KERN_ERR "cfg80211: Invalid " 1966 pr_err("Invalid regulatory domain detected:\n");
1846 "regulatory domain detected:\n");
1847 print_regdomain_info(rd); 1967 print_regdomain_info(rd);
1848 return -EINVAL; 1968 return -EINVAL;
1849 } 1969 }
@@ -1959,6 +2079,8 @@ int set_regdom(const struct ieee80211_regdomain *rd)
1959 2079
1960 nl80211_send_reg_change_event(last_request); 2080 nl80211_send_reg_change_event(last_request);
1961 2081
2082 reg_set_request_processed();
2083
1962 mutex_unlock(&reg_mutex); 2084 mutex_unlock(&reg_mutex);
1963 2085
1964 return r; 2086 return r;
@@ -2015,8 +2137,7 @@ int __init regulatory_init(void)
2015 * early boot for call_usermodehelper(). For now treat these 2137 * early boot for call_usermodehelper(). For now treat these
2016 * errors as non-fatal. 2138 * errors as non-fatal.
2017 */ 2139 */
2018 printk(KERN_ERR "cfg80211: kobject_uevent_env() was unable " 2140 pr_err("kobject_uevent_env() was unable to call CRDA during init\n");
2019 "to call CRDA during init");
2020#ifdef CONFIG_CFG80211_REG_DEBUG 2141#ifdef CONFIG_CFG80211_REG_DEBUG
2021 /* We want to find out exactly why when debugging */ 2142 /* We want to find out exactly why when debugging */
2022 WARN_ON(err); 2143 WARN_ON(err);
diff --git a/net/wireless/util.c b/net/wireless/util.c
index 76120aeda57..fee020b15a4 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -502,7 +502,7 @@ int ieee80211_data_from_8023(struct sk_buff *skb, const u8 *addr,
502 skb_orphan(skb); 502 skb_orphan(skb);
503 503
504 if (pskb_expand_head(skb, head_need, 0, GFP_ATOMIC)) { 504 if (pskb_expand_head(skb, head_need, 0, GFP_ATOMIC)) {
505 printk(KERN_ERR "failed to reallocate Tx buffer\n"); 505 pr_err("failed to reallocate Tx buffer\n");
506 return -ENOMEM; 506 return -ENOMEM;
507 } 507 }
508 skb->truesize += head_need; 508 skb->truesize += head_need;
@@ -685,20 +685,17 @@ void cfg80211_upload_connect_keys(struct wireless_dev *wdev)
685 continue; 685 continue;
686 if (rdev->ops->add_key(wdev->wiphy, dev, i, false, NULL, 686 if (rdev->ops->add_key(wdev->wiphy, dev, i, false, NULL,
687 &wdev->connect_keys->params[i])) { 687 &wdev->connect_keys->params[i])) {
688 printk(KERN_ERR "%s: failed to set key %d\n", 688 netdev_err(dev, "failed to set key %d\n", i);
689 dev->name, i);
690 continue; 689 continue;
691 } 690 }
692 if (wdev->connect_keys->def == i) 691 if (wdev->connect_keys->def == i)
693 if (rdev->ops->set_default_key(wdev->wiphy, dev, i)) { 692 if (rdev->ops->set_default_key(wdev->wiphy, dev, i)) {
694 printk(KERN_ERR "%s: failed to set defkey %d\n", 693 netdev_err(dev, "failed to set defkey %d\n", i);
695 dev->name, i);
696 continue; 694 continue;
697 } 695 }
698 if (wdev->connect_keys->defmgmt == i) 696 if (wdev->connect_keys->defmgmt == i)
699 if (rdev->ops->set_default_mgmt_key(wdev->wiphy, dev, i)) 697 if (rdev->ops->set_default_mgmt_key(wdev->wiphy, dev, i))
700 printk(KERN_ERR "%s: failed to set mgtdef %d\n", 698 netdev_err(dev, "failed to set mgtdef %d\n", i);
701 dev->name, i);
702 } 699 }
703 700
704 kfree(wdev->connect_keys); 701 kfree(wdev->connect_keys);
diff --git a/net/wireless/wext-core.c b/net/wireless/wext-core.c
index dc675a3daa3..fdbc23c10d8 100644
--- a/net/wireless/wext-core.c
+++ b/net/wireless/wext-core.c
@@ -467,8 +467,8 @@ void wireless_send_event(struct net_device * dev,
467 * The best the driver could do is to log an error message. 467 * The best the driver could do is to log an error message.
468 * We will do it ourselves instead... 468 * We will do it ourselves instead...
469 */ 469 */
470 printk(KERN_ERR "%s (WE) : Invalid/Unknown Wireless Event (0x%04X)\n", 470 netdev_err(dev, "(WE) : Invalid/Unknown Wireless Event (0x%04X)\n",
471 dev->name, cmd); 471 cmd);
472 return; 472 return;
473 } 473 }
474 474
@@ -476,11 +476,13 @@ void wireless_send_event(struct net_device * dev,
476 if (descr->header_type == IW_HEADER_TYPE_POINT) { 476 if (descr->header_type == IW_HEADER_TYPE_POINT) {
477 /* Check if number of token fits within bounds */ 477 /* Check if number of token fits within bounds */
478 if (wrqu->data.length > descr->max_tokens) { 478 if (wrqu->data.length > descr->max_tokens) {
479 printk(KERN_ERR "%s (WE) : Wireless Event too big (%d)\n", dev->name, wrqu->data.length); 479 netdev_err(dev, "(WE) : Wireless Event too big (%d)\n",
480 wrqu->data.length);
480 return; 481 return;
481 } 482 }
482 if (wrqu->data.length < descr->min_tokens) { 483 if (wrqu->data.length < descr->min_tokens) {
483 printk(KERN_ERR "%s (WE) : Wireless Event too small (%d)\n", dev->name, wrqu->data.length); 484 netdev_err(dev, "(WE) : Wireless Event too small (%d)\n",
485 wrqu->data.length);
484 return; 486 return;
485 } 487 }
486 /* Calculate extra_len - extra is NULL for restricted events */ 488 /* Calculate extra_len - extra is NULL for restricted events */
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index f7af98dff40..ad96ee90fe2 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -1357,11 +1357,11 @@ static int x25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1357 void __user *argp = (void __user *)arg; 1357 void __user *argp = (void __user *)arg;
1358 int rc; 1358 int rc;
1359 1359
1360 lock_kernel();
1361 switch (cmd) { 1360 switch (cmd) {
1362 case TIOCOUTQ: { 1361 case TIOCOUTQ: {
1363 int amount = sk->sk_sndbuf - sk_wmem_alloc_get(sk); 1362 int amount;
1364 1363
1364 amount = sk->sk_sndbuf - sk_wmem_alloc_get(sk);
1365 if (amount < 0) 1365 if (amount < 0)
1366 amount = 0; 1366 amount = 0;
1367 rc = put_user(amount, (unsigned int __user *)argp); 1367 rc = put_user(amount, (unsigned int __user *)argp);
@@ -1375,8 +1375,10 @@ static int x25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1375 * These two are safe on a single CPU system as 1375 * These two are safe on a single CPU system as
1376 * only user tasks fiddle here 1376 * only user tasks fiddle here
1377 */ 1377 */
1378 lock_sock(sk);
1378 if ((skb = skb_peek(&sk->sk_receive_queue)) != NULL) 1379 if ((skb = skb_peek(&sk->sk_receive_queue)) != NULL)
1379 amount = skb->len; 1380 amount = skb->len;
1381 release_sock(sk);
1380 rc = put_user(amount, (unsigned int __user *)argp); 1382 rc = put_user(amount, (unsigned int __user *)argp);
1381 break; 1383 break;
1382 } 1384 }
@@ -1422,9 +1424,11 @@ static int x25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1422 rc = x25_subscr_ioctl(cmd, argp); 1424 rc = x25_subscr_ioctl(cmd, argp);
1423 break; 1425 break;
1424 case SIOCX25GFACILITIES: { 1426 case SIOCX25GFACILITIES: {
1425 struct x25_facilities fac = x25->facilities; 1427 lock_sock(sk);
1426 rc = copy_to_user(argp, &fac, 1428 rc = copy_to_user(argp, &x25->facilities,
1427 sizeof(fac)) ? -EFAULT : 0; 1429 sizeof(x25->facilities))
1430 ? -EFAULT : 0;
1431 release_sock(sk);
1428 break; 1432 break;
1429 } 1433 }
1430 1434
@@ -1435,18 +1439,19 @@ static int x25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1435 sizeof(facilities))) 1439 sizeof(facilities)))
1436 break; 1440 break;
1437 rc = -EINVAL; 1441 rc = -EINVAL;
1442 lock_sock(sk);
1438 if (sk->sk_state != TCP_LISTEN && 1443 if (sk->sk_state != TCP_LISTEN &&
1439 sk->sk_state != TCP_CLOSE) 1444 sk->sk_state != TCP_CLOSE)
1440 break; 1445 goto out_fac_release;
1441 if (facilities.pacsize_in < X25_PS16 || 1446 if (facilities.pacsize_in < X25_PS16 ||
1442 facilities.pacsize_in > X25_PS4096) 1447 facilities.pacsize_in > X25_PS4096)
1443 break; 1448 goto out_fac_release;
1444 if (facilities.pacsize_out < X25_PS16 || 1449 if (facilities.pacsize_out < X25_PS16 ||
1445 facilities.pacsize_out > X25_PS4096) 1450 facilities.pacsize_out > X25_PS4096)
1446 break; 1451 goto out_fac_release;
1447 if (facilities.winsize_in < 1 || 1452 if (facilities.winsize_in < 1 ||
1448 facilities.winsize_in > 127) 1453 facilities.winsize_in > 127)
1449 break; 1454 goto out_fac_release;
1450 if (facilities.throughput) { 1455 if (facilities.throughput) {
1451 int out = facilities.throughput & 0xf0; 1456 int out = facilities.throughput & 0xf0;
1452 int in = facilities.throughput & 0x0f; 1457 int in = facilities.throughput & 0x0f;
@@ -1454,24 +1459,28 @@ static int x25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1454 facilities.throughput |= 1459 facilities.throughput |=
1455 X25_DEFAULT_THROUGHPUT << 4; 1460 X25_DEFAULT_THROUGHPUT << 4;
1456 else if (out < 0x30 || out > 0xD0) 1461 else if (out < 0x30 || out > 0xD0)
1457 break; 1462 goto out_fac_release;
1458 if (!in) 1463 if (!in)
1459 facilities.throughput |= 1464 facilities.throughput |=
1460 X25_DEFAULT_THROUGHPUT; 1465 X25_DEFAULT_THROUGHPUT;
1461 else if (in < 0x03 || in > 0x0D) 1466 else if (in < 0x03 || in > 0x0D)
1462 break; 1467 goto out_fac_release;
1463 } 1468 }
1464 if (facilities.reverse && 1469 if (facilities.reverse &&
1465 (facilities.reverse & 0x81) != 0x81) 1470 (facilities.reverse & 0x81) != 0x81)
1466 break; 1471 goto out_fac_release;
1467 x25->facilities = facilities; 1472 x25->facilities = facilities;
1468 rc = 0; 1473 rc = 0;
1474out_fac_release:
1475 release_sock(sk);
1469 break; 1476 break;
1470 } 1477 }
1471 1478
1472 case SIOCX25GDTEFACILITIES: { 1479 case SIOCX25GDTEFACILITIES: {
1480 lock_sock(sk);
1473 rc = copy_to_user(argp, &x25->dte_facilities, 1481 rc = copy_to_user(argp, &x25->dte_facilities,
1474 sizeof(x25->dte_facilities)); 1482 sizeof(x25->dte_facilities));
1483 release_sock(sk);
1475 if (rc) 1484 if (rc)
1476 rc = -EFAULT; 1485 rc = -EFAULT;
1477 break; 1486 break;
@@ -1483,26 +1492,31 @@ static int x25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1483 if (copy_from_user(&dtefacs, argp, sizeof(dtefacs))) 1492 if (copy_from_user(&dtefacs, argp, sizeof(dtefacs)))
1484 break; 1493 break;
1485 rc = -EINVAL; 1494 rc = -EINVAL;
1495 lock_sock(sk);
1486 if (sk->sk_state != TCP_LISTEN && 1496 if (sk->sk_state != TCP_LISTEN &&
1487 sk->sk_state != TCP_CLOSE) 1497 sk->sk_state != TCP_CLOSE)
1488 break; 1498 goto out_dtefac_release;
1489 if (dtefacs.calling_len > X25_MAX_AE_LEN) 1499 if (dtefacs.calling_len > X25_MAX_AE_LEN)
1490 break; 1500 goto out_dtefac_release;
1491 if (dtefacs.calling_ae == NULL) 1501 if (dtefacs.calling_ae == NULL)
1492 break; 1502 goto out_dtefac_release;
1493 if (dtefacs.called_len > X25_MAX_AE_LEN) 1503 if (dtefacs.called_len > X25_MAX_AE_LEN)
1494 break; 1504 goto out_dtefac_release;
1495 if (dtefacs.called_ae == NULL) 1505 if (dtefacs.called_ae == NULL)
1496 break; 1506 goto out_dtefac_release;
1497 x25->dte_facilities = dtefacs; 1507 x25->dte_facilities = dtefacs;
1498 rc = 0; 1508 rc = 0;
1509out_dtefac_release:
1510 release_sock(sk);
1499 break; 1511 break;
1500 } 1512 }
1501 1513
1502 case SIOCX25GCALLUSERDATA: { 1514 case SIOCX25GCALLUSERDATA: {
1503 struct x25_calluserdata cud = x25->calluserdata; 1515 lock_sock(sk);
1504 rc = copy_to_user(argp, &cud, 1516 rc = copy_to_user(argp, &x25->calluserdata,
1505 sizeof(cud)) ? -EFAULT : 0; 1517 sizeof(x25->calluserdata))
1518 ? -EFAULT : 0;
1519 release_sock(sk);
1506 break; 1520 break;
1507 } 1521 }
1508 1522
@@ -1516,16 +1530,19 @@ static int x25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1516 rc = -EINVAL; 1530 rc = -EINVAL;
1517 if (calluserdata.cudlength > X25_MAX_CUD_LEN) 1531 if (calluserdata.cudlength > X25_MAX_CUD_LEN)
1518 break; 1532 break;
1533 lock_sock(sk);
1519 x25->calluserdata = calluserdata; 1534 x25->calluserdata = calluserdata;
1535 release_sock(sk);
1520 rc = 0; 1536 rc = 0;
1521 break; 1537 break;
1522 } 1538 }
1523 1539
1524 case SIOCX25GCAUSEDIAG: { 1540 case SIOCX25GCAUSEDIAG: {
1525 struct x25_causediag causediag; 1541 lock_sock(sk);
1526 causediag = x25->causediag; 1542 rc = copy_to_user(argp, &x25->causediag,
1527 rc = copy_to_user(argp, &causediag, 1543 sizeof(x25->causediag))
1528 sizeof(causediag)) ? -EFAULT : 0; 1544 ? -EFAULT : 0;
1545 release_sock(sk);
1529 break; 1546 break;
1530 } 1547 }
1531 1548
@@ -1534,7 +1551,9 @@ static int x25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1534 rc = -EFAULT; 1551 rc = -EFAULT;
1535 if (copy_from_user(&causediag, argp, sizeof(causediag))) 1552 if (copy_from_user(&causediag, argp, sizeof(causediag)))
1536 break; 1553 break;
1554 lock_sock(sk);
1537 x25->causediag = causediag; 1555 x25->causediag = causediag;
1556 release_sock(sk);
1538 rc = 0; 1557 rc = 0;
1539 break; 1558 break;
1540 1559
@@ -1543,31 +1562,37 @@ static int x25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1543 case SIOCX25SCUDMATCHLEN: { 1562 case SIOCX25SCUDMATCHLEN: {
1544 struct x25_subaddr sub_addr; 1563 struct x25_subaddr sub_addr;
1545 rc = -EINVAL; 1564 rc = -EINVAL;
1565 lock_sock(sk);
1546 if(sk->sk_state != TCP_CLOSE) 1566 if(sk->sk_state != TCP_CLOSE)
1547 break; 1567 goto out_cud_release;
1548 rc = -EFAULT; 1568 rc = -EFAULT;
1549 if (copy_from_user(&sub_addr, argp, 1569 if (copy_from_user(&sub_addr, argp,
1550 sizeof(sub_addr))) 1570 sizeof(sub_addr)))
1551 break; 1571 goto out_cud_release;
1552 rc = -EINVAL; 1572 rc = -EINVAL;
1553 if(sub_addr.cudmatchlength > X25_MAX_CUD_LEN) 1573 if(sub_addr.cudmatchlength > X25_MAX_CUD_LEN)
1554 break; 1574 goto out_cud_release;
1555 x25->cudmatchlength = sub_addr.cudmatchlength; 1575 x25->cudmatchlength = sub_addr.cudmatchlength;
1556 rc = 0; 1576 rc = 0;
1577out_cud_release:
1578 release_sock(sk);
1557 break; 1579 break;
1558 } 1580 }
1559 1581
1560 case SIOCX25CALLACCPTAPPRV: { 1582 case SIOCX25CALLACCPTAPPRV: {
1561 rc = -EINVAL; 1583 rc = -EINVAL;
1584 lock_kernel();
1562 if (sk->sk_state != TCP_CLOSE) 1585 if (sk->sk_state != TCP_CLOSE)
1563 break; 1586 break;
1564 clear_bit(X25_ACCPT_APPRV_FLAG, &x25->flags); 1587 clear_bit(X25_ACCPT_APPRV_FLAG, &x25->flags);
1588 unlock_kernel();
1565 rc = 0; 1589 rc = 0;
1566 break; 1590 break;
1567 } 1591 }
1568 1592
1569 case SIOCX25SENDCALLACCPT: { 1593 case SIOCX25SENDCALLACCPT: {
1570 rc = -EINVAL; 1594 rc = -EINVAL;
1595 lock_kernel();
1571 if (sk->sk_state != TCP_ESTABLISHED) 1596 if (sk->sk_state != TCP_ESTABLISHED)
1572 break; 1597 break;
1573 /* must call accptapprv above */ 1598 /* must call accptapprv above */
@@ -1575,6 +1600,7 @@ static int x25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1575 break; 1600 break;
1576 x25_write_internal(sk, X25_CALL_ACCEPTED); 1601 x25_write_internal(sk, X25_CALL_ACCEPTED);
1577 x25->state = X25_STATE_3; 1602 x25->state = X25_STATE_3;
1603 unlock_kernel();
1578 rc = 0; 1604 rc = 0;
1579 break; 1605 break;
1580 } 1606 }
@@ -1583,7 +1609,6 @@ static int x25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1583 rc = -ENOIOCTLCMD; 1609 rc = -ENOIOCTLCMD;
1584 break; 1610 break;
1585 } 1611 }
1586 unlock_kernel();
1587 1612
1588 return rc; 1613 return rc;
1589} 1614}
@@ -1619,16 +1644,20 @@ static int compat_x25_subscr_ioctl(unsigned int cmd,
1619 dev_put(dev); 1644 dev_put(dev);
1620 1645
1621 if (cmd == SIOCX25GSUBSCRIP) { 1646 if (cmd == SIOCX25GSUBSCRIP) {
1647 read_lock_bh(&x25_neigh_list_lock);
1622 x25_subscr.extended = nb->extended; 1648 x25_subscr.extended = nb->extended;
1623 x25_subscr.global_facil_mask = nb->global_facil_mask; 1649 x25_subscr.global_facil_mask = nb->global_facil_mask;
1650 read_unlock_bh(&x25_neigh_list_lock);
1624 rc = copy_to_user(x25_subscr32, &x25_subscr, 1651 rc = copy_to_user(x25_subscr32, &x25_subscr,
1625 sizeof(*x25_subscr32)) ? -EFAULT : 0; 1652 sizeof(*x25_subscr32)) ? -EFAULT : 0;
1626 } else { 1653 } else {
1627 rc = -EINVAL; 1654 rc = -EINVAL;
1628 if (x25_subscr.extended == 0 || x25_subscr.extended == 1) { 1655 if (x25_subscr.extended == 0 || x25_subscr.extended == 1) {
1629 rc = 0; 1656 rc = 0;
1657 write_lock_bh(&x25_neigh_list_lock);
1630 nb->extended = x25_subscr.extended; 1658 nb->extended = x25_subscr.extended;
1631 nb->global_facil_mask = x25_subscr.global_facil_mask; 1659 nb->global_facil_mask = x25_subscr.global_facil_mask;
1660 write_unlock_bh(&x25_neigh_list_lock);
1632 } 1661 }
1633 } 1662 }
1634 x25_neigh_put(nb); 1663 x25_neigh_put(nb);
@@ -1654,19 +1683,15 @@ static int compat_x25_ioctl(struct socket *sock, unsigned int cmd,
1654 break; 1683 break;
1655 case SIOCGSTAMP: 1684 case SIOCGSTAMP:
1656 rc = -EINVAL; 1685 rc = -EINVAL;
1657 lock_kernel();
1658 if (sk) 1686 if (sk)
1659 rc = compat_sock_get_timestamp(sk, 1687 rc = compat_sock_get_timestamp(sk,
1660 (struct timeval __user*)argp); 1688 (struct timeval __user*)argp);
1661 unlock_kernel();
1662 break; 1689 break;
1663 case SIOCGSTAMPNS: 1690 case SIOCGSTAMPNS:
1664 rc = -EINVAL; 1691 rc = -EINVAL;
1665 lock_kernel();
1666 if (sk) 1692 if (sk)
1667 rc = compat_sock_get_timestampns(sk, 1693 rc = compat_sock_get_timestampns(sk,
1668 (struct timespec __user*)argp); 1694 (struct timespec __user*)argp);
1669 unlock_kernel();
1670 break; 1695 break;
1671 case SIOCGIFADDR: 1696 case SIOCGIFADDR:
1672 case SIOCSIFADDR: 1697 case SIOCSIFADDR:
@@ -1685,22 +1710,16 @@ static int compat_x25_ioctl(struct socket *sock, unsigned int cmd,
1685 rc = -EPERM; 1710 rc = -EPERM;
1686 if (!capable(CAP_NET_ADMIN)) 1711 if (!capable(CAP_NET_ADMIN))
1687 break; 1712 break;
1688 lock_kernel();
1689 rc = x25_route_ioctl(cmd, argp); 1713 rc = x25_route_ioctl(cmd, argp);
1690 unlock_kernel();
1691 break; 1714 break;
1692 case SIOCX25GSUBSCRIP: 1715 case SIOCX25GSUBSCRIP:
1693 lock_kernel();
1694 rc = compat_x25_subscr_ioctl(cmd, argp); 1716 rc = compat_x25_subscr_ioctl(cmd, argp);
1695 unlock_kernel();
1696 break; 1717 break;
1697 case SIOCX25SSUBSCRIP: 1718 case SIOCX25SSUBSCRIP:
1698 rc = -EPERM; 1719 rc = -EPERM;
1699 if (!capable(CAP_NET_ADMIN)) 1720 if (!capable(CAP_NET_ADMIN))
1700 break; 1721 break;
1701 lock_kernel();
1702 rc = compat_x25_subscr_ioctl(cmd, argp); 1722 rc = compat_x25_subscr_ioctl(cmd, argp);
1703 unlock_kernel();
1704 break; 1723 break;
1705 case SIOCX25GFACILITIES: 1724 case SIOCX25GFACILITIES:
1706 case SIOCX25SFACILITIES: 1725 case SIOCX25SFACILITIES:
diff --git a/net/x25/x25_link.c b/net/x25/x25_link.c
index b25c6463c3e..4cbc942f762 100644
--- a/net/x25/x25_link.c
+++ b/net/x25/x25_link.c
@@ -31,8 +31,8 @@
31#include <linux/init.h> 31#include <linux/init.h>
32#include <net/x25.h> 32#include <net/x25.h>
33 33
34static LIST_HEAD(x25_neigh_list); 34LIST_HEAD(x25_neigh_list);
35static DEFINE_RWLOCK(x25_neigh_list_lock); 35DEFINE_RWLOCK(x25_neigh_list_lock);
36 36
37static void x25_t20timer_expiry(unsigned long); 37static void x25_t20timer_expiry(unsigned long);
38 38
@@ -360,16 +360,20 @@ int x25_subscr_ioctl(unsigned int cmd, void __user *arg)
360 dev_put(dev); 360 dev_put(dev);
361 361
362 if (cmd == SIOCX25GSUBSCRIP) { 362 if (cmd == SIOCX25GSUBSCRIP) {
363 read_lock_bh(&x25_neigh_list_lock);
363 x25_subscr.extended = nb->extended; 364 x25_subscr.extended = nb->extended;
364 x25_subscr.global_facil_mask = nb->global_facil_mask; 365 x25_subscr.global_facil_mask = nb->global_facil_mask;
366 read_unlock_bh(&x25_neigh_list_lock);
365 rc = copy_to_user(arg, &x25_subscr, 367 rc = copy_to_user(arg, &x25_subscr,
366 sizeof(x25_subscr)) ? -EFAULT : 0; 368 sizeof(x25_subscr)) ? -EFAULT : 0;
367 } else { 369 } else {
368 rc = -EINVAL; 370 rc = -EINVAL;
369 if (!(x25_subscr.extended && x25_subscr.extended != 1)) { 371 if (!(x25_subscr.extended && x25_subscr.extended != 1)) {
370 rc = 0; 372 rc = 0;
373 write_lock_bh(&x25_neigh_list_lock);
371 nb->extended = x25_subscr.extended; 374 nb->extended = x25_subscr.extended;
372 nb->global_facil_mask = x25_subscr.global_facil_mask; 375 nb->global_facil_mask = x25_subscr.global_facil_mask;
376 write_unlock_bh(&x25_neigh_list_lock);
373 } 377 }
374 } 378 }
375 x25_neigh_put(nb); 379 x25_neigh_put(nb);