aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/802/garp.c4
-rw-r--r--net/802/stp.c4
-rw-r--r--net/8021q/vlan.c2
-rw-r--r--net/8021q/vlan_core.c2
-rw-r--r--net/8021q/vlan_dev.c4
-rw-r--r--net/9p/trans_virtio.c17
-rw-r--r--net/atm/br2684.c9
-rw-r--r--net/atm/lec.c2
-rw-r--r--net/batman-adv/Makefile2
-rw-r--r--net/batman-adv/aggregation.c293
-rw-r--r--net/batman-adv/aggregation.h46
-rw-r--r--net/batman-adv/bat_iv_ogm.c1170
-rw-r--r--net/batman-adv/bat_ogm.h35
-rw-r--r--net/batman-adv/bat_sysfs.c2
-rw-r--r--net/batman-adv/bitarray.c6
-rw-r--r--net/batman-adv/gateway_client.c10
-rw-r--r--net/batman-adv/hard-interface.c88
-rw-r--r--net/batman-adv/hard-interface.h1
-rw-r--r--net/batman-adv/hash.h25
-rw-r--r--net/batman-adv/main.c4
-rw-r--r--net/batman-adv/main.h8
-rw-r--r--net/batman-adv/originator.c21
-rw-r--r--net/batman-adv/packet.h19
-rw-r--r--net/batman-adv/routing.c669
-rw-r--r--net/batman-adv/routing.h17
-rw-r--r--net/batman-adv/send.c313
-rw-r--r--net/batman-adv/send.h9
-rw-r--r--net/batman-adv/soft-interface.c44
-rw-r--r--net/batman-adv/translation-table.c199
-rw-r--r--net/batman-adv/translation-table.h21
-rw-r--r--net/batman-adv/types.h5
-rw-r--r--net/batman-adv/unicast.c6
-rw-r--r--net/batman-adv/unicast.h2
-rw-r--r--net/batman-adv/vis.c10
-rw-r--r--net/bluetooth/bnep/netdev.c2
-rw-r--r--net/bridge/br_device.c7
-rw-r--r--net/bridge/br_fdb.c23
-rw-r--r--net/bridge/br_if.c53
-rw-r--r--net/bridge/br_input.c33
-rw-r--r--net/bridge/br_multicast.c21
-rw-r--r--net/bridge/br_notify.c7
-rw-r--r--net/bridge/br_private.h7
-rw-r--r--net/bridge/br_sysfs_br.c34
-rw-r--r--net/bridge/netfilter/Kconfig2
-rw-r--r--net/bridge/netfilter/ebtable_broute.c4
-rw-r--r--net/bridge/netfilter/ebtables.c3
-rw-r--r--net/caif/caif_dev.c6
-rw-r--r--net/caif/cfcnfg.c38
-rw-r--r--net/caif/cfctrl.c23
-rw-r--r--net/caif/cfdbgl.c7
-rw-r--r--net/caif/cfdgml.c7
-rw-r--r--net/caif/cffrml.c7
-rw-r--r--net/caif/cfmuxl.c6
-rw-r--r--net/caif/cfrfml.c7
-rw-r--r--net/caif/cfserl.c7
-rw-r--r--net/caif/cfsrvl.c8
-rw-r--r--net/caif/cfutill.c7
-rw-r--r--net/caif/cfveil.c7
-rw-r--r--net/caif/cfvidl.c7
-rw-r--r--net/can/Kconfig11
-rw-r--r--net/can/Makefile3
-rw-r--r--net/can/af_can.c6
-rw-r--r--net/can/bcm.c53
-rw-r--r--net/can/gw.c959
-rw-r--r--net/ceph/ceph_common.c1
-rw-r--r--net/ceph/messenger.c1
-rw-r--r--net/ceph/msgpool.c40
-rw-r--r--net/ceph/osd_client.c26
-rw-r--r--net/ceph/osdmap.c84
-rw-r--r--net/core/Makefile2
-rw-r--r--net/core/datagram.c8
-rw-r--r--net/core/dev.c171
-rw-r--r--net/core/dev_addr_lists.c4
-rw-r--r--net/core/dst.c15
-rw-r--r--net/core/ethtool.c20
-rw-r--r--net/core/fib_rules.c8
-rw-r--r--net/core/filter.c2
-rw-r--r--net/core/flow.c36
-rw-r--r--net/core/kmap_skb.h2
-rw-r--r--net/core/link_watch.c9
-rw-r--r--net/core/neighbour.c48
-rw-r--r--net/core/net-sysfs.c12
-rw-r--r--net/core/netpoll.c8
-rw-r--r--net/core/pktgen.c3
-rw-r--r--net/core/rtnetlink.c34
-rw-r--r--net/core/scm.c12
-rw-r--r--net/core/secure_seq.c184
-rw-r--r--net/core/skbuff.c88
-rw-r--r--net/core/sock.c23
-rw-r--r--net/core/user_dma.c2
-rw-r--r--net/dcb/dcbnl.c30
-rw-r--r--net/dccp/ccids/ccid2.c84
-rw-r--r--net/dccp/ccids/ccid2.h6
-rw-r--r--net/dccp/dccp.h1
-rw-r--r--net/dccp/feat.c202
-rw-r--r--net/dccp/feat.h1
-rw-r--r--net/dccp/ipv4.c1
-rw-r--r--net/dccp/ipv6.c9
-rw-r--r--net/dccp/proto.c1
-rw-r--r--net/decnet/dn_dev.c6
-rw-r--r--net/dsa/slave.c3
-rw-r--r--net/ethernet/eth.c2
-rw-r--r--net/ieee802154/6lowpan.c891
-rw-r--r--net/ieee802154/6lowpan.h212
-rw-r--r--net/ieee802154/Kconfig6
-rw-r--r--net/ieee802154/Makefile8
-rw-r--r--net/ipv4/af_inet.c7
-rw-r--r--net/ipv4/cipso_ipv4.c2
-rw-r--r--net/ipv4/devinet.c6
-rw-r--r--net/ipv4/fib_semantics.c10
-rw-r--r--net/ipv4/fib_trie.c12
-rw-r--r--net/ipv4/gre.c4
-rw-r--r--net/ipv4/icmp.c5
-rw-r--r--net/ipv4/igmp.c16
-rw-r--r--net/ipv4/inet_diag.c5
-rw-r--r--net/ipv4/inet_hashtables.c1
-rw-r--r--net/ipv4/inet_lro.c2
-rw-r--r--net/ipv4/inetpeer.c1
-rw-r--r--net/ipv4/ip_output.c18
-rw-r--r--net/ipv4/ip_sockglue.c9
-rw-r--r--net/ipv4/ipip.c10
-rw-r--r--net/ipv4/ipmr.c8
-rw-r--r--net/ipv4/netfilter.c18
-rw-r--r--net/ipv4/netfilter/ip_queue.c12
-rw-r--r--net/ipv4/netfilter/nf_nat_amanda.c4
-rw-r--r--net/ipv4/netfilter/nf_nat_core.c24
-rw-r--r--net/ipv4/netfilter/nf_nat_ftp.c4
-rw-r--r--net/ipv4/netfilter/nf_nat_h323.c36
-rw-r--r--net/ipv4/netfilter/nf_nat_irc.c4
-rw-r--r--net/ipv4/netfilter/nf_nat_pptp.c16
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_common.c1
-rw-r--r--net/ipv4/netfilter/nf_nat_sip.c28
-rw-r--r--net/ipv4/netfilter/nf_nat_snmp_basic.c4
-rw-r--r--net/ipv4/netfilter/nf_nat_standalone.c6
-rw-r--r--net/ipv4/netfilter/nf_nat_tftp.c4
-rw-r--r--net/ipv4/proc.c2
-rw-r--r--net/ipv4/raw.c3
-rw-r--r--net/ipv4/route.c38
-rw-r--r--net/ipv4/syncookies.c2
-rw-r--r--net/ipv4/tcp.c56
-rw-r--r--net/ipv4/tcp_input.c105
-rw-r--r--net/ipv4/tcp_ipv4.c69
-rw-r--r--net/ipv4/tcp_output.c72
-rw-r--r--net/ipv4/udp.c11
-rw-r--r--net/ipv6/addrconf.c87
-rw-r--r--net/ipv6/datagram.c16
-rw-r--r--net/ipv6/exthdrs.c7
-rw-r--r--net/ipv6/icmp.c28
-rw-r--r--net/ipv6/inet6_connection_sock.c9
-rw-r--r--net/ipv6/inet6_hashtables.c1
-rw-r--r--net/ipv6/ip6_fib.c2
-rw-r--r--net/ipv6/ip6_flowlabel.c8
-rw-r--r--net/ipv6/ip6_output.c20
-rw-r--r--net/ipv6/ip6_tunnel.c54
-rw-r--r--net/ipv6/ip6mr.c8
-rw-r--r--net/ipv6/ipv6_sockglue.c11
-rw-r--r--net/ipv6/mcast.c2
-rw-r--r--net/ipv6/ndisc.c31
-rw-r--r--net/ipv6/netfilter/ip6_queue.c12
-rw-r--r--net/ipv6/raw.c12
-rw-r--r--net/ipv6/route.c72
-rw-r--r--net/ipv6/sit.c15
-rw-r--r--net/ipv6/syncookies.c2
-rw-r--r--net/ipv6/tcp_ipv6.c51
-rw-r--r--net/ipv6/udp.c8
-rw-r--r--net/irda/irlan/irlan_eth.c2
-rw-r--r--net/irda/irsysctl.c6
-rw-r--r--net/irda/qos.c6
-rw-r--r--net/iucv/Kconfig14
-rw-r--r--net/iucv/af_iucv.c870
-rw-r--r--net/iucv/iucv.c23
-rw-r--r--net/lapb/lapb_iface.c29
-rw-r--r--net/mac80211/agg-rx.c4
-rw-r--r--net/mac80211/cfg.c8
-rw-r--r--net/mac80211/ibss.c6
-rw-r--r--net/mac80211/iface.c6
-rw-r--r--net/mac80211/mesh_pathtbl.c4
-rw-r--r--net/mac80211/sta_info.c8
-rw-r--r--net/netfilter/core.c4
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c3
-rw-r--r--net/netfilter/nf_conntrack_core.c12
-rw-r--r--net/netfilter/nf_conntrack_ecache.c8
-rw-r--r--net/netfilter/nf_conntrack_extend.c4
-rw-r--r--net/netfilter/nf_conntrack_helper.c6
-rw-r--r--net/netfilter/nf_conntrack_netlink.c6
-rw-r--r--net/netfilter/nf_conntrack_pptp.c1
-rw-r--r--net/netfilter/nf_conntrack_proto_tcp.c6
-rw-r--r--net/netfilter/nf_log.c10
-rw-r--r--net/netfilter/nf_queue.c7
-rw-r--r--net/netfilter/nfnetlink.c6
-rw-r--r--net/netfilter/nfnetlink_queue.c4
-rw-r--r--net/netfilter/xt_rateest.c9
-rw-r--r--net/netlabel/Makefile2
-rw-r--r--net/netlabel/netlabel_addrlist.c2
-rw-r--r--net/netlabel/netlabel_addrlist.h2
-rw-r--r--net/netlabel/netlabel_cipso_v4.c2
-rw-r--r--net/netlabel/netlabel_cipso_v4.h2
-rw-r--r--net/netlabel/netlabel_domainhash.c8
-rw-r--r--net/netlabel/netlabel_domainhash.h2
-rw-r--r--net/netlabel/netlabel_kapi.c22
-rw-r--r--net/netlabel/netlabel_mgmt.c2
-rw-r--r--net/netlabel/netlabel_mgmt.h2
-rw-r--r--net/netlabel/netlabel_unlabeled.c8
-rw-r--r--net/netlabel/netlabel_unlabeled.h2
-rw-r--r--net/netlabel/netlabel_user.c2
-rw-r--r--net/netlabel/netlabel_user.h2
-rw-r--r--net/netlink/af_netlink.c7
-rw-r--r--net/packet/af_packet.c953
-rw-r--r--net/phonet/af_phonet.c4
-rw-r--r--net/phonet/pn_dev.c6
-rw-r--r--net/phonet/socket.c6
-rw-r--r--net/rds/Kconfig1
-rw-r--r--net/rds/ib_rdma.c112
-rw-r--r--net/rds/iw_rdma.c13
-rw-r--r--net/rds/xlist.h80
-rw-r--r--net/sched/act_mirred.c3
-rw-r--r--net/sched/cls_rsvp.h27
-rw-r--r--net/sched/sch_prio.c2
-rw-r--r--net/sched/sch_sfb.c13
-rw-r--r--net/sched/sch_sfq.c7
-rw-r--r--net/sctp/associola.c1
-rw-r--r--net/sctp/outqueue.c4
-rw-r--r--net/sctp/protocol.c2
-rw-r--r--net/sctp/sm_make_chunk.c1
-rw-r--r--net/sctp/sm_sideeffect.c5
-rw-r--r--net/sctp/sm_statefuns.c11
-rw-r--r--net/socket.c79
-rw-r--r--net/sunrpc/auth_gss/auth_gss.c4
-rw-r--r--net/sunrpc/xprt.c1
-rw-r--r--net/tipc/bcast.c111
-rw-r--r--net/tipc/bcast.h1
-rw-r--r--net/tipc/bearer.c8
-rw-r--r--net/tipc/bearer.h4
-rw-r--r--net/tipc/config.h1
-rw-r--r--net/tipc/discover.c6
-rw-r--r--net/tipc/eth_media.c32
-rw-r--r--net/tipc/link.c111
-rw-r--r--net/tipc/link.h1
-rw-r--r--net/tipc/name_distr.c35
-rw-r--r--net/tipc/net.c11
-rw-r--r--net/tipc/node.c45
-rw-r--r--net/tipc/node.h10
-rw-r--r--net/tipc/socket.c51
-rw-r--r--net/tipc/subscr.c3
-rw-r--r--net/tipc/subscr.h6
-rw-r--r--net/unix/af_unix.c24
-rw-r--r--net/xfrm/xfrm_algo.c4
-rw-r--r--net/xfrm/xfrm_input.c5
-rw-r--r--net/xfrm/xfrm_ipcomp.c11
-rw-r--r--net/xfrm/xfrm_policy.c10
-rw-r--r--net/xfrm/xfrm_user.c4
251 files changed, 7690 insertions, 3109 deletions
diff --git a/net/802/garp.c b/net/802/garp.c
index 16102951d36a..070bf4403bf8 100644
--- a/net/802/garp.c
+++ b/net/802/garp.c
@@ -553,7 +553,7 @@ static void garp_release_port(struct net_device *dev)
553 if (rtnl_dereference(port->applicants[i])) 553 if (rtnl_dereference(port->applicants[i]))
554 return; 554 return;
555 } 555 }
556 rcu_assign_pointer(dev->garp_port, NULL); 556 RCU_INIT_POINTER(dev->garp_port, NULL);
557 kfree_rcu(port, rcu); 557 kfree_rcu(port, rcu);
558} 558}
559 559
@@ -605,7 +605,7 @@ void garp_uninit_applicant(struct net_device *dev, struct garp_application *appl
605 605
606 ASSERT_RTNL(); 606 ASSERT_RTNL();
607 607
608 rcu_assign_pointer(port->applicants[appl->type], NULL); 608 RCU_INIT_POINTER(port->applicants[appl->type], NULL);
609 609
610 /* Delete timer and generate a final TRANSMIT_PDU event to flush out 610 /* Delete timer and generate a final TRANSMIT_PDU event to flush out
611 * all pending messages before the applicant is gone. */ 611 * all pending messages before the applicant is gone. */
diff --git a/net/802/stp.c b/net/802/stp.c
index 978c30b1b36b..0e136ef1e4ba 100644
--- a/net/802/stp.c
+++ b/net/802/stp.c
@@ -88,9 +88,9 @@ void stp_proto_unregister(const struct stp_proto *proto)
88{ 88{
89 mutex_lock(&stp_proto_mutex); 89 mutex_lock(&stp_proto_mutex);
90 if (is_zero_ether_addr(proto->group_address)) 90 if (is_zero_ether_addr(proto->group_address))
91 rcu_assign_pointer(stp_proto, NULL); 91 RCU_INIT_POINTER(stp_proto, NULL);
92 else 92 else
93 rcu_assign_pointer(garp_protos[proto->group_address[5] - 93 RCU_INIT_POINTER(garp_protos[proto->group_address[5] -
94 GARP_ADDR_MIN], NULL); 94 GARP_ADDR_MIN], NULL);
95 synchronize_rcu(); 95 synchronize_rcu();
96 96
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 8970ba139d73..5471628d3ffe 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -133,7 +133,7 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head)
133 if (grp->nr_vlans == 0) { 133 if (grp->nr_vlans == 0) {
134 vlan_gvrp_uninit_applicant(real_dev); 134 vlan_gvrp_uninit_applicant(real_dev);
135 135
136 rcu_assign_pointer(real_dev->vlgrp, NULL); 136 RCU_INIT_POINTER(real_dev->vlgrp, NULL);
137 137
138 /* Free the group, after all cpu's are done. */ 138 /* Free the group, after all cpu's are done. */
139 call_rcu(&grp->rcu, vlan_rcu_free); 139 call_rcu(&grp->rcu, vlan_rcu_free);
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index 5f27f8e30254..f1f2f7bb6661 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -167,6 +167,8 @@ struct sk_buff *vlan_untag(struct sk_buff *skb)
167 if (unlikely(!skb)) 167 if (unlikely(!skb))
168 goto err_free; 168 goto err_free;
169 169
170 skb_reset_network_header(skb);
171 skb_reset_transport_header(skb);
170 return skb; 172 return skb;
171 173
172err_free: 174err_free:
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 9d40a071d038..c8cf9391417e 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -610,7 +610,8 @@ static int vlan_ethtool_get_settings(struct net_device *dev,
610 struct ethtool_cmd *cmd) 610 struct ethtool_cmd *cmd)
611{ 611{
612 const struct vlan_dev_info *vlan = vlan_dev_info(dev); 612 const struct vlan_dev_info *vlan = vlan_dev_info(dev);
613 return dev_ethtool_get_settings(vlan->real_dev, cmd); 613
614 return __ethtool_get_settings(vlan->real_dev, cmd);
614} 615}
615 616
616static void vlan_ethtool_get_drvinfo(struct net_device *dev, 617static void vlan_ethtool_get_drvinfo(struct net_device *dev,
@@ -674,7 +675,6 @@ static const struct net_device_ops vlan_netdev_ops = {
674 .ndo_validate_addr = eth_validate_addr, 675 .ndo_validate_addr = eth_validate_addr,
675 .ndo_set_mac_address = vlan_dev_set_mac_address, 676 .ndo_set_mac_address = vlan_dev_set_mac_address,
676 .ndo_set_rx_mode = vlan_dev_set_rx_mode, 677 .ndo_set_rx_mode = vlan_dev_set_rx_mode,
677 .ndo_set_multicast_list = vlan_dev_set_rx_mode,
678 .ndo_change_rx_flags = vlan_dev_change_rx_flags, 678 .ndo_change_rx_flags = vlan_dev_change_rx_flags,
679 .ndo_do_ioctl = vlan_dev_ioctl, 679 .ndo_do_ioctl = vlan_dev_ioctl,
680 .ndo_neigh_setup = vlan_dev_neigh_setup, 680 .ndo_neigh_setup = vlan_dev_neigh_setup,
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index 175b5135bdcf..e317583fcc73 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -263,7 +263,6 @@ p9_virtio_request(struct p9_client *client, struct p9_req_t *req)
263{ 263{
264 int in, out, inp, outp; 264 int in, out, inp, outp;
265 struct virtio_chan *chan = client->trans; 265 struct virtio_chan *chan = client->trans;
266 char *rdata = (char *)req->rc+sizeof(struct p9_fcall);
267 unsigned long flags; 266 unsigned long flags;
268 size_t pdata_off = 0; 267 size_t pdata_off = 0;
269 struct trans_rpage_info *rpinfo = NULL; 268 struct trans_rpage_info *rpinfo = NULL;
@@ -346,7 +345,8 @@ req_retry_pinned:
346 * Arrange in such a way that server places header in the 345 * Arrange in such a way that server places header in the
347 * alloced memory and payload onto the user buffer. 346 * alloced memory and payload onto the user buffer.
348 */ 347 */
349 inp = pack_sg_list(chan->sg, out, VIRTQUEUE_NUM, rdata, 11); 348 inp = pack_sg_list(chan->sg, out,
349 VIRTQUEUE_NUM, req->rc->sdata, 11);
350 /* 350 /*
351 * Running executables in the filesystem may result in 351 * Running executables in the filesystem may result in
352 * a read request with kernel buffer as opposed to user buffer. 352 * a read request with kernel buffer as opposed to user buffer.
@@ -366,8 +366,8 @@ req_retry_pinned:
366 } 366 }
367 in += inp; 367 in += inp;
368 } else { 368 } else {
369 in = pack_sg_list(chan->sg, out, VIRTQUEUE_NUM, rdata, 369 in = pack_sg_list(chan->sg, out, VIRTQUEUE_NUM,
370 req->rc->capacity); 370 req->rc->sdata, req->rc->capacity);
371 } 371 }
372 372
373 err = virtqueue_add_buf(chan->vq, chan->sg, out, in, req->tc); 373 err = virtqueue_add_buf(chan->vq, chan->sg, out, in, req->tc);
@@ -592,7 +592,14 @@ static struct p9_trans_module p9_virtio_trans = {
592 .close = p9_virtio_close, 592 .close = p9_virtio_close,
593 .request = p9_virtio_request, 593 .request = p9_virtio_request,
594 .cancel = p9_virtio_cancel, 594 .cancel = p9_virtio_cancel,
595 .maxsize = PAGE_SIZE*VIRTQUEUE_NUM, 595
596 /*
597 * We leave one entry for input and one entry for response
598 * headers. We also skip one more entry to accomodate, address
599 * that are not at page boundary, that can result in an extra
600 * page in zero copy.
601 */
602 .maxsize = PAGE_SIZE * (VIRTQUEUE_NUM - 3),
596 .pref = P9_TRANS_PREF_PAYLOAD_SEP, 603 .pref = P9_TRANS_PREF_PAYLOAD_SEP,
597 .def = 0, 604 .def = 0,
598 .owner = THIS_MODULE, 605 .owner = THIS_MODULE,
diff --git a/net/atm/br2684.c b/net/atm/br2684.c
index 2252c2085dac..d07223c834af 100644
--- a/net/atm/br2684.c
+++ b/net/atm/br2684.c
@@ -242,8 +242,6 @@ static int br2684_xmit_vcc(struct sk_buff *skb, struct net_device *dev,
242 if (brdev->payload == p_bridged) { 242 if (brdev->payload == p_bridged) {
243 skb_push(skb, 2); 243 skb_push(skb, 2);
244 memset(skb->data, 0, 2); 244 memset(skb->data, 0, 2);
245 } else { /* p_routed */
246 skb_pull(skb, ETH_HLEN);
247 } 245 }
248 } 246 }
249 skb_debug(skb); 247 skb_debug(skb);
@@ -560,12 +558,13 @@ static int br2684_regvcc(struct atm_vcc *atmvcc, void __user * arg)
560 spin_unlock_irqrestore(&rq->lock, flags); 558 spin_unlock_irqrestore(&rq->lock, flags);
561 559
562 skb_queue_walk_safe(&queue, skb, tmp) { 560 skb_queue_walk_safe(&queue, skb, tmp) {
563 struct net_device *dev = skb->dev; 561 struct net_device *dev;
562
563 br2684_push(atmvcc, skb);
564 dev = skb->dev;
564 565
565 dev->stats.rx_bytes -= skb->len; 566 dev->stats.rx_bytes -= skb->len;
566 dev->stats.rx_packets--; 567 dev->stats.rx_packets--;
567
568 br2684_push(atmvcc, skb);
569 } 568 }
570 569
571 /* initialize netdev carrier state */ 570 /* initialize netdev carrier state */
diff --git a/net/atm/lec.c b/net/atm/lec.c
index 215c9fad7cdf..f1964caa0f83 100644
--- a/net/atm/lec.c
+++ b/net/atm/lec.c
@@ -643,7 +643,7 @@ static const struct net_device_ops lec_netdev_ops = {
643 .ndo_start_xmit = lec_start_xmit, 643 .ndo_start_xmit = lec_start_xmit,
644 .ndo_change_mtu = lec_change_mtu, 644 .ndo_change_mtu = lec_change_mtu,
645 .ndo_tx_timeout = lec_tx_timeout, 645 .ndo_tx_timeout = lec_tx_timeout,
646 .ndo_set_multicast_list = lec_set_multicast_list, 646 .ndo_set_rx_mode = lec_set_multicast_list,
647}; 647};
648 648
649static const unsigned char lec_ctrl_magic[] = { 649static const unsigned char lec_ctrl_magic[] = {
diff --git a/net/batman-adv/Makefile b/net/batman-adv/Makefile
index 2de93d00631b..ce6861166499 100644
--- a/net/batman-adv/Makefile
+++ b/net/batman-adv/Makefile
@@ -19,8 +19,8 @@
19# 19#
20 20
21obj-$(CONFIG_BATMAN_ADV) += batman-adv.o 21obj-$(CONFIG_BATMAN_ADV) += batman-adv.o
22batman-adv-y += aggregation.o
23batman-adv-y += bat_debugfs.o 22batman-adv-y += bat_debugfs.o
23batman-adv-y += bat_iv_ogm.o
24batman-adv-y += bat_sysfs.o 24batman-adv-y += bat_sysfs.o
25batman-adv-y += bitarray.o 25batman-adv-y += bitarray.o
26batman-adv-y += gateway_client.o 26batman-adv-y += gateway_client.o
diff --git a/net/batman-adv/aggregation.c b/net/batman-adv/aggregation.c
deleted file mode 100644
index 69467fe71ff2..000000000000
--- a/net/batman-adv/aggregation.c
+++ /dev/null
@@ -1,293 +0,0 @@
1/*
2 * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors:
3 *
4 * Marek Lindner, Simon Wunderlich
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of version 2 of the GNU General Public
8 * License as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
18 * 02110-1301, USA
19 *
20 */
21
22#include "main.h"
23#include "translation-table.h"
24#include "aggregation.h"
25#include "send.h"
26#include "routing.h"
27#include "hard-interface.h"
28
29/* return true if new_packet can be aggregated with forw_packet */
30static bool can_aggregate_with(const struct batman_packet *new_batman_packet,
31 struct bat_priv *bat_priv,
32 int packet_len,
33 unsigned long send_time,
34 bool directlink,
35 const struct hard_iface *if_incoming,
36 const struct forw_packet *forw_packet)
37{
38 struct batman_packet *batman_packet =
39 (struct batman_packet *)forw_packet->skb->data;
40 int aggregated_bytes = forw_packet->packet_len + packet_len;
41 struct hard_iface *primary_if = NULL;
42 bool res = false;
43
44 /**
45 * we can aggregate the current packet to this aggregated packet
46 * if:
47 *
48 * - the send time is within our MAX_AGGREGATION_MS time
49 * - the resulting packet wont be bigger than
50 * MAX_AGGREGATION_BYTES
51 */
52
53 if (time_before(send_time, forw_packet->send_time) &&
54 time_after_eq(send_time + msecs_to_jiffies(MAX_AGGREGATION_MS),
55 forw_packet->send_time) &&
56 (aggregated_bytes <= MAX_AGGREGATION_BYTES)) {
57
58 /**
59 * check aggregation compatibility
60 * -> direct link packets are broadcasted on
61 * their interface only
62 * -> aggregate packet if the current packet is
63 * a "global" packet as well as the base
64 * packet
65 */
66
67 primary_if = primary_if_get_selected(bat_priv);
68 if (!primary_if)
69 goto out;
70
71 /* packets without direct link flag and high TTL
72 * are flooded through the net */
73 if ((!directlink) &&
74 (!(batman_packet->flags & DIRECTLINK)) &&
75 (batman_packet->ttl != 1) &&
76
77 /* own packets originating non-primary
78 * interfaces leave only that interface */
79 ((!forw_packet->own) ||
80 (forw_packet->if_incoming == primary_if))) {
81 res = true;
82 goto out;
83 }
84
85 /* if the incoming packet is sent via this one
86 * interface only - we still can aggregate */
87 if ((directlink) &&
88 (new_batman_packet->ttl == 1) &&
89 (forw_packet->if_incoming == if_incoming) &&
90
91 /* packets from direct neighbors or
92 * own secondary interface packets
93 * (= secondary interface packets in general) */
94 (batman_packet->flags & DIRECTLINK ||
95 (forw_packet->own &&
96 forw_packet->if_incoming != primary_if))) {
97 res = true;
98 goto out;
99 }
100 }
101
102out:
103 if (primary_if)
104 hardif_free_ref(primary_if);
105 return res;
106}
107
108/* create a new aggregated packet and add this packet to it */
109static void new_aggregated_packet(const unsigned char *packet_buff,
110 int packet_len, unsigned long send_time,
111 bool direct_link,
112 struct hard_iface *if_incoming,
113 int own_packet)
114{
115 struct bat_priv *bat_priv = netdev_priv(if_incoming->soft_iface);
116 struct forw_packet *forw_packet_aggr;
117 unsigned char *skb_buff;
118
119 if (!atomic_inc_not_zero(&if_incoming->refcount))
120 return;
121
122 /* own packet should always be scheduled */
123 if (!own_packet) {
124 if (!atomic_dec_not_zero(&bat_priv->batman_queue_left)) {
125 bat_dbg(DBG_BATMAN, bat_priv,
126 "batman packet queue full\n");
127 goto out;
128 }
129 }
130
131 forw_packet_aggr = kmalloc(sizeof(*forw_packet_aggr), GFP_ATOMIC);
132 if (!forw_packet_aggr) {
133 if (!own_packet)
134 atomic_inc(&bat_priv->batman_queue_left);
135 goto out;
136 }
137
138 if ((atomic_read(&bat_priv->aggregated_ogms)) &&
139 (packet_len < MAX_AGGREGATION_BYTES))
140 forw_packet_aggr->skb = dev_alloc_skb(MAX_AGGREGATION_BYTES +
141 sizeof(struct ethhdr));
142 else
143 forw_packet_aggr->skb = dev_alloc_skb(packet_len +
144 sizeof(struct ethhdr));
145
146 if (!forw_packet_aggr->skb) {
147 if (!own_packet)
148 atomic_inc(&bat_priv->batman_queue_left);
149 kfree(forw_packet_aggr);
150 goto out;
151 }
152 skb_reserve(forw_packet_aggr->skb, sizeof(struct ethhdr));
153
154 INIT_HLIST_NODE(&forw_packet_aggr->list);
155
156 skb_buff = skb_put(forw_packet_aggr->skb, packet_len);
157 forw_packet_aggr->packet_len = packet_len;
158 memcpy(skb_buff, packet_buff, packet_len);
159
160 forw_packet_aggr->own = own_packet;
161 forw_packet_aggr->if_incoming = if_incoming;
162 forw_packet_aggr->num_packets = 0;
163 forw_packet_aggr->direct_link_flags = NO_FLAGS;
164 forw_packet_aggr->send_time = send_time;
165
166 /* save packet direct link flag status */
167 if (direct_link)
168 forw_packet_aggr->direct_link_flags |= 1;
169
170 /* add new packet to packet list */
171 spin_lock_bh(&bat_priv->forw_bat_list_lock);
172 hlist_add_head(&forw_packet_aggr->list, &bat_priv->forw_bat_list);
173 spin_unlock_bh(&bat_priv->forw_bat_list_lock);
174
175 /* start timer for this packet */
176 INIT_DELAYED_WORK(&forw_packet_aggr->delayed_work,
177 send_outstanding_bat_packet);
178 queue_delayed_work(bat_event_workqueue,
179 &forw_packet_aggr->delayed_work,
180 send_time - jiffies);
181
182 return;
183out:
184 hardif_free_ref(if_incoming);
185}
186
187/* aggregate a new packet into the existing aggregation */
188static void aggregate(struct forw_packet *forw_packet_aggr,
189 const unsigned char *packet_buff, int packet_len,
190 bool direct_link)
191{
192 unsigned char *skb_buff;
193
194 skb_buff = skb_put(forw_packet_aggr->skb, packet_len);
195 memcpy(skb_buff, packet_buff, packet_len);
196 forw_packet_aggr->packet_len += packet_len;
197 forw_packet_aggr->num_packets++;
198
199 /* save packet direct link flag status */
200 if (direct_link)
201 forw_packet_aggr->direct_link_flags |=
202 (1 << forw_packet_aggr->num_packets);
203}
204
205void add_bat_packet_to_list(struct bat_priv *bat_priv,
206 unsigned char *packet_buff, int packet_len,
207 struct hard_iface *if_incoming, int own_packet,
208 unsigned long send_time)
209{
210 /**
211 * _aggr -> pointer to the packet we want to aggregate with
212 * _pos -> pointer to the position in the queue
213 */
214 struct forw_packet *forw_packet_aggr = NULL, *forw_packet_pos = NULL;
215 struct hlist_node *tmp_node;
216 struct batman_packet *batman_packet =
217 (struct batman_packet *)packet_buff;
218 bool direct_link = batman_packet->flags & DIRECTLINK ? 1 : 0;
219
220 /* find position for the packet in the forward queue */
221 spin_lock_bh(&bat_priv->forw_bat_list_lock);
222 /* own packets are not to be aggregated */
223 if ((atomic_read(&bat_priv->aggregated_ogms)) && (!own_packet)) {
224 hlist_for_each_entry(forw_packet_pos, tmp_node,
225 &bat_priv->forw_bat_list, list) {
226 if (can_aggregate_with(batman_packet,
227 bat_priv,
228 packet_len,
229 send_time,
230 direct_link,
231 if_incoming,
232 forw_packet_pos)) {
233 forw_packet_aggr = forw_packet_pos;
234 break;
235 }
236 }
237 }
238
239 /* nothing to aggregate with - either aggregation disabled or no
240 * suitable aggregation packet found */
241 if (!forw_packet_aggr) {
242 /* the following section can run without the lock */
243 spin_unlock_bh(&bat_priv->forw_bat_list_lock);
244
245 /**
246 * if we could not aggregate this packet with one of the others
247 * we hold it back for a while, so that it might be aggregated
248 * later on
249 */
250 if ((!own_packet) &&
251 (atomic_read(&bat_priv->aggregated_ogms)))
252 send_time += msecs_to_jiffies(MAX_AGGREGATION_MS);
253
254 new_aggregated_packet(packet_buff, packet_len,
255 send_time, direct_link,
256 if_incoming, own_packet);
257 } else {
258 aggregate(forw_packet_aggr,
259 packet_buff, packet_len,
260 direct_link);
261 spin_unlock_bh(&bat_priv->forw_bat_list_lock);
262 }
263}
264
265/* unpack the aggregated packets and process them one by one */
266void receive_aggr_bat_packet(const struct ethhdr *ethhdr,
267 unsigned char *packet_buff, int packet_len,
268 struct hard_iface *if_incoming)
269{
270 struct batman_packet *batman_packet;
271 int buff_pos = 0;
272 unsigned char *tt_buff;
273
274 batman_packet = (struct batman_packet *)packet_buff;
275
276 do {
277 /* network to host order for our 32bit seqno and the
278 orig_interval */
279 batman_packet->seqno = ntohl(batman_packet->seqno);
280 batman_packet->tt_crc = ntohs(batman_packet->tt_crc);
281
282 tt_buff = packet_buff + buff_pos + BAT_PACKET_LEN;
283
284 receive_bat_packet(ethhdr, batman_packet, tt_buff, if_incoming);
285
286 buff_pos += BAT_PACKET_LEN +
287 tt_len(batman_packet->tt_num_changes);
288
289 batman_packet = (struct batman_packet *)
290 (packet_buff + buff_pos);
291 } while (aggregated_packet(buff_pos, packet_len,
292 batman_packet->tt_num_changes));
293}
diff --git a/net/batman-adv/aggregation.h b/net/batman-adv/aggregation.h
deleted file mode 100644
index 216337bb841f..000000000000
--- a/net/batman-adv/aggregation.h
+++ /dev/null
@@ -1,46 +0,0 @@
1/*
2 * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors:
3 *
4 * Marek Lindner, Simon Wunderlich
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of version 2 of the GNU General Public
8 * License as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
18 * 02110-1301, USA
19 *
20 */
21
22#ifndef _NET_BATMAN_ADV_AGGREGATION_H_
23#define _NET_BATMAN_ADV_AGGREGATION_H_
24
25#include "main.h"
26
27/* is there another aggregated packet here? */
28static inline int aggregated_packet(int buff_pos, int packet_len,
29 int tt_num_changes)
30{
31 int next_buff_pos = buff_pos + BAT_PACKET_LEN + (tt_num_changes *
32 sizeof(struct tt_change));
33
34 return (next_buff_pos <= packet_len) &&
35 (next_buff_pos <= MAX_AGGREGATION_BYTES);
36}
37
38void add_bat_packet_to_list(struct bat_priv *bat_priv,
39 unsigned char *packet_buff, int packet_len,
40 struct hard_iface *if_incoming, int own_packet,
41 unsigned long send_time);
42void receive_aggr_bat_packet(const struct ethhdr *ethhdr,
43 unsigned char *packet_buff, int packet_len,
44 struct hard_iface *if_incoming);
45
46#endif /* _NET_BATMAN_ADV_AGGREGATION_H_ */
diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c
new file mode 100644
index 000000000000..3512e251545b
--- /dev/null
+++ b/net/batman-adv/bat_iv_ogm.c
@@ -0,0 +1,1170 @@
1/*
2 * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors:
3 *
4 * Marek Lindner, Simon Wunderlich
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of version 2 of the GNU General Public
8 * License as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
18 * 02110-1301, USA
19 *
20 */
21
22#include "main.h"
23#include "bat_ogm.h"
24#include "translation-table.h"
25#include "ring_buffer.h"
26#include "originator.h"
27#include "routing.h"
28#include "gateway_common.h"
29#include "gateway_client.h"
30#include "hard-interface.h"
31#include "send.h"
32
33void bat_ogm_init(struct hard_iface *hard_iface)
34{
35 struct batman_ogm_packet *batman_ogm_packet;
36
37 hard_iface->packet_len = BATMAN_OGM_LEN;
38 hard_iface->packet_buff = kmalloc(hard_iface->packet_len, GFP_ATOMIC);
39
40 batman_ogm_packet = (struct batman_ogm_packet *)hard_iface->packet_buff;
41 batman_ogm_packet->packet_type = BAT_OGM;
42 batman_ogm_packet->version = COMPAT_VERSION;
43 batman_ogm_packet->flags = NO_FLAGS;
44 batman_ogm_packet->ttl = 2;
45 batman_ogm_packet->tq = TQ_MAX_VALUE;
46 batman_ogm_packet->tt_num_changes = 0;
47 batman_ogm_packet->ttvn = 0;
48}
49
50void bat_ogm_init_primary(struct hard_iface *hard_iface)
51{
52 struct batman_ogm_packet *batman_ogm_packet;
53
54 batman_ogm_packet = (struct batman_ogm_packet *)hard_iface->packet_buff;
55 batman_ogm_packet->flags = PRIMARIES_FIRST_HOP;
56 batman_ogm_packet->ttl = TTL;
57}
58
59void bat_ogm_update_mac(struct hard_iface *hard_iface)
60{
61 struct batman_ogm_packet *batman_ogm_packet;
62
63 batman_ogm_packet = (struct batman_ogm_packet *)hard_iface->packet_buff;
64 memcpy(batman_ogm_packet->orig,
65 hard_iface->net_dev->dev_addr, ETH_ALEN);
66 memcpy(batman_ogm_packet->prev_sender,
67 hard_iface->net_dev->dev_addr, ETH_ALEN);
68}
69
70/* when do we schedule our own ogm to be sent */
71static unsigned long bat_ogm_emit_send_time(const struct bat_priv *bat_priv)
72{
73 return jiffies + msecs_to_jiffies(
74 atomic_read(&bat_priv->orig_interval) -
75 JITTER + (random32() % 2*JITTER));
76}
77
78/* when do we schedule a ogm packet to be sent */
79static unsigned long bat_ogm_fwd_send_time(void)
80{
81 return jiffies + msecs_to_jiffies(random32() % (JITTER/2));
82}
83
84/* apply hop penalty for a normal link */
85static uint8_t hop_penalty(uint8_t tq, const struct bat_priv *bat_priv)
86{
87 int hop_penalty = atomic_read(&bat_priv->hop_penalty);
88 return (tq * (TQ_MAX_VALUE - hop_penalty)) / (TQ_MAX_VALUE);
89}
90
91/* is there another aggregated packet here? */
92static int bat_ogm_aggr_packet(int buff_pos, int packet_len,
93 int tt_num_changes)
94{
95 int next_buff_pos = buff_pos + BATMAN_OGM_LEN + tt_len(tt_num_changes);
96
97 return (next_buff_pos <= packet_len) &&
98 (next_buff_pos <= MAX_AGGREGATION_BYTES);
99}
100
101/* send a batman ogm to a given interface */
102static void bat_ogm_send_to_if(struct forw_packet *forw_packet,
103 struct hard_iface *hard_iface)
104{
105 struct bat_priv *bat_priv = netdev_priv(hard_iface->soft_iface);
106 char *fwd_str;
107 uint8_t packet_num;
108 int16_t buff_pos;
109 struct batman_ogm_packet *batman_ogm_packet;
110 struct sk_buff *skb;
111
112 if (hard_iface->if_status != IF_ACTIVE)
113 return;
114
115 packet_num = 0;
116 buff_pos = 0;
117 batman_ogm_packet = (struct batman_ogm_packet *)forw_packet->skb->data;
118
119 /* adjust all flags and log packets */
120 while (bat_ogm_aggr_packet(buff_pos, forw_packet->packet_len,
121 batman_ogm_packet->tt_num_changes)) {
122
123 /* we might have aggregated direct link packets with an
124 * ordinary base packet */
125 if ((forw_packet->direct_link_flags & (1 << packet_num)) &&
126 (forw_packet->if_incoming == hard_iface))
127 batman_ogm_packet->flags |= DIRECTLINK;
128 else
129 batman_ogm_packet->flags &= ~DIRECTLINK;
130
131 fwd_str = (packet_num > 0 ? "Forwarding" : (forw_packet->own ?
132 "Sending own" :
133 "Forwarding"));
134 bat_dbg(DBG_BATMAN, bat_priv,
135 "%s %spacket (originator %pM, seqno %d, TQ %d, TTL %d,"
136 " IDF %s, ttvn %d) on interface %s [%pM]\n",
137 fwd_str, (packet_num > 0 ? "aggregated " : ""),
138 batman_ogm_packet->orig,
139 ntohl(batman_ogm_packet->seqno),
140 batman_ogm_packet->tq, batman_ogm_packet->ttl,
141 (batman_ogm_packet->flags & DIRECTLINK ?
142 "on" : "off"),
143 batman_ogm_packet->ttvn, hard_iface->net_dev->name,
144 hard_iface->net_dev->dev_addr);
145
146 buff_pos += BATMAN_OGM_LEN +
147 tt_len(batman_ogm_packet->tt_num_changes);
148 packet_num++;
149 batman_ogm_packet = (struct batman_ogm_packet *)
150 (forw_packet->skb->data + buff_pos);
151 }
152
153 /* create clone because function is called more than once */
154 skb = skb_clone(forw_packet->skb, GFP_ATOMIC);
155 if (skb)
156 send_skb_packet(skb, hard_iface, broadcast_addr);
157}
158
159/* send a batman ogm packet */
160void bat_ogm_emit(struct forw_packet *forw_packet)
161{
162 struct hard_iface *hard_iface;
163 struct net_device *soft_iface;
164 struct bat_priv *bat_priv;
165 struct hard_iface *primary_if = NULL;
166 struct batman_ogm_packet *batman_ogm_packet;
167 unsigned char directlink;
168
169 batman_ogm_packet = (struct batman_ogm_packet *)
170 (forw_packet->skb->data);
171 directlink = (batman_ogm_packet->flags & DIRECTLINK ? 1 : 0);
172
173 if (!forw_packet->if_incoming) {
174 pr_err("Error - can't forward packet: incoming iface not "
175 "specified\n");
176 goto out;
177 }
178
179 soft_iface = forw_packet->if_incoming->soft_iface;
180 bat_priv = netdev_priv(soft_iface);
181
182 if (forw_packet->if_incoming->if_status != IF_ACTIVE)
183 goto out;
184
185 primary_if = primary_if_get_selected(bat_priv);
186 if (!primary_if)
187 goto out;
188
189 /* multihomed peer assumed */
190 /* non-primary OGMs are only broadcasted on their interface */
191 if ((directlink && (batman_ogm_packet->ttl == 1)) ||
192 (forw_packet->own && (forw_packet->if_incoming != primary_if))) {
193
194 /* FIXME: what about aggregated packets ? */
195 bat_dbg(DBG_BATMAN, bat_priv,
196 "%s packet (originator %pM, seqno %d, TTL %d) "
197 "on interface %s [%pM]\n",
198 (forw_packet->own ? "Sending own" : "Forwarding"),
199 batman_ogm_packet->orig,
200 ntohl(batman_ogm_packet->seqno),
201 batman_ogm_packet->ttl,
202 forw_packet->if_incoming->net_dev->name,
203 forw_packet->if_incoming->net_dev->dev_addr);
204
205 /* skb is only used once and than forw_packet is free'd */
206 send_skb_packet(forw_packet->skb, forw_packet->if_incoming,
207 broadcast_addr);
208 forw_packet->skb = NULL;
209
210 goto out;
211 }
212
213 /* broadcast on every interface */
214 rcu_read_lock();
215 list_for_each_entry_rcu(hard_iface, &hardif_list, list) {
216 if (hard_iface->soft_iface != soft_iface)
217 continue;
218
219 bat_ogm_send_to_if(forw_packet, hard_iface);
220 }
221 rcu_read_unlock();
222
223out:
224 if (primary_if)
225 hardif_free_ref(primary_if);
226}
227
228/* return true if new_packet can be aggregated with forw_packet */
229static bool bat_ogm_can_aggregate(const struct batman_ogm_packet
230 *new_batman_ogm_packet,
231 struct bat_priv *bat_priv,
232 int packet_len, unsigned long send_time,
233 bool directlink,
234 const struct hard_iface *if_incoming,
235 const struct forw_packet *forw_packet)
236{
237 struct batman_ogm_packet *batman_ogm_packet;
238 int aggregated_bytes = forw_packet->packet_len + packet_len;
239 struct hard_iface *primary_if = NULL;
240 bool res = false;
241
242 batman_ogm_packet = (struct batman_ogm_packet *)forw_packet->skb->data;
243
244 /**
245 * we can aggregate the current packet to this aggregated packet
246 * if:
247 *
248 * - the send time is within our MAX_AGGREGATION_MS time
249 * - the resulting packet wont be bigger than
250 * MAX_AGGREGATION_BYTES
251 */
252
253 if (time_before(send_time, forw_packet->send_time) &&
254 time_after_eq(send_time + msecs_to_jiffies(MAX_AGGREGATION_MS),
255 forw_packet->send_time) &&
256 (aggregated_bytes <= MAX_AGGREGATION_BYTES)) {
257
258 /**
259 * check aggregation compatibility
260 * -> direct link packets are broadcasted on
261 * their interface only
262 * -> aggregate packet if the current packet is
263 * a "global" packet as well as the base
264 * packet
265 */
266
267 primary_if = primary_if_get_selected(bat_priv);
268 if (!primary_if)
269 goto out;
270
271 /* packets without direct link flag and high TTL
272 * are flooded through the net */
273 if ((!directlink) &&
274 (!(batman_ogm_packet->flags & DIRECTLINK)) &&
275 (batman_ogm_packet->ttl != 1) &&
276
277 /* own packets originating non-primary
278 * interfaces leave only that interface */
279 ((!forw_packet->own) ||
280 (forw_packet->if_incoming == primary_if))) {
281 res = true;
282 goto out;
283 }
284
285 /* if the incoming packet is sent via this one
286 * interface only - we still can aggregate */
287 if ((directlink) &&
288 (new_batman_ogm_packet->ttl == 1) &&
289 (forw_packet->if_incoming == if_incoming) &&
290
291 /* packets from direct neighbors or
292 * own secondary interface packets
293 * (= secondary interface packets in general) */
294 (batman_ogm_packet->flags & DIRECTLINK ||
295 (forw_packet->own &&
296 forw_packet->if_incoming != primary_if))) {
297 res = true;
298 goto out;
299 }
300 }
301
302out:
303 if (primary_if)
304 hardif_free_ref(primary_if);
305 return res;
306}
307
308/* create a new aggregated packet and add this packet to it */
309static void bat_ogm_aggregate_new(const unsigned char *packet_buff,
310 int packet_len, unsigned long send_time,
311 bool direct_link,
312 struct hard_iface *if_incoming,
313 int own_packet)
314{
315 struct bat_priv *bat_priv = netdev_priv(if_incoming->soft_iface);
316 struct forw_packet *forw_packet_aggr;
317 unsigned char *skb_buff;
318
319 if (!atomic_inc_not_zero(&if_incoming->refcount))
320 return;
321
322 /* own packet should always be scheduled */
323 if (!own_packet) {
324 if (!atomic_dec_not_zero(&bat_priv->batman_queue_left)) {
325 bat_dbg(DBG_BATMAN, bat_priv,
326 "batman packet queue full\n");
327 goto out;
328 }
329 }
330
331 forw_packet_aggr = kmalloc(sizeof(*forw_packet_aggr), GFP_ATOMIC);
332 if (!forw_packet_aggr) {
333 if (!own_packet)
334 atomic_inc(&bat_priv->batman_queue_left);
335 goto out;
336 }
337
338 if ((atomic_read(&bat_priv->aggregated_ogms)) &&
339 (packet_len < MAX_AGGREGATION_BYTES))
340 forw_packet_aggr->skb = dev_alloc_skb(MAX_AGGREGATION_BYTES +
341 sizeof(struct ethhdr));
342 else
343 forw_packet_aggr->skb = dev_alloc_skb(packet_len +
344 sizeof(struct ethhdr));
345
346 if (!forw_packet_aggr->skb) {
347 if (!own_packet)
348 atomic_inc(&bat_priv->batman_queue_left);
349 kfree(forw_packet_aggr);
350 goto out;
351 }
352 skb_reserve(forw_packet_aggr->skb, sizeof(struct ethhdr));
353
354 INIT_HLIST_NODE(&forw_packet_aggr->list);
355
356 skb_buff = skb_put(forw_packet_aggr->skb, packet_len);
357 forw_packet_aggr->packet_len = packet_len;
358 memcpy(skb_buff, packet_buff, packet_len);
359
360 forw_packet_aggr->own = own_packet;
361 forw_packet_aggr->if_incoming = if_incoming;
362 forw_packet_aggr->num_packets = 0;
363 forw_packet_aggr->direct_link_flags = NO_FLAGS;
364 forw_packet_aggr->send_time = send_time;
365
366 /* save packet direct link flag status */
367 if (direct_link)
368 forw_packet_aggr->direct_link_flags |= 1;
369
370 /* add new packet to packet list */
371 spin_lock_bh(&bat_priv->forw_bat_list_lock);
372 hlist_add_head(&forw_packet_aggr->list, &bat_priv->forw_bat_list);
373 spin_unlock_bh(&bat_priv->forw_bat_list_lock);
374
375 /* start timer for this packet */
376 INIT_DELAYED_WORK(&forw_packet_aggr->delayed_work,
377 send_outstanding_bat_ogm_packet);
378 queue_delayed_work(bat_event_workqueue,
379 &forw_packet_aggr->delayed_work,
380 send_time - jiffies);
381
382 return;
383out:
384 hardif_free_ref(if_incoming);
385}
386
387/* aggregate a new packet into the existing ogm packet */
388static void bat_ogm_aggregate(struct forw_packet *forw_packet_aggr,
389 const unsigned char *packet_buff,
390 int packet_len, bool direct_link)
391{
392 unsigned char *skb_buff;
393
394 skb_buff = skb_put(forw_packet_aggr->skb, packet_len);
395 memcpy(skb_buff, packet_buff, packet_len);
396 forw_packet_aggr->packet_len += packet_len;
397 forw_packet_aggr->num_packets++;
398
399 /* save packet direct link flag status */
400 if (direct_link)
401 forw_packet_aggr->direct_link_flags |=
402 (1 << forw_packet_aggr->num_packets);
403}
404
405static void bat_ogm_queue_add(struct bat_priv *bat_priv,
406 unsigned char *packet_buff,
407 int packet_len, struct hard_iface *if_incoming,
408 int own_packet, unsigned long send_time)
409{
410 /**
411 * _aggr -> pointer to the packet we want to aggregate with
412 * _pos -> pointer to the position in the queue
413 */
414 struct forw_packet *forw_packet_aggr = NULL, *forw_packet_pos = NULL;
415 struct hlist_node *tmp_node;
416 struct batman_ogm_packet *batman_ogm_packet;
417 bool direct_link;
418
419 batman_ogm_packet = (struct batman_ogm_packet *)packet_buff;
420 direct_link = batman_ogm_packet->flags & DIRECTLINK ? 1 : 0;
421
422 /* find position for the packet in the forward queue */
423 spin_lock_bh(&bat_priv->forw_bat_list_lock);
424 /* own packets are not to be aggregated */
425 if ((atomic_read(&bat_priv->aggregated_ogms)) && (!own_packet)) {
426 hlist_for_each_entry(forw_packet_pos, tmp_node,
427 &bat_priv->forw_bat_list, list) {
428 if (bat_ogm_can_aggregate(batman_ogm_packet,
429 bat_priv, packet_len,
430 send_time, direct_link,
431 if_incoming,
432 forw_packet_pos)) {
433 forw_packet_aggr = forw_packet_pos;
434 break;
435 }
436 }
437 }
438
439 /* nothing to aggregate with - either aggregation disabled or no
440 * suitable aggregation packet found */
441 if (!forw_packet_aggr) {
442 /* the following section can run without the lock */
443 spin_unlock_bh(&bat_priv->forw_bat_list_lock);
444
445 /**
446 * if we could not aggregate this packet with one of the others
447 * we hold it back for a while, so that it might be aggregated
448 * later on
449 */
450 if ((!own_packet) &&
451 (atomic_read(&bat_priv->aggregated_ogms)))
452 send_time += msecs_to_jiffies(MAX_AGGREGATION_MS);
453
454 bat_ogm_aggregate_new(packet_buff, packet_len,
455 send_time, direct_link,
456 if_incoming, own_packet);
457 } else {
458 bat_ogm_aggregate(forw_packet_aggr, packet_buff, packet_len,
459 direct_link);
460 spin_unlock_bh(&bat_priv->forw_bat_list_lock);
461 }
462}
463
464static void bat_ogm_forward(struct orig_node *orig_node,
465 const struct ethhdr *ethhdr,
466 struct batman_ogm_packet *batman_ogm_packet,
467 int directlink, struct hard_iface *if_incoming)
468{
469 struct bat_priv *bat_priv = netdev_priv(if_incoming->soft_iface);
470 struct neigh_node *router;
471 uint8_t in_tq, in_ttl, tq_avg = 0;
472 uint8_t tt_num_changes;
473
474 if (batman_ogm_packet->ttl <= 1) {
475 bat_dbg(DBG_BATMAN, bat_priv, "ttl exceeded\n");
476 return;
477 }
478
479 router = orig_node_get_router(orig_node);
480
481 in_tq = batman_ogm_packet->tq;
482 in_ttl = batman_ogm_packet->ttl;
483 tt_num_changes = batman_ogm_packet->tt_num_changes;
484
485 batman_ogm_packet->ttl--;
486 memcpy(batman_ogm_packet->prev_sender, ethhdr->h_source, ETH_ALEN);
487
488 /* rebroadcast tq of our best ranking neighbor to ensure the rebroadcast
489 * of our best tq value */
490 if (router && router->tq_avg != 0) {
491
492 /* rebroadcast ogm of best ranking neighbor as is */
493 if (!compare_eth(router->addr, ethhdr->h_source)) {
494 batman_ogm_packet->tq = router->tq_avg;
495
496 if (router->last_ttl)
497 batman_ogm_packet->ttl = router->last_ttl - 1;
498 }
499
500 tq_avg = router->tq_avg;
501 }
502
503 if (router)
504 neigh_node_free_ref(router);
505
506 /* apply hop penalty */
507 batman_ogm_packet->tq = hop_penalty(batman_ogm_packet->tq, bat_priv);
508
509 bat_dbg(DBG_BATMAN, bat_priv,
510 "Forwarding packet: tq_orig: %i, tq_avg: %i, "
511 "tq_forw: %i, ttl_orig: %i, ttl_forw: %i\n",
512 in_tq, tq_avg, batman_ogm_packet->tq, in_ttl - 1,
513 batman_ogm_packet->ttl);
514
515 batman_ogm_packet->seqno = htonl(batman_ogm_packet->seqno);
516 batman_ogm_packet->tt_crc = htons(batman_ogm_packet->tt_crc);
517
518 /* switch of primaries first hop flag when forwarding */
519 batman_ogm_packet->flags &= ~PRIMARIES_FIRST_HOP;
520 if (directlink)
521 batman_ogm_packet->flags |= DIRECTLINK;
522 else
523 batman_ogm_packet->flags &= ~DIRECTLINK;
524
525 bat_ogm_queue_add(bat_priv, (unsigned char *)batman_ogm_packet,
526 BATMAN_OGM_LEN + tt_len(tt_num_changes),
527 if_incoming, 0, bat_ogm_fwd_send_time());
528}
529
530void bat_ogm_schedule(struct hard_iface *hard_iface, int tt_num_changes)
531{
532 struct bat_priv *bat_priv = netdev_priv(hard_iface->soft_iface);
533 struct batman_ogm_packet *batman_ogm_packet;
534 struct hard_iface *primary_if;
535 int vis_server;
536
537 vis_server = atomic_read(&bat_priv->vis_mode);
538 primary_if = primary_if_get_selected(bat_priv);
539
540 batman_ogm_packet = (struct batman_ogm_packet *)hard_iface->packet_buff;
541
542 /* change sequence number to network order */
543 batman_ogm_packet->seqno =
544 htonl((uint32_t)atomic_read(&hard_iface->seqno));
545
546 batman_ogm_packet->ttvn = atomic_read(&bat_priv->ttvn);
547 batman_ogm_packet->tt_crc = htons((uint16_t)
548 atomic_read(&bat_priv->tt_crc));
549 if (tt_num_changes >= 0)
550 batman_ogm_packet->tt_num_changes = tt_num_changes;
551
552 if (vis_server == VIS_TYPE_SERVER_SYNC)
553 batman_ogm_packet->flags |= VIS_SERVER;
554 else
555 batman_ogm_packet->flags &= ~VIS_SERVER;
556
557 if ((hard_iface == primary_if) &&
558 (atomic_read(&bat_priv->gw_mode) == GW_MODE_SERVER))
559 batman_ogm_packet->gw_flags =
560 (uint8_t)atomic_read(&bat_priv->gw_bandwidth);
561 else
562 batman_ogm_packet->gw_flags = NO_FLAGS;
563
564 atomic_inc(&hard_iface->seqno);
565
566 slide_own_bcast_window(hard_iface);
567 bat_ogm_queue_add(bat_priv, hard_iface->packet_buff,
568 hard_iface->packet_len, hard_iface, 1,
569 bat_ogm_emit_send_time(bat_priv));
570
571 if (primary_if)
572 hardif_free_ref(primary_if);
573}
574
575static void bat_ogm_orig_update(struct bat_priv *bat_priv,
576 struct orig_node *orig_node,
577 const struct ethhdr *ethhdr,
578 const struct batman_ogm_packet
579 *batman_ogm_packet,
580 struct hard_iface *if_incoming,
581 const unsigned char *tt_buff, int is_duplicate)
582{
583 struct neigh_node *neigh_node = NULL, *tmp_neigh_node = NULL;
584 struct neigh_node *router = NULL;
585 struct orig_node *orig_node_tmp;
586 struct hlist_node *node;
587 uint8_t bcast_own_sum_orig, bcast_own_sum_neigh;
588
589 bat_dbg(DBG_BATMAN, bat_priv, "update_originator(): "
590 "Searching and updating originator entry of received packet\n");
591
592 rcu_read_lock();
593 hlist_for_each_entry_rcu(tmp_neigh_node, node,
594 &orig_node->neigh_list, list) {
595 if (compare_eth(tmp_neigh_node->addr, ethhdr->h_source) &&
596 (tmp_neigh_node->if_incoming == if_incoming) &&
597 atomic_inc_not_zero(&tmp_neigh_node->refcount)) {
598 if (neigh_node)
599 neigh_node_free_ref(neigh_node);
600 neigh_node = tmp_neigh_node;
601 continue;
602 }
603
604 if (is_duplicate)
605 continue;
606
607 spin_lock_bh(&tmp_neigh_node->tq_lock);
608 ring_buffer_set(tmp_neigh_node->tq_recv,
609 &tmp_neigh_node->tq_index, 0);
610 tmp_neigh_node->tq_avg =
611 ring_buffer_avg(tmp_neigh_node->tq_recv);
612 spin_unlock_bh(&tmp_neigh_node->tq_lock);
613 }
614
615 if (!neigh_node) {
616 struct orig_node *orig_tmp;
617
618 orig_tmp = get_orig_node(bat_priv, ethhdr->h_source);
619 if (!orig_tmp)
620 goto unlock;
621
622 neigh_node = create_neighbor(orig_node, orig_tmp,
623 ethhdr->h_source, if_incoming);
624
625 orig_node_free_ref(orig_tmp);
626 if (!neigh_node)
627 goto unlock;
628 } else
629 bat_dbg(DBG_BATMAN, bat_priv,
630 "Updating existing last-hop neighbor of originator\n");
631
632 rcu_read_unlock();
633
634 orig_node->flags = batman_ogm_packet->flags;
635 neigh_node->last_valid = jiffies;
636
637 spin_lock_bh(&neigh_node->tq_lock);
638 ring_buffer_set(neigh_node->tq_recv,
639 &neigh_node->tq_index,
640 batman_ogm_packet->tq);
641 neigh_node->tq_avg = ring_buffer_avg(neigh_node->tq_recv);
642 spin_unlock_bh(&neigh_node->tq_lock);
643
644 if (!is_duplicate) {
645 orig_node->last_ttl = batman_ogm_packet->ttl;
646 neigh_node->last_ttl = batman_ogm_packet->ttl;
647 }
648
649 bonding_candidate_add(orig_node, neigh_node);
650
651 /* if this neighbor already is our next hop there is nothing
652 * to change */
653 router = orig_node_get_router(orig_node);
654 if (router == neigh_node)
655 goto update_tt;
656
657 /* if this neighbor does not offer a better TQ we won't consider it */
658 if (router && (router->tq_avg > neigh_node->tq_avg))
659 goto update_tt;
660
661 /* if the TQ is the same and the link not more symmetric we
662 * won't consider it either */
663 if (router && (neigh_node->tq_avg == router->tq_avg)) {
664 orig_node_tmp = router->orig_node;
665 spin_lock_bh(&orig_node_tmp->ogm_cnt_lock);
666 bcast_own_sum_orig =
667 orig_node_tmp->bcast_own_sum[if_incoming->if_num];
668 spin_unlock_bh(&orig_node_tmp->ogm_cnt_lock);
669
670 orig_node_tmp = neigh_node->orig_node;
671 spin_lock_bh(&orig_node_tmp->ogm_cnt_lock);
672 bcast_own_sum_neigh =
673 orig_node_tmp->bcast_own_sum[if_incoming->if_num];
674 spin_unlock_bh(&orig_node_tmp->ogm_cnt_lock);
675
676 if (bcast_own_sum_orig >= bcast_own_sum_neigh)
677 goto update_tt;
678 }
679
680 update_route(bat_priv, orig_node, neigh_node);
681
682update_tt:
683 /* I have to check for transtable changes only if the OGM has been
684 * sent through a primary interface */
685 if (((batman_ogm_packet->orig != ethhdr->h_source) &&
686 (batman_ogm_packet->ttl > 2)) ||
687 (batman_ogm_packet->flags & PRIMARIES_FIRST_HOP))
688 tt_update_orig(bat_priv, orig_node, tt_buff,
689 batman_ogm_packet->tt_num_changes,
690 batman_ogm_packet->ttvn,
691 batman_ogm_packet->tt_crc);
692
693 if (orig_node->gw_flags != batman_ogm_packet->gw_flags)
694 gw_node_update(bat_priv, orig_node,
695 batman_ogm_packet->gw_flags);
696
697 orig_node->gw_flags = batman_ogm_packet->gw_flags;
698
699 /* restart gateway selection if fast or late switching was enabled */
700 if ((orig_node->gw_flags) &&
701 (atomic_read(&bat_priv->gw_mode) == GW_MODE_CLIENT) &&
702 (atomic_read(&bat_priv->gw_sel_class) > 2))
703 gw_check_election(bat_priv, orig_node);
704
705 goto out;
706
707unlock:
708 rcu_read_unlock();
709out:
710 if (neigh_node)
711 neigh_node_free_ref(neigh_node);
712 if (router)
713 neigh_node_free_ref(router);
714}
715
716static int bat_ogm_calc_tq(struct orig_node *orig_node,
717 struct orig_node *orig_neigh_node,
718 struct batman_ogm_packet *batman_ogm_packet,
719 struct hard_iface *if_incoming)
720{
721 struct bat_priv *bat_priv = netdev_priv(if_incoming->soft_iface);
722 struct neigh_node *neigh_node = NULL, *tmp_neigh_node;
723 struct hlist_node *node;
724 uint8_t total_count;
725 uint8_t orig_eq_count, neigh_rq_count, tq_own;
726 int tq_asym_penalty, ret = 0;
727
728 /* find corresponding one hop neighbor */
729 rcu_read_lock();
730 hlist_for_each_entry_rcu(tmp_neigh_node, node,
731 &orig_neigh_node->neigh_list, list) {
732
733 if (!compare_eth(tmp_neigh_node->addr, orig_neigh_node->orig))
734 continue;
735
736 if (tmp_neigh_node->if_incoming != if_incoming)
737 continue;
738
739 if (!atomic_inc_not_zero(&tmp_neigh_node->refcount))
740 continue;
741
742 neigh_node = tmp_neigh_node;
743 break;
744 }
745 rcu_read_unlock();
746
747 if (!neigh_node)
748 neigh_node = create_neighbor(orig_neigh_node,
749 orig_neigh_node,
750 orig_neigh_node->orig,
751 if_incoming);
752
753 if (!neigh_node)
754 goto out;
755
756 /* if orig_node is direct neighbor update neigh_node last_valid */
757 if (orig_node == orig_neigh_node)
758 neigh_node->last_valid = jiffies;
759
760 orig_node->last_valid = jiffies;
761
762 /* find packet count of corresponding one hop neighbor */
763 spin_lock_bh(&orig_node->ogm_cnt_lock);
764 orig_eq_count = orig_neigh_node->bcast_own_sum[if_incoming->if_num];
765 neigh_rq_count = neigh_node->real_packet_count;
766 spin_unlock_bh(&orig_node->ogm_cnt_lock);
767
768 /* pay attention to not get a value bigger than 100 % */
769 total_count = (orig_eq_count > neigh_rq_count ?
770 neigh_rq_count : orig_eq_count);
771
772 /* if we have too few packets (too less data) we set tq_own to zero */
773 /* if we receive too few packets it is not considered bidirectional */
774 if ((total_count < TQ_LOCAL_BIDRECT_SEND_MINIMUM) ||
775 (neigh_rq_count < TQ_LOCAL_BIDRECT_RECV_MINIMUM))
776 tq_own = 0;
777 else
778 /* neigh_node->real_packet_count is never zero as we
779 * only purge old information when getting new
780 * information */
781 tq_own = (TQ_MAX_VALUE * total_count) / neigh_rq_count;
782
783 /*
784 * 1 - ((1-x) ** 3), normalized to TQ_MAX_VALUE this does
785 * affect the nearly-symmetric links only a little, but
786 * punishes asymmetric links more. This will give a value
787 * between 0 and TQ_MAX_VALUE
788 */
789 tq_asym_penalty = TQ_MAX_VALUE - (TQ_MAX_VALUE *
790 (TQ_LOCAL_WINDOW_SIZE - neigh_rq_count) *
791 (TQ_LOCAL_WINDOW_SIZE - neigh_rq_count) *
792 (TQ_LOCAL_WINDOW_SIZE - neigh_rq_count)) /
793 (TQ_LOCAL_WINDOW_SIZE *
794 TQ_LOCAL_WINDOW_SIZE *
795 TQ_LOCAL_WINDOW_SIZE);
796
797 batman_ogm_packet->tq = ((batman_ogm_packet->tq * tq_own
798 * tq_asym_penalty) /
799 (TQ_MAX_VALUE * TQ_MAX_VALUE));
800
801 bat_dbg(DBG_BATMAN, bat_priv,
802 "bidirectional: "
803 "orig = %-15pM neigh = %-15pM => own_bcast = %2i, "
804 "real recv = %2i, local tq: %3i, asym_penalty: %3i, "
805 "total tq: %3i\n",
806 orig_node->orig, orig_neigh_node->orig, total_count,
807 neigh_rq_count, tq_own, tq_asym_penalty, batman_ogm_packet->tq);
808
809 /* if link has the minimum required transmission quality
810 * consider it bidirectional */
811 if (batman_ogm_packet->tq >= TQ_TOTAL_BIDRECT_LIMIT)
812 ret = 1;
813
814out:
815 if (neigh_node)
816 neigh_node_free_ref(neigh_node);
817 return ret;
818}
819
820/* processes a batman packet for all interfaces, adjusts the sequence number and
821 * finds out whether it is a duplicate.
822 * returns:
823 * 1 the packet is a duplicate
824 * 0 the packet has not yet been received
825 * -1 the packet is old and has been received while the seqno window
826 * was protected. Caller should drop it.
827 */
828static int bat_ogm_update_seqnos(const struct ethhdr *ethhdr,
829 const struct batman_ogm_packet
830 *batman_ogm_packet,
831 const struct hard_iface *if_incoming)
832{
833 struct bat_priv *bat_priv = netdev_priv(if_incoming->soft_iface);
834 struct orig_node *orig_node;
835 struct neigh_node *tmp_neigh_node;
836 struct hlist_node *node;
837 int is_duplicate = 0;
838 int32_t seq_diff;
839 int need_update = 0;
840 int set_mark, ret = -1;
841
842 orig_node = get_orig_node(bat_priv, batman_ogm_packet->orig);
843 if (!orig_node)
844 return 0;
845
846 spin_lock_bh(&orig_node->ogm_cnt_lock);
847 seq_diff = batman_ogm_packet->seqno - orig_node->last_real_seqno;
848
849 /* signalize caller that the packet is to be dropped. */
850 if (window_protected(bat_priv, seq_diff,
851 &orig_node->batman_seqno_reset))
852 goto out;
853
854 rcu_read_lock();
855 hlist_for_each_entry_rcu(tmp_neigh_node, node,
856 &orig_node->neigh_list, list) {
857
858 is_duplicate |= get_bit_status(tmp_neigh_node->real_bits,
859 orig_node->last_real_seqno,
860 batman_ogm_packet->seqno);
861
862 if (compare_eth(tmp_neigh_node->addr, ethhdr->h_source) &&
863 (tmp_neigh_node->if_incoming == if_incoming))
864 set_mark = 1;
865 else
866 set_mark = 0;
867
868 /* if the window moved, set the update flag. */
869 need_update |= bit_get_packet(bat_priv,
870 tmp_neigh_node->real_bits,
871 seq_diff, set_mark);
872
873 tmp_neigh_node->real_packet_count =
874 bit_packet_count(tmp_neigh_node->real_bits);
875 }
876 rcu_read_unlock();
877
878 if (need_update) {
879 bat_dbg(DBG_BATMAN, bat_priv,
880 "updating last_seqno: old %d, new %d\n",
881 orig_node->last_real_seqno, batman_ogm_packet->seqno);
882 orig_node->last_real_seqno = batman_ogm_packet->seqno;
883 }
884
885 ret = is_duplicate;
886
887out:
888 spin_unlock_bh(&orig_node->ogm_cnt_lock);
889 orig_node_free_ref(orig_node);
890 return ret;
891}
892
893static void bat_ogm_process(const struct ethhdr *ethhdr,
894 struct batman_ogm_packet *batman_ogm_packet,
895 const unsigned char *tt_buff,
896 struct hard_iface *if_incoming)
897{
898 struct bat_priv *bat_priv = netdev_priv(if_incoming->soft_iface);
899 struct hard_iface *hard_iface;
900 struct orig_node *orig_neigh_node, *orig_node;
901 struct neigh_node *router = NULL, *router_router = NULL;
902 struct neigh_node *orig_neigh_router = NULL;
903 int has_directlink_flag;
904 int is_my_addr = 0, is_my_orig = 0, is_my_oldorig = 0;
905 int is_broadcast = 0, is_bidirectional, is_single_hop_neigh;
906 int is_duplicate;
907 uint32_t if_incoming_seqno;
908
909 /* Silently drop when the batman packet is actually not a
910 * correct packet.
911 *
912 * This might happen if a packet is padded (e.g. Ethernet has a
913 * minimum frame length of 64 byte) and the aggregation interprets
914 * it as an additional length.
915 *
916 * TODO: A more sane solution would be to have a bit in the
917 * batman_ogm_packet to detect whether the packet is the last
918 * packet in an aggregation. Here we expect that the padding
919 * is always zero (or not 0x01)
920 */
921 if (batman_ogm_packet->packet_type != BAT_OGM)
922 return;
923
924 /* could be changed by schedule_own_packet() */
925 if_incoming_seqno = atomic_read(&if_incoming->seqno);
926
927 has_directlink_flag = (batman_ogm_packet->flags & DIRECTLINK ? 1 : 0);
928
929 is_single_hop_neigh = (compare_eth(ethhdr->h_source,
930 batman_ogm_packet->orig) ? 1 : 0);
931
932 bat_dbg(DBG_BATMAN, bat_priv,
933 "Received BATMAN packet via NB: %pM, IF: %s [%pM] "
934 "(from OG: %pM, via prev OG: %pM, seqno %d, ttvn %u, "
935 "crc %u, changes %u, td %d, TTL %d, V %d, IDF %d)\n",
936 ethhdr->h_source, if_incoming->net_dev->name,
937 if_incoming->net_dev->dev_addr, batman_ogm_packet->orig,
938 batman_ogm_packet->prev_sender, batman_ogm_packet->seqno,
939 batman_ogm_packet->ttvn, batman_ogm_packet->tt_crc,
940 batman_ogm_packet->tt_num_changes, batman_ogm_packet->tq,
941 batman_ogm_packet->ttl, batman_ogm_packet->version,
942 has_directlink_flag);
943
944 rcu_read_lock();
945 list_for_each_entry_rcu(hard_iface, &hardif_list, list) {
946 if (hard_iface->if_status != IF_ACTIVE)
947 continue;
948
949 if (hard_iface->soft_iface != if_incoming->soft_iface)
950 continue;
951
952 if (compare_eth(ethhdr->h_source,
953 hard_iface->net_dev->dev_addr))
954 is_my_addr = 1;
955
956 if (compare_eth(batman_ogm_packet->orig,
957 hard_iface->net_dev->dev_addr))
958 is_my_orig = 1;
959
960 if (compare_eth(batman_ogm_packet->prev_sender,
961 hard_iface->net_dev->dev_addr))
962 is_my_oldorig = 1;
963
964 if (is_broadcast_ether_addr(ethhdr->h_source))
965 is_broadcast = 1;
966 }
967 rcu_read_unlock();
968
969 if (batman_ogm_packet->version != COMPAT_VERSION) {
970 bat_dbg(DBG_BATMAN, bat_priv,
971 "Drop packet: incompatible batman version (%i)\n",
972 batman_ogm_packet->version);
973 return;
974 }
975
976 if (is_my_addr) {
977 bat_dbg(DBG_BATMAN, bat_priv,
978 "Drop packet: received my own broadcast (sender: %pM"
979 ")\n",
980 ethhdr->h_source);
981 return;
982 }
983
984 if (is_broadcast) {
985 bat_dbg(DBG_BATMAN, bat_priv, "Drop packet: "
986 "ignoring all packets with broadcast source addr (sender: %pM"
987 ")\n", ethhdr->h_source);
988 return;
989 }
990
991 if (is_my_orig) {
992 unsigned long *word;
993 int offset;
994
995 orig_neigh_node = get_orig_node(bat_priv, ethhdr->h_source);
996 if (!orig_neigh_node)
997 return;
998
999 /* neighbor has to indicate direct link and it has to
1000 * come via the corresponding interface */
1001 /* save packet seqno for bidirectional check */
1002 if (has_directlink_flag &&
1003 compare_eth(if_incoming->net_dev->dev_addr,
1004 batman_ogm_packet->orig)) {
1005 offset = if_incoming->if_num * NUM_WORDS;
1006
1007 spin_lock_bh(&orig_neigh_node->ogm_cnt_lock);
1008 word = &(orig_neigh_node->bcast_own[offset]);
1009 bit_mark(word,
1010 if_incoming_seqno -
1011 batman_ogm_packet->seqno - 2);
1012 orig_neigh_node->bcast_own_sum[if_incoming->if_num] =
1013 bit_packet_count(word);
1014 spin_unlock_bh(&orig_neigh_node->ogm_cnt_lock);
1015 }
1016
1017 bat_dbg(DBG_BATMAN, bat_priv, "Drop packet: "
1018 "originator packet from myself (via neighbor)\n");
1019 orig_node_free_ref(orig_neigh_node);
1020 return;
1021 }
1022
1023 if (is_my_oldorig) {
1024 bat_dbg(DBG_BATMAN, bat_priv,
1025 "Drop packet: ignoring all rebroadcast echos (sender: "
1026 "%pM)\n", ethhdr->h_source);
1027 return;
1028 }
1029
1030 orig_node = get_orig_node(bat_priv, batman_ogm_packet->orig);
1031 if (!orig_node)
1032 return;
1033
1034 is_duplicate = bat_ogm_update_seqnos(ethhdr, batman_ogm_packet,
1035 if_incoming);
1036
1037 if (is_duplicate == -1) {
1038 bat_dbg(DBG_BATMAN, bat_priv,
1039 "Drop packet: packet within seqno protection time "
1040 "(sender: %pM)\n", ethhdr->h_source);
1041 goto out;
1042 }
1043
1044 if (batman_ogm_packet->tq == 0) {
1045 bat_dbg(DBG_BATMAN, bat_priv,
1046 "Drop packet: originator packet with tq equal 0\n");
1047 goto out;
1048 }
1049
1050 router = orig_node_get_router(orig_node);
1051 if (router)
1052 router_router = orig_node_get_router(router->orig_node);
1053
1054 /* avoid temporary routing loops */
1055 if (router && router_router &&
1056 (compare_eth(router->addr, batman_ogm_packet->prev_sender)) &&
1057 !(compare_eth(batman_ogm_packet->orig,
1058 batman_ogm_packet->prev_sender)) &&
1059 (compare_eth(router->addr, router_router->addr))) {
1060 bat_dbg(DBG_BATMAN, bat_priv,
1061 "Drop packet: ignoring all rebroadcast packets that "
1062 "may make me loop (sender: %pM)\n", ethhdr->h_source);
1063 goto out;
1064 }
1065
1066 /* if sender is a direct neighbor the sender mac equals
1067 * originator mac */
1068 orig_neigh_node = (is_single_hop_neigh ?
1069 orig_node :
1070 get_orig_node(bat_priv, ethhdr->h_source));
1071 if (!orig_neigh_node)
1072 goto out;
1073
1074 orig_neigh_router = orig_node_get_router(orig_neigh_node);
1075
1076 /* drop packet if sender is not a direct neighbor and if we
1077 * don't route towards it */
1078 if (!is_single_hop_neigh && (!orig_neigh_router)) {
1079 bat_dbg(DBG_BATMAN, bat_priv,
1080 "Drop packet: OGM via unknown neighbor!\n");
1081 goto out_neigh;
1082 }
1083
1084 is_bidirectional = bat_ogm_calc_tq(orig_node, orig_neigh_node,
1085 batman_ogm_packet, if_incoming);
1086
1087 bonding_save_primary(orig_node, orig_neigh_node, batman_ogm_packet);
1088
1089 /* update ranking if it is not a duplicate or has the same
1090 * seqno and similar ttl as the non-duplicate */
1091 if (is_bidirectional &&
1092 (!is_duplicate ||
1093 ((orig_node->last_real_seqno == batman_ogm_packet->seqno) &&
1094 (orig_node->last_ttl - 3 <= batman_ogm_packet->ttl))))
1095 bat_ogm_orig_update(bat_priv, orig_node, ethhdr,
1096 batman_ogm_packet, if_incoming,
1097 tt_buff, is_duplicate);
1098
1099 /* is single hop (direct) neighbor */
1100 if (is_single_hop_neigh) {
1101
1102 /* mark direct link on incoming interface */
1103 bat_ogm_forward(orig_node, ethhdr, batman_ogm_packet,
1104 1, if_incoming);
1105
1106 bat_dbg(DBG_BATMAN, bat_priv, "Forwarding packet: "
1107 "rebroadcast neighbor packet with direct link flag\n");
1108 goto out_neigh;
1109 }
1110
1111 /* multihop originator */
1112 if (!is_bidirectional) {
1113 bat_dbg(DBG_BATMAN, bat_priv,
1114 "Drop packet: not received via bidirectional link\n");
1115 goto out_neigh;
1116 }
1117
1118 if (is_duplicate) {
1119 bat_dbg(DBG_BATMAN, bat_priv,
1120 "Drop packet: duplicate packet received\n");
1121 goto out_neigh;
1122 }
1123
1124 bat_dbg(DBG_BATMAN, bat_priv,
1125 "Forwarding packet: rebroadcast originator packet\n");
1126 bat_ogm_forward(orig_node, ethhdr, batman_ogm_packet, 0, if_incoming);
1127
1128out_neigh:
1129 if ((orig_neigh_node) && (!is_single_hop_neigh))
1130 orig_node_free_ref(orig_neigh_node);
1131out:
1132 if (router)
1133 neigh_node_free_ref(router);
1134 if (router_router)
1135 neigh_node_free_ref(router_router);
1136 if (orig_neigh_router)
1137 neigh_node_free_ref(orig_neigh_router);
1138
1139 orig_node_free_ref(orig_node);
1140}
1141
1142void bat_ogm_receive(const struct ethhdr *ethhdr, unsigned char *packet_buff,
1143 int packet_len, struct hard_iface *if_incoming)
1144{
1145 struct batman_ogm_packet *batman_ogm_packet;
1146 int buff_pos = 0;
1147 unsigned char *tt_buff;
1148
1149 batman_ogm_packet = (struct batman_ogm_packet *)packet_buff;
1150
1151 /* unpack the aggregated packets and process them one by one */
1152 do {
1153 /* network to host order for our 32bit seqno and the
1154 orig_interval */
1155 batman_ogm_packet->seqno = ntohl(batman_ogm_packet->seqno);
1156 batman_ogm_packet->tt_crc = ntohs(batman_ogm_packet->tt_crc);
1157
1158 tt_buff = packet_buff + buff_pos + BATMAN_OGM_LEN;
1159
1160 bat_ogm_process(ethhdr, batman_ogm_packet,
1161 tt_buff, if_incoming);
1162
1163 buff_pos += BATMAN_OGM_LEN +
1164 tt_len(batman_ogm_packet->tt_num_changes);
1165
1166 batman_ogm_packet = (struct batman_ogm_packet *)
1167 (packet_buff + buff_pos);
1168 } while (bat_ogm_aggr_packet(buff_pos, packet_len,
1169 batman_ogm_packet->tt_num_changes));
1170}
diff --git a/net/batman-adv/bat_ogm.h b/net/batman-adv/bat_ogm.h
new file mode 100644
index 000000000000..69329c107e28
--- /dev/null
+++ b/net/batman-adv/bat_ogm.h
@@ -0,0 +1,35 @@
1/*
2 * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors:
3 *
4 * Marek Lindner, Simon Wunderlich
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of version 2 of the GNU General Public
8 * License as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
18 * 02110-1301, USA
19 *
20 */
21
22#ifndef _NET_BATMAN_ADV_OGM_H_
23#define _NET_BATMAN_ADV_OGM_H_
24
25#include "main.h"
26
27void bat_ogm_init(struct hard_iface *hard_iface);
28void bat_ogm_init_primary(struct hard_iface *hard_iface);
29void bat_ogm_update_mac(struct hard_iface *hard_iface);
30void bat_ogm_schedule(struct hard_iface *hard_iface, int tt_num_changes);
31void bat_ogm_emit(struct forw_packet *forw_packet);
32void bat_ogm_receive(const struct ethhdr *ethhdr, unsigned char *packet_buff,
33 int packet_len, struct hard_iface *if_incoming);
34
35#endif /* _NET_BATMAN_ADV_OGM_H_ */
diff --git a/net/batman-adv/bat_sysfs.c b/net/batman-adv/bat_sysfs.c
index cd15deba60a1..b8a7414c3571 100644
--- a/net/batman-adv/bat_sysfs.c
+++ b/net/batman-adv/bat_sysfs.c
@@ -380,6 +380,7 @@ static ssize_t store_gw_bwidth(struct kobject *kobj, struct attribute *attr,
380BAT_ATTR_BOOL(aggregated_ogms, S_IRUGO | S_IWUSR, NULL); 380BAT_ATTR_BOOL(aggregated_ogms, S_IRUGO | S_IWUSR, NULL);
381BAT_ATTR_BOOL(bonding, S_IRUGO | S_IWUSR, NULL); 381BAT_ATTR_BOOL(bonding, S_IRUGO | S_IWUSR, NULL);
382BAT_ATTR_BOOL(fragmentation, S_IRUGO | S_IWUSR, update_min_mtu); 382BAT_ATTR_BOOL(fragmentation, S_IRUGO | S_IWUSR, update_min_mtu);
383BAT_ATTR_BOOL(ap_isolation, S_IRUGO | S_IWUSR, NULL);
383static BAT_ATTR(vis_mode, S_IRUGO | S_IWUSR, show_vis_mode, store_vis_mode); 384static BAT_ATTR(vis_mode, S_IRUGO | S_IWUSR, show_vis_mode, store_vis_mode);
384static BAT_ATTR(gw_mode, S_IRUGO | S_IWUSR, show_gw_mode, store_gw_mode); 385static BAT_ATTR(gw_mode, S_IRUGO | S_IWUSR, show_gw_mode, store_gw_mode);
385BAT_ATTR_UINT(orig_interval, S_IRUGO | S_IWUSR, 2 * JITTER, INT_MAX, NULL); 386BAT_ATTR_UINT(orig_interval, S_IRUGO | S_IWUSR, 2 * JITTER, INT_MAX, NULL);
@@ -396,6 +397,7 @@ static struct bat_attribute *mesh_attrs[] = {
396 &bat_attr_aggregated_ogms, 397 &bat_attr_aggregated_ogms,
397 &bat_attr_bonding, 398 &bat_attr_bonding,
398 &bat_attr_fragmentation, 399 &bat_attr_fragmentation,
400 &bat_attr_ap_isolation,
399 &bat_attr_vis_mode, 401 &bat_attr_vis_mode,
400 &bat_attr_gw_mode, 402 &bat_attr_gw_mode,
401 &bat_attr_orig_interval, 403 &bat_attr_orig_interval,
diff --git a/net/batman-adv/bitarray.c b/net/batman-adv/bitarray.c
index c1f4bfc09cc3..0be9ff346fa0 100644
--- a/net/batman-adv/bitarray.c
+++ b/net/batman-adv/bitarray.c
@@ -97,12 +97,12 @@ static void bit_shift(unsigned long *seq_bits, int32_t n)
97 (seq_bits[i - word_num - 1] >> 97 (seq_bits[i - word_num - 1] >>
98 (WORD_BIT_SIZE-word_offset)); 98 (WORD_BIT_SIZE-word_offset));
99 /* and the upper part of the right half and shift it left to 99 /* and the upper part of the right half and shift it left to
100 * it's position */ 100 * its position */
101 /* for our example that would be: word[0] = 9800 + 0076 = 101 /* for our example that would be: word[0] = 9800 + 0076 =
102 * 9876 */ 102 * 9876 */
103 } 103 }
104 /* now for our last word, i==word_num, we only have the it's "left" 104 /* now for our last word, i==word_num, we only have its "left" half.
105 * half. that's the 1000 word in our example.*/ 105 * that's the 1000 word in our example.*/
106 106
107 seq_bits[i] = (seq_bits[i - word_num] << word_offset); 107 seq_bits[i] = (seq_bits[i - word_num] << word_offset);
108 108
diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c
index 056180ef9e1a..619fb73b3b76 100644
--- a/net/batman-adv/gateway_client.c
+++ b/net/batman-adv/gateway_client.c
@@ -532,14 +532,14 @@ static bool is_type_dhcprequest(struct sk_buff *skb, int header_len)
532 pkt_len -= header_len + DHCP_OPTIONS_OFFSET + 1; 532 pkt_len -= header_len + DHCP_OPTIONS_OFFSET + 1;
533 533
534 /* Access the dhcp option lists. Each entry is made up by: 534 /* Access the dhcp option lists. Each entry is made up by:
535 * - octect 1: option type 535 * - octet 1: option type
536 * - octect 2: option data len (only if type != 255 and 0) 536 * - octet 2: option data len (only if type != 255 and 0)
537 * - octect 3: option data */ 537 * - octet 3: option data */
538 while (*p != 255 && !ret) { 538 while (*p != 255 && !ret) {
539 /* p now points to the first octect: option type */ 539 /* p now points to the first octet: option type */
540 if (*p == 53) { 540 if (*p == 53) {
541 /* type 53 is the message type option. 541 /* type 53 is the message type option.
542 * Jump the len octect and go to the data octect */ 542 * Jump the len octet and go to the data octet */
543 if (pkt_len < 2) 543 if (pkt_len < 2)
544 goto out; 544 goto out;
545 p += 2; 545 p += 2;
diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c
index db7aacf1e095..7704df468e0b 100644
--- a/net/batman-adv/hard-interface.c
+++ b/net/batman-adv/hard-interface.c
@@ -28,6 +28,7 @@
28#include "bat_sysfs.h" 28#include "bat_sysfs.h"
29#include "originator.h" 29#include "originator.h"
30#include "hash.h" 30#include "hash.h"
31#include "bat_ogm.h"
31 32
32#include <linux/if_arp.h> 33#include <linux/if_arp.h>
33 34
@@ -131,7 +132,6 @@ static void primary_if_select(struct bat_priv *bat_priv,
131 struct hard_iface *new_hard_iface) 132 struct hard_iface *new_hard_iface)
132{ 133{
133 struct hard_iface *curr_hard_iface; 134 struct hard_iface *curr_hard_iface;
134 struct batman_packet *batman_packet;
135 135
136 ASSERT_RTNL(); 136 ASSERT_RTNL();
137 137
@@ -147,10 +147,7 @@ static void primary_if_select(struct bat_priv *bat_priv,
147 if (!new_hard_iface) 147 if (!new_hard_iface)
148 return; 148 return;
149 149
150 batman_packet = (struct batman_packet *)(new_hard_iface->packet_buff); 150 bat_ogm_init_primary(new_hard_iface);
151 batman_packet->flags = PRIMARIES_FIRST_HOP;
152 batman_packet->ttl = TTL;
153
154 primary_if_update_addr(bat_priv); 151 primary_if_update_addr(bat_priv);
155} 152}
156 153
@@ -162,14 +159,6 @@ static bool hardif_is_iface_up(const struct hard_iface *hard_iface)
162 return false; 159 return false;
163} 160}
164 161
165static void update_mac_addresses(struct hard_iface *hard_iface)
166{
167 memcpy(((struct batman_packet *)(hard_iface->packet_buff))->orig,
168 hard_iface->net_dev->dev_addr, ETH_ALEN);
169 memcpy(((struct batman_packet *)(hard_iface->packet_buff))->prev_sender,
170 hard_iface->net_dev->dev_addr, ETH_ALEN);
171}
172
173static void check_known_mac_addr(const struct net_device *net_dev) 162static void check_known_mac_addr(const struct net_device *net_dev)
174{ 163{
175 const struct hard_iface *hard_iface; 164 const struct hard_iface *hard_iface;
@@ -244,12 +233,12 @@ static void hardif_activate_interface(struct hard_iface *hard_iface)
244 233
245 bat_priv = netdev_priv(hard_iface->soft_iface); 234 bat_priv = netdev_priv(hard_iface->soft_iface);
246 235
247 update_mac_addresses(hard_iface); 236 bat_ogm_update_mac(hard_iface);
248 hard_iface->if_status = IF_TO_BE_ACTIVATED; 237 hard_iface->if_status = IF_TO_BE_ACTIVATED;
249 238
250 /** 239 /**
251 * the first active interface becomes our primary interface or 240 * the first active interface becomes our primary interface or
252 * the next active interface after the old primay interface was removed 241 * the next active interface after the old primary interface was removed
253 */ 242 */
254 primary_if = primary_if_get_selected(bat_priv); 243 primary_if = primary_if_get_selected(bat_priv);
255 if (!primary_if) 244 if (!primary_if)
@@ -283,7 +272,6 @@ int hardif_enable_interface(struct hard_iface *hard_iface,
283 const char *iface_name) 272 const char *iface_name)
284{ 273{
285 struct bat_priv *bat_priv; 274 struct bat_priv *bat_priv;
286 struct batman_packet *batman_packet;
287 struct net_device *soft_iface; 275 struct net_device *soft_iface;
288 int ret; 276 int ret;
289 277
@@ -318,8 +306,8 @@ int hardif_enable_interface(struct hard_iface *hard_iface,
318 306
319 hard_iface->soft_iface = soft_iface; 307 hard_iface->soft_iface = soft_iface;
320 bat_priv = netdev_priv(hard_iface->soft_iface); 308 bat_priv = netdev_priv(hard_iface->soft_iface);
321 hard_iface->packet_len = BAT_PACKET_LEN; 309
322 hard_iface->packet_buff = kmalloc(hard_iface->packet_len, GFP_ATOMIC); 310 bat_ogm_init(hard_iface);
323 311
324 if (!hard_iface->packet_buff) { 312 if (!hard_iface->packet_buff) {
325 bat_err(hard_iface->soft_iface, "Can't add interface packet " 313 bat_err(hard_iface->soft_iface, "Can't add interface packet "
@@ -328,15 +316,6 @@ int hardif_enable_interface(struct hard_iface *hard_iface,
328 goto err; 316 goto err;
329 } 317 }
330 318
331 batman_packet = (struct batman_packet *)(hard_iface->packet_buff);
332 batman_packet->packet_type = BAT_PACKET;
333 batman_packet->version = COMPAT_VERSION;
334 batman_packet->flags = NO_FLAGS;
335 batman_packet->ttl = 2;
336 batman_packet->tq = TQ_MAX_VALUE;
337 batman_packet->tt_num_changes = 0;
338 batman_packet->ttvn = 0;
339
340 hard_iface->if_num = bat_priv->num_ifaces; 319 hard_iface->if_num = bat_priv->num_ifaces;
341 bat_priv->num_ifaces++; 320 bat_priv->num_ifaces++;
342 hard_iface->if_status = IF_INACTIVE; 321 hard_iface->if_status = IF_INACTIVE;
@@ -381,7 +360,7 @@ int hardif_enable_interface(struct hard_iface *hard_iface,
381 hard_iface->net_dev->name); 360 hard_iface->net_dev->name);
382 361
383 /* begin scheduling originator messages on that interface */ 362 /* begin scheduling originator messages on that interface */
384 schedule_own_packet(hard_iface); 363 schedule_bat_ogm(hard_iface);
385 364
386out: 365out:
387 return 0; 366 return 0;
@@ -455,11 +434,8 @@ static struct hard_iface *hardif_add_interface(struct net_device *net_dev)
455 dev_hold(net_dev); 434 dev_hold(net_dev);
456 435
457 hard_iface = kmalloc(sizeof(*hard_iface), GFP_ATOMIC); 436 hard_iface = kmalloc(sizeof(*hard_iface), GFP_ATOMIC);
458 if (!hard_iface) { 437 if (!hard_iface)
459 pr_err("Can't add interface (%s): out of memory\n",
460 net_dev->name);
461 goto release_dev; 438 goto release_dev;
462 }
463 439
464 ret = sysfs_add_hardif(&hard_iface->hardif_obj, net_dev); 440 ret = sysfs_add_hardif(&hard_iface->hardif_obj, net_dev);
465 if (ret) 441 if (ret)
@@ -551,7 +527,7 @@ static int hard_if_event(struct notifier_block *this,
551 goto hardif_put; 527 goto hardif_put;
552 528
553 check_known_mac_addr(hard_iface->net_dev); 529 check_known_mac_addr(hard_iface->net_dev);
554 update_mac_addresses(hard_iface); 530 bat_ogm_update_mac(hard_iface);
555 531
556 bat_priv = netdev_priv(hard_iface->soft_iface); 532 bat_priv = netdev_priv(hard_iface->soft_iface);
557 primary_if = primary_if_get_selected(bat_priv); 533 primary_if = primary_if_get_selected(bat_priv);
@@ -573,14 +549,14 @@ out:
573 return NOTIFY_DONE; 549 return NOTIFY_DONE;
574} 550}
575 551
576/* receive a packet with the batman ethertype coming on a hard 552/* incoming packets with the batman ethertype received on any active hard
577 * interface */ 553 * interface */
578static int batman_skb_recv(struct sk_buff *skb, struct net_device *dev, 554static int batman_skb_recv(struct sk_buff *skb, struct net_device *dev,
579 struct packet_type *ptype, 555 struct packet_type *ptype,
580 struct net_device *orig_dev) 556 struct net_device *orig_dev)
581{ 557{
582 struct bat_priv *bat_priv; 558 struct bat_priv *bat_priv;
583 struct batman_packet *batman_packet; 559 struct batman_ogm_packet *batman_ogm_packet;
584 struct hard_iface *hard_iface; 560 struct hard_iface *hard_iface;
585 int ret; 561 int ret;
586 562
@@ -612,22 +588,22 @@ static int batman_skb_recv(struct sk_buff *skb, struct net_device *dev,
612 if (hard_iface->if_status != IF_ACTIVE) 588 if (hard_iface->if_status != IF_ACTIVE)
613 goto err_free; 589 goto err_free;
614 590
615 batman_packet = (struct batman_packet *)skb->data; 591 batman_ogm_packet = (struct batman_ogm_packet *)skb->data;
616 592
617 if (batman_packet->version != COMPAT_VERSION) { 593 if (batman_ogm_packet->version != COMPAT_VERSION) {
618 bat_dbg(DBG_BATMAN, bat_priv, 594 bat_dbg(DBG_BATMAN, bat_priv,
619 "Drop packet: incompatible batman version (%i)\n", 595 "Drop packet: incompatible batman version (%i)\n",
620 batman_packet->version); 596 batman_ogm_packet->version);
621 goto err_free; 597 goto err_free;
622 } 598 }
623 599
624 /* all receive handlers return whether they received or reused 600 /* all receive handlers return whether they received or reused
625 * the supplied skb. if not, we have to free the skb. */ 601 * the supplied skb. if not, we have to free the skb. */
626 602
627 switch (batman_packet->packet_type) { 603 switch (batman_ogm_packet->packet_type) {
628 /* batman originator packet */ 604 /* batman originator packet */
629 case BAT_PACKET: 605 case BAT_OGM:
630 ret = recv_bat_packet(skb, hard_iface); 606 ret = recv_bat_ogm_packet(skb, hard_iface);
631 break; 607 break;
632 608
633 /* batman icmp packet */ 609 /* batman icmp packet */
@@ -681,6 +657,36 @@ err_out:
681 return NET_RX_DROP; 657 return NET_RX_DROP;
682} 658}
683 659
660/* This function returns true if the interface represented by ifindex is a
661 * 802.11 wireless device */
662bool is_wifi_iface(int ifindex)
663{
664 struct net_device *net_device = NULL;
665 bool ret = false;
666
667 if (ifindex == NULL_IFINDEX)
668 goto out;
669
670 net_device = dev_get_by_index(&init_net, ifindex);
671 if (!net_device)
672 goto out;
673
674#ifdef CONFIG_WIRELESS_EXT
675 /* pre-cfg80211 drivers have to implement WEXT, so it is possible to
676 * check for wireless_handlers != NULL */
677 if (net_device->wireless_handlers)
678 ret = true;
679 else
680#endif
681 /* cfg80211 drivers have to set ieee80211_ptr */
682 if (net_device->ieee80211_ptr)
683 ret = true;
684out:
685 if (net_device)
686 dev_put(net_device);
687 return ret;
688}
689
684struct notifier_block hard_if_notifier = { 690struct notifier_block hard_if_notifier = {
685 .notifier_call = hard_if_event, 691 .notifier_call = hard_if_event,
686}; 692};
diff --git a/net/batman-adv/hard-interface.h b/net/batman-adv/hard-interface.h
index 442eacbc9e3a..67f78d1a63b4 100644
--- a/net/batman-adv/hard-interface.h
+++ b/net/batman-adv/hard-interface.h
@@ -42,6 +42,7 @@ void hardif_remove_interfaces(void);
42int hardif_min_mtu(struct net_device *soft_iface); 42int hardif_min_mtu(struct net_device *soft_iface);
43void update_min_mtu(struct net_device *soft_iface); 43void update_min_mtu(struct net_device *soft_iface);
44void hardif_free_rcu(struct rcu_head *rcu); 44void hardif_free_rcu(struct rcu_head *rcu);
45bool is_wifi_iface(int ifindex);
45 46
46static inline void hardif_free_ref(struct hard_iface *hard_iface) 47static inline void hardif_free_ref(struct hard_iface *hard_iface)
47{ 48{
diff --git a/net/batman-adv/hash.h b/net/batman-adv/hash.h
index dd5c9fd7a905..d20aa71ba1e8 100644
--- a/net/batman-adv/hash.h
+++ b/net/batman-adv/hash.h
@@ -76,19 +76,30 @@ static inline void hash_delete(struct hashtable_t *hash,
76 hash_destroy(hash); 76 hash_destroy(hash);
77} 77}
78 78
79/* adds data to the hashtable. returns 0 on success, -1 on error */ 79/**
80 * hash_add - adds data to the hashtable
81 * @hash: storage hash table
82 * @compare: callback to determine if 2 hash elements are identical
83 * @choose: callback calculating the hash index
84 * @data: data passed to the aforementioned callbacks as argument
85 * @data_node: to be added element
86 *
87 * Returns 0 on success, 1 if the element already is in the hash
88 * and -1 on error.
89 */
90
80static inline int hash_add(struct hashtable_t *hash, 91static inline int hash_add(struct hashtable_t *hash,
81 hashdata_compare_cb compare, 92 hashdata_compare_cb compare,
82 hashdata_choose_cb choose, 93 hashdata_choose_cb choose,
83 const void *data, struct hlist_node *data_node) 94 const void *data, struct hlist_node *data_node)
84{ 95{
85 int index; 96 int index, ret = -1;
86 struct hlist_head *head; 97 struct hlist_head *head;
87 struct hlist_node *node; 98 struct hlist_node *node;
88 spinlock_t *list_lock; /* spinlock to protect write access */ 99 spinlock_t *list_lock; /* spinlock to protect write access */
89 100
90 if (!hash) 101 if (!hash)
91 goto err; 102 goto out;
92 103
93 index = choose(data, hash->size); 104 index = choose(data, hash->size);
94 head = &hash->table[index]; 105 head = &hash->table[index];
@@ -99,6 +110,7 @@ static inline int hash_add(struct hashtable_t *hash,
99 if (!compare(node, data)) 110 if (!compare(node, data))
100 continue; 111 continue;
101 112
113 ret = 1;
102 goto err_unlock; 114 goto err_unlock;
103 } 115 }
104 rcu_read_unlock(); 116 rcu_read_unlock();
@@ -108,12 +120,13 @@ static inline int hash_add(struct hashtable_t *hash,
108 hlist_add_head_rcu(data_node, head); 120 hlist_add_head_rcu(data_node, head);
109 spin_unlock_bh(list_lock); 121 spin_unlock_bh(list_lock);
110 122
111 return 0; 123 ret = 0;
124 goto out;
112 125
113err_unlock: 126err_unlock:
114 rcu_read_unlock(); 127 rcu_read_unlock();
115err: 128out:
116 return -1; 129 return ret;
117} 130}
118 131
119/* removes data from hash, if found. returns pointer do data on success, so you 132/* removes data from hash, if found. returns pointer do data on success, so you
diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c
index b0f9068ade57..fb87bdc2ce9b 100644
--- a/net/batman-adv/main.c
+++ b/net/batman-adv/main.c
@@ -107,7 +107,7 @@ int mesh_init(struct net_device *soft_iface)
107 if (tt_init(bat_priv) < 1) 107 if (tt_init(bat_priv) < 1)
108 goto err; 108 goto err;
109 109
110 tt_local_add(soft_iface, soft_iface->dev_addr); 110 tt_local_add(soft_iface, soft_iface->dev_addr, NULL_IFINDEX);
111 111
112 if (vis_init(bat_priv) < 1) 112 if (vis_init(bat_priv) < 1)
113 goto err; 113 goto err;
@@ -117,8 +117,6 @@ int mesh_init(struct net_device *soft_iface)
117 goto end; 117 goto end;
118 118
119err: 119err:
120 pr_err("Unable to allocate memory for mesh information structures: "
121 "out of mem ?\n");
122 mesh_free(soft_iface); 120 mesh_free(soft_iface);
123 return -1; 121 return -1;
124 122
diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h
index a6df61a6933b..964ad4d8ba33 100644
--- a/net/batman-adv/main.h
+++ b/net/batman-adv/main.h
@@ -28,7 +28,7 @@
28#define DRIVER_DEVICE "batman-adv" 28#define DRIVER_DEVICE "batman-adv"
29 29
30#ifndef SOURCE_VERSION 30#ifndef SOURCE_VERSION
31#define SOURCE_VERSION "2011.3.0" 31#define SOURCE_VERSION "2011.4.0"
32#endif 32#endif
33 33
34/* B.A.T.M.A.N. parameters */ 34/* B.A.T.M.A.N. parameters */
@@ -44,7 +44,7 @@
44#define PURGE_TIMEOUT 200 44#define PURGE_TIMEOUT 200
45#define TT_LOCAL_TIMEOUT 3600 /* in seconds */ 45#define TT_LOCAL_TIMEOUT 3600 /* in seconds */
46#define TT_CLIENT_ROAM_TIMEOUT 600 46#define TT_CLIENT_ROAM_TIMEOUT 600
47/* sliding packet range of received originator messages in squence numbers 47/* sliding packet range of received originator messages in sequence numbers
48 * (should be a multiple of our word size) */ 48 * (should be a multiple of our word size) */
49#define TQ_LOCAL_WINDOW_SIZE 64 49#define TQ_LOCAL_WINDOW_SIZE 64
50#define TT_REQUEST_TIMEOUT 3 /* seconds we have to keep pending tt_req */ 50#define TT_REQUEST_TIMEOUT 3 /* seconds we have to keep pending tt_req */
@@ -62,6 +62,8 @@
62 62
63#define NO_FLAGS 0 63#define NO_FLAGS 0
64 64
65#define NULL_IFINDEX 0 /* dummy ifindex used to avoid iface checks */
66
65#define NUM_WORDS (TQ_LOCAL_WINDOW_SIZE / WORD_BIT_SIZE) 67#define NUM_WORDS (TQ_LOCAL_WINDOW_SIZE / WORD_BIT_SIZE)
66 68
67#define LOG_BUF_LEN 8192 /* has to be a power of 2 */ 69#define LOG_BUF_LEN 8192 /* has to be a power of 2 */
@@ -133,7 +135,7 @@ enum dbg_level {
133#include <linux/mutex.h> /* mutex */ 135#include <linux/mutex.h> /* mutex */
134#include <linux/module.h> /* needed by all modules */ 136#include <linux/module.h> /* needed by all modules */
135#include <linux/netdevice.h> /* netdevice */ 137#include <linux/netdevice.h> /* netdevice */
136#include <linux/etherdevice.h> /* ethernet address classifaction */ 138#include <linux/etherdevice.h> /* ethernet address classification */
137#include <linux/if_ether.h> /* ethernet header */ 139#include <linux/if_ether.h> /* ethernet header */
138#include <linux/poll.h> /* poll_table */ 140#include <linux/poll.h> /* poll_table */
139#include <linux/kthread.h> /* kernel threads */ 141#include <linux/kthread.h> /* kernel threads */
diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c
index f3c3f620d195..0e5b77255d99 100644
--- a/net/batman-adv/originator.c
+++ b/net/batman-adv/originator.c
@@ -252,7 +252,7 @@ struct orig_node *get_orig_node(struct bat_priv *bat_priv, const uint8_t *addr)
252 252
253 hash_added = hash_add(bat_priv->orig_hash, compare_orig, 253 hash_added = hash_add(bat_priv->orig_hash, compare_orig,
254 choose_orig, orig_node, &orig_node->hash_entry); 254 choose_orig, orig_node, &orig_node->hash_entry);
255 if (hash_added < 0) 255 if (hash_added != 0)
256 goto free_bcast_own_sum; 256 goto free_bcast_own_sum;
257 257
258 return orig_node; 258 return orig_node;
@@ -336,8 +336,7 @@ static bool purge_orig_node(struct bat_priv *bat_priv,
336 } else { 336 } else {
337 if (purge_orig_neighbors(bat_priv, orig_node, 337 if (purge_orig_neighbors(bat_priv, orig_node,
338 &best_neigh_node)) { 338 &best_neigh_node)) {
339 update_routes(bat_priv, orig_node, 339 update_route(bat_priv, orig_node, best_neigh_node);
340 best_neigh_node);
341 } 340 }
342 } 341 }
343 342
@@ -493,10 +492,8 @@ static int orig_node_add_if(struct orig_node *orig_node, int max_if_num)
493 492
494 data_ptr = kmalloc(max_if_num * sizeof(unsigned long) * NUM_WORDS, 493 data_ptr = kmalloc(max_if_num * sizeof(unsigned long) * NUM_WORDS,
495 GFP_ATOMIC); 494 GFP_ATOMIC);
496 if (!data_ptr) { 495 if (!data_ptr)
497 pr_err("Can't resize orig: out of memory\n");
498 return -1; 496 return -1;
499 }
500 497
501 memcpy(data_ptr, orig_node->bcast_own, 498 memcpy(data_ptr, orig_node->bcast_own,
502 (max_if_num - 1) * sizeof(unsigned long) * NUM_WORDS); 499 (max_if_num - 1) * sizeof(unsigned long) * NUM_WORDS);
@@ -504,10 +501,8 @@ static int orig_node_add_if(struct orig_node *orig_node, int max_if_num)
504 orig_node->bcast_own = data_ptr; 501 orig_node->bcast_own = data_ptr;
505 502
506 data_ptr = kmalloc(max_if_num * sizeof(uint8_t), GFP_ATOMIC); 503 data_ptr = kmalloc(max_if_num * sizeof(uint8_t), GFP_ATOMIC);
507 if (!data_ptr) { 504 if (!data_ptr)
508 pr_err("Can't resize orig: out of memory\n");
509 return -1; 505 return -1;
510 }
511 506
512 memcpy(data_ptr, orig_node->bcast_own_sum, 507 memcpy(data_ptr, orig_node->bcast_own_sum,
513 (max_if_num - 1) * sizeof(uint8_t)); 508 (max_if_num - 1) * sizeof(uint8_t));
@@ -562,10 +557,8 @@ static int orig_node_del_if(struct orig_node *orig_node,
562 557
563 chunk_size = sizeof(unsigned long) * NUM_WORDS; 558 chunk_size = sizeof(unsigned long) * NUM_WORDS;
564 data_ptr = kmalloc(max_if_num * chunk_size, GFP_ATOMIC); 559 data_ptr = kmalloc(max_if_num * chunk_size, GFP_ATOMIC);
565 if (!data_ptr) { 560 if (!data_ptr)
566 pr_err("Can't resize orig: out of memory\n");
567 return -1; 561 return -1;
568 }
569 562
570 /* copy first part */ 563 /* copy first part */
571 memcpy(data_ptr, orig_node->bcast_own, del_if_num * chunk_size); 564 memcpy(data_ptr, orig_node->bcast_own, del_if_num * chunk_size);
@@ -583,10 +576,8 @@ free_bcast_own:
583 goto free_own_sum; 576 goto free_own_sum;
584 577
585 data_ptr = kmalloc(max_if_num * sizeof(uint8_t), GFP_ATOMIC); 578 data_ptr = kmalloc(max_if_num * sizeof(uint8_t), GFP_ATOMIC);
586 if (!data_ptr) { 579 if (!data_ptr)
587 pr_err("Can't resize orig: out of memory\n");
588 return -1; 580 return -1;
589 }
590 581
591 memcpy(data_ptr, orig_node->bcast_own_sum, 582 memcpy(data_ptr, orig_node->bcast_own_sum,
592 del_if_num * sizeof(uint8_t)); 583 del_if_num * sizeof(uint8_t));
diff --git a/net/batman-adv/packet.h b/net/batman-adv/packet.h
index b76b4be10b92..4d9e54c57a36 100644
--- a/net/batman-adv/packet.h
+++ b/net/batman-adv/packet.h
@@ -25,14 +25,14 @@
25#define ETH_P_BATMAN 0x4305 /* unofficial/not registered Ethertype */ 25#define ETH_P_BATMAN 0x4305 /* unofficial/not registered Ethertype */
26 26
27enum bat_packettype { 27enum bat_packettype {
28 BAT_PACKET = 0x01, 28 BAT_OGM = 0x01,
29 BAT_ICMP = 0x02, 29 BAT_ICMP = 0x02,
30 BAT_UNICAST = 0x03, 30 BAT_UNICAST = 0x03,
31 BAT_BCAST = 0x04, 31 BAT_BCAST = 0x04,
32 BAT_VIS = 0x05, 32 BAT_VIS = 0x05,
33 BAT_UNICAST_FRAG = 0x06, 33 BAT_UNICAST_FRAG = 0x06,
34 BAT_TT_QUERY = 0x07, 34 BAT_TT_QUERY = 0x07,
35 BAT_ROAM_ADV = 0x08 35 BAT_ROAM_ADV = 0x08
36}; 36};
37 37
38/* this file is included by batctl which needs these defines */ 38/* this file is included by batctl which needs these defines */
@@ -84,12 +84,13 @@ enum tt_query_flags {
84enum tt_client_flags { 84enum tt_client_flags {
85 TT_CLIENT_DEL = 1 << 0, 85 TT_CLIENT_DEL = 1 << 0,
86 TT_CLIENT_ROAM = 1 << 1, 86 TT_CLIENT_ROAM = 1 << 1,
87 TT_CLIENT_WIFI = 1 << 2,
87 TT_CLIENT_NOPURGE = 1 << 8, 88 TT_CLIENT_NOPURGE = 1 << 8,
88 TT_CLIENT_NEW = 1 << 9, 89 TT_CLIENT_NEW = 1 << 9,
89 TT_CLIENT_PENDING = 1 << 10 90 TT_CLIENT_PENDING = 1 << 10
90}; 91};
91 92
92struct batman_packet { 93struct batman_ogm_packet {
93 uint8_t packet_type; 94 uint8_t packet_type;
94 uint8_t version; /* batman version field */ 95 uint8_t version; /* batman version field */
95 uint8_t ttl; 96 uint8_t ttl;
@@ -104,7 +105,7 @@ struct batman_packet {
104 uint16_t tt_crc; 105 uint16_t tt_crc;
105} __packed; 106} __packed;
106 107
107#define BAT_PACKET_LEN sizeof(struct batman_packet) 108#define BATMAN_OGM_LEN sizeof(struct batman_ogm_packet)
108 109
109struct icmp_packet { 110struct icmp_packet {
110 uint8_t packet_type; 111 uint8_t packet_type;
diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c
index 0f32c818874d..f961cc5eade5 100644
--- a/net/batman-adv/routing.c
+++ b/net/batman-adv/routing.c
@@ -22,18 +22,14 @@
22#include "main.h" 22#include "main.h"
23#include "routing.h" 23#include "routing.h"
24#include "send.h" 24#include "send.h"
25#include "hash.h"
26#include "soft-interface.h" 25#include "soft-interface.h"
27#include "hard-interface.h" 26#include "hard-interface.h"
28#include "icmp_socket.h" 27#include "icmp_socket.h"
29#include "translation-table.h" 28#include "translation-table.h"
30#include "originator.h" 29#include "originator.h"
31#include "ring_buffer.h"
32#include "vis.h" 30#include "vis.h"
33#include "aggregation.h"
34#include "gateway_common.h"
35#include "gateway_client.h"
36#include "unicast.h" 31#include "unicast.h"
32#include "bat_ogm.h"
37 33
38void slide_own_bcast_window(struct hard_iface *hard_iface) 34void slide_own_bcast_window(struct hard_iface *hard_iface)
39{ 35{
@@ -64,69 +60,9 @@ void slide_own_bcast_window(struct hard_iface *hard_iface)
64 } 60 }
65} 61}
66 62
67static void update_transtable(struct bat_priv *bat_priv, 63static void _update_route(struct bat_priv *bat_priv,
68 struct orig_node *orig_node, 64 struct orig_node *orig_node,
69 const unsigned char *tt_buff, 65 struct neigh_node *neigh_node)
70 uint8_t tt_num_changes, uint8_t ttvn,
71 uint16_t tt_crc)
72{
73 uint8_t orig_ttvn = (uint8_t)atomic_read(&orig_node->last_ttvn);
74 bool full_table = true;
75
76 /* the ttvn increased by one -> we can apply the attached changes */
77 if (ttvn - orig_ttvn == 1) {
78 /* the OGM could not contain the changes because they were too
79 * many to fit in one frame or because they have already been
80 * sent TT_OGM_APPEND_MAX times. In this case send a tt
81 * request */
82 if (!tt_num_changes) {
83 full_table = false;
84 goto request_table;
85 }
86
87 tt_update_changes(bat_priv, orig_node, tt_num_changes, ttvn,
88 (struct tt_change *)tt_buff);
89
90 /* Even if we received the crc into the OGM, we prefer
91 * to recompute it to spot any possible inconsistency
92 * in the global table */
93 orig_node->tt_crc = tt_global_crc(bat_priv, orig_node);
94
95 /* The ttvn alone is not enough to guarantee consistency
96 * because a single value could repesent different states
97 * (due to the wrap around). Thus a node has to check whether
98 * the resulting table (after applying the changes) is still
99 * consistent or not. E.g. a node could disconnect while its
100 * ttvn is X and reconnect on ttvn = X + TTVN_MAX: in this case
101 * checking the CRC value is mandatory to detect the
102 * inconsistency */
103 if (orig_node->tt_crc != tt_crc)
104 goto request_table;
105
106 /* Roaming phase is over: tables are in sync again. I can
107 * unset the flag */
108 orig_node->tt_poss_change = false;
109 } else {
110 /* if we missed more than one change or our tables are not
111 * in sync anymore -> request fresh tt data */
112 if (ttvn != orig_ttvn || orig_node->tt_crc != tt_crc) {
113request_table:
114 bat_dbg(DBG_TT, bat_priv, "TT inconsistency for %pM. "
115 "Need to retrieve the correct information "
116 "(ttvn: %u last_ttvn: %u crc: %u last_crc: "
117 "%u num_changes: %u)\n", orig_node->orig, ttvn,
118 orig_ttvn, tt_crc, orig_node->tt_crc,
119 tt_num_changes);
120 send_tt_request(bat_priv, orig_node, ttvn, tt_crc,
121 full_table);
122 return;
123 }
124 }
125}
126
127static void update_route(struct bat_priv *bat_priv,
128 struct orig_node *orig_node,
129 struct neigh_node *neigh_node)
130{ 66{
131 struct neigh_node *curr_router; 67 struct neigh_node *curr_router;
132 68
@@ -170,8 +106,8 @@ static void update_route(struct bat_priv *bat_priv,
170 neigh_node_free_ref(curr_router); 106 neigh_node_free_ref(curr_router);
171} 107}
172 108
173void update_routes(struct bat_priv *bat_priv, struct orig_node *orig_node, 109void update_route(struct bat_priv *bat_priv, struct orig_node *orig_node,
174 struct neigh_node *neigh_node) 110 struct neigh_node *neigh_node)
175{ 111{
176 struct neigh_node *router = NULL; 112 struct neigh_node *router = NULL;
177 113
@@ -181,116 +117,13 @@ void update_routes(struct bat_priv *bat_priv, struct orig_node *orig_node,
181 router = orig_node_get_router(orig_node); 117 router = orig_node_get_router(orig_node);
182 118
183 if (router != neigh_node) 119 if (router != neigh_node)
184 update_route(bat_priv, orig_node, neigh_node); 120 _update_route(bat_priv, orig_node, neigh_node);
185 121
186out: 122out:
187 if (router) 123 if (router)
188 neigh_node_free_ref(router); 124 neigh_node_free_ref(router);
189} 125}
190 126
191static int is_bidirectional_neigh(struct orig_node *orig_node,
192 struct orig_node *orig_neigh_node,
193 struct batman_packet *batman_packet,
194 struct hard_iface *if_incoming)
195{
196 struct bat_priv *bat_priv = netdev_priv(if_incoming->soft_iface);
197 struct neigh_node *neigh_node = NULL, *tmp_neigh_node;
198 struct hlist_node *node;
199 uint8_t total_count;
200 uint8_t orig_eq_count, neigh_rq_count, tq_own;
201 int tq_asym_penalty, ret = 0;
202
203 /* find corresponding one hop neighbor */
204 rcu_read_lock();
205 hlist_for_each_entry_rcu(tmp_neigh_node, node,
206 &orig_neigh_node->neigh_list, list) {
207
208 if (!compare_eth(tmp_neigh_node->addr, orig_neigh_node->orig))
209 continue;
210
211 if (tmp_neigh_node->if_incoming != if_incoming)
212 continue;
213
214 if (!atomic_inc_not_zero(&tmp_neigh_node->refcount))
215 continue;
216
217 neigh_node = tmp_neigh_node;
218 break;
219 }
220 rcu_read_unlock();
221
222 if (!neigh_node)
223 neigh_node = create_neighbor(orig_neigh_node,
224 orig_neigh_node,
225 orig_neigh_node->orig,
226 if_incoming);
227
228 if (!neigh_node)
229 goto out;
230
231 /* if orig_node is direct neighbour update neigh_node last_valid */
232 if (orig_node == orig_neigh_node)
233 neigh_node->last_valid = jiffies;
234
235 orig_node->last_valid = jiffies;
236
237 /* find packet count of corresponding one hop neighbor */
238 spin_lock_bh(&orig_node->ogm_cnt_lock);
239 orig_eq_count = orig_neigh_node->bcast_own_sum[if_incoming->if_num];
240 neigh_rq_count = neigh_node->real_packet_count;
241 spin_unlock_bh(&orig_node->ogm_cnt_lock);
242
243 /* pay attention to not get a value bigger than 100 % */
244 total_count = (orig_eq_count > neigh_rq_count ?
245 neigh_rq_count : orig_eq_count);
246
247 /* if we have too few packets (too less data) we set tq_own to zero */
248 /* if we receive too few packets it is not considered bidirectional */
249 if ((total_count < TQ_LOCAL_BIDRECT_SEND_MINIMUM) ||
250 (neigh_rq_count < TQ_LOCAL_BIDRECT_RECV_MINIMUM))
251 tq_own = 0;
252 else
253 /* neigh_node->real_packet_count is never zero as we
254 * only purge old information when getting new
255 * information */
256 tq_own = (TQ_MAX_VALUE * total_count) / neigh_rq_count;
257
258 /*
259 * 1 - ((1-x) ** 3), normalized to TQ_MAX_VALUE this does
260 * affect the nearly-symmetric links only a little, but
261 * punishes asymmetric links more. This will give a value
262 * between 0 and TQ_MAX_VALUE
263 */
264 tq_asym_penalty = TQ_MAX_VALUE - (TQ_MAX_VALUE *
265 (TQ_LOCAL_WINDOW_SIZE - neigh_rq_count) *
266 (TQ_LOCAL_WINDOW_SIZE - neigh_rq_count) *
267 (TQ_LOCAL_WINDOW_SIZE - neigh_rq_count)) /
268 (TQ_LOCAL_WINDOW_SIZE *
269 TQ_LOCAL_WINDOW_SIZE *
270 TQ_LOCAL_WINDOW_SIZE);
271
272 batman_packet->tq = ((batman_packet->tq * tq_own * tq_asym_penalty) /
273 (TQ_MAX_VALUE * TQ_MAX_VALUE));
274
275 bat_dbg(DBG_BATMAN, bat_priv,
276 "bidirectional: "
277 "orig = %-15pM neigh = %-15pM => own_bcast = %2i, "
278 "real recv = %2i, local tq: %3i, asym_penalty: %3i, "
279 "total tq: %3i\n",
280 orig_node->orig, orig_neigh_node->orig, total_count,
281 neigh_rq_count, tq_own, tq_asym_penalty, batman_packet->tq);
282
283 /* if link has the minimum required transmission quality
284 * consider it bidirectional */
285 if (batman_packet->tq >= TQ_TOTAL_BIDRECT_LIMIT)
286 ret = 1;
287
288out:
289 if (neigh_node)
290 neigh_node_free_ref(neigh_node);
291 return ret;
292}
293
294/* caller must hold the neigh_list_lock */ 127/* caller must hold the neigh_list_lock */
295void bonding_candidate_del(struct orig_node *orig_node, 128void bonding_candidate_del(struct orig_node *orig_node,
296 struct neigh_node *neigh_node) 129 struct neigh_node *neigh_node)
@@ -308,8 +141,8 @@ out:
308 return; 141 return;
309} 142}
310 143
311static void bonding_candidate_add(struct orig_node *orig_node, 144void bonding_candidate_add(struct orig_node *orig_node,
312 struct neigh_node *neigh_node) 145 struct neigh_node *neigh_node)
313{ 146{
314 struct hlist_node *node; 147 struct hlist_node *node;
315 struct neigh_node *tmp_neigh_node, *router = NULL; 148 struct neigh_node *tmp_neigh_node, *router = NULL;
@@ -379,162 +212,23 @@ out:
379} 212}
380 213
381/* copy primary address for bonding */ 214/* copy primary address for bonding */
382static void bonding_save_primary(const struct orig_node *orig_node, 215void bonding_save_primary(const struct orig_node *orig_node,
383 struct orig_node *orig_neigh_node, 216 struct orig_node *orig_neigh_node,
384 const struct batman_packet *batman_packet) 217 const struct batman_ogm_packet *batman_ogm_packet)
385{ 218{
386 if (!(batman_packet->flags & PRIMARIES_FIRST_HOP)) 219 if (!(batman_ogm_packet->flags & PRIMARIES_FIRST_HOP))
387 return; 220 return;
388 221
389 memcpy(orig_neigh_node->primary_addr, orig_node->orig, ETH_ALEN); 222 memcpy(orig_neigh_node->primary_addr, orig_node->orig, ETH_ALEN);
390} 223}
391 224
392static void update_orig(struct bat_priv *bat_priv, struct orig_node *orig_node,
393 const struct ethhdr *ethhdr,
394 const struct batman_packet *batman_packet,
395 struct hard_iface *if_incoming,
396 const unsigned char *tt_buff, int is_duplicate)
397{
398 struct neigh_node *neigh_node = NULL, *tmp_neigh_node = NULL;
399 struct neigh_node *router = NULL;
400 struct orig_node *orig_node_tmp;
401 struct hlist_node *node;
402 uint8_t bcast_own_sum_orig, bcast_own_sum_neigh;
403
404 bat_dbg(DBG_BATMAN, bat_priv, "update_originator(): "
405 "Searching and updating originator entry of received packet\n");
406
407 rcu_read_lock();
408 hlist_for_each_entry_rcu(tmp_neigh_node, node,
409 &orig_node->neigh_list, list) {
410 if (compare_eth(tmp_neigh_node->addr, ethhdr->h_source) &&
411 (tmp_neigh_node->if_incoming == if_incoming) &&
412 atomic_inc_not_zero(&tmp_neigh_node->refcount)) {
413 if (neigh_node)
414 neigh_node_free_ref(neigh_node);
415 neigh_node = tmp_neigh_node;
416 continue;
417 }
418
419 if (is_duplicate)
420 continue;
421
422 spin_lock_bh(&tmp_neigh_node->tq_lock);
423 ring_buffer_set(tmp_neigh_node->tq_recv,
424 &tmp_neigh_node->tq_index, 0);
425 tmp_neigh_node->tq_avg =
426 ring_buffer_avg(tmp_neigh_node->tq_recv);
427 spin_unlock_bh(&tmp_neigh_node->tq_lock);
428 }
429
430 if (!neigh_node) {
431 struct orig_node *orig_tmp;
432
433 orig_tmp = get_orig_node(bat_priv, ethhdr->h_source);
434 if (!orig_tmp)
435 goto unlock;
436
437 neigh_node = create_neighbor(orig_node, orig_tmp,
438 ethhdr->h_source, if_incoming);
439
440 orig_node_free_ref(orig_tmp);
441 if (!neigh_node)
442 goto unlock;
443 } else
444 bat_dbg(DBG_BATMAN, bat_priv,
445 "Updating existing last-hop neighbor of originator\n");
446
447 rcu_read_unlock();
448
449 orig_node->flags = batman_packet->flags;
450 neigh_node->last_valid = jiffies;
451
452 spin_lock_bh(&neigh_node->tq_lock);
453 ring_buffer_set(neigh_node->tq_recv,
454 &neigh_node->tq_index,
455 batman_packet->tq);
456 neigh_node->tq_avg = ring_buffer_avg(neigh_node->tq_recv);
457 spin_unlock_bh(&neigh_node->tq_lock);
458
459 if (!is_duplicate) {
460 orig_node->last_ttl = batman_packet->ttl;
461 neigh_node->last_ttl = batman_packet->ttl;
462 }
463
464 bonding_candidate_add(orig_node, neigh_node);
465
466 /* if this neighbor already is our next hop there is nothing
467 * to change */
468 router = orig_node_get_router(orig_node);
469 if (router == neigh_node)
470 goto update_tt;
471
472 /* if this neighbor does not offer a better TQ we won't consider it */
473 if (router && (router->tq_avg > neigh_node->tq_avg))
474 goto update_tt;
475
476 /* if the TQ is the same and the link not more symetric we
477 * won't consider it either */
478 if (router && (neigh_node->tq_avg == router->tq_avg)) {
479 orig_node_tmp = router->orig_node;
480 spin_lock_bh(&orig_node_tmp->ogm_cnt_lock);
481 bcast_own_sum_orig =
482 orig_node_tmp->bcast_own_sum[if_incoming->if_num];
483 spin_unlock_bh(&orig_node_tmp->ogm_cnt_lock);
484
485 orig_node_tmp = neigh_node->orig_node;
486 spin_lock_bh(&orig_node_tmp->ogm_cnt_lock);
487 bcast_own_sum_neigh =
488 orig_node_tmp->bcast_own_sum[if_incoming->if_num];
489 spin_unlock_bh(&orig_node_tmp->ogm_cnt_lock);
490
491 if (bcast_own_sum_orig >= bcast_own_sum_neigh)
492 goto update_tt;
493 }
494
495 update_routes(bat_priv, orig_node, neigh_node);
496
497update_tt:
498 /* I have to check for transtable changes only if the OGM has been
499 * sent through a primary interface */
500 if (((batman_packet->orig != ethhdr->h_source) &&
501 (batman_packet->ttl > 2)) ||
502 (batman_packet->flags & PRIMARIES_FIRST_HOP))
503 update_transtable(bat_priv, orig_node, tt_buff,
504 batman_packet->tt_num_changes,
505 batman_packet->ttvn,
506 batman_packet->tt_crc);
507
508 if (orig_node->gw_flags != batman_packet->gw_flags)
509 gw_node_update(bat_priv, orig_node, batman_packet->gw_flags);
510
511 orig_node->gw_flags = batman_packet->gw_flags;
512
513 /* restart gateway selection if fast or late switching was enabled */
514 if ((orig_node->gw_flags) &&
515 (atomic_read(&bat_priv->gw_mode) == GW_MODE_CLIENT) &&
516 (atomic_read(&bat_priv->gw_sel_class) > 2))
517 gw_check_election(bat_priv, orig_node);
518
519 goto out;
520
521unlock:
522 rcu_read_unlock();
523out:
524 if (neigh_node)
525 neigh_node_free_ref(neigh_node);
526 if (router)
527 neigh_node_free_ref(router);
528}
529
530/* checks whether the host restarted and is in the protection time. 225/* checks whether the host restarted and is in the protection time.
531 * returns: 226 * returns:
532 * 0 if the packet is to be accepted 227 * 0 if the packet is to be accepted
533 * 1 if the packet is to be ignored. 228 * 1 if the packet is to be ignored.
534 */ 229 */
535static int window_protected(struct bat_priv *bat_priv, 230int window_protected(struct bat_priv *bat_priv, int32_t seq_num_diff,
536 int32_t seq_num_diff, 231 unsigned long *last_reset)
537 unsigned long *last_reset)
538{ 232{
539 if ((seq_num_diff <= -TQ_LOCAL_WINDOW_SIZE) 233 if ((seq_num_diff <= -TQ_LOCAL_WINDOW_SIZE)
540 || (seq_num_diff >= EXPECTED_SEQNO_RANGE)) { 234 || (seq_num_diff >= EXPECTED_SEQNO_RANGE)) {
@@ -552,330 +246,12 @@ static int window_protected(struct bat_priv *bat_priv,
552 return 0; 246 return 0;
553} 247}
554 248
555/* processes a batman packet for all interfaces, adjusts the sequence number and 249int recv_bat_ogm_packet(struct sk_buff *skb, struct hard_iface *hard_iface)
556 * finds out whether it is a duplicate.
557 * returns:
558 * 1 the packet is a duplicate
559 * 0 the packet has not yet been received
560 * -1 the packet is old and has been received while the seqno window
561 * was protected. Caller should drop it.
562 */
563static int count_real_packets(const struct ethhdr *ethhdr,
564 const struct batman_packet *batman_packet,
565 const struct hard_iface *if_incoming)
566{
567 struct bat_priv *bat_priv = netdev_priv(if_incoming->soft_iface);
568 struct orig_node *orig_node;
569 struct neigh_node *tmp_neigh_node;
570 struct hlist_node *node;
571 int is_duplicate = 0;
572 int32_t seq_diff;
573 int need_update = 0;
574 int set_mark, ret = -1;
575
576 orig_node = get_orig_node(bat_priv, batman_packet->orig);
577 if (!orig_node)
578 return 0;
579
580 spin_lock_bh(&orig_node->ogm_cnt_lock);
581 seq_diff = batman_packet->seqno - orig_node->last_real_seqno;
582
583 /* signalize caller that the packet is to be dropped. */
584 if (window_protected(bat_priv, seq_diff,
585 &orig_node->batman_seqno_reset))
586 goto out;
587
588 rcu_read_lock();
589 hlist_for_each_entry_rcu(tmp_neigh_node, node,
590 &orig_node->neigh_list, list) {
591
592 is_duplicate |= get_bit_status(tmp_neigh_node->real_bits,
593 orig_node->last_real_seqno,
594 batman_packet->seqno);
595
596 if (compare_eth(tmp_neigh_node->addr, ethhdr->h_source) &&
597 (tmp_neigh_node->if_incoming == if_incoming))
598 set_mark = 1;
599 else
600 set_mark = 0;
601
602 /* if the window moved, set the update flag. */
603 need_update |= bit_get_packet(bat_priv,
604 tmp_neigh_node->real_bits,
605 seq_diff, set_mark);
606
607 tmp_neigh_node->real_packet_count =
608 bit_packet_count(tmp_neigh_node->real_bits);
609 }
610 rcu_read_unlock();
611
612 if (need_update) {
613 bat_dbg(DBG_BATMAN, bat_priv,
614 "updating last_seqno: old %d, new %d\n",
615 orig_node->last_real_seqno, batman_packet->seqno);
616 orig_node->last_real_seqno = batman_packet->seqno;
617 }
618
619 ret = is_duplicate;
620
621out:
622 spin_unlock_bh(&orig_node->ogm_cnt_lock);
623 orig_node_free_ref(orig_node);
624 return ret;
625}
626
627void receive_bat_packet(const struct ethhdr *ethhdr,
628 struct batman_packet *batman_packet,
629 const unsigned char *tt_buff,
630 struct hard_iface *if_incoming)
631{
632 struct bat_priv *bat_priv = netdev_priv(if_incoming->soft_iface);
633 struct hard_iface *hard_iface;
634 struct orig_node *orig_neigh_node, *orig_node;
635 struct neigh_node *router = NULL, *router_router = NULL;
636 struct neigh_node *orig_neigh_router = NULL;
637 int has_directlink_flag;
638 int is_my_addr = 0, is_my_orig = 0, is_my_oldorig = 0;
639 int is_broadcast = 0, is_bidirectional, is_single_hop_neigh;
640 int is_duplicate;
641 uint32_t if_incoming_seqno;
642
643 /* Silently drop when the batman packet is actually not a
644 * correct packet.
645 *
646 * This might happen if a packet is padded (e.g. Ethernet has a
647 * minimum frame length of 64 byte) and the aggregation interprets
648 * it as an additional length.
649 *
650 * TODO: A more sane solution would be to have a bit in the
651 * batman_packet to detect whether the packet is the last
652 * packet in an aggregation. Here we expect that the padding
653 * is always zero (or not 0x01)
654 */
655 if (batman_packet->packet_type != BAT_PACKET)
656 return;
657
658 /* could be changed by schedule_own_packet() */
659 if_incoming_seqno = atomic_read(&if_incoming->seqno);
660
661 has_directlink_flag = (batman_packet->flags & DIRECTLINK ? 1 : 0);
662
663 is_single_hop_neigh = (compare_eth(ethhdr->h_source,
664 batman_packet->orig) ? 1 : 0);
665
666 bat_dbg(DBG_BATMAN, bat_priv,
667 "Received BATMAN packet via NB: %pM, IF: %s [%pM] "
668 "(from OG: %pM, via prev OG: %pM, seqno %d, ttvn %u, "
669 "crc %u, changes %u, td %d, TTL %d, V %d, IDF %d)\n",
670 ethhdr->h_source, if_incoming->net_dev->name,
671 if_incoming->net_dev->dev_addr, batman_packet->orig,
672 batman_packet->prev_sender, batman_packet->seqno,
673 batman_packet->ttvn, batman_packet->tt_crc,
674 batman_packet->tt_num_changes, batman_packet->tq,
675 batman_packet->ttl, batman_packet->version,
676 has_directlink_flag);
677
678 rcu_read_lock();
679 list_for_each_entry_rcu(hard_iface, &hardif_list, list) {
680 if (hard_iface->if_status != IF_ACTIVE)
681 continue;
682
683 if (hard_iface->soft_iface != if_incoming->soft_iface)
684 continue;
685
686 if (compare_eth(ethhdr->h_source,
687 hard_iface->net_dev->dev_addr))
688 is_my_addr = 1;
689
690 if (compare_eth(batman_packet->orig,
691 hard_iface->net_dev->dev_addr))
692 is_my_orig = 1;
693
694 if (compare_eth(batman_packet->prev_sender,
695 hard_iface->net_dev->dev_addr))
696 is_my_oldorig = 1;
697
698 if (is_broadcast_ether_addr(ethhdr->h_source))
699 is_broadcast = 1;
700 }
701 rcu_read_unlock();
702
703 if (batman_packet->version != COMPAT_VERSION) {
704 bat_dbg(DBG_BATMAN, bat_priv,
705 "Drop packet: incompatible batman version (%i)\n",
706 batman_packet->version);
707 return;
708 }
709
710 if (is_my_addr) {
711 bat_dbg(DBG_BATMAN, bat_priv,
712 "Drop packet: received my own broadcast (sender: %pM"
713 ")\n",
714 ethhdr->h_source);
715 return;
716 }
717
718 if (is_broadcast) {
719 bat_dbg(DBG_BATMAN, bat_priv, "Drop packet: "
720 "ignoring all packets with broadcast source addr (sender: %pM"
721 ")\n", ethhdr->h_source);
722 return;
723 }
724
725 if (is_my_orig) {
726 unsigned long *word;
727 int offset;
728
729 orig_neigh_node = get_orig_node(bat_priv, ethhdr->h_source);
730 if (!orig_neigh_node)
731 return;
732
733 /* neighbor has to indicate direct link and it has to
734 * come via the corresponding interface */
735 /* save packet seqno for bidirectional check */
736 if (has_directlink_flag &&
737 compare_eth(if_incoming->net_dev->dev_addr,
738 batman_packet->orig)) {
739 offset = if_incoming->if_num * NUM_WORDS;
740
741 spin_lock_bh(&orig_neigh_node->ogm_cnt_lock);
742 word = &(orig_neigh_node->bcast_own[offset]);
743 bit_mark(word,
744 if_incoming_seqno - batman_packet->seqno - 2);
745 orig_neigh_node->bcast_own_sum[if_incoming->if_num] =
746 bit_packet_count(word);
747 spin_unlock_bh(&orig_neigh_node->ogm_cnt_lock);
748 }
749
750 bat_dbg(DBG_BATMAN, bat_priv, "Drop packet: "
751 "originator packet from myself (via neighbor)\n");
752 orig_node_free_ref(orig_neigh_node);
753 return;
754 }
755
756 if (is_my_oldorig) {
757 bat_dbg(DBG_BATMAN, bat_priv,
758 "Drop packet: ignoring all rebroadcast echos (sender: "
759 "%pM)\n", ethhdr->h_source);
760 return;
761 }
762
763 orig_node = get_orig_node(bat_priv, batman_packet->orig);
764 if (!orig_node)
765 return;
766
767 is_duplicate = count_real_packets(ethhdr, batman_packet, if_incoming);
768
769 if (is_duplicate == -1) {
770 bat_dbg(DBG_BATMAN, bat_priv,
771 "Drop packet: packet within seqno protection time "
772 "(sender: %pM)\n", ethhdr->h_source);
773 goto out;
774 }
775
776 if (batman_packet->tq == 0) {
777 bat_dbg(DBG_BATMAN, bat_priv,
778 "Drop packet: originator packet with tq equal 0\n");
779 goto out;
780 }
781
782 router = orig_node_get_router(orig_node);
783 if (router)
784 router_router = orig_node_get_router(router->orig_node);
785
786 /* avoid temporary routing loops */
787 if (router && router_router &&
788 (compare_eth(router->addr, batman_packet->prev_sender)) &&
789 !(compare_eth(batman_packet->orig, batman_packet->prev_sender)) &&
790 (compare_eth(router->addr, router_router->addr))) {
791 bat_dbg(DBG_BATMAN, bat_priv,
792 "Drop packet: ignoring all rebroadcast packets that "
793 "may make me loop (sender: %pM)\n", ethhdr->h_source);
794 goto out;
795 }
796
797 /* if sender is a direct neighbor the sender mac equals
798 * originator mac */
799 orig_neigh_node = (is_single_hop_neigh ?
800 orig_node :
801 get_orig_node(bat_priv, ethhdr->h_source));
802 if (!orig_neigh_node)
803 goto out;
804
805 orig_neigh_router = orig_node_get_router(orig_neigh_node);
806
807 /* drop packet if sender is not a direct neighbor and if we
808 * don't route towards it */
809 if (!is_single_hop_neigh && (!orig_neigh_router)) {
810 bat_dbg(DBG_BATMAN, bat_priv,
811 "Drop packet: OGM via unknown neighbor!\n");
812 goto out_neigh;
813 }
814
815 is_bidirectional = is_bidirectional_neigh(orig_node, orig_neigh_node,
816 batman_packet, if_incoming);
817
818 bonding_save_primary(orig_node, orig_neigh_node, batman_packet);
819
820 /* update ranking if it is not a duplicate or has the same
821 * seqno and similar ttl as the non-duplicate */
822 if (is_bidirectional &&
823 (!is_duplicate ||
824 ((orig_node->last_real_seqno == batman_packet->seqno) &&
825 (orig_node->last_ttl - 3 <= batman_packet->ttl))))
826 update_orig(bat_priv, orig_node, ethhdr, batman_packet,
827 if_incoming, tt_buff, is_duplicate);
828
829 /* is single hop (direct) neighbor */
830 if (is_single_hop_neigh) {
831
832 /* mark direct link on incoming interface */
833 schedule_forward_packet(orig_node, ethhdr, batman_packet,
834 1, if_incoming);
835
836 bat_dbg(DBG_BATMAN, bat_priv, "Forwarding packet: "
837 "rebroadcast neighbor packet with direct link flag\n");
838 goto out_neigh;
839 }
840
841 /* multihop originator */
842 if (!is_bidirectional) {
843 bat_dbg(DBG_BATMAN, bat_priv,
844 "Drop packet: not received via bidirectional link\n");
845 goto out_neigh;
846 }
847
848 if (is_duplicate) {
849 bat_dbg(DBG_BATMAN, bat_priv,
850 "Drop packet: duplicate packet received\n");
851 goto out_neigh;
852 }
853
854 bat_dbg(DBG_BATMAN, bat_priv,
855 "Forwarding packet: rebroadcast originator packet\n");
856 schedule_forward_packet(orig_node, ethhdr, batman_packet,
857 0, if_incoming);
858
859out_neigh:
860 if ((orig_neigh_node) && (!is_single_hop_neigh))
861 orig_node_free_ref(orig_neigh_node);
862out:
863 if (router)
864 neigh_node_free_ref(router);
865 if (router_router)
866 neigh_node_free_ref(router_router);
867 if (orig_neigh_router)
868 neigh_node_free_ref(orig_neigh_router);
869
870 orig_node_free_ref(orig_node);
871}
872
873int recv_bat_packet(struct sk_buff *skb, struct hard_iface *hard_iface)
874{ 250{
875 struct ethhdr *ethhdr; 251 struct ethhdr *ethhdr;
876 252
877 /* drop packet if it has not necessary minimum size */ 253 /* drop packet if it has not necessary minimum size */
878 if (unlikely(!pskb_may_pull(skb, sizeof(struct batman_packet)))) 254 if (unlikely(!pskb_may_pull(skb, BATMAN_OGM_LEN)))
879 return NET_RX_DROP; 255 return NET_RX_DROP;
880 256
881 ethhdr = (struct ethhdr *)skb_mac_header(skb); 257 ethhdr = (struct ethhdr *)skb_mac_header(skb);
@@ -898,10 +274,7 @@ int recv_bat_packet(struct sk_buff *skb, struct hard_iface *hard_iface)
898 274
899 ethhdr = (struct ethhdr *)skb_mac_header(skb); 275 ethhdr = (struct ethhdr *)skb_mac_header(skb);
900 276
901 receive_aggr_bat_packet(ethhdr, 277 bat_ogm_receive(ethhdr, skb->data, skb_headlen(skb), hard_iface);
902 skb->data,
903 skb_headlen(skb),
904 hard_iface);
905 278
906 kfree_skb(skb); 279 kfree_skb(skb);
907 return NET_RX_SUCCESS; 280 return NET_RX_SUCCESS;
@@ -1243,7 +616,7 @@ int recv_tt_query(struct sk_buff *skb, struct hard_iface *recv_if)
1243 } 616 }
1244 break; 617 break;
1245 case TT_RESPONSE: 618 case TT_RESPONSE:
1246 /* packet needs to be linearised to access the TT changes */ 619 /* packet needs to be linearized to access the TT changes */
1247 if (skb_linearize(skb) < 0) 620 if (skb_linearize(skb) < 0)
1248 goto out; 621 goto out;
1249 622
@@ -1300,7 +673,7 @@ int recv_roam_adv(struct sk_buff *skb, struct hard_iface *recv_if)
1300 roam_adv_packet->client); 673 roam_adv_packet->client);
1301 674
1302 tt_global_add(bat_priv, orig_node, roam_adv_packet->client, 675 tt_global_add(bat_priv, orig_node, roam_adv_packet->client,
1303 atomic_read(&orig_node->last_ttvn) + 1, true); 676 atomic_read(&orig_node->last_ttvn) + 1, true, false);
1304 677
1305 /* Roaming phase starts: I have new information but the ttvn has not 678 /* Roaming phase starts: I have new information but the ttvn has not
1306 * been incremented yet. This flag will make me check all the incoming 679 * been incremented yet. This flag will make me check all the incoming
@@ -1536,7 +909,7 @@ static int check_unicast_ttvn(struct bat_priv *bat_priv,
1536 909
1537 ethhdr = (struct ethhdr *)(skb->data + 910 ethhdr = (struct ethhdr *)(skb->data +
1538 sizeof(struct unicast_packet)); 911 sizeof(struct unicast_packet));
1539 orig_node = transtable_search(bat_priv, ethhdr->h_dest); 912 orig_node = transtable_search(bat_priv, NULL, ethhdr->h_dest);
1540 913
1541 if (!orig_node) { 914 if (!orig_node) {
1542 if (!is_my_client(bat_priv, ethhdr->h_dest)) 915 if (!is_my_client(bat_priv, ethhdr->h_dest))
diff --git a/net/batman-adv/routing.h b/net/batman-adv/routing.h
index fb14e9579b19..7aaee0fb0fdc 100644
--- a/net/batman-adv/routing.h
+++ b/net/batman-adv/routing.h
@@ -23,19 +23,15 @@
23#define _NET_BATMAN_ADV_ROUTING_H_ 23#define _NET_BATMAN_ADV_ROUTING_H_
24 24
25void slide_own_bcast_window(struct hard_iface *hard_iface); 25void slide_own_bcast_window(struct hard_iface *hard_iface);
26void receive_bat_packet(const struct ethhdr *ethhdr, 26void update_route(struct bat_priv *bat_priv, struct orig_node *orig_node,
27 struct batman_packet *batman_packet, 27 struct neigh_node *neigh_node);
28 const unsigned char *tt_buff,
29 struct hard_iface *if_incoming);
30void update_routes(struct bat_priv *bat_priv, struct orig_node *orig_node,
31 struct neigh_node *neigh_node);
32int route_unicast_packet(struct sk_buff *skb, struct hard_iface *recv_if); 28int route_unicast_packet(struct sk_buff *skb, struct hard_iface *recv_if);
33int recv_icmp_packet(struct sk_buff *skb, struct hard_iface *recv_if); 29int recv_icmp_packet(struct sk_buff *skb, struct hard_iface *recv_if);
34int recv_unicast_packet(struct sk_buff *skb, struct hard_iface *recv_if); 30int recv_unicast_packet(struct sk_buff *skb, struct hard_iface *recv_if);
35int recv_ucast_frag_packet(struct sk_buff *skb, struct hard_iface *recv_if); 31int recv_ucast_frag_packet(struct sk_buff *skb, struct hard_iface *recv_if);
36int recv_bcast_packet(struct sk_buff *skb, struct hard_iface *recv_if); 32int recv_bcast_packet(struct sk_buff *skb, struct hard_iface *recv_if);
37int recv_vis_packet(struct sk_buff *skb, struct hard_iface *recv_if); 33int recv_vis_packet(struct sk_buff *skb, struct hard_iface *recv_if);
38int recv_bat_packet(struct sk_buff *skb, struct hard_iface *recv_if); 34int recv_bat_ogm_packet(struct sk_buff *skb, struct hard_iface *recv_if);
39int recv_tt_query(struct sk_buff *skb, struct hard_iface *recv_if); 35int recv_tt_query(struct sk_buff *skb, struct hard_iface *recv_if);
40int recv_roam_adv(struct sk_buff *skb, struct hard_iface *recv_if); 36int recv_roam_adv(struct sk_buff *skb, struct hard_iface *recv_if);
41struct neigh_node *find_router(struct bat_priv *bat_priv, 37struct neigh_node *find_router(struct bat_priv *bat_priv,
@@ -43,5 +39,12 @@ struct neigh_node *find_router(struct bat_priv *bat_priv,
43 const struct hard_iface *recv_if); 39 const struct hard_iface *recv_if);
44void bonding_candidate_del(struct orig_node *orig_node, 40void bonding_candidate_del(struct orig_node *orig_node,
45 struct neigh_node *neigh_node); 41 struct neigh_node *neigh_node);
42void bonding_candidate_add(struct orig_node *orig_node,
43 struct neigh_node *neigh_node);
44void bonding_save_primary(const struct orig_node *orig_node,
45 struct orig_node *orig_neigh_node,
46 const struct batman_ogm_packet *batman_ogm_packet);
47int window_protected(struct bat_priv *bat_priv, int32_t seq_num_diff,
48 unsigned long *last_reset);
46 49
47#endif /* _NET_BATMAN_ADV_ROUTING_H_ */ 50#endif /* _NET_BATMAN_ADV_ROUTING_H_ */
diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c
index 58d14472068c..8a684eb738ad 100644
--- a/net/batman-adv/send.c
+++ b/net/batman-adv/send.c
@@ -26,33 +26,12 @@
26#include "soft-interface.h" 26#include "soft-interface.h"
27#include "hard-interface.h" 27#include "hard-interface.h"
28#include "vis.h" 28#include "vis.h"
29#include "aggregation.h"
30#include "gateway_common.h" 29#include "gateway_common.h"
31#include "originator.h" 30#include "originator.h"
31#include "bat_ogm.h"
32 32
33static void send_outstanding_bcast_packet(struct work_struct *work); 33static void send_outstanding_bcast_packet(struct work_struct *work);
34 34
35/* apply hop penalty for a normal link */
36static uint8_t hop_penalty(uint8_t tq, const struct bat_priv *bat_priv)
37{
38 int hop_penalty = atomic_read(&bat_priv->hop_penalty);
39 return (tq * (TQ_MAX_VALUE - hop_penalty)) / (TQ_MAX_VALUE);
40}
41
42/* when do we schedule our own packet to be sent */
43static unsigned long own_send_time(const struct bat_priv *bat_priv)
44{
45 return jiffies + msecs_to_jiffies(
46 atomic_read(&bat_priv->orig_interval) -
47 JITTER + (random32() % 2*JITTER));
48}
49
50/* when do we schedule a forwarded packet to be sent */
51static unsigned long forward_send_time(void)
52{
53 return jiffies + msecs_to_jiffies(random32() % (JITTER/2));
54}
55
56/* send out an already prepared packet to the given address via the 35/* send out an already prepared packet to the given address via the
57 * specified batman interface */ 36 * specified batman interface */
58int send_skb_packet(struct sk_buff *skb, struct hard_iface *hard_iface, 37int send_skb_packet(struct sk_buff *skb, struct hard_iface *hard_iface,
@@ -99,141 +78,17 @@ send_skb_err:
99 return NET_XMIT_DROP; 78 return NET_XMIT_DROP;
100} 79}
101 80
102/* Send a packet to a given interface */
103static void send_packet_to_if(struct forw_packet *forw_packet,
104 struct hard_iface *hard_iface)
105{
106 struct bat_priv *bat_priv = netdev_priv(hard_iface->soft_iface);
107 char *fwd_str;
108 uint8_t packet_num;
109 int16_t buff_pos;
110 struct batman_packet *batman_packet;
111 struct sk_buff *skb;
112
113 if (hard_iface->if_status != IF_ACTIVE)
114 return;
115
116 packet_num = 0;
117 buff_pos = 0;
118 batman_packet = (struct batman_packet *)forw_packet->skb->data;
119
120 /* adjust all flags and log packets */
121 while (aggregated_packet(buff_pos,
122 forw_packet->packet_len,
123 batman_packet->tt_num_changes)) {
124
125 /* we might have aggregated direct link packets with an
126 * ordinary base packet */
127 if ((forw_packet->direct_link_flags & (1 << packet_num)) &&
128 (forw_packet->if_incoming == hard_iface))
129 batman_packet->flags |= DIRECTLINK;
130 else
131 batman_packet->flags &= ~DIRECTLINK;
132
133 fwd_str = (packet_num > 0 ? "Forwarding" : (forw_packet->own ?
134 "Sending own" :
135 "Forwarding"));
136 bat_dbg(DBG_BATMAN, bat_priv,
137 "%s %spacket (originator %pM, seqno %d, TQ %d, TTL %d,"
138 " IDF %s, hvn %d) on interface %s [%pM]\n",
139 fwd_str, (packet_num > 0 ? "aggregated " : ""),
140 batman_packet->orig, ntohl(batman_packet->seqno),
141 batman_packet->tq, batman_packet->ttl,
142 (batman_packet->flags & DIRECTLINK ?
143 "on" : "off"),
144 batman_packet->ttvn, hard_iface->net_dev->name,
145 hard_iface->net_dev->dev_addr);
146
147 buff_pos += sizeof(*batman_packet) +
148 tt_len(batman_packet->tt_num_changes);
149 packet_num++;
150 batman_packet = (struct batman_packet *)
151 (forw_packet->skb->data + buff_pos);
152 }
153
154 /* create clone because function is called more than once */
155 skb = skb_clone(forw_packet->skb, GFP_ATOMIC);
156 if (skb)
157 send_skb_packet(skb, hard_iface, broadcast_addr);
158}
159
160/* send a batman packet */
161static void send_packet(struct forw_packet *forw_packet)
162{
163 struct hard_iface *hard_iface;
164 struct net_device *soft_iface;
165 struct bat_priv *bat_priv;
166 struct hard_iface *primary_if = NULL;
167 struct batman_packet *batman_packet =
168 (struct batman_packet *)(forw_packet->skb->data);
169 int directlink = (batman_packet->flags & DIRECTLINK ? 1 : 0);
170
171 if (!forw_packet->if_incoming) {
172 pr_err("Error - can't forward packet: incoming iface not "
173 "specified\n");
174 goto out;
175 }
176
177 soft_iface = forw_packet->if_incoming->soft_iface;
178 bat_priv = netdev_priv(soft_iface);
179
180 if (forw_packet->if_incoming->if_status != IF_ACTIVE)
181 goto out;
182
183 primary_if = primary_if_get_selected(bat_priv);
184 if (!primary_if)
185 goto out;
186
187 /* multihomed peer assumed */
188 /* non-primary OGMs are only broadcasted on their interface */
189 if ((directlink && (batman_packet->ttl == 1)) ||
190 (forw_packet->own && (forw_packet->if_incoming != primary_if))) {
191
192 /* FIXME: what about aggregated packets ? */
193 bat_dbg(DBG_BATMAN, bat_priv,
194 "%s packet (originator %pM, seqno %d, TTL %d) "
195 "on interface %s [%pM]\n",
196 (forw_packet->own ? "Sending own" : "Forwarding"),
197 batman_packet->orig, ntohl(batman_packet->seqno),
198 batman_packet->ttl,
199 forw_packet->if_incoming->net_dev->name,
200 forw_packet->if_incoming->net_dev->dev_addr);
201
202 /* skb is only used once and than forw_packet is free'd */
203 send_skb_packet(forw_packet->skb, forw_packet->if_incoming,
204 broadcast_addr);
205 forw_packet->skb = NULL;
206
207 goto out;
208 }
209
210 /* broadcast on every interface */
211 rcu_read_lock();
212 list_for_each_entry_rcu(hard_iface, &hardif_list, list) {
213 if (hard_iface->soft_iface != soft_iface)
214 continue;
215
216 send_packet_to_if(forw_packet, hard_iface);
217 }
218 rcu_read_unlock();
219
220out:
221 if (primary_if)
222 hardif_free_ref(primary_if);
223}
224
225static void realloc_packet_buffer(struct hard_iface *hard_iface, 81static void realloc_packet_buffer(struct hard_iface *hard_iface,
226 int new_len) 82 int new_len)
227{ 83{
228 unsigned char *new_buff; 84 unsigned char *new_buff;
229 struct batman_packet *batman_packet;
230 85
231 new_buff = kmalloc(new_len, GFP_ATOMIC); 86 new_buff = kmalloc(new_len, GFP_ATOMIC);
232 87
233 /* keep old buffer if kmalloc should fail */ 88 /* keep old buffer if kmalloc should fail */
234 if (new_buff) { 89 if (new_buff) {
235 memcpy(new_buff, hard_iface->packet_buff, 90 memcpy(new_buff, hard_iface->packet_buff,
236 sizeof(*batman_packet)); 91 BATMAN_OGM_LEN);
237 92
238 kfree(hard_iface->packet_buff); 93 kfree(hard_iface->packet_buff);
239 hard_iface->packet_buff = new_buff; 94 hard_iface->packet_buff = new_buff;
@@ -242,60 +97,48 @@ static void realloc_packet_buffer(struct hard_iface *hard_iface,
242} 97}
243 98
244/* when calling this function (hard_iface == primary_if) has to be true */ 99/* when calling this function (hard_iface == primary_if) has to be true */
245static void prepare_packet_buffer(struct bat_priv *bat_priv, 100static int prepare_packet_buffer(struct bat_priv *bat_priv,
246 struct hard_iface *hard_iface) 101 struct hard_iface *hard_iface)
247{ 102{
248 int new_len; 103 int new_len;
249 struct batman_packet *batman_packet;
250 104
251 new_len = BAT_PACKET_LEN + 105 new_len = BATMAN_OGM_LEN +
252 tt_len((uint8_t)atomic_read(&bat_priv->tt_local_changes)); 106 tt_len((uint8_t)atomic_read(&bat_priv->tt_local_changes));
253 107
254 /* if we have too many changes for one packet don't send any 108 /* if we have too many changes for one packet don't send any
255 * and wait for the tt table request which will be fragmented */ 109 * and wait for the tt table request which will be fragmented */
256 if (new_len > hard_iface->soft_iface->mtu) 110 if (new_len > hard_iface->soft_iface->mtu)
257 new_len = BAT_PACKET_LEN; 111 new_len = BATMAN_OGM_LEN;
258 112
259 realloc_packet_buffer(hard_iface, new_len); 113 realloc_packet_buffer(hard_iface, new_len);
260 batman_packet = (struct batman_packet *)hard_iface->packet_buff;
261 114
262 atomic_set(&bat_priv->tt_crc, tt_local_crc(bat_priv)); 115 atomic_set(&bat_priv->tt_crc, tt_local_crc(bat_priv));
263 116
264 /* reset the sending counter */ 117 /* reset the sending counter */
265 atomic_set(&bat_priv->tt_ogm_append_cnt, TT_OGM_APPEND_MAX); 118 atomic_set(&bat_priv->tt_ogm_append_cnt, TT_OGM_APPEND_MAX);
266 119
267 batman_packet->tt_num_changes = tt_changes_fill_buffer(bat_priv, 120 return tt_changes_fill_buffer(bat_priv,
268 hard_iface->packet_buff + BAT_PACKET_LEN, 121 hard_iface->packet_buff + BATMAN_OGM_LEN,
269 hard_iface->packet_len - BAT_PACKET_LEN); 122 hard_iface->packet_len - BATMAN_OGM_LEN);
270
271} 123}
272 124
273static void reset_packet_buffer(struct bat_priv *bat_priv, 125static int reset_packet_buffer(struct bat_priv *bat_priv,
274 struct hard_iface *hard_iface) 126 struct hard_iface *hard_iface)
275{ 127{
276 struct batman_packet *batman_packet; 128 realloc_packet_buffer(hard_iface, BATMAN_OGM_LEN);
277 129 return 0;
278 realloc_packet_buffer(hard_iface, BAT_PACKET_LEN);
279
280 batman_packet = (struct batman_packet *)hard_iface->packet_buff;
281 batman_packet->tt_num_changes = 0;
282} 130}
283 131
284void schedule_own_packet(struct hard_iface *hard_iface) 132void schedule_bat_ogm(struct hard_iface *hard_iface)
285{ 133{
286 struct bat_priv *bat_priv = netdev_priv(hard_iface->soft_iface); 134 struct bat_priv *bat_priv = netdev_priv(hard_iface->soft_iface);
287 struct hard_iface *primary_if; 135 struct hard_iface *primary_if;
288 unsigned long send_time; 136 int tt_num_changes = -1;
289 struct batman_packet *batman_packet;
290 int vis_server;
291 137
292 if ((hard_iface->if_status == IF_NOT_IN_USE) || 138 if ((hard_iface->if_status == IF_NOT_IN_USE) ||
293 (hard_iface->if_status == IF_TO_BE_REMOVED)) 139 (hard_iface->if_status == IF_TO_BE_REMOVED))
294 return; 140 return;
295 141
296 vis_server = atomic_read(&bat_priv->vis_mode);
297 primary_if = primary_if_get_selected(bat_priv);
298
299 /** 142 /**
300 * the interface gets activated here to avoid race conditions between 143 * the interface gets activated here to avoid race conditions between
301 * the moment of activating the interface in 144 * the moment of activating the interface in
@@ -306,124 +149,26 @@ void schedule_own_packet(struct hard_iface *hard_iface)
306 if (hard_iface->if_status == IF_TO_BE_ACTIVATED) 149 if (hard_iface->if_status == IF_TO_BE_ACTIVATED)
307 hard_iface->if_status = IF_ACTIVE; 150 hard_iface->if_status = IF_ACTIVE;
308 151
152 primary_if = primary_if_get_selected(bat_priv);
153
309 if (hard_iface == primary_if) { 154 if (hard_iface == primary_if) {
310 /* if at least one change happened */ 155 /* if at least one change happened */
311 if (atomic_read(&bat_priv->tt_local_changes) > 0) { 156 if (atomic_read(&bat_priv->tt_local_changes) > 0) {
312 tt_commit_changes(bat_priv); 157 tt_commit_changes(bat_priv);
313 prepare_packet_buffer(bat_priv, hard_iface); 158 tt_num_changes = prepare_packet_buffer(bat_priv,
159 hard_iface);
314 } 160 }
315 161
316 /* if the changes have been sent enough times */ 162 /* if the changes have been sent often enough */
317 if (!atomic_dec_not_zero(&bat_priv->tt_ogm_append_cnt)) 163 if (!atomic_dec_not_zero(&bat_priv->tt_ogm_append_cnt))
318 reset_packet_buffer(bat_priv, hard_iface); 164 tt_num_changes = reset_packet_buffer(bat_priv,
165 hard_iface);
319 } 166 }
320 167
321 /**
322 * NOTE: packet_buff might just have been re-allocated in
323 * prepare_packet_buffer() or in reset_packet_buffer()
324 */
325 batman_packet = (struct batman_packet *)hard_iface->packet_buff;
326
327 /* change sequence number to network order */
328 batman_packet->seqno =
329 htonl((uint32_t)atomic_read(&hard_iface->seqno));
330
331 batman_packet->ttvn = atomic_read(&bat_priv->ttvn);
332 batman_packet->tt_crc = htons((uint16_t)atomic_read(&bat_priv->tt_crc));
333
334 if (vis_server == VIS_TYPE_SERVER_SYNC)
335 batman_packet->flags |= VIS_SERVER;
336 else
337 batman_packet->flags &= ~VIS_SERVER;
338
339 if ((hard_iface == primary_if) &&
340 (atomic_read(&bat_priv->gw_mode) == GW_MODE_SERVER))
341 batman_packet->gw_flags =
342 (uint8_t)atomic_read(&bat_priv->gw_bandwidth);
343 else
344 batman_packet->gw_flags = NO_FLAGS;
345
346 atomic_inc(&hard_iface->seqno);
347
348 slide_own_bcast_window(hard_iface);
349 send_time = own_send_time(bat_priv);
350 add_bat_packet_to_list(bat_priv,
351 hard_iface->packet_buff,
352 hard_iface->packet_len,
353 hard_iface, 1, send_time);
354
355 if (primary_if) 168 if (primary_if)
356 hardif_free_ref(primary_if); 169 hardif_free_ref(primary_if);
357}
358
359void schedule_forward_packet(struct orig_node *orig_node,
360 const struct ethhdr *ethhdr,
361 struct batman_packet *batman_packet,
362 int directlink,
363 struct hard_iface *if_incoming)
364{
365 struct bat_priv *bat_priv = netdev_priv(if_incoming->soft_iface);
366 struct neigh_node *router;
367 uint8_t in_tq, in_ttl, tq_avg = 0;
368 unsigned long send_time;
369 uint8_t tt_num_changes;
370
371 if (batman_packet->ttl <= 1) {
372 bat_dbg(DBG_BATMAN, bat_priv, "ttl exceeded\n");
373 return;
374 }
375
376 router = orig_node_get_router(orig_node);
377
378 in_tq = batman_packet->tq;
379 in_ttl = batman_packet->ttl;
380 tt_num_changes = batman_packet->tt_num_changes;
381
382 batman_packet->ttl--;
383 memcpy(batman_packet->prev_sender, ethhdr->h_source, ETH_ALEN);
384
385 /* rebroadcast tq of our best ranking neighbor to ensure the rebroadcast
386 * of our best tq value */
387 if (router && router->tq_avg != 0) {
388
389 /* rebroadcast ogm of best ranking neighbor as is */
390 if (!compare_eth(router->addr, ethhdr->h_source)) {
391 batman_packet->tq = router->tq_avg;
392
393 if (router->last_ttl)
394 batman_packet->ttl = router->last_ttl - 1;
395 }
396
397 tq_avg = router->tq_avg;
398 }
399
400 if (router)
401 neigh_node_free_ref(router);
402
403 /* apply hop penalty */
404 batman_packet->tq = hop_penalty(batman_packet->tq, bat_priv);
405
406 bat_dbg(DBG_BATMAN, bat_priv,
407 "Forwarding packet: tq_orig: %i, tq_avg: %i, "
408 "tq_forw: %i, ttl_orig: %i, ttl_forw: %i\n",
409 in_tq, tq_avg, batman_packet->tq, in_ttl - 1,
410 batman_packet->ttl);
411
412 batman_packet->seqno = htonl(batman_packet->seqno);
413 batman_packet->tt_crc = htons(batman_packet->tt_crc);
414
415 /* switch of primaries first hop flag when forwarding */
416 batman_packet->flags &= ~PRIMARIES_FIRST_HOP;
417 if (directlink)
418 batman_packet->flags |= DIRECTLINK;
419 else
420 batman_packet->flags &= ~DIRECTLINK;
421 170
422 send_time = forward_send_time(); 171 bat_ogm_schedule(hard_iface, tt_num_changes);
423 add_bat_packet_to_list(bat_priv,
424 (unsigned char *)batman_packet,
425 sizeof(*batman_packet) + tt_len(tt_num_changes),
426 if_incoming, 0, send_time);
427} 172}
428 173
429static void forw_packet_free(struct forw_packet *forw_packet) 174static void forw_packet_free(struct forw_packet *forw_packet)
@@ -454,7 +199,7 @@ static void _add_bcast_packet_to_list(struct bat_priv *bat_priv,
454} 199}
455 200
456/* add a broadcast packet to the queue and setup timers. broadcast packets 201/* add a broadcast packet to the queue and setup timers. broadcast packets
457 * are sent multiple times to increase probability for beeing received. 202 * are sent multiple times to increase probability for being received.
458 * 203 *
459 * This function returns NETDEV_TX_OK on success and NETDEV_TX_BUSY on 204 * This function returns NETDEV_TX_OK on success and NETDEV_TX_BUSY on
460 * errors. 205 * errors.
@@ -557,7 +302,7 @@ out:
557 atomic_inc(&bat_priv->bcast_queue_left); 302 atomic_inc(&bat_priv->bcast_queue_left);
558} 303}
559 304
560void send_outstanding_bat_packet(struct work_struct *work) 305void send_outstanding_bat_ogm_packet(struct work_struct *work)
561{ 306{
562 struct delayed_work *delayed_work = 307 struct delayed_work *delayed_work =
563 container_of(work, struct delayed_work, work); 308 container_of(work, struct delayed_work, work);
@@ -573,7 +318,7 @@ void send_outstanding_bat_packet(struct work_struct *work)
573 if (atomic_read(&bat_priv->mesh_state) == MESH_DEACTIVATING) 318 if (atomic_read(&bat_priv->mesh_state) == MESH_DEACTIVATING)
574 goto out; 319 goto out;
575 320
576 send_packet(forw_packet); 321 bat_ogm_emit(forw_packet);
577 322
578 /** 323 /**
579 * we have to have at least one packet in the queue 324 * we have to have at least one packet in the queue
@@ -581,7 +326,7 @@ void send_outstanding_bat_packet(struct work_struct *work)
581 * shutting down 326 * shutting down
582 */ 327 */
583 if (forw_packet->own) 328 if (forw_packet->own)
584 schedule_own_packet(forw_packet->if_incoming); 329 schedule_bat_ogm(forw_packet->if_incoming);
585 330
586out: 331out:
587 /* don't count own packet */ 332 /* don't count own packet */
@@ -612,7 +357,7 @@ void purge_outstanding_packets(struct bat_priv *bat_priv,
612 &bat_priv->forw_bcast_list, list) { 357 &bat_priv->forw_bcast_list, list) {
613 358
614 /** 359 /**
615 * if purge_outstanding_packets() was called with an argmument 360 * if purge_outstanding_packets() was called with an argument
616 * we delete only packets belonging to the given interface 361 * we delete only packets belonging to the given interface
617 */ 362 */
618 if ((hard_iface) && 363 if ((hard_iface) &&
@@ -641,7 +386,7 @@ void purge_outstanding_packets(struct bat_priv *bat_priv,
641 &bat_priv->forw_bat_list, list) { 386 &bat_priv->forw_bat_list, list) {
642 387
643 /** 388 /**
644 * if purge_outstanding_packets() was called with an argmument 389 * if purge_outstanding_packets() was called with an argument
645 * we delete only packets belonging to the given interface 390 * we delete only packets belonging to the given interface
646 */ 391 */
647 if ((hard_iface) && 392 if ((hard_iface) &&
diff --git a/net/batman-adv/send.h b/net/batman-adv/send.h
index 1f2d1e877663..c8ca3ef7385b 100644
--- a/net/batman-adv/send.h
+++ b/net/batman-adv/send.h
@@ -24,15 +24,10 @@
24 24
25int send_skb_packet(struct sk_buff *skb, struct hard_iface *hard_iface, 25int send_skb_packet(struct sk_buff *skb, struct hard_iface *hard_iface,
26 const uint8_t *dst_addr); 26 const uint8_t *dst_addr);
27void schedule_own_packet(struct hard_iface *hard_iface); 27void schedule_bat_ogm(struct hard_iface *hard_iface);
28void schedule_forward_packet(struct orig_node *orig_node,
29 const struct ethhdr *ethhdr,
30 struct batman_packet *batman_packet,
31 int directlink,
32 struct hard_iface *if_outgoing);
33int add_bcast_packet_to_list(struct bat_priv *bat_priv, 28int add_bcast_packet_to_list(struct bat_priv *bat_priv,
34 const struct sk_buff *skb, unsigned long delay); 29 const struct sk_buff *skb, unsigned long delay);
35void send_outstanding_bat_packet(struct work_struct *work); 30void send_outstanding_bat_ogm_packet(struct work_struct *work);
36void purge_outstanding_packets(struct bat_priv *bat_priv, 31void purge_outstanding_packets(struct bat_priv *bat_priv,
37 const struct hard_iface *hard_iface); 32 const struct hard_iface *hard_iface);
38 33
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index 3e2f91ffa4e2..f9cc95728989 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -445,30 +445,31 @@ static void softif_batman_recv(struct sk_buff *skb, struct net_device *dev,
445{ 445{
446 struct bat_priv *bat_priv = netdev_priv(dev); 446 struct bat_priv *bat_priv = netdev_priv(dev);
447 struct ethhdr *ethhdr = (struct ethhdr *)skb->data; 447 struct ethhdr *ethhdr = (struct ethhdr *)skb->data;
448 struct batman_packet *batman_packet; 448 struct batman_ogm_packet *batman_ogm_packet;
449 struct softif_neigh *softif_neigh = NULL; 449 struct softif_neigh *softif_neigh = NULL;
450 struct hard_iface *primary_if = NULL; 450 struct hard_iface *primary_if = NULL;
451 struct softif_neigh *curr_softif_neigh = NULL; 451 struct softif_neigh *curr_softif_neigh = NULL;
452 452
453 if (ntohs(ethhdr->h_proto) == ETH_P_8021Q) 453 if (ntohs(ethhdr->h_proto) == ETH_P_8021Q)
454 batman_packet = (struct batman_packet *) 454 batman_ogm_packet = (struct batman_ogm_packet *)
455 (skb->data + ETH_HLEN + VLAN_HLEN); 455 (skb->data + ETH_HLEN + VLAN_HLEN);
456 else 456 else
457 batman_packet = (struct batman_packet *)(skb->data + ETH_HLEN); 457 batman_ogm_packet = (struct batman_ogm_packet *)
458 (skb->data + ETH_HLEN);
458 459
459 if (batman_packet->version != COMPAT_VERSION) 460 if (batman_ogm_packet->version != COMPAT_VERSION)
460 goto out; 461 goto out;
461 462
462 if (batman_packet->packet_type != BAT_PACKET) 463 if (batman_ogm_packet->packet_type != BAT_OGM)
463 goto out; 464 goto out;
464 465
465 if (!(batman_packet->flags & PRIMARIES_FIRST_HOP)) 466 if (!(batman_ogm_packet->flags & PRIMARIES_FIRST_HOP))
466 goto out; 467 goto out;
467 468
468 if (is_my_mac(batman_packet->orig)) 469 if (is_my_mac(batman_ogm_packet->orig))
469 goto out; 470 goto out;
470 471
471 softif_neigh = softif_neigh_get(bat_priv, batman_packet->orig, vid); 472 softif_neigh = softif_neigh_get(bat_priv, batman_ogm_packet->orig, vid);
472 if (!softif_neigh) 473 if (!softif_neigh)
473 goto out; 474 goto out;
474 475
@@ -532,11 +533,11 @@ static int interface_set_mac_addr(struct net_device *dev, void *p)
532 if (!is_valid_ether_addr(addr->sa_data)) 533 if (!is_valid_ether_addr(addr->sa_data))
533 return -EADDRNOTAVAIL; 534 return -EADDRNOTAVAIL;
534 535
535 /* only modify transtable if it has been initialised before */ 536 /* only modify transtable if it has been initialized before */
536 if (atomic_read(&bat_priv->mesh_state) == MESH_ACTIVE) { 537 if (atomic_read(&bat_priv->mesh_state) == MESH_ACTIVE) {
537 tt_local_remove(bat_priv, dev->dev_addr, 538 tt_local_remove(bat_priv, dev->dev_addr,
538 "mac address changed", false); 539 "mac address changed", false);
539 tt_local_add(dev, addr->sa_data); 540 tt_local_add(dev, addr->sa_data, NULL_IFINDEX);
540 } 541 }
541 542
542 memcpy(dev->dev_addr, addr->sa_data, ETH_ALEN); 543 memcpy(dev->dev_addr, addr->sa_data, ETH_ALEN);
@@ -565,7 +566,7 @@ static int interface_tx(struct sk_buff *skb, struct net_device *soft_iface)
565 struct orig_node *orig_node = NULL; 566 struct orig_node *orig_node = NULL;
566 int data_len = skb->len, ret; 567 int data_len = skb->len, ret;
567 short vid = -1; 568 short vid = -1;
568 bool do_bcast = false; 569 bool do_bcast;
569 570
570 if (atomic_read(&bat_priv->mesh_state) != MESH_ACTIVE) 571 if (atomic_read(&bat_priv->mesh_state) != MESH_ACTIVE)
571 goto dropped; 572 goto dropped;
@@ -595,18 +596,19 @@ static int interface_tx(struct sk_buff *skb, struct net_device *soft_iface)
595 goto dropped; 596 goto dropped;
596 597
597 /* Register the client MAC in the transtable */ 598 /* Register the client MAC in the transtable */
598 tt_local_add(soft_iface, ethhdr->h_source); 599 tt_local_add(soft_iface, ethhdr->h_source, skb->skb_iif);
599 600
600 orig_node = transtable_search(bat_priv, ethhdr->h_dest); 601 orig_node = transtable_search(bat_priv, ethhdr->h_source,
601 if (is_multicast_ether_addr(ethhdr->h_dest) || 602 ethhdr->h_dest);
602 (orig_node && orig_node->gw_flags)) { 603 do_bcast = is_multicast_ether_addr(ethhdr->h_dest);
604 if (do_bcast || (orig_node && orig_node->gw_flags)) {
603 ret = gw_is_target(bat_priv, skb, orig_node); 605 ret = gw_is_target(bat_priv, skb, orig_node);
604 606
605 if (ret < 0) 607 if (ret < 0)
606 goto dropped; 608 goto dropped;
607 609
608 if (ret == 0) 610 if (ret)
609 do_bcast = true; 611 do_bcast = false;
610 } 612 }
611 613
612 /* ethernet packet should be broadcasted */ 614 /* ethernet packet should be broadcasted */
@@ -739,6 +741,9 @@ void interface_rx(struct net_device *soft_iface,
739 741
740 soft_iface->last_rx = jiffies; 742 soft_iface->last_rx = jiffies;
741 743
744 if (is_ap_isolated(bat_priv, ethhdr->h_source, ethhdr->h_dest))
745 goto dropped;
746
742 netif_rx(skb); 747 netif_rx(skb);
743 goto out; 748 goto out;
744 749
@@ -796,10 +801,8 @@ struct net_device *softif_create(const char *name)
796 801
797 soft_iface = alloc_netdev(sizeof(*bat_priv), name, interface_setup); 802 soft_iface = alloc_netdev(sizeof(*bat_priv), name, interface_setup);
798 803
799 if (!soft_iface) { 804 if (!soft_iface)
800 pr_err("Unable to allocate the batman interface: %s\n", name);
801 goto out; 805 goto out;
802 }
803 806
804 ret = register_netdevice(soft_iface); 807 ret = register_netdevice(soft_iface);
805 if (ret < 0) { 808 if (ret < 0) {
@@ -812,6 +815,7 @@ struct net_device *softif_create(const char *name)
812 815
813 atomic_set(&bat_priv->aggregated_ogms, 1); 816 atomic_set(&bat_priv->aggregated_ogms, 1);
814 atomic_set(&bat_priv->bonding, 0); 817 atomic_set(&bat_priv->bonding, 0);
818 atomic_set(&bat_priv->ap_isolation, 0);
815 atomic_set(&bat_priv->vis_mode, VIS_TYPE_CLIENT_UPDATE); 819 atomic_set(&bat_priv->vis_mode, VIS_TYPE_CLIENT_UPDATE);
816 atomic_set(&bat_priv->gw_mode, GW_MODE_OFF); 820 atomic_set(&bat_priv->gw_mode, GW_MODE_OFF);
817 atomic_set(&bat_priv->gw_sel_class, 20); 821 atomic_set(&bat_priv->gw_sel_class, 20);
diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index fb6931d00cd7..cc53f78e448c 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -183,7 +183,8 @@ static int tt_local_init(struct bat_priv *bat_priv)
183 return 1; 183 return 1;
184} 184}
185 185
186void tt_local_add(struct net_device *soft_iface, const uint8_t *addr) 186void tt_local_add(struct net_device *soft_iface, const uint8_t *addr,
187 int ifindex)
187{ 188{
188 struct bat_priv *bat_priv = netdev_priv(soft_iface); 189 struct bat_priv *bat_priv = netdev_priv(soft_iface);
189 struct tt_local_entry *tt_local_entry = NULL; 190 struct tt_local_entry *tt_local_entry = NULL;
@@ -207,6 +208,8 @@ void tt_local_add(struct net_device *soft_iface, const uint8_t *addr)
207 memcpy(tt_local_entry->addr, addr, ETH_ALEN); 208 memcpy(tt_local_entry->addr, addr, ETH_ALEN);
208 tt_local_entry->last_seen = jiffies; 209 tt_local_entry->last_seen = jiffies;
209 tt_local_entry->flags = NO_FLAGS; 210 tt_local_entry->flags = NO_FLAGS;
211 if (is_wifi_iface(ifindex))
212 tt_local_entry->flags |= TT_CLIENT_WIFI;
210 atomic_set(&tt_local_entry->refcount, 2); 213 atomic_set(&tt_local_entry->refcount, 2);
211 214
212 /* the batman interface mac address should never be purged */ 215 /* the batman interface mac address should never be purged */
@@ -329,7 +332,7 @@ int tt_local_seq_print_text(struct seq_file *seq, void *offset)
329 332
330 rcu_read_lock(); 333 rcu_read_lock();
331 __hlist_for_each_rcu(node, head) 334 __hlist_for_each_rcu(node, head)
332 buf_size += 21; 335 buf_size += 29;
333 rcu_read_unlock(); 336 rcu_read_unlock();
334 } 337 }
335 338
@@ -348,8 +351,19 @@ int tt_local_seq_print_text(struct seq_file *seq, void *offset)
348 rcu_read_lock(); 351 rcu_read_lock();
349 hlist_for_each_entry_rcu(tt_local_entry, node, 352 hlist_for_each_entry_rcu(tt_local_entry, node,
350 head, hash_entry) { 353 head, hash_entry) {
351 pos += snprintf(buff + pos, 22, " * %pM\n", 354 pos += snprintf(buff + pos, 30, " * %pM "
352 tt_local_entry->addr); 355 "[%c%c%c%c%c]\n",
356 tt_local_entry->addr,
357 (tt_local_entry->flags &
358 TT_CLIENT_ROAM ? 'R' : '.'),
359 (tt_local_entry->flags &
360 TT_CLIENT_NOPURGE ? 'P' : '.'),
361 (tt_local_entry->flags &
362 TT_CLIENT_NEW ? 'N' : '.'),
363 (tt_local_entry->flags &
364 TT_CLIENT_PENDING ? 'X' : '.'),
365 (tt_local_entry->flags &
366 TT_CLIENT_WIFI ? 'W' : '.'));
353 } 367 }
354 rcu_read_unlock(); 368 rcu_read_unlock();
355 } 369 }
@@ -369,8 +383,8 @@ static void tt_local_set_pending(struct bat_priv *bat_priv,
369 tt_local_event(bat_priv, tt_local_entry->addr, 383 tt_local_event(bat_priv, tt_local_entry->addr,
370 tt_local_entry->flags | flags); 384 tt_local_entry->flags | flags);
371 385
372 /* The local client has to be merked as "pending to be removed" but has 386 /* The local client has to be marked as "pending to be removed" but has
373 * to be kept in the table in order to send it in an full tables 387 * to be kept in the table in order to send it in a full table
374 * response issued before the net ttvn increment (consistency check) */ 388 * response issued before the net ttvn increment (consistency check) */
375 tt_local_entry->flags |= TT_CLIENT_PENDING; 389 tt_local_entry->flags |= TT_CLIENT_PENDING;
376} 390}
@@ -495,7 +509,8 @@ static void tt_changes_list_free(struct bat_priv *bat_priv)
495 509
496/* caller must hold orig_node refcount */ 510/* caller must hold orig_node refcount */
497int tt_global_add(struct bat_priv *bat_priv, struct orig_node *orig_node, 511int tt_global_add(struct bat_priv *bat_priv, struct orig_node *orig_node,
498 const unsigned char *tt_addr, uint8_t ttvn, bool roaming) 512 const unsigned char *tt_addr, uint8_t ttvn, bool roaming,
513 bool wifi)
499{ 514{
500 struct tt_global_entry *tt_global_entry; 515 struct tt_global_entry *tt_global_entry;
501 struct orig_node *orig_node_tmp; 516 struct orig_node *orig_node_tmp;
@@ -537,6 +552,9 @@ int tt_global_add(struct bat_priv *bat_priv, struct orig_node *orig_node,
537 tt_global_entry->roam_at = 0; 552 tt_global_entry->roam_at = 0;
538 } 553 }
539 554
555 if (wifi)
556 tt_global_entry->flags |= TT_CLIENT_WIFI;
557
540 bat_dbg(DBG_TT, bat_priv, 558 bat_dbg(DBG_TT, bat_priv,
541 "Creating new global tt entry: %pM (via %pM)\n", 559 "Creating new global tt entry: %pM (via %pM)\n",
542 tt_global_entry->addr, orig_node->orig); 560 tt_global_entry->addr, orig_node->orig);
@@ -582,8 +600,8 @@ int tt_global_seq_print_text(struct seq_file *seq, void *offset)
582 seq_printf(seq, 600 seq_printf(seq,
583 "Globally announced TT entries received via the mesh %s\n", 601 "Globally announced TT entries received via the mesh %s\n",
584 net_dev->name); 602 net_dev->name);
585 seq_printf(seq, " %-13s %s %-15s %s\n", 603 seq_printf(seq, " %-13s %s %-15s %s %s\n",
586 "Client", "(TTVN)", "Originator", "(Curr TTVN)"); 604 "Client", "(TTVN)", "Originator", "(Curr TTVN)", "Flags");
587 605
588 buf_size = 1; 606 buf_size = 1;
589 /* Estimate length for: " * xx:xx:xx:xx:xx:xx (ttvn) via 607 /* Estimate length for: " * xx:xx:xx:xx:xx:xx (ttvn) via
@@ -593,7 +611,7 @@ int tt_global_seq_print_text(struct seq_file *seq, void *offset)
593 611
594 rcu_read_lock(); 612 rcu_read_lock();
595 __hlist_for_each_rcu(node, head) 613 __hlist_for_each_rcu(node, head)
596 buf_size += 59; 614 buf_size += 67;
597 rcu_read_unlock(); 615 rcu_read_unlock();
598 } 616 }
599 617
@@ -612,14 +630,20 @@ int tt_global_seq_print_text(struct seq_file *seq, void *offset)
612 rcu_read_lock(); 630 rcu_read_lock();
613 hlist_for_each_entry_rcu(tt_global_entry, node, 631 hlist_for_each_entry_rcu(tt_global_entry, node,
614 head, hash_entry) { 632 head, hash_entry) {
615 pos += snprintf(buff + pos, 61, 633 pos += snprintf(buff + pos, 69,
616 " * %pM (%3u) via %pM (%3u)\n", 634 " * %pM (%3u) via %pM (%3u) "
617 tt_global_entry->addr, 635 "[%c%c%c]\n", tt_global_entry->addr,
618 tt_global_entry->ttvn, 636 tt_global_entry->ttvn,
619 tt_global_entry->orig_node->orig, 637 tt_global_entry->orig_node->orig,
620 (uint8_t) atomic_read( 638 (uint8_t) atomic_read(
621 &tt_global_entry->orig_node-> 639 &tt_global_entry->orig_node->
622 last_ttvn)); 640 last_ttvn),
641 (tt_global_entry->flags &
642 TT_CLIENT_ROAM ? 'R' : '.'),
643 (tt_global_entry->flags &
644 TT_CLIENT_PENDING ? 'X' : '.'),
645 (tt_global_entry->flags &
646 TT_CLIENT_WIFI ? 'W' : '.'));
623 } 647 }
624 rcu_read_unlock(); 648 rcu_read_unlock();
625 } 649 }
@@ -774,30 +798,56 @@ static void tt_global_table_free(struct bat_priv *bat_priv)
774 bat_priv->tt_global_hash = NULL; 798 bat_priv->tt_global_hash = NULL;
775} 799}
776 800
801static bool _is_ap_isolated(struct tt_local_entry *tt_local_entry,
802 struct tt_global_entry *tt_global_entry)
803{
804 bool ret = false;
805
806 if (tt_local_entry->flags & TT_CLIENT_WIFI &&
807 tt_global_entry->flags & TT_CLIENT_WIFI)
808 ret = true;
809
810 return ret;
811}
812
777struct orig_node *transtable_search(struct bat_priv *bat_priv, 813struct orig_node *transtable_search(struct bat_priv *bat_priv,
778 const uint8_t *addr) 814 const uint8_t *src, const uint8_t *addr)
779{ 815{
780 struct tt_global_entry *tt_global_entry; 816 struct tt_local_entry *tt_local_entry = NULL;
817 struct tt_global_entry *tt_global_entry = NULL;
781 struct orig_node *orig_node = NULL; 818 struct orig_node *orig_node = NULL;
782 819
783 tt_global_entry = tt_global_hash_find(bat_priv, addr); 820 if (src && atomic_read(&bat_priv->ap_isolation)) {
821 tt_local_entry = tt_local_hash_find(bat_priv, src);
822 if (!tt_local_entry)
823 goto out;
824 }
784 825
826 tt_global_entry = tt_global_hash_find(bat_priv, addr);
785 if (!tt_global_entry) 827 if (!tt_global_entry)
786 goto out; 828 goto out;
787 829
830 /* check whether the clients should not communicate due to AP
831 * isolation */
832 if (tt_local_entry && _is_ap_isolated(tt_local_entry, tt_global_entry))
833 goto out;
834
788 if (!atomic_inc_not_zero(&tt_global_entry->orig_node->refcount)) 835 if (!atomic_inc_not_zero(&tt_global_entry->orig_node->refcount))
789 goto free_tt; 836 goto out;
790 837
791 /* A global client marked as PENDING has already moved from that 838 /* A global client marked as PENDING has already moved from that
792 * originator */ 839 * originator */
793 if (tt_global_entry->flags & TT_CLIENT_PENDING) 840 if (tt_global_entry->flags & TT_CLIENT_PENDING)
794 goto free_tt; 841 goto out;
795 842
796 orig_node = tt_global_entry->orig_node; 843 orig_node = tt_global_entry->orig_node;
797 844
798free_tt:
799 tt_global_entry_free_ref(tt_global_entry);
800out: 845out:
846 if (tt_global_entry)
847 tt_global_entry_free_ref(tt_global_entry);
848 if (tt_local_entry)
849 tt_local_entry_free_ref(tt_local_entry);
850
801 return orig_node; 851 return orig_node;
802} 852}
803 853
@@ -1029,8 +1079,9 @@ out:
1029 return skb; 1079 return skb;
1030} 1080}
1031 1081
1032int send_tt_request(struct bat_priv *bat_priv, struct orig_node *dst_orig_node, 1082static int send_tt_request(struct bat_priv *bat_priv,
1033 uint8_t ttvn, uint16_t tt_crc, bool full_table) 1083 struct orig_node *dst_orig_node,
1084 uint8_t ttvn, uint16_t tt_crc, bool full_table)
1034{ 1085{
1035 struct sk_buff *skb = NULL; 1086 struct sk_buff *skb = NULL;
1036 struct tt_query_packet *tt_request; 1087 struct tt_query_packet *tt_request;
@@ -1137,12 +1188,12 @@ static bool send_other_tt_response(struct bat_priv *bat_priv,
1137 orig_ttvn = (uint8_t)atomic_read(&req_dst_orig_node->last_ttvn); 1188 orig_ttvn = (uint8_t)atomic_read(&req_dst_orig_node->last_ttvn);
1138 req_ttvn = tt_request->ttvn; 1189 req_ttvn = tt_request->ttvn;
1139 1190
1140 /* I have not the requested data */ 1191 /* I don't have the requested data */
1141 if (orig_ttvn != req_ttvn || 1192 if (orig_ttvn != req_ttvn ||
1142 tt_request->tt_data != req_dst_orig_node->tt_crc) 1193 tt_request->tt_data != req_dst_orig_node->tt_crc)
1143 goto out; 1194 goto out;
1144 1195
1145 /* If it has explicitly been requested the full table */ 1196 /* If the full table has been explicitly requested */
1146 if (tt_request->flags & TT_FULL_TABLE || 1197 if (tt_request->flags & TT_FULL_TABLE ||
1147 !req_dst_orig_node->tt_buff) 1198 !req_dst_orig_node->tt_buff)
1148 full_table = true; 1199 full_table = true;
@@ -1363,7 +1414,9 @@ static void _tt_update_changes(struct bat_priv *bat_priv,
1363 (tt_change + i)->flags & TT_CLIENT_ROAM); 1414 (tt_change + i)->flags & TT_CLIENT_ROAM);
1364 else 1415 else
1365 if (!tt_global_add(bat_priv, orig_node, 1416 if (!tt_global_add(bat_priv, orig_node,
1366 (tt_change + i)->addr, ttvn, false)) 1417 (tt_change + i)->addr, ttvn, false,
1418 (tt_change + i)->flags &
1419 TT_CLIENT_WIFI))
1367 /* In case of problem while storing a 1420 /* In case of problem while storing a
1368 * global_entry, we stop the updating 1421 * global_entry, we stop the updating
1369 * procedure without committing the 1422 * procedure without committing the
@@ -1403,9 +1456,10 @@ out:
1403 orig_node_free_ref(orig_node); 1456 orig_node_free_ref(orig_node);
1404} 1457}
1405 1458
1406void tt_update_changes(struct bat_priv *bat_priv, struct orig_node *orig_node, 1459static void tt_update_changes(struct bat_priv *bat_priv,
1407 uint16_t tt_num_changes, uint8_t ttvn, 1460 struct orig_node *orig_node,
1408 struct tt_change *tt_change) 1461 uint16_t tt_num_changes, uint8_t ttvn,
1462 struct tt_change *tt_change)
1409{ 1463{
1410 _tt_update_changes(bat_priv, orig_node, tt_change, tt_num_changes, 1464 _tt_update_changes(bat_priv, orig_node, tt_change, tt_num_changes,
1411 ttvn); 1465 ttvn);
@@ -1720,3 +1774,90 @@ void tt_commit_changes(struct bat_priv *bat_priv)
1720 atomic_inc(&bat_priv->ttvn); 1774 atomic_inc(&bat_priv->ttvn);
1721 bat_priv->tt_poss_change = false; 1775 bat_priv->tt_poss_change = false;
1722} 1776}
1777
1778bool is_ap_isolated(struct bat_priv *bat_priv, uint8_t *src, uint8_t *dst)
1779{
1780 struct tt_local_entry *tt_local_entry = NULL;
1781 struct tt_global_entry *tt_global_entry = NULL;
1782 bool ret = true;
1783
1784 if (!atomic_read(&bat_priv->ap_isolation))
1785 return false;
1786
1787 tt_local_entry = tt_local_hash_find(bat_priv, dst);
1788 if (!tt_local_entry)
1789 goto out;
1790
1791 tt_global_entry = tt_global_hash_find(bat_priv, src);
1792 if (!tt_global_entry)
1793 goto out;
1794
1795 if (_is_ap_isolated(tt_local_entry, tt_global_entry))
1796 goto out;
1797
1798 ret = false;
1799
1800out:
1801 if (tt_global_entry)
1802 tt_global_entry_free_ref(tt_global_entry);
1803 if (tt_local_entry)
1804 tt_local_entry_free_ref(tt_local_entry);
1805 return ret;
1806}
1807
1808void tt_update_orig(struct bat_priv *bat_priv, struct orig_node *orig_node,
1809 const unsigned char *tt_buff, uint8_t tt_num_changes,
1810 uint8_t ttvn, uint16_t tt_crc)
1811{
1812 uint8_t orig_ttvn = (uint8_t)atomic_read(&orig_node->last_ttvn);
1813 bool full_table = true;
1814
1815 /* the ttvn increased by one -> we can apply the attached changes */
1816 if (ttvn - orig_ttvn == 1) {
1817 /* the OGM could not contain the changes due to their size or
1818 * because they have already been sent TT_OGM_APPEND_MAX times.
1819 * In this case send a tt request */
1820 if (!tt_num_changes) {
1821 full_table = false;
1822 goto request_table;
1823 }
1824
1825 tt_update_changes(bat_priv, orig_node, tt_num_changes, ttvn,
1826 (struct tt_change *)tt_buff);
1827
1828 /* Even if we received the precomputed crc with the OGM, we
1829 * prefer to recompute it to spot any possible inconsistency
1830 * in the global table */
1831 orig_node->tt_crc = tt_global_crc(bat_priv, orig_node);
1832
1833 /* The ttvn alone is not enough to guarantee consistency
1834 * because a single value could represent different states
1835 * (due to the wrap around). Thus a node has to check whether
1836 * the resulting table (after applying the changes) is still
1837 * consistent or not. E.g. a node could disconnect while its
1838 * ttvn is X and reconnect on ttvn = X + TTVN_MAX: in this case
1839 * checking the CRC value is mandatory to detect the
1840 * inconsistency */
1841 if (orig_node->tt_crc != tt_crc)
1842 goto request_table;
1843
1844 /* Roaming phase is over: tables are in sync again. I can
1845 * unset the flag */
1846 orig_node->tt_poss_change = false;
1847 } else {
1848 /* if we missed more than one change or our tables are not
1849 * in sync anymore -> request fresh tt data */
1850 if (ttvn != orig_ttvn || orig_node->tt_crc != tt_crc) {
1851request_table:
1852 bat_dbg(DBG_TT, bat_priv, "TT inconsistency for %pM. "
1853 "Need to retrieve the correct information "
1854 "(ttvn: %u last_ttvn: %u crc: %u last_crc: "
1855 "%u num_changes: %u)\n", orig_node->orig, ttvn,
1856 orig_ttvn, tt_crc, orig_node->tt_crc,
1857 tt_num_changes);
1858 send_tt_request(bat_priv, orig_node, ttvn, tt_crc,
1859 full_table);
1860 return;
1861 }
1862 }
1863}
diff --git a/net/batman-adv/translation-table.h b/net/batman-adv/translation-table.h
index d4122cba53b8..30efd49881a3 100644
--- a/net/batman-adv/translation-table.h
+++ b/net/batman-adv/translation-table.h
@@ -26,15 +26,16 @@ int tt_len(int changes_num);
26int tt_changes_fill_buffer(struct bat_priv *bat_priv, 26int tt_changes_fill_buffer(struct bat_priv *bat_priv,
27 unsigned char *buff, int buff_len); 27 unsigned char *buff, int buff_len);
28int tt_init(struct bat_priv *bat_priv); 28int tt_init(struct bat_priv *bat_priv);
29void tt_local_add(struct net_device *soft_iface, const uint8_t *addr); 29void tt_local_add(struct net_device *soft_iface, const uint8_t *addr,
30 int ifindex);
30void tt_local_remove(struct bat_priv *bat_priv, 31void tt_local_remove(struct bat_priv *bat_priv,
31 const uint8_t *addr, const char *message, bool roaming); 32 const uint8_t *addr, const char *message, bool roaming);
32int tt_local_seq_print_text(struct seq_file *seq, void *offset); 33int tt_local_seq_print_text(struct seq_file *seq, void *offset);
33void tt_global_add_orig(struct bat_priv *bat_priv, struct orig_node *orig_node, 34void tt_global_add_orig(struct bat_priv *bat_priv, struct orig_node *orig_node,
34 const unsigned char *tt_buff, int tt_buff_len); 35 const unsigned char *tt_buff, int tt_buff_len);
35int tt_global_add(struct bat_priv *bat_priv, 36int tt_global_add(struct bat_priv *bat_priv, struct orig_node *orig_node,
36 struct orig_node *orig_node, const unsigned char *addr, 37 const unsigned char *addr, uint8_t ttvn, bool roaming,
37 uint8_t ttvn, bool roaming); 38 bool wifi);
38int tt_global_seq_print_text(struct seq_file *seq, void *offset); 39int tt_global_seq_print_text(struct seq_file *seq, void *offset);
39void tt_global_del_orig(struct bat_priv *bat_priv, 40void tt_global_del_orig(struct bat_priv *bat_priv,
40 struct orig_node *orig_node, const char *message); 41 struct orig_node *orig_node, const char *message);
@@ -42,25 +43,23 @@ void tt_global_del(struct bat_priv *bat_priv,
42 struct orig_node *orig_node, const unsigned char *addr, 43 struct orig_node *orig_node, const unsigned char *addr,
43 const char *message, bool roaming); 44 const char *message, bool roaming);
44struct orig_node *transtable_search(struct bat_priv *bat_priv, 45struct orig_node *transtable_search(struct bat_priv *bat_priv,
45 const uint8_t *addr); 46 const uint8_t *src, const uint8_t *addr);
46void tt_save_orig_buffer(struct bat_priv *bat_priv, struct orig_node *orig_node, 47void tt_save_orig_buffer(struct bat_priv *bat_priv, struct orig_node *orig_node,
47 const unsigned char *tt_buff, uint8_t tt_num_changes); 48 const unsigned char *tt_buff, uint8_t tt_num_changes);
48uint16_t tt_local_crc(struct bat_priv *bat_priv); 49uint16_t tt_local_crc(struct bat_priv *bat_priv);
49uint16_t tt_global_crc(struct bat_priv *bat_priv, struct orig_node *orig_node); 50uint16_t tt_global_crc(struct bat_priv *bat_priv, struct orig_node *orig_node);
50void tt_free(struct bat_priv *bat_priv); 51void tt_free(struct bat_priv *bat_priv);
51int send_tt_request(struct bat_priv *bat_priv,
52 struct orig_node *dst_orig_node, uint8_t hvn,
53 uint16_t tt_crc, bool full_table);
54bool send_tt_response(struct bat_priv *bat_priv, 52bool send_tt_response(struct bat_priv *bat_priv,
55 struct tt_query_packet *tt_request); 53 struct tt_query_packet *tt_request);
56void tt_update_changes(struct bat_priv *bat_priv, struct orig_node *orig_node,
57 uint16_t tt_num_changes, uint8_t ttvn,
58 struct tt_change *tt_change);
59bool is_my_client(struct bat_priv *bat_priv, const uint8_t *addr); 54bool is_my_client(struct bat_priv *bat_priv, const uint8_t *addr);
60void handle_tt_response(struct bat_priv *bat_priv, 55void handle_tt_response(struct bat_priv *bat_priv,
61 struct tt_query_packet *tt_response); 56 struct tt_query_packet *tt_response);
62void send_roam_adv(struct bat_priv *bat_priv, uint8_t *client, 57void send_roam_adv(struct bat_priv *bat_priv, uint8_t *client,
63 struct orig_node *orig_node); 58 struct orig_node *orig_node);
64void tt_commit_changes(struct bat_priv *bat_priv); 59void tt_commit_changes(struct bat_priv *bat_priv);
60bool is_ap_isolated(struct bat_priv *bat_priv, uint8_t *src, uint8_t *dst);
61void tt_update_orig(struct bat_priv *bat_priv, struct orig_node *orig_node,
62 const unsigned char *tt_buff, uint8_t tt_num_changes,
63 uint8_t ttvn, uint16_t tt_crc);
65 64
66#endif /* _NET_BATMAN_ADV_TRANSLATION_TABLE_H_ */ 65#endif /* _NET_BATMAN_ADV_TRANSLATION_TABLE_H_ */
diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h
index 25bd1db35370..1ae355750511 100644
--- a/net/batman-adv/types.h
+++ b/net/batman-adv/types.h
@@ -57,7 +57,7 @@ struct hard_iface {
57 * @batman_seqno_reset: time when the batman seqno window was reset 57 * @batman_seqno_reset: time when the batman seqno window was reset
58 * @gw_flags: flags related to gateway class 58 * @gw_flags: flags related to gateway class
59 * @flags: for now only VIS_SERVER flag 59 * @flags: for now only VIS_SERVER flag
60 * @last_real_seqno: last and best known squence number 60 * @last_real_seqno: last and best known sequence number
61 * @last_ttl: ttl of last received packet 61 * @last_ttl: ttl of last received packet
62 * @last_bcast_seqno: last broadcast sequence number received by this host 62 * @last_bcast_seqno: last broadcast sequence number received by this host
63 * 63 *
@@ -146,6 +146,7 @@ struct bat_priv {
146 atomic_t aggregated_ogms; /* boolean */ 146 atomic_t aggregated_ogms; /* boolean */
147 atomic_t bonding; /* boolean */ 147 atomic_t bonding; /* boolean */
148 atomic_t fragmentation; /* boolean */ 148 atomic_t fragmentation; /* boolean */
149 atomic_t ap_isolation; /* boolean */
149 atomic_t vis_mode; /* VIS_TYPE_* */ 150 atomic_t vis_mode; /* VIS_TYPE_* */
150 atomic_t gw_mode; /* GW_MODE_* */ 151 atomic_t gw_mode; /* GW_MODE_* */
151 atomic_t gw_sel_class; /* uint */ 152 atomic_t gw_sel_class; /* uint */
@@ -156,7 +157,7 @@ struct bat_priv {
156 atomic_t bcast_seqno; 157 atomic_t bcast_seqno;
157 atomic_t bcast_queue_left; 158 atomic_t bcast_queue_left;
158 atomic_t batman_queue_left; 159 atomic_t batman_queue_left;
159 atomic_t ttvn; /* tranlation table version number */ 160 atomic_t ttvn; /* translation table version number */
160 atomic_t tt_ogm_append_cnt; 161 atomic_t tt_ogm_append_cnt;
161 atomic_t tt_local_changes; /* changes registered in a OGM interval */ 162 atomic_t tt_local_changes; /* changes registered in a OGM interval */
162 /* The tt_poss_change flag is used to detect an ongoing roaming phase. 163 /* The tt_poss_change flag is used to detect an ongoing roaming phase.
diff --git a/net/batman-adv/unicast.c b/net/batman-adv/unicast.c
index 32b125fb3d3b..07d1c1da89dd 100644
--- a/net/batman-adv/unicast.c
+++ b/net/batman-adv/unicast.c
@@ -299,8 +299,10 @@ int unicast_send_skb(struct sk_buff *skb, struct bat_priv *bat_priv)
299 goto find_router; 299 goto find_router;
300 } 300 }
301 301
302 /* check for tt host - increases orig_node refcount */ 302 /* check for tt host - increases orig_node refcount.
303 orig_node = transtable_search(bat_priv, ethhdr->h_dest); 303 * returns NULL in case of AP isolation */
304 orig_node = transtable_search(bat_priv, ethhdr->h_source,
305 ethhdr->h_dest);
304 306
305find_router: 307find_router:
306 /** 308 /**
diff --git a/net/batman-adv/unicast.h b/net/batman-adv/unicast.h
index 62f54b954625..8fd5535544b9 100644
--- a/net/batman-adv/unicast.h
+++ b/net/batman-adv/unicast.h
@@ -24,7 +24,7 @@
24 24
25#include "packet.h" 25#include "packet.h"
26 26
27#define FRAG_TIMEOUT 10000 /* purge frag list entrys after time in ms */ 27#define FRAG_TIMEOUT 10000 /* purge frag list entries after time in ms */
28#define FRAG_BUFFER_SIZE 6 /* number of list elements in buffer */ 28#define FRAG_BUFFER_SIZE 6 /* number of list elements in buffer */
29 29
30int frag_reassemble_skb(struct sk_buff *skb, struct bat_priv *bat_priv, 30int frag_reassemble_skb(struct sk_buff *skb, struct bat_priv *bat_priv,
diff --git a/net/batman-adv/vis.c b/net/batman-adv/vis.c
index 8a1b98589d76..f81a6b668b0c 100644
--- a/net/batman-adv/vis.c
+++ b/net/batman-adv/vis.c
@@ -131,7 +131,7 @@ static void vis_data_insert_interface(const uint8_t *interface,
131 return; 131 return;
132 } 132 }
133 133
134 /* its a new address, add it to the list */ 134 /* it's a new address, add it to the list */
135 entry = kmalloc(sizeof(*entry), GFP_ATOMIC); 135 entry = kmalloc(sizeof(*entry), GFP_ATOMIC);
136 if (!entry) 136 if (!entry)
137 return; 137 return;
@@ -465,7 +465,7 @@ static struct vis_info *add_packet(struct bat_priv *bat_priv,
465 /* try to add it */ 465 /* try to add it */
466 hash_added = hash_add(bat_priv->vis_hash, vis_info_cmp, vis_info_choose, 466 hash_added = hash_add(bat_priv->vis_hash, vis_info_cmp, vis_info_choose,
467 info, &info->hash_entry); 467 info, &info->hash_entry);
468 if (hash_added < 0) { 468 if (hash_added != 0) {
469 /* did not work (for some reason) */ 469 /* did not work (for some reason) */
470 kref_put(&info->refcount, free_info); 470 kref_put(&info->refcount, free_info);
471 info = NULL; 471 info = NULL;
@@ -887,10 +887,8 @@ int vis_init(struct bat_priv *bat_priv)
887 } 887 }
888 888
889 bat_priv->my_vis_info = kmalloc(MAX_VIS_PACKET_SIZE, GFP_ATOMIC); 889 bat_priv->my_vis_info = kmalloc(MAX_VIS_PACKET_SIZE, GFP_ATOMIC);
890 if (!bat_priv->my_vis_info) { 890 if (!bat_priv->my_vis_info)
891 pr_err("Can't initialize vis packet\n");
892 goto err; 891 goto err;
893 }
894 892
895 bat_priv->my_vis_info->skb_packet = dev_alloc_skb(sizeof(*packet) + 893 bat_priv->my_vis_info->skb_packet = dev_alloc_skb(sizeof(*packet) +
896 MAX_VIS_PACKET_SIZE + 894 MAX_VIS_PACKET_SIZE +
@@ -920,7 +918,7 @@ int vis_init(struct bat_priv *bat_priv)
920 hash_added = hash_add(bat_priv->vis_hash, vis_info_cmp, vis_info_choose, 918 hash_added = hash_add(bat_priv->vis_hash, vis_info_cmp, vis_info_choose,
921 bat_priv->my_vis_info, 919 bat_priv->my_vis_info,
922 &bat_priv->my_vis_info->hash_entry); 920 &bat_priv->my_vis_info->hash_entry);
923 if (hash_added < 0) { 921 if (hash_added != 0) {
924 pr_err("Can't add own vis packet into hash\n"); 922 pr_err("Can't add own vis packet into hash\n");
925 /* not in hash, need to remove it manually. */ 923 /* not in hash, need to remove it manually. */
926 kref_put(&bat_priv->my_vis_info->refcount, free_info); 924 kref_put(&bat_priv->my_vis_info->refcount, free_info);
diff --git a/net/bluetooth/bnep/netdev.c b/net/bluetooth/bnep/netdev.c
index d4f5dff7c955..bc4086480d97 100644
--- a/net/bluetooth/bnep/netdev.c
+++ b/net/bluetooth/bnep/netdev.c
@@ -217,7 +217,7 @@ static const struct net_device_ops bnep_netdev_ops = {
217 .ndo_stop = bnep_net_close, 217 .ndo_stop = bnep_net_close,
218 .ndo_start_xmit = bnep_net_xmit, 218 .ndo_start_xmit = bnep_net_xmit,
219 .ndo_validate_addr = eth_validate_addr, 219 .ndo_validate_addr = eth_validate_addr,
220 .ndo_set_multicast_list = bnep_net_set_mc_list, 220 .ndo_set_rx_mode = bnep_net_set_mc_list,
221 .ndo_set_mac_address = bnep_net_set_mac_addr, 221 .ndo_set_mac_address = bnep_net_set_mac_addr,
222 .ndo_tx_timeout = bnep_net_timeout, 222 .ndo_tx_timeout = bnep_net_timeout,
223 .ndo_change_mtu = eth_change_mtu, 223 .ndo_change_mtu = eth_change_mtu,
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 32b8f9f7f79e..feb77ea7b58e 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -91,7 +91,6 @@ static int br_dev_open(struct net_device *dev)
91{ 91{
92 struct net_bridge *br = netdev_priv(dev); 92 struct net_bridge *br = netdev_priv(dev);
93 93
94 netif_carrier_off(dev);
95 netdev_update_features(dev); 94 netdev_update_features(dev);
96 netif_start_queue(dev); 95 netif_start_queue(dev);
97 br_stp_enable_bridge(br); 96 br_stp_enable_bridge(br);
@@ -108,8 +107,6 @@ static int br_dev_stop(struct net_device *dev)
108{ 107{
109 struct net_bridge *br = netdev_priv(dev); 108 struct net_bridge *br = netdev_priv(dev);
110 109
111 netif_carrier_off(dev);
112
113 br_stp_disable_bridge(br); 110 br_stp_disable_bridge(br);
114 br_multicast_stop(br); 111 br_multicast_stop(br);
115 112
@@ -304,7 +301,7 @@ static const struct net_device_ops br_netdev_ops = {
304 .ndo_start_xmit = br_dev_xmit, 301 .ndo_start_xmit = br_dev_xmit,
305 .ndo_get_stats64 = br_get_stats64, 302 .ndo_get_stats64 = br_get_stats64,
306 .ndo_set_mac_address = br_set_mac_address, 303 .ndo_set_mac_address = br_set_mac_address,
307 .ndo_set_multicast_list = br_dev_set_multicast_list, 304 .ndo_set_rx_mode = br_dev_set_multicast_list,
308 .ndo_change_mtu = br_change_mtu, 305 .ndo_change_mtu = br_change_mtu,
309 .ndo_do_ioctl = br_dev_ioctl, 306 .ndo_do_ioctl = br_dev_ioctl,
310#ifdef CONFIG_NET_POLL_CONTROLLER 307#ifdef CONFIG_NET_POLL_CONTROLLER
@@ -361,6 +358,8 @@ void br_dev_setup(struct net_device *dev)
361 memcpy(br->group_addr, br_group_address, ETH_ALEN); 358 memcpy(br->group_addr, br_group_address, ETH_ALEN);
362 359
363 br->stp_enabled = BR_NO_STP; 360 br->stp_enabled = BR_NO_STP;
361 br->group_fwd_mask = BR_GROUPFWD_DEFAULT;
362
364 br->designated_root = br->bridge_id; 363 br->designated_root = br->bridge_id;
365 br->bridge_max_age = br->max_age = 20 * HZ; 364 br->bridge_max_age = br->max_age = 20 * HZ;
366 br->bridge_hello_time = br->hello_time = 2 * HZ; 365 br->bridge_hello_time = br->hello_time = 2 * HZ;
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index 68def3b7fb49..c8e7861b88b0 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -558,19 +558,28 @@ skip:
558 558
559/* Create new static fdb entry */ 559/* Create new static fdb entry */
560static int fdb_add_entry(struct net_bridge_port *source, const __u8 *addr, 560static int fdb_add_entry(struct net_bridge_port *source, const __u8 *addr,
561 __u16 state) 561 __u16 state, __u16 flags)
562{ 562{
563 struct net_bridge *br = source->br; 563 struct net_bridge *br = source->br;
564 struct hlist_head *head = &br->hash[br_mac_hash(addr)]; 564 struct hlist_head *head = &br->hash[br_mac_hash(addr)];
565 struct net_bridge_fdb_entry *fdb; 565 struct net_bridge_fdb_entry *fdb;
566 566
567 fdb = fdb_find(head, addr); 567 fdb = fdb_find(head, addr);
568 if (fdb) 568 if (fdb == NULL) {
569 return -EEXIST; 569 if (!(flags & NLM_F_CREATE))
570 return -ENOENT;
570 571
571 fdb = fdb_create(head, source, addr); 572 fdb = fdb_create(head, source, addr);
572 if (!fdb) 573 if (!fdb)
573 return -ENOMEM; 574 return -ENOMEM;
575 } else {
576 if (flags & NLM_F_EXCL)
577 return -EEXIST;
578
579 if (flags & NLM_F_REPLACE)
580 fdb->updated = fdb->used = jiffies;
581 fdb->is_local = fdb->is_static = 0;
582 }
574 583
575 if (state & NUD_PERMANENT) 584 if (state & NUD_PERMANENT)
576 fdb->is_local = fdb->is_static = 1; 585 fdb->is_local = fdb->is_static = 1;
@@ -626,7 +635,7 @@ int br_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
626 } 635 }
627 636
628 spin_lock_bh(&p->br->hash_lock); 637 spin_lock_bh(&p->br->hash_lock);
629 err = fdb_add_entry(p, addr, ndm->ndm_state); 638 err = fdb_add_entry(p, addr, ndm->ndm_state, nlh->nlmsg_flags);
630 spin_unlock_bh(&p->br->hash_lock); 639 spin_unlock_bh(&p->br->hash_lock);
631 640
632 return err; 641 return err;
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index 3176e2e13d9b..c3b77dceb937 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -13,6 +13,7 @@
13 13
14#include <linux/kernel.h> 14#include <linux/kernel.h>
15#include <linux/netdevice.h> 15#include <linux/netdevice.h>
16#include <linux/etherdevice.h>
16#include <linux/netpoll.h> 17#include <linux/netpoll.h>
17#include <linux/ethtool.h> 18#include <linux/ethtool.h>
18#include <linux/if_arp.h> 19#include <linux/if_arp.h>
@@ -33,20 +34,18 @@
33 */ 34 */
34static int port_cost(struct net_device *dev) 35static int port_cost(struct net_device *dev)
35{ 36{
36 if (dev->ethtool_ops && dev->ethtool_ops->get_settings) { 37 struct ethtool_cmd ecmd;
37 struct ethtool_cmd ecmd = { .cmd = ETHTOOL_GSET, }; 38
38 39 if (!__ethtool_get_settings(dev, &ecmd)) {
39 if (!dev_ethtool_get_settings(dev, &ecmd)) { 40 switch (ethtool_cmd_speed(&ecmd)) {
40 switch (ethtool_cmd_speed(&ecmd)) { 41 case SPEED_10000:
41 case SPEED_10000: 42 return 2;
42 return 2; 43 case SPEED_1000:
43 case SPEED_1000: 44 return 4;
44 return 4; 45 case SPEED_100:
45 case SPEED_100: 46 return 19;
46 return 19; 47 case SPEED_10:
47 case SPEED_10: 48 return 100;
48 return 100;
49 }
50 } 49 }
51 } 50 }
52 51
@@ -231,6 +230,7 @@ static struct net_bridge_port *new_nbp(struct net_bridge *br,
231int br_add_bridge(struct net *net, const char *name) 230int br_add_bridge(struct net *net, const char *name)
232{ 231{
233 struct net_device *dev; 232 struct net_device *dev;
233 int res;
234 234
235 dev = alloc_netdev(sizeof(struct net_bridge), name, 235 dev = alloc_netdev(sizeof(struct net_bridge), name,
236 br_dev_setup); 236 br_dev_setup);
@@ -240,7 +240,10 @@ int br_add_bridge(struct net *net, const char *name)
240 240
241 dev_net_set(dev, net); 241 dev_net_set(dev, net);
242 242
243 return register_netdev(dev); 243 res = register_netdev(dev);
244 if (res)
245 free_netdev(dev);
246 return res;
244} 247}
245 248
246int br_del_bridge(struct net *net, const char *name) 249int br_del_bridge(struct net *net, const char *name)
@@ -320,7 +323,8 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
320 323
321 /* Don't allow bridging non-ethernet like devices */ 324 /* Don't allow bridging non-ethernet like devices */
322 if ((dev->flags & IFF_LOOPBACK) || 325 if ((dev->flags & IFF_LOOPBACK) ||
323 dev->type != ARPHRD_ETHER || dev->addr_len != ETH_ALEN) 326 dev->type != ARPHRD_ETHER || dev->addr_len != ETH_ALEN ||
327 !is_valid_ether_addr(dev->dev_addr))
324 return -EINVAL; 328 return -EINVAL;
325 329
326 /* No bridging of bridges */ 330 /* No bridging of bridges */
@@ -348,10 +352,6 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
348 err = kobject_init_and_add(&p->kobj, &brport_ktype, &(dev->dev.kobj), 352 err = kobject_init_and_add(&p->kobj, &brport_ktype, &(dev->dev.kobj),
349 SYSFS_BRIDGE_PORT_ATTR); 353 SYSFS_BRIDGE_PORT_ATTR);
350 if (err) 354 if (err)
351 goto err0;
352
353 err = br_fdb_insert(br, p, dev->dev_addr);
354 if (err)
355 goto err1; 355 goto err1;
356 356
357 err = br_sysfs_addif(p); 357 err = br_sysfs_addif(p);
@@ -392,6 +392,9 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
392 392
393 dev_set_mtu(br->dev, br_min_mtu(br)); 393 dev_set_mtu(br->dev, br_min_mtu(br));
394 394
395 if (br_fdb_insert(br, p, dev->dev_addr))
396 netdev_err(dev, "failed insert local address bridge forwarding table\n");
397
395 kobject_uevent(&p->kobj, KOBJ_ADD); 398 kobject_uevent(&p->kobj, KOBJ_ADD);
396 399
397 return 0; 400 return 0;
@@ -401,11 +404,9 @@ err4:
401err3: 404err3:
402 sysfs_remove_link(br->ifobj, p->dev->name); 405 sysfs_remove_link(br->ifobj, p->dev->name);
403err2: 406err2:
404 br_fdb_delete_by_port(br, p, 1);
405err1:
406 kobject_put(&p->kobj); 407 kobject_put(&p->kobj);
407 p = NULL; /* kobject_put frees */ 408 p = NULL; /* kobject_put frees */
408err0: 409err1:
409 dev_set_promiscuity(dev, -1); 410 dev_set_promiscuity(dev, -1);
410put_back: 411put_back:
411 dev_put(dev); 412 dev_put(dev);
@@ -417,6 +418,7 @@ put_back:
417int br_del_if(struct net_bridge *br, struct net_device *dev) 418int br_del_if(struct net_bridge *br, struct net_device *dev)
418{ 419{
419 struct net_bridge_port *p; 420 struct net_bridge_port *p;
421 bool changed_addr;
420 422
421 p = br_port_get_rtnl(dev); 423 p = br_port_get_rtnl(dev);
422 if (!p || p->br != br) 424 if (!p || p->br != br)
@@ -425,9 +427,12 @@ int br_del_if(struct net_bridge *br, struct net_device *dev)
425 del_nbp(p); 427 del_nbp(p);
426 428
427 spin_lock_bh(&br->lock); 429 spin_lock_bh(&br->lock);
428 br_stp_recalculate_bridge_id(br); 430 changed_addr = br_stp_recalculate_bridge_id(br);
429 spin_unlock_bh(&br->lock); 431 spin_unlock_bh(&br->lock);
430 432
433 if (changed_addr)
434 call_netdevice_notifiers(NETDEV_CHANGEADDR, br->dev);
435
431 netdev_update_features(br->dev); 436 netdev_update_features(br->dev);
432 437
433 return 0; 438 return 0;
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index f06ee39c73fd..6f9f8c014725 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -162,14 +162,37 @@ rx_handler_result_t br_handle_frame(struct sk_buff **pskb)
162 p = br_port_get_rcu(skb->dev); 162 p = br_port_get_rcu(skb->dev);
163 163
164 if (unlikely(is_link_local(dest))) { 164 if (unlikely(is_link_local(dest))) {
165 /* Pause frames shouldn't be passed up by driver anyway */ 165 /*
166 if (skb->protocol == htons(ETH_P_PAUSE)) 166 * See IEEE 802.1D Table 7-10 Reserved addresses
167 *
168 * Assignment Value
169 * Bridge Group Address 01-80-C2-00-00-00
170 * (MAC Control) 802.3 01-80-C2-00-00-01
171 * (Link Aggregation) 802.3 01-80-C2-00-00-02
172 * 802.1X PAE address 01-80-C2-00-00-03
173 *
174 * 802.1AB LLDP 01-80-C2-00-00-0E
175 *
176 * Others reserved for future standardization
177 */
178 switch (dest[5]) {
179 case 0x00: /* Bridge Group Address */
180 /* If STP is turned off,
181 then must forward to keep loop detection */
182 if (p->br->stp_enabled == BR_NO_STP)
183 goto forward;
184 break;
185
186 case 0x01: /* IEEE MAC (Pause) */
167 goto drop; 187 goto drop;
168 188
169 /* If STP is turned off, then forward */ 189 default:
170 if (p->br->stp_enabled == BR_NO_STP && dest[5] == 0) 190 /* Allow selective forwarding for most other protocols */
171 goto forward; 191 if (p->br->group_fwd_mask & (1u << dest[5]))
192 goto forward;
193 }
172 194
195 /* Deliver packet to local host only */
173 if (NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN, skb, skb->dev, 196 if (NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN, skb, skb->dev,
174 NULL, br_handle_local_finish)) { 197 NULL, br_handle_local_finish)) {
175 return RX_HANDLER_CONSUMED; /* consumed by filter */ 198 return RX_HANDLER_CONSUMED; /* consumed by filter */
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 2d85ca7111d3..995cbe0ac0b2 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -1456,7 +1456,7 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br,
1456{ 1456{
1457 struct sk_buff *skb2; 1457 struct sk_buff *skb2;
1458 const struct ipv6hdr *ip6h; 1458 const struct ipv6hdr *ip6h;
1459 struct icmp6hdr *icmp6h; 1459 u8 icmp6_type;
1460 u8 nexthdr; 1460 u8 nexthdr;
1461 unsigned len; 1461 unsigned len;
1462 int offset; 1462 int offset;
@@ -1502,9 +1502,9 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br,
1502 __skb_pull(skb2, offset); 1502 __skb_pull(skb2, offset);
1503 skb_reset_transport_header(skb2); 1503 skb_reset_transport_header(skb2);
1504 1504
1505 icmp6h = icmp6_hdr(skb2); 1505 icmp6_type = icmp6_hdr(skb2)->icmp6_type;
1506 1506
1507 switch (icmp6h->icmp6_type) { 1507 switch (icmp6_type) {
1508 case ICMPV6_MGM_QUERY: 1508 case ICMPV6_MGM_QUERY:
1509 case ICMPV6_MGM_REPORT: 1509 case ICMPV6_MGM_REPORT:
1510 case ICMPV6_MGM_REDUCTION: 1510 case ICMPV6_MGM_REDUCTION:
@@ -1520,16 +1520,23 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br,
1520 err = pskb_trim_rcsum(skb2, len); 1520 err = pskb_trim_rcsum(skb2, len);
1521 if (err) 1521 if (err)
1522 goto out; 1522 goto out;
1523 err = -EINVAL;
1523 } 1524 }
1524 1525
1526 ip6h = ipv6_hdr(skb2);
1527
1525 switch (skb2->ip_summed) { 1528 switch (skb2->ip_summed) {
1526 case CHECKSUM_COMPLETE: 1529 case CHECKSUM_COMPLETE:
1527 if (!csum_fold(skb2->csum)) 1530 if (!csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr, skb2->len,
1531 IPPROTO_ICMPV6, skb2->csum))
1528 break; 1532 break;
1529 /*FALLTHROUGH*/ 1533 /*FALLTHROUGH*/
1530 case CHECKSUM_NONE: 1534 case CHECKSUM_NONE:
1531 skb2->csum = 0; 1535 skb2->csum = ~csum_unfold(csum_ipv6_magic(&ip6h->saddr,
1532 if (skb_checksum_complete(skb2)) 1536 &ip6h->daddr,
1537 skb2->len,
1538 IPPROTO_ICMPV6, 0));
1539 if (__skb_checksum_complete(skb2))
1533 goto out; 1540 goto out;
1534 } 1541 }
1535 1542
@@ -1537,7 +1544,7 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br,
1537 1544
1538 BR_INPUT_SKB_CB(skb)->igmp = 1; 1545 BR_INPUT_SKB_CB(skb)->igmp = 1;
1539 1546
1540 switch (icmp6h->icmp6_type) { 1547 switch (icmp6_type) {
1541 case ICMPV6_MGM_REPORT: 1548 case ICMPV6_MGM_REPORT:
1542 { 1549 {
1543 struct mld_msg *mld; 1550 struct mld_msg *mld;
diff --git a/net/bridge/br_notify.c b/net/bridge/br_notify.c
index 6545ee9591d1..a76b62135558 100644
--- a/net/bridge/br_notify.c
+++ b/net/bridge/br_notify.c
@@ -34,6 +34,7 @@ static int br_device_event(struct notifier_block *unused, unsigned long event, v
34 struct net_device *dev = ptr; 34 struct net_device *dev = ptr;
35 struct net_bridge_port *p; 35 struct net_bridge_port *p;
36 struct net_bridge *br; 36 struct net_bridge *br;
37 bool changed_addr;
37 int err; 38 int err;
38 39
39 /* register of bridge completed, add sysfs entries */ 40 /* register of bridge completed, add sysfs entries */
@@ -57,8 +58,12 @@ static int br_device_event(struct notifier_block *unused, unsigned long event, v
57 case NETDEV_CHANGEADDR: 58 case NETDEV_CHANGEADDR:
58 spin_lock_bh(&br->lock); 59 spin_lock_bh(&br->lock);
59 br_fdb_changeaddr(p, dev->dev_addr); 60 br_fdb_changeaddr(p, dev->dev_addr);
60 br_stp_recalculate_bridge_id(br); 61 changed_addr = br_stp_recalculate_bridge_id(br);
61 spin_unlock_bh(&br->lock); 62 spin_unlock_bh(&br->lock);
63
64 if (changed_addr)
65 call_netdevice_notifiers(NETDEV_CHANGEADDR, br->dev);
66
62 break; 67 break;
63 68
64 case NETDEV_CHANGE: 69 case NETDEV_CHANGE:
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 78cc364997d9..a248fe65b29a 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -29,6 +29,11 @@
29 29
30#define BR_VERSION "2.3" 30#define BR_VERSION "2.3"
31 31
32/* Control of forwarding link local multicast */
33#define BR_GROUPFWD_DEFAULT 0
34/* Don't allow forwarding control protocols like STP and LLDP */
35#define BR_GROUPFWD_RESTRICTED 0x4007u
36
32/* Path to usermode spanning tree program */ 37/* Path to usermode spanning tree program */
33#define BR_STP_PROG "/sbin/bridge-stp" 38#define BR_STP_PROG "/sbin/bridge-stp"
34 39
@@ -193,6 +198,8 @@ struct net_bridge
193 unsigned long flags; 198 unsigned long flags;
194#define BR_SET_MAC_ADDR 0x00000001 199#define BR_SET_MAC_ADDR 0x00000001
195 200
201 u16 group_fwd_mask;
202
196 /* STP */ 203 /* STP */
197 bridge_id designated_root; 204 bridge_id designated_root;
198 bridge_id bridge_id; 205 bridge_id bridge_id;
diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c
index 68b893ea8c3a..c236c0e43984 100644
--- a/net/bridge/br_sysfs_br.c
+++ b/net/bridge/br_sysfs_br.c
@@ -149,6 +149,39 @@ static ssize_t store_stp_state(struct device *d,
149static DEVICE_ATTR(stp_state, S_IRUGO | S_IWUSR, show_stp_state, 149static DEVICE_ATTR(stp_state, S_IRUGO | S_IWUSR, show_stp_state,
150 store_stp_state); 150 store_stp_state);
151 151
152static ssize_t show_group_fwd_mask(struct device *d,
153 struct device_attribute *attr, char *buf)
154{
155 struct net_bridge *br = to_bridge(d);
156 return sprintf(buf, "%#x\n", br->group_fwd_mask);
157}
158
159
160static ssize_t store_group_fwd_mask(struct device *d,
161 struct device_attribute *attr, const char *buf,
162 size_t len)
163{
164 struct net_bridge *br = to_bridge(d);
165 char *endp;
166 unsigned long val;
167
168 if (!capable(CAP_NET_ADMIN))
169 return -EPERM;
170
171 val = simple_strtoul(buf, &endp, 0);
172 if (endp == buf)
173 return -EINVAL;
174
175 if (val & BR_GROUPFWD_RESTRICTED)
176 return -EINVAL;
177
178 br->group_fwd_mask = val;
179
180 return len;
181}
182static DEVICE_ATTR(group_fwd_mask, S_IRUGO | S_IWUSR, show_group_fwd_mask,
183 store_group_fwd_mask);
184
152static ssize_t show_priority(struct device *d, struct device_attribute *attr, 185static ssize_t show_priority(struct device *d, struct device_attribute *attr,
153 char *buf) 186 char *buf)
154{ 187{
@@ -652,6 +685,7 @@ static struct attribute *bridge_attrs[] = {
652 &dev_attr_max_age.attr, 685 &dev_attr_max_age.attr,
653 &dev_attr_ageing_time.attr, 686 &dev_attr_ageing_time.attr,
654 &dev_attr_stp_state.attr, 687 &dev_attr_stp_state.attr,
688 &dev_attr_group_fwd_mask.attr,
655 &dev_attr_priority.attr, 689 &dev_attr_priority.attr,
656 &dev_attr_bridge_id.attr, 690 &dev_attr_bridge_id.attr,
657 &dev_attr_root_id.attr, 691 &dev_attr_root_id.attr,
diff --git a/net/bridge/netfilter/Kconfig b/net/bridge/netfilter/Kconfig
index ba6f73eb06c6..a9aff9c7d027 100644
--- a/net/bridge/netfilter/Kconfig
+++ b/net/bridge/netfilter/Kconfig
@@ -4,7 +4,7 @@
4 4
5menuconfig BRIDGE_NF_EBTABLES 5menuconfig BRIDGE_NF_EBTABLES
6 tristate "Ethernet Bridge tables (ebtables) support" 6 tristate "Ethernet Bridge tables (ebtables) support"
7 depends on BRIDGE && BRIDGE_NETFILTER 7 depends on BRIDGE && NETFILTER
8 select NETFILTER_XTABLES 8 select NETFILTER_XTABLES
9 help 9 help
10 ebtables is a general, extensible frame/packet identification 10 ebtables is a general, extensible frame/packet identification
diff --git a/net/bridge/netfilter/ebtable_broute.c b/net/bridge/netfilter/ebtable_broute.c
index 1bcaf36ad612..40d8258bf74f 100644
--- a/net/bridge/netfilter/ebtable_broute.c
+++ b/net/bridge/netfilter/ebtable_broute.c
@@ -87,14 +87,14 @@ static int __init ebtable_broute_init(void)
87 if (ret < 0) 87 if (ret < 0)
88 return ret; 88 return ret;
89 /* see br_input.c */ 89 /* see br_input.c */
90 rcu_assign_pointer(br_should_route_hook, 90 RCU_INIT_POINTER(br_should_route_hook,
91 (br_should_route_hook_t *)ebt_broute); 91 (br_should_route_hook_t *)ebt_broute);
92 return 0; 92 return 0;
93} 93}
94 94
95static void __exit ebtable_broute_fini(void) 95static void __exit ebtable_broute_fini(void)
96{ 96{
97 rcu_assign_pointer(br_should_route_hook, NULL); 97 RCU_INIT_POINTER(br_should_route_hook, NULL);
98 synchronize_net(); 98 synchronize_net();
99 unregister_pernet_subsys(&broute_net_ops); 99 unregister_pernet_subsys(&broute_net_ops);
100} 100}
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 2b5ca1a0054d..5864cc491369 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -1198,7 +1198,8 @@ ebt_register_table(struct net *net, const struct ebt_table *input_table)
1198 1198
1199 if (table->check && table->check(newinfo, table->valid_hooks)) { 1199 if (table->check && table->check(newinfo, table->valid_hooks)) {
1200 BUGPRINT("The table doesn't like its own initial data, lol\n"); 1200 BUGPRINT("The table doesn't like its own initial data, lol\n");
1201 return ERR_PTR(-EINVAL); 1201 ret = -EINVAL;
1202 goto free_chainstack;
1202 } 1203 }
1203 1204
1204 table->private = newinfo; 1205 table->private = newinfo;
diff --git a/net/caif/caif_dev.c b/net/caif/caif_dev.c
index 7c2fa0a08148..7f9ac0742d19 100644
--- a/net/caif/caif_dev.c
+++ b/net/caif/caif_dev.c
@@ -93,10 +93,14 @@ static struct caif_device_entry *caif_device_alloc(struct net_device *dev)
93 caifdevs = caif_device_list(dev_net(dev)); 93 caifdevs = caif_device_list(dev_net(dev));
94 BUG_ON(!caifdevs); 94 BUG_ON(!caifdevs);
95 95
96 caifd = kzalloc(sizeof(*caifd), GFP_ATOMIC); 96 caifd = kzalloc(sizeof(*caifd), GFP_KERNEL);
97 if (!caifd) 97 if (!caifd)
98 return NULL; 98 return NULL;
99 caifd->pcpu_refcnt = alloc_percpu(int); 99 caifd->pcpu_refcnt = alloc_percpu(int);
100 if (!caifd->pcpu_refcnt) {
101 kfree(caifd);
102 return NULL;
103 }
100 caifd->netdev = dev; 104 caifd->netdev = dev;
101 dev_hold(dev); 105 dev_hold(dev);
102 return caifd; 106 return caifd;
diff --git a/net/caif/cfcnfg.c b/net/caif/cfcnfg.c
index 52fe33bee029..00523ecc4ced 100644
--- a/net/caif/cfcnfg.c
+++ b/net/caif/cfcnfg.c
@@ -78,10 +78,8 @@ struct cfcnfg *cfcnfg_create(void)
78 78
79 /* Initiate this layer */ 79 /* Initiate this layer */
80 this = kzalloc(sizeof(struct cfcnfg), GFP_ATOMIC); 80 this = kzalloc(sizeof(struct cfcnfg), GFP_ATOMIC);
81 if (!this) { 81 if (!this)
82 pr_warn("Out of memory\n");
83 return NULL; 82 return NULL;
84 }
85 this->mux = cfmuxl_create(); 83 this->mux = cfmuxl_create();
86 if (!this->mux) 84 if (!this->mux)
87 goto out_of_mem; 85 goto out_of_mem;
@@ -108,8 +106,6 @@ struct cfcnfg *cfcnfg_create(void)
108 106
109 return this; 107 return this;
110out_of_mem: 108out_of_mem:
111 pr_warn("Out of memory\n");
112
113 synchronize_rcu(); 109 synchronize_rcu();
114 110
115 kfree(this->mux); 111 kfree(this->mux);
@@ -448,10 +444,8 @@ cfcnfg_linkup_rsp(struct cflayer *layer, u8 channel_id, enum cfctrl_srv serv,
448 "- unknown channel type\n"); 444 "- unknown channel type\n");
449 goto unlock; 445 goto unlock;
450 } 446 }
451 if (!servicel) { 447 if (!servicel)
452 pr_warn("Out of memory\n");
453 goto unlock; 448 goto unlock;
454 }
455 layer_set_dn(servicel, cnfg->mux); 449 layer_set_dn(servicel, cnfg->mux);
456 cfmuxl_set_uplayer(cnfg->mux, servicel, channel_id); 450 cfmuxl_set_uplayer(cnfg->mux, servicel, channel_id);
457 layer_set_up(servicel, adapt_layer); 451 layer_set_up(servicel, adapt_layer);
@@ -473,7 +467,7 @@ cfcnfg_add_phy_layer(struct cfcnfg *cnfg, enum cfcnfg_phy_type phy_type,
473{ 467{
474 struct cflayer *frml; 468 struct cflayer *frml;
475 struct cflayer *phy_driver = NULL; 469 struct cflayer *phy_driver = NULL;
476 struct cfcnfg_phyinfo *phyinfo; 470 struct cfcnfg_phyinfo *phyinfo = NULL;
477 int i; 471 int i;
478 u8 phyid; 472 u8 phyid;
479 473
@@ -488,25 +482,25 @@ cfcnfg_add_phy_layer(struct cfcnfg *cnfg, enum cfcnfg_phy_type phy_type,
488 goto got_phyid; 482 goto got_phyid;
489 } 483 }
490 pr_warn("Too many CAIF Link Layers (max 6)\n"); 484 pr_warn("Too many CAIF Link Layers (max 6)\n");
491 goto out; 485 goto out_err;
492 486
493got_phyid: 487got_phyid:
494 phyinfo = kzalloc(sizeof(struct cfcnfg_phyinfo), GFP_ATOMIC); 488 phyinfo = kzalloc(sizeof(struct cfcnfg_phyinfo), GFP_ATOMIC);
489 if (!phyinfo)
490 goto out_err;
495 491
496 switch (phy_type) { 492 switch (phy_type) {
497 case CFPHYTYPE_FRAG: 493 case CFPHYTYPE_FRAG:
498 phy_driver = 494 phy_driver =
499 cfserl_create(CFPHYTYPE_FRAG, phyid, stx); 495 cfserl_create(CFPHYTYPE_FRAG, phyid, stx);
500 if (!phy_driver) { 496 if (!phy_driver)
501 pr_warn("Out of memory\n"); 497 goto out_err;
502 goto out;
503 }
504 break; 498 break;
505 case CFPHYTYPE_CAIF: 499 case CFPHYTYPE_CAIF:
506 phy_driver = NULL; 500 phy_driver = NULL;
507 break; 501 break;
508 default: 502 default:
509 goto out; 503 goto out_err;
510 } 504 }
511 phy_layer->id = phyid; 505 phy_layer->id = phyid;
512 phyinfo->pref = pref; 506 phyinfo->pref = pref;
@@ -520,11 +514,8 @@ got_phyid:
520 514
521 frml = cffrml_create(phyid, fcs); 515 frml = cffrml_create(phyid, fcs);
522 516
523 if (!frml) { 517 if (!frml)
524 pr_warn("Out of memory\n"); 518 goto out_err;
525 kfree(phyinfo);
526 goto out;
527 }
528 phyinfo->frm_layer = frml; 519 phyinfo->frm_layer = frml;
529 layer_set_up(frml, cnfg->mux); 520 layer_set_up(frml, cnfg->mux);
530 521
@@ -540,7 +531,12 @@ got_phyid:
540 } 531 }
541 532
542 list_add_rcu(&phyinfo->node, &cnfg->phys); 533 list_add_rcu(&phyinfo->node, &cnfg->phys);
543out: 534 mutex_unlock(&cnfg->lock);
535 return;
536
537out_err:
538 kfree(phy_driver);
539 kfree(phyinfo);
544 mutex_unlock(&cnfg->lock); 540 mutex_unlock(&cnfg->lock);
545} 541}
546EXPORT_SYMBOL(cfcnfg_add_phy_layer); 542EXPORT_SYMBOL(cfcnfg_add_phy_layer);
diff --git a/net/caif/cfctrl.c b/net/caif/cfctrl.c
index e22671bed669..5cf52225692e 100644
--- a/net/caif/cfctrl.c
+++ b/net/caif/cfctrl.c
@@ -35,15 +35,12 @@ struct cflayer *cfctrl_create(void)
35{ 35{
36 struct dev_info dev_info; 36 struct dev_info dev_info;
37 struct cfctrl *this = 37 struct cfctrl *this =
38 kmalloc(sizeof(struct cfctrl), GFP_ATOMIC); 38 kzalloc(sizeof(struct cfctrl), GFP_ATOMIC);
39 if (!this) { 39 if (!this)
40 pr_warn("Out of memory\n");
41 return NULL; 40 return NULL;
42 }
43 caif_assert(offsetof(struct cfctrl, serv.layer) == 0); 41 caif_assert(offsetof(struct cfctrl, serv.layer) == 0);
44 memset(&dev_info, 0, sizeof(dev_info)); 42 memset(&dev_info, 0, sizeof(dev_info));
45 dev_info.id = 0xff; 43 dev_info.id = 0xff;
46 memset(this, 0, sizeof(*this));
47 cfsrvl_init(&this->serv, 0, &dev_info, false); 44 cfsrvl_init(&this->serv, 0, &dev_info, false);
48 atomic_set(&this->req_seq_no, 1); 45 atomic_set(&this->req_seq_no, 1);
49 atomic_set(&this->rsp_seq_no, 1); 46 atomic_set(&this->rsp_seq_no, 1);
@@ -180,10 +177,8 @@ void cfctrl_enum_req(struct cflayer *layer, u8 physlinkid)
180 struct cfctrl *cfctrl = container_obj(layer); 177 struct cfctrl *cfctrl = container_obj(layer);
181 struct cfpkt *pkt = cfpkt_create(CFPKT_CTRL_PKT_LEN); 178 struct cfpkt *pkt = cfpkt_create(CFPKT_CTRL_PKT_LEN);
182 struct cflayer *dn = cfctrl->serv.layer.dn; 179 struct cflayer *dn = cfctrl->serv.layer.dn;
183 if (!pkt) { 180 if (!pkt)
184 pr_warn("Out of memory\n");
185 return; 181 return;
186 }
187 if (!dn) { 182 if (!dn) {
188 pr_debug("not able to send enum request\n"); 183 pr_debug("not able to send enum request\n");
189 return; 184 return;
@@ -224,10 +219,8 @@ int cfctrl_linkup_request(struct cflayer *layer,
224 } 219 }
225 220
226 pkt = cfpkt_create(CFPKT_CTRL_PKT_LEN); 221 pkt = cfpkt_create(CFPKT_CTRL_PKT_LEN);
227 if (!pkt) { 222 if (!pkt)
228 pr_warn("Out of memory\n");
229 return -ENOMEM; 223 return -ENOMEM;
230 }
231 cfpkt_addbdy(pkt, CFCTRL_CMD_LINK_SETUP); 224 cfpkt_addbdy(pkt, CFCTRL_CMD_LINK_SETUP);
232 cfpkt_addbdy(pkt, (param->chtype << 4) | param->linktype); 225 cfpkt_addbdy(pkt, (param->chtype << 4) | param->linktype);
233 cfpkt_addbdy(pkt, (param->priority << 3) | param->phyid); 226 cfpkt_addbdy(pkt, (param->priority << 3) | param->phyid);
@@ -275,10 +268,8 @@ int cfctrl_linkup_request(struct cflayer *layer,
275 return -EINVAL; 268 return -EINVAL;
276 } 269 }
277 req = kzalloc(sizeof(*req), GFP_KERNEL); 270 req = kzalloc(sizeof(*req), GFP_KERNEL);
278 if (!req) { 271 if (!req)
279 pr_warn("Out of memory\n");
280 return -ENOMEM; 272 return -ENOMEM;
281 }
282 req->client_layer = user_layer; 273 req->client_layer = user_layer;
283 req->cmd = CFCTRL_CMD_LINK_SETUP; 274 req->cmd = CFCTRL_CMD_LINK_SETUP;
284 req->param = *param; 275 req->param = *param;
@@ -312,10 +303,8 @@ int cfctrl_linkdown_req(struct cflayer *layer, u8 channelid,
312 struct cfpkt *pkt = cfpkt_create(CFPKT_CTRL_PKT_LEN); 303 struct cfpkt *pkt = cfpkt_create(CFPKT_CTRL_PKT_LEN);
313 struct cflayer *dn = cfctrl->serv.layer.dn; 304 struct cflayer *dn = cfctrl->serv.layer.dn;
314 305
315 if (!pkt) { 306 if (!pkt)
316 pr_warn("Out of memory\n");
317 return -ENOMEM; 307 return -ENOMEM;
318 }
319 308
320 if (!dn) { 309 if (!dn) {
321 pr_debug("not able to send link-down request\n"); 310 pr_debug("not able to send link-down request\n");
diff --git a/net/caif/cfdbgl.c b/net/caif/cfdbgl.c
index 11a2af4c162a..65d6ef3cf9aa 100644
--- a/net/caif/cfdbgl.c
+++ b/net/caif/cfdbgl.c
@@ -19,13 +19,10 @@ static int cfdbgl_transmit(struct cflayer *layr, struct cfpkt *pkt);
19 19
20struct cflayer *cfdbgl_create(u8 channel_id, struct dev_info *dev_info) 20struct cflayer *cfdbgl_create(u8 channel_id, struct dev_info *dev_info)
21{ 21{
22 struct cfsrvl *dbg = kmalloc(sizeof(struct cfsrvl), GFP_ATOMIC); 22 struct cfsrvl *dbg = kzalloc(sizeof(struct cfsrvl), GFP_ATOMIC);
23 if (!dbg) { 23 if (!dbg)
24 pr_warn("Out of memory\n");
25 return NULL; 24 return NULL;
26 }
27 caif_assert(offsetof(struct cfsrvl, layer) == 0); 25 caif_assert(offsetof(struct cfsrvl, layer) == 0);
28 memset(dbg, 0, sizeof(struct cfsrvl));
29 cfsrvl_init(dbg, channel_id, dev_info, false); 26 cfsrvl_init(dbg, channel_id, dev_info, false);
30 dbg->layer.receive = cfdbgl_receive; 27 dbg->layer.receive = cfdbgl_receive;
31 dbg->layer.transmit = cfdbgl_transmit; 28 dbg->layer.transmit = cfdbgl_transmit;
diff --git a/net/caif/cfdgml.c b/net/caif/cfdgml.c
index 0382dec84fdc..0f5ff27aa41c 100644
--- a/net/caif/cfdgml.c
+++ b/net/caif/cfdgml.c
@@ -26,13 +26,10 @@ static int cfdgml_transmit(struct cflayer *layr, struct cfpkt *pkt);
26 26
27struct cflayer *cfdgml_create(u8 channel_id, struct dev_info *dev_info) 27struct cflayer *cfdgml_create(u8 channel_id, struct dev_info *dev_info)
28{ 28{
29 struct cfsrvl *dgm = kmalloc(sizeof(struct cfsrvl), GFP_ATOMIC); 29 struct cfsrvl *dgm = kzalloc(sizeof(struct cfsrvl), GFP_ATOMIC);
30 if (!dgm) { 30 if (!dgm)
31 pr_warn("Out of memory\n");
32 return NULL; 31 return NULL;
33 }
34 caif_assert(offsetof(struct cfsrvl, layer) == 0); 32 caif_assert(offsetof(struct cfsrvl, layer) == 0);
35 memset(dgm, 0, sizeof(struct cfsrvl));
36 cfsrvl_init(dgm, channel_id, dev_info, true); 33 cfsrvl_init(dgm, channel_id, dev_info, true);
37 dgm->layer.receive = cfdgml_receive; 34 dgm->layer.receive = cfdgml_receive;
38 dgm->layer.transmit = cfdgml_transmit; 35 dgm->layer.transmit = cfdgml_transmit;
diff --git a/net/caif/cffrml.c b/net/caif/cffrml.c
index 04204b202718..f39921171d0d 100644
--- a/net/caif/cffrml.c
+++ b/net/caif/cffrml.c
@@ -34,11 +34,9 @@ static u32 cffrml_rcv_error;
34static u32 cffrml_rcv_checsum_error; 34static u32 cffrml_rcv_checsum_error;
35struct cflayer *cffrml_create(u16 phyid, bool use_fcs) 35struct cflayer *cffrml_create(u16 phyid, bool use_fcs)
36{ 36{
37 struct cffrml *this = kmalloc(sizeof(struct cffrml), GFP_ATOMIC); 37 struct cffrml *this = kzalloc(sizeof(struct cffrml), GFP_ATOMIC);
38 if (!this) { 38 if (!this)
39 pr_warn("Out of memory\n");
40 return NULL; 39 return NULL;
41 }
42 this->pcpu_refcnt = alloc_percpu(int); 40 this->pcpu_refcnt = alloc_percpu(int);
43 if (this->pcpu_refcnt == NULL) { 41 if (this->pcpu_refcnt == NULL) {
44 kfree(this); 42 kfree(this);
@@ -47,7 +45,6 @@ struct cflayer *cffrml_create(u16 phyid, bool use_fcs)
47 45
48 caif_assert(offsetof(struct cffrml, layer) == 0); 46 caif_assert(offsetof(struct cffrml, layer) == 0);
49 47
50 memset(this, 0, sizeof(struct cflayer));
51 this->layer.receive = cffrml_receive; 48 this->layer.receive = cffrml_receive;
52 this->layer.transmit = cffrml_transmit; 49 this->layer.transmit = cffrml_transmit;
53 this->layer.ctrlcmd = cffrml_ctrlcmd; 50 this->layer.ctrlcmd = cffrml_ctrlcmd;
diff --git a/net/caif/cfmuxl.c b/net/caif/cfmuxl.c
index c23979e79dfa..b36f24a4c8e7 100644
--- a/net/caif/cfmuxl.c
+++ b/net/caif/cfmuxl.c
@@ -108,7 +108,7 @@ struct cflayer *cfmuxl_remove_dnlayer(struct cflayer *layr, u8 phyid)
108 int idx = phyid % DN_CACHE_SIZE; 108 int idx = phyid % DN_CACHE_SIZE;
109 109
110 spin_lock_bh(&muxl->transmit_lock); 110 spin_lock_bh(&muxl->transmit_lock);
111 rcu_assign_pointer(muxl->dn_cache[idx], NULL); 111 RCU_INIT_POINTER(muxl->dn_cache[idx], NULL);
112 dn = get_from_id(&muxl->frml_list, phyid); 112 dn = get_from_id(&muxl->frml_list, phyid);
113 if (dn == NULL) 113 if (dn == NULL)
114 goto out; 114 goto out;
@@ -164,7 +164,7 @@ struct cflayer *cfmuxl_remove_uplayer(struct cflayer *layr, u8 id)
164 if (up == NULL) 164 if (up == NULL)
165 goto out; 165 goto out;
166 166
167 rcu_assign_pointer(muxl->up_cache[idx], NULL); 167 RCU_INIT_POINTER(muxl->up_cache[idx], NULL);
168 list_del_rcu(&up->node); 168 list_del_rcu(&up->node);
169out: 169out:
170 spin_unlock_bh(&muxl->receive_lock); 170 spin_unlock_bh(&muxl->receive_lock);
@@ -261,7 +261,7 @@ static void cfmuxl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl,
261 261
262 idx = layer->id % UP_CACHE_SIZE; 262 idx = layer->id % UP_CACHE_SIZE;
263 spin_lock_bh(&muxl->receive_lock); 263 spin_lock_bh(&muxl->receive_lock);
264 rcu_assign_pointer(muxl->up_cache[idx], NULL); 264 RCU_INIT_POINTER(muxl->up_cache[idx], NULL);
265 list_del_rcu(&layer->node); 265 list_del_rcu(&layer->node);
266 spin_unlock_bh(&muxl->receive_lock); 266 spin_unlock_bh(&muxl->receive_lock);
267 } 267 }
diff --git a/net/caif/cfrfml.c b/net/caif/cfrfml.c
index 0deabb440051..81660f809713 100644
--- a/net/caif/cfrfml.c
+++ b/net/caif/cfrfml.c
@@ -46,13 +46,10 @@ struct cflayer *cfrfml_create(u8 channel_id, struct dev_info *dev_info,
46 int mtu_size) 46 int mtu_size)
47{ 47{
48 int tmp; 48 int tmp;
49 struct cfrfml *this = 49 struct cfrfml *this = kzalloc(sizeof(struct cfrfml), GFP_ATOMIC);
50 kzalloc(sizeof(struct cfrfml), GFP_ATOMIC);
51 50
52 if (!this) { 51 if (!this)
53 pr_warn("Out of memory\n");
54 return NULL; 52 return NULL;
55 }
56 53
57 cfsrvl_init(&this->serv, channel_id, dev_info, false); 54 cfsrvl_init(&this->serv, channel_id, dev_info, false);
58 this->serv.release = cfrfml_release; 55 this->serv.release = cfrfml_release;
diff --git a/net/caif/cfserl.c b/net/caif/cfserl.c
index 2715c84cfa87..797c8d165993 100644
--- a/net/caif/cfserl.c
+++ b/net/caif/cfserl.c
@@ -33,13 +33,10 @@ static void cfserl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl,
33 33
34struct cflayer *cfserl_create(int type, int instance, bool use_stx) 34struct cflayer *cfserl_create(int type, int instance, bool use_stx)
35{ 35{
36 struct cfserl *this = kmalloc(sizeof(struct cfserl), GFP_ATOMIC); 36 struct cfserl *this = kzalloc(sizeof(struct cfserl), GFP_ATOMIC);
37 if (!this) { 37 if (!this)
38 pr_warn("Out of memory\n");
39 return NULL; 38 return NULL;
40 }
41 caif_assert(offsetof(struct cfserl, layer) == 0); 39 caif_assert(offsetof(struct cfserl, layer) == 0);
42 memset(this, 0, sizeof(struct cfserl));
43 this->layer.receive = cfserl_receive; 40 this->layer.receive = cfserl_receive;
44 this->layer.transmit = cfserl_transmit; 41 this->layer.transmit = cfserl_transmit;
45 this->layer.ctrlcmd = cfserl_ctrlcmd; 42 this->layer.ctrlcmd = cfserl_ctrlcmd;
diff --git a/net/caif/cfsrvl.c b/net/caif/cfsrvl.c
index 535a1e72b366..b99f5b22689d 100644
--- a/net/caif/cfsrvl.c
+++ b/net/caif/cfsrvl.c
@@ -108,10 +108,8 @@ static int cfservl_modemcmd(struct cflayer *layr, enum caif_modemcmd ctrl)
108 struct caif_payload_info *info; 108 struct caif_payload_info *info;
109 u8 flow_on = SRVL_FLOW_ON; 109 u8 flow_on = SRVL_FLOW_ON;
110 pkt = cfpkt_create(SRVL_CTRL_PKT_SIZE); 110 pkt = cfpkt_create(SRVL_CTRL_PKT_SIZE);
111 if (!pkt) { 111 if (!pkt)
112 pr_warn("Out of memory\n");
113 return -ENOMEM; 112 return -ENOMEM;
114 }
115 113
116 if (cfpkt_add_head(pkt, &flow_on, 1) < 0) { 114 if (cfpkt_add_head(pkt, &flow_on, 1) < 0) {
117 pr_err("Packet is erroneous!\n"); 115 pr_err("Packet is erroneous!\n");
@@ -130,10 +128,8 @@ static int cfservl_modemcmd(struct cflayer *layr, enum caif_modemcmd ctrl)
130 struct caif_payload_info *info; 128 struct caif_payload_info *info;
131 u8 flow_off = SRVL_FLOW_OFF; 129 u8 flow_off = SRVL_FLOW_OFF;
132 pkt = cfpkt_create(SRVL_CTRL_PKT_SIZE); 130 pkt = cfpkt_create(SRVL_CTRL_PKT_SIZE);
133 if (!pkt) { 131 if (!pkt)
134 pr_warn("Out of memory\n");
135 return -ENOMEM; 132 return -ENOMEM;
136 }
137 133
138 if (cfpkt_add_head(pkt, &flow_off, 1) < 0) { 134 if (cfpkt_add_head(pkt, &flow_off, 1) < 0) {
139 pr_err("Packet is erroneous!\n"); 135 pr_err("Packet is erroneous!\n");
diff --git a/net/caif/cfutill.c b/net/caif/cfutill.c
index 98e027db18ed..53e49f3e3af3 100644
--- a/net/caif/cfutill.c
+++ b/net/caif/cfutill.c
@@ -26,13 +26,10 @@ static int cfutill_transmit(struct cflayer *layr, struct cfpkt *pkt);
26 26
27struct cflayer *cfutill_create(u8 channel_id, struct dev_info *dev_info) 27struct cflayer *cfutill_create(u8 channel_id, struct dev_info *dev_info)
28{ 28{
29 struct cfsrvl *util = kmalloc(sizeof(struct cfsrvl), GFP_ATOMIC); 29 struct cfsrvl *util = kzalloc(sizeof(struct cfsrvl), GFP_ATOMIC);
30 if (!util) { 30 if (!util)
31 pr_warn("Out of memory\n");
32 return NULL; 31 return NULL;
33 }
34 caif_assert(offsetof(struct cfsrvl, layer) == 0); 32 caif_assert(offsetof(struct cfsrvl, layer) == 0);
35 memset(util, 0, sizeof(struct cfsrvl));
36 cfsrvl_init(util, channel_id, dev_info, true); 33 cfsrvl_init(util, channel_id, dev_info, true);
37 util->layer.receive = cfutill_receive; 34 util->layer.receive = cfutill_receive;
38 util->layer.transmit = cfutill_transmit; 35 util->layer.transmit = cfutill_transmit;
diff --git a/net/caif/cfveil.c b/net/caif/cfveil.c
index 3ec83fbc2887..910ab0661f66 100644
--- a/net/caif/cfveil.c
+++ b/net/caif/cfveil.c
@@ -25,13 +25,10 @@ static int cfvei_transmit(struct cflayer *layr, struct cfpkt *pkt);
25 25
26struct cflayer *cfvei_create(u8 channel_id, struct dev_info *dev_info) 26struct cflayer *cfvei_create(u8 channel_id, struct dev_info *dev_info)
27{ 27{
28 struct cfsrvl *vei = kmalloc(sizeof(struct cfsrvl), GFP_ATOMIC); 28 struct cfsrvl *vei = kzalloc(sizeof(struct cfsrvl), GFP_ATOMIC);
29 if (!vei) { 29 if (!vei)
30 pr_warn("Out of memory\n");
31 return NULL; 30 return NULL;
32 }
33 caif_assert(offsetof(struct cfsrvl, layer) == 0); 31 caif_assert(offsetof(struct cfsrvl, layer) == 0);
34 memset(vei, 0, sizeof(struct cfsrvl));
35 cfsrvl_init(vei, channel_id, dev_info, true); 32 cfsrvl_init(vei, channel_id, dev_info, true);
36 vei->layer.receive = cfvei_receive; 33 vei->layer.receive = cfvei_receive;
37 vei->layer.transmit = cfvei_transmit; 34 vei->layer.transmit = cfvei_transmit;
diff --git a/net/caif/cfvidl.c b/net/caif/cfvidl.c
index b2f5989ad455..e3f37db40ac3 100644
--- a/net/caif/cfvidl.c
+++ b/net/caif/cfvidl.c
@@ -21,14 +21,11 @@ static int cfvidl_transmit(struct cflayer *layr, struct cfpkt *pkt);
21 21
22struct cflayer *cfvidl_create(u8 channel_id, struct dev_info *dev_info) 22struct cflayer *cfvidl_create(u8 channel_id, struct dev_info *dev_info)
23{ 23{
24 struct cfsrvl *vid = kmalloc(sizeof(struct cfsrvl), GFP_ATOMIC); 24 struct cfsrvl *vid = kzalloc(sizeof(struct cfsrvl), GFP_ATOMIC);
25 if (!vid) { 25 if (!vid)
26 pr_warn("Out of memory\n");
27 return NULL; 26 return NULL;
28 }
29 caif_assert(offsetof(struct cfsrvl, layer) == 0); 27 caif_assert(offsetof(struct cfsrvl, layer) == 0);
30 28
31 memset(vid, 0, sizeof(struct cfsrvl));
32 cfsrvl_init(vid, channel_id, dev_info, false); 29 cfsrvl_init(vid, channel_id, dev_info, false);
33 vid->layer.receive = cfvidl_receive; 30 vid->layer.receive = cfvidl_receive;
34 vid->layer.transmit = cfvidl_transmit; 31 vid->layer.transmit = cfvidl_transmit;
diff --git a/net/can/Kconfig b/net/can/Kconfig
index 89395b2c8bca..03200699d274 100644
--- a/net/can/Kconfig
+++ b/net/can/Kconfig
@@ -40,5 +40,16 @@ config CAN_BCM
40 CAN messages are used on the bus (e.g. in automotive environments). 40 CAN messages are used on the bus (e.g. in automotive environments).
41 To use the Broadcast Manager, use AF_CAN with protocol CAN_BCM. 41 To use the Broadcast Manager, use AF_CAN with protocol CAN_BCM.
42 42
43config CAN_GW
44 tristate "CAN Gateway/Router (with netlink configuration)"
45 depends on CAN
46 default N
47 ---help---
48 The CAN Gateway/Router is used to route (and modify) CAN frames.
49 It is based on the PF_CAN core infrastructure for msg filtering and
50 msg sending and can optionally modify routed CAN frames on the fly.
51 CAN frames can be routed between CAN network interfaces (one hop).
52 They can be modified with AND/OR/XOR/SET operations as configured
53 by the netlink configuration interface known e.g. from iptables.
43 54
44source "drivers/net/can/Kconfig" 55source "drivers/net/can/Kconfig"
diff --git a/net/can/Makefile b/net/can/Makefile
index 2d3894b32742..cef49eb1f5c7 100644
--- a/net/can/Makefile
+++ b/net/can/Makefile
@@ -10,3 +10,6 @@ can-raw-y := raw.o
10 10
11obj-$(CONFIG_CAN_BCM) += can-bcm.o 11obj-$(CONFIG_CAN_BCM) += can-bcm.o
12can-bcm-y := bcm.o 12can-bcm-y := bcm.o
13
14obj-$(CONFIG_CAN_GW) += can-gw.o
15can-gw-y := gw.o
diff --git a/net/can/af_can.c b/net/can/af_can.c
index 8ce926d3b2cb..d1ff5152c657 100644
--- a/net/can/af_can.c
+++ b/net/can/af_can.c
@@ -719,7 +719,7 @@ int can_proto_register(const struct can_proto *cp)
719 proto); 719 proto);
720 err = -EBUSY; 720 err = -EBUSY;
721 } else 721 } else
722 rcu_assign_pointer(proto_tab[proto], cp); 722 RCU_INIT_POINTER(proto_tab[proto], cp);
723 723
724 mutex_unlock(&proto_tab_lock); 724 mutex_unlock(&proto_tab_lock);
725 725
@@ -740,7 +740,7 @@ void can_proto_unregister(const struct can_proto *cp)
740 740
741 mutex_lock(&proto_tab_lock); 741 mutex_lock(&proto_tab_lock);
742 BUG_ON(proto_tab[proto] != cp); 742 BUG_ON(proto_tab[proto] != cp);
743 rcu_assign_pointer(proto_tab[proto], NULL); 743 RCU_INIT_POINTER(proto_tab[proto], NULL);
744 mutex_unlock(&proto_tab_lock); 744 mutex_unlock(&proto_tab_lock);
745 745
746 synchronize_rcu(); 746 synchronize_rcu();
@@ -857,7 +857,7 @@ static __exit void can_exit(void)
857 struct net_device *dev; 857 struct net_device *dev;
858 858
859 if (stats_timer) 859 if (stats_timer)
860 del_timer(&can_stattimer); 860 del_timer_sync(&can_stattimer);
861 861
862 can_remove_proc(); 862 can_remove_proc();
863 863
diff --git a/net/can/bcm.c b/net/can/bcm.c
index d6c8ae5b2e6a..c84963d2dee6 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -344,6 +344,18 @@ static void bcm_send_to_user(struct bcm_op *op, struct bcm_msg_head *head,
344 } 344 }
345} 345}
346 346
347static void bcm_tx_start_timer(struct bcm_op *op)
348{
349 if (op->kt_ival1.tv64 && op->count)
350 hrtimer_start(&op->timer,
351 ktime_add(ktime_get(), op->kt_ival1),
352 HRTIMER_MODE_ABS);
353 else if (op->kt_ival2.tv64)
354 hrtimer_start(&op->timer,
355 ktime_add(ktime_get(), op->kt_ival2),
356 HRTIMER_MODE_ABS);
357}
358
347static void bcm_tx_timeout_tsklet(unsigned long data) 359static void bcm_tx_timeout_tsklet(unsigned long data)
348{ 360{
349 struct bcm_op *op = (struct bcm_op *)data; 361 struct bcm_op *op = (struct bcm_op *)data;
@@ -365,26 +377,12 @@ static void bcm_tx_timeout_tsklet(unsigned long data)
365 377
366 bcm_send_to_user(op, &msg_head, NULL, 0); 378 bcm_send_to_user(op, &msg_head, NULL, 0);
367 } 379 }
368 }
369
370 if (op->kt_ival1.tv64 && (op->count > 0)) {
371
372 /* send (next) frame */
373 bcm_can_tx(op); 380 bcm_can_tx(op);
374 hrtimer_start(&op->timer,
375 ktime_add(ktime_get(), op->kt_ival1),
376 HRTIMER_MODE_ABS);
377 381
378 } else { 382 } else if (op->kt_ival2.tv64)
379 if (op->kt_ival2.tv64) { 383 bcm_can_tx(op);
380 384
381 /* send (next) frame */ 385 bcm_tx_start_timer(op);
382 bcm_can_tx(op);
383 hrtimer_start(&op->timer,
384 ktime_add(ktime_get(), op->kt_ival2),
385 HRTIMER_MODE_ABS);
386 }
387 }
388} 386}
389 387
390/* 388/*
@@ -964,23 +962,20 @@ static int bcm_tx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
964 hrtimer_cancel(&op->timer); 962 hrtimer_cancel(&op->timer);
965 } 963 }
966 964
967 if ((op->flags & STARTTIMER) && 965 if (op->flags & STARTTIMER) {
968 ((op->kt_ival1.tv64 && op->count) || op->kt_ival2.tv64)) { 966 hrtimer_cancel(&op->timer);
969
970 /* spec: send can_frame when starting timer */ 967 /* spec: send can_frame when starting timer */
971 op->flags |= TX_ANNOUNCE; 968 op->flags |= TX_ANNOUNCE;
972
973 if (op->kt_ival1.tv64 && (op->count > 0)) {
974 /* op->count-- is done in bcm_tx_timeout_handler */
975 hrtimer_start(&op->timer, op->kt_ival1,
976 HRTIMER_MODE_REL);
977 } else
978 hrtimer_start(&op->timer, op->kt_ival2,
979 HRTIMER_MODE_REL);
980 } 969 }
981 970
982 if (op->flags & TX_ANNOUNCE) 971 if (op->flags & TX_ANNOUNCE) {
983 bcm_can_tx(op); 972 bcm_can_tx(op);
973 if (op->count)
974 op->count--;
975 }
976
977 if (op->flags & STARTTIMER)
978 bcm_tx_start_timer(op);
984 979
985 return msg_head->nframes * CFSIZ + MHSIZ; 980 return msg_head->nframes * CFSIZ + MHSIZ;
986} 981}
diff --git a/net/can/gw.c b/net/can/gw.c
new file mode 100644
index 000000000000..ac11407d3b54
--- /dev/null
+++ b/net/can/gw.c
@@ -0,0 +1,959 @@
1/*
2 * gw.c - CAN frame Gateway/Router/Bridge with netlink interface
3 *
4 * Copyright (c) 2011 Volkswagen Group Electronic Research
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of Volkswagen nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18 *
19 * Alternatively, provided that this notice is retained in full, this
20 * software may be distributed under the terms of the GNU General
21 * Public License ("GPL") version 2, in which case the provisions of the
22 * GPL apply INSTEAD OF those given above.
23 *
24 * The provided data structures and external interfaces from this code
25 * are not restricted to be used by modules with a GPL compatible license.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
28 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
29 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
30 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
31 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
32 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
33 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
34 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
35 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
36 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
37 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
38 * DAMAGE.
39 *
40 * Send feedback to <socketcan-users@lists.berlios.de>
41 *
42 */
43
44#include <linux/module.h>
45#include <linux/init.h>
46#include <linux/types.h>
47#include <linux/list.h>
48#include <linux/spinlock.h>
49#include <linux/rcupdate.h>
50#include <linux/rculist.h>
51#include <linux/net.h>
52#include <linux/netdevice.h>
53#include <linux/if_arp.h>
54#include <linux/skbuff.h>
55#include <linux/can.h>
56#include <linux/can/core.h>
57#include <linux/can/gw.h>
58#include <net/rtnetlink.h>
59#include <net/net_namespace.h>
60#include <net/sock.h>
61
62#define CAN_GW_VERSION "20101209"
63static __initdata const char banner[] =
64 KERN_INFO "can: netlink gateway (rev " CAN_GW_VERSION ")\n";
65
66MODULE_DESCRIPTION("PF_CAN netlink gateway");
67MODULE_LICENSE("Dual BSD/GPL");
68MODULE_AUTHOR("Oliver Hartkopp <oliver.hartkopp@volkswagen.de>");
69MODULE_ALIAS("can-gw");
70
71HLIST_HEAD(cgw_list);
72static struct notifier_block notifier;
73
74static struct kmem_cache *cgw_cache __read_mostly;
75
76/* structure that contains the (on-the-fly) CAN frame modifications */
77struct cf_mod {
78 struct {
79 struct can_frame and;
80 struct can_frame or;
81 struct can_frame xor;
82 struct can_frame set;
83 } modframe;
84 struct {
85 u8 and;
86 u8 or;
87 u8 xor;
88 u8 set;
89 } modtype;
90 void (*modfunc[MAX_MODFUNCTIONS])(struct can_frame *cf,
91 struct cf_mod *mod);
92
93 /* CAN frame checksum calculation after CAN frame modifications */
94 struct {
95 struct cgw_csum_xor xor;
96 struct cgw_csum_crc8 crc8;
97 } csum;
98 struct {
99 void (*xor)(struct can_frame *cf, struct cgw_csum_xor *xor);
100 void (*crc8)(struct can_frame *cf, struct cgw_csum_crc8 *crc8);
101 } csumfunc;
102};
103
104
105/*
106 * So far we just support CAN -> CAN routing and frame modifications.
107 *
108 * The internal can_can_gw structure contains data and attributes for
109 * a CAN -> CAN gateway job.
110 */
111struct can_can_gw {
112 struct can_filter filter;
113 int src_idx;
114 int dst_idx;
115};
116
117/* list entry for CAN gateways jobs */
118struct cgw_job {
119 struct hlist_node list;
120 struct rcu_head rcu;
121 u32 handled_frames;
122 u32 dropped_frames;
123 struct cf_mod mod;
124 union {
125 /* CAN frame data source */
126 struct net_device *dev;
127 } src;
128 union {
129 /* CAN frame data destination */
130 struct net_device *dev;
131 } dst;
132 union {
133 struct can_can_gw ccgw;
134 /* tbc */
135 };
136 u8 gwtype;
137 u16 flags;
138};
139
140/* modification functions that are invoked in the hot path in can_can_gw_rcv */
141
142#define MODFUNC(func, op) static void func(struct can_frame *cf, \
143 struct cf_mod *mod) { op ; }
144
145MODFUNC(mod_and_id, cf->can_id &= mod->modframe.and.can_id)
146MODFUNC(mod_and_dlc, cf->can_dlc &= mod->modframe.and.can_dlc)
147MODFUNC(mod_and_data, *(u64 *)cf->data &= *(u64 *)mod->modframe.and.data)
148MODFUNC(mod_or_id, cf->can_id |= mod->modframe.or.can_id)
149MODFUNC(mod_or_dlc, cf->can_dlc |= mod->modframe.or.can_dlc)
150MODFUNC(mod_or_data, *(u64 *)cf->data |= *(u64 *)mod->modframe.or.data)
151MODFUNC(mod_xor_id, cf->can_id ^= mod->modframe.xor.can_id)
152MODFUNC(mod_xor_dlc, cf->can_dlc ^= mod->modframe.xor.can_dlc)
153MODFUNC(mod_xor_data, *(u64 *)cf->data ^= *(u64 *)mod->modframe.xor.data)
154MODFUNC(mod_set_id, cf->can_id = mod->modframe.set.can_id)
155MODFUNC(mod_set_dlc, cf->can_dlc = mod->modframe.set.can_dlc)
156MODFUNC(mod_set_data, *(u64 *)cf->data = *(u64 *)mod->modframe.set.data)
157
158static inline void canframecpy(struct can_frame *dst, struct can_frame *src)
159{
160 /*
161 * Copy the struct members separately to ensure that no uninitialized
162 * data are copied in the 3 bytes hole of the struct. This is needed
163 * to make easy compares of the data in the struct cf_mod.
164 */
165
166 dst->can_id = src->can_id;
167 dst->can_dlc = src->can_dlc;
168 *(u64 *)dst->data = *(u64 *)src->data;
169}
170
171static int cgw_chk_csum_parms(s8 fr, s8 to, s8 re)
172{
173 /*
174 * absolute dlc values 0 .. 7 => 0 .. 7, e.g. data [0]
175 * relative to received dlc -1 .. -8 :
176 * e.g. for received dlc = 8
177 * -1 => index = 7 (data[7])
178 * -3 => index = 5 (data[5])
179 * -8 => index = 0 (data[0])
180 */
181
182 if (fr > -9 && fr < 8 &&
183 to > -9 && to < 8 &&
184 re > -9 && re < 8)
185 return 0;
186 else
187 return -EINVAL;
188}
189
190static inline int calc_idx(int idx, int rx_dlc)
191{
192 if (idx < 0)
193 return rx_dlc + idx;
194 else
195 return idx;
196}
197
198static void cgw_csum_xor_rel(struct can_frame *cf, struct cgw_csum_xor *xor)
199{
200 int from = calc_idx(xor->from_idx, cf->can_dlc);
201 int to = calc_idx(xor->to_idx, cf->can_dlc);
202 int res = calc_idx(xor->result_idx, cf->can_dlc);
203 u8 val = xor->init_xor_val;
204 int i;
205
206 if (from < 0 || to < 0 || res < 0)
207 return;
208
209 if (from <= to) {
210 for (i = from; i <= to; i++)
211 val ^= cf->data[i];
212 } else {
213 for (i = from; i >= to; i--)
214 val ^= cf->data[i];
215 }
216
217 cf->data[res] = val;
218}
219
220static void cgw_csum_xor_pos(struct can_frame *cf, struct cgw_csum_xor *xor)
221{
222 u8 val = xor->init_xor_val;
223 int i;
224
225 for (i = xor->from_idx; i <= xor->to_idx; i++)
226 val ^= cf->data[i];
227
228 cf->data[xor->result_idx] = val;
229}
230
231static void cgw_csum_xor_neg(struct can_frame *cf, struct cgw_csum_xor *xor)
232{
233 u8 val = xor->init_xor_val;
234 int i;
235
236 for (i = xor->from_idx; i >= xor->to_idx; i--)
237 val ^= cf->data[i];
238
239 cf->data[xor->result_idx] = val;
240}
241
242static void cgw_csum_crc8_rel(struct can_frame *cf, struct cgw_csum_crc8 *crc8)
243{
244 int from = calc_idx(crc8->from_idx, cf->can_dlc);
245 int to = calc_idx(crc8->to_idx, cf->can_dlc);
246 int res = calc_idx(crc8->result_idx, cf->can_dlc);
247 u8 crc = crc8->init_crc_val;
248 int i;
249
250 if (from < 0 || to < 0 || res < 0)
251 return;
252
253 if (from <= to) {
254 for (i = crc8->from_idx; i <= crc8->to_idx; i++)
255 crc = crc8->crctab[crc^cf->data[i]];
256 } else {
257 for (i = crc8->from_idx; i >= crc8->to_idx; i--)
258 crc = crc8->crctab[crc^cf->data[i]];
259 }
260
261 switch (crc8->profile) {
262
263 case CGW_CRC8PRF_1U8:
264 crc = crc8->crctab[crc^crc8->profile_data[0]];
265 break;
266
267 case CGW_CRC8PRF_16U8:
268 crc = crc8->crctab[crc^crc8->profile_data[cf->data[1] & 0xF]];
269 break;
270
271 case CGW_CRC8PRF_SFFID_XOR:
272 crc = crc8->crctab[crc^(cf->can_id & 0xFF)^
273 (cf->can_id >> 8 & 0xFF)];
274 break;
275
276 }
277
278 cf->data[crc8->result_idx] = crc^crc8->final_xor_val;
279}
280
281static void cgw_csum_crc8_pos(struct can_frame *cf, struct cgw_csum_crc8 *crc8)
282{
283 u8 crc = crc8->init_crc_val;
284 int i;
285
286 for (i = crc8->from_idx; i <= crc8->to_idx; i++)
287 crc = crc8->crctab[crc^cf->data[i]];
288
289 switch (crc8->profile) {
290
291 case CGW_CRC8PRF_1U8:
292 crc = crc8->crctab[crc^crc8->profile_data[0]];
293 break;
294
295 case CGW_CRC8PRF_16U8:
296 crc = crc8->crctab[crc^crc8->profile_data[cf->data[1] & 0xF]];
297 break;
298
299 case CGW_CRC8PRF_SFFID_XOR:
300 crc = crc8->crctab[crc^(cf->can_id & 0xFF)^
301 (cf->can_id >> 8 & 0xFF)];
302 break;
303 }
304
305 cf->data[crc8->result_idx] = crc^crc8->final_xor_val;
306}
307
308static void cgw_csum_crc8_neg(struct can_frame *cf, struct cgw_csum_crc8 *crc8)
309{
310 u8 crc = crc8->init_crc_val;
311 int i;
312
313 for (i = crc8->from_idx; i >= crc8->to_idx; i--)
314 crc = crc8->crctab[crc^cf->data[i]];
315
316 switch (crc8->profile) {
317
318 case CGW_CRC8PRF_1U8:
319 crc = crc8->crctab[crc^crc8->profile_data[0]];
320 break;
321
322 case CGW_CRC8PRF_16U8:
323 crc = crc8->crctab[crc^crc8->profile_data[cf->data[1] & 0xF]];
324 break;
325
326 case CGW_CRC8PRF_SFFID_XOR:
327 crc = crc8->crctab[crc^(cf->can_id & 0xFF)^
328 (cf->can_id >> 8 & 0xFF)];
329 break;
330 }
331
332 cf->data[crc8->result_idx] = crc^crc8->final_xor_val;
333}
334
335/* the receive & process & send function */
336static void can_can_gw_rcv(struct sk_buff *skb, void *data)
337{
338 struct cgw_job *gwj = (struct cgw_job *)data;
339 struct can_frame *cf;
340 struct sk_buff *nskb;
341 int modidx = 0;
342
343 /* do not handle already routed frames - see comment below */
344 if (skb_mac_header_was_set(skb))
345 return;
346
347 if (!(gwj->dst.dev->flags & IFF_UP)) {
348 gwj->dropped_frames++;
349 return;
350 }
351
352 /*
353 * clone the given skb, which has not been done in can_rcv()
354 *
355 * When there is at least one modification function activated,
356 * we need to copy the skb as we want to modify skb->data.
357 */
358 if (gwj->mod.modfunc[0])
359 nskb = skb_copy(skb, GFP_ATOMIC);
360 else
361 nskb = skb_clone(skb, GFP_ATOMIC);
362
363 if (!nskb) {
364 gwj->dropped_frames++;
365 return;
366 }
367
368 /*
369 * Mark routed frames by setting some mac header length which is
370 * not relevant for the CAN frames located in the skb->data section.
371 *
372 * As dev->header_ops is not set in CAN netdevices no one is ever
373 * accessing the various header offsets in the CAN skbuffs anyway.
374 * E.g. using the packet socket to read CAN frames is still working.
375 */
376 skb_set_mac_header(nskb, 8);
377 nskb->dev = gwj->dst.dev;
378
379 /* pointer to modifiable CAN frame */
380 cf = (struct can_frame *)nskb->data;
381
382 /* perform preprocessed modification functions if there are any */
383 while (modidx < MAX_MODFUNCTIONS && gwj->mod.modfunc[modidx])
384 (*gwj->mod.modfunc[modidx++])(cf, &gwj->mod);
385
386 /* check for checksum updates when the CAN frame has been modified */
387 if (modidx) {
388 if (gwj->mod.csumfunc.crc8)
389 (*gwj->mod.csumfunc.crc8)(cf, &gwj->mod.csum.crc8);
390
391 if (gwj->mod.csumfunc.xor)
392 (*gwj->mod.csumfunc.xor)(cf, &gwj->mod.csum.xor);
393 }
394
395 /* clear the skb timestamp if not configured the other way */
396 if (!(gwj->flags & CGW_FLAGS_CAN_SRC_TSTAMP))
397 nskb->tstamp.tv64 = 0;
398
399 /* send to netdevice */
400 if (can_send(nskb, gwj->flags & CGW_FLAGS_CAN_ECHO))
401 gwj->dropped_frames++;
402 else
403 gwj->handled_frames++;
404}
405
406static inline int cgw_register_filter(struct cgw_job *gwj)
407{
408 return can_rx_register(gwj->src.dev, gwj->ccgw.filter.can_id,
409 gwj->ccgw.filter.can_mask, can_can_gw_rcv,
410 gwj, "gw");
411}
412
413static inline void cgw_unregister_filter(struct cgw_job *gwj)
414{
415 can_rx_unregister(gwj->src.dev, gwj->ccgw.filter.can_id,
416 gwj->ccgw.filter.can_mask, can_can_gw_rcv, gwj);
417}
418
419static int cgw_notifier(struct notifier_block *nb,
420 unsigned long msg, void *data)
421{
422 struct net_device *dev = (struct net_device *)data;
423
424 if (!net_eq(dev_net(dev), &init_net))
425 return NOTIFY_DONE;
426 if (dev->type != ARPHRD_CAN)
427 return NOTIFY_DONE;
428
429 if (msg == NETDEV_UNREGISTER) {
430
431 struct cgw_job *gwj = NULL;
432 struct hlist_node *n, *nx;
433
434 ASSERT_RTNL();
435
436 hlist_for_each_entry_safe(gwj, n, nx, &cgw_list, list) {
437
438 if (gwj->src.dev == dev || gwj->dst.dev == dev) {
439 hlist_del(&gwj->list);
440 cgw_unregister_filter(gwj);
441 kfree(gwj);
442 }
443 }
444 }
445
446 return NOTIFY_DONE;
447}
448
449static int cgw_put_job(struct sk_buff *skb, struct cgw_job *gwj)
450{
451 struct cgw_frame_mod mb;
452 struct rtcanmsg *rtcan;
453 struct nlmsghdr *nlh = nlmsg_put(skb, 0, 0, 0, sizeof(*rtcan), 0);
454 if (!nlh)
455 return -EMSGSIZE;
456
457 rtcan = nlmsg_data(nlh);
458 rtcan->can_family = AF_CAN;
459 rtcan->gwtype = gwj->gwtype;
460 rtcan->flags = gwj->flags;
461
462 /* add statistics if available */
463
464 if (gwj->handled_frames) {
465 if (nla_put_u32(skb, CGW_HANDLED, gwj->handled_frames) < 0)
466 goto cancel;
467 else
468 nlh->nlmsg_len += NLA_HDRLEN + NLA_ALIGN(sizeof(u32));
469 }
470
471 if (gwj->dropped_frames) {
472 if (nla_put_u32(skb, CGW_DROPPED, gwj->dropped_frames) < 0)
473 goto cancel;
474 else
475 nlh->nlmsg_len += NLA_HDRLEN + NLA_ALIGN(sizeof(u32));
476 }
477
478 /* check non default settings of attributes */
479
480 if (gwj->mod.modtype.and) {
481 memcpy(&mb.cf, &gwj->mod.modframe.and, sizeof(mb.cf));
482 mb.modtype = gwj->mod.modtype.and;
483 if (nla_put(skb, CGW_MOD_AND, sizeof(mb), &mb) < 0)
484 goto cancel;
485 else
486 nlh->nlmsg_len += NLA_HDRLEN + NLA_ALIGN(sizeof(mb));
487 }
488
489 if (gwj->mod.modtype.or) {
490 memcpy(&mb.cf, &gwj->mod.modframe.or, sizeof(mb.cf));
491 mb.modtype = gwj->mod.modtype.or;
492 if (nla_put(skb, CGW_MOD_OR, sizeof(mb), &mb) < 0)
493 goto cancel;
494 else
495 nlh->nlmsg_len += NLA_HDRLEN + NLA_ALIGN(sizeof(mb));
496 }
497
498 if (gwj->mod.modtype.xor) {
499 memcpy(&mb.cf, &gwj->mod.modframe.xor, sizeof(mb.cf));
500 mb.modtype = gwj->mod.modtype.xor;
501 if (nla_put(skb, CGW_MOD_XOR, sizeof(mb), &mb) < 0)
502 goto cancel;
503 else
504 nlh->nlmsg_len += NLA_HDRLEN + NLA_ALIGN(sizeof(mb));
505 }
506
507 if (gwj->mod.modtype.set) {
508 memcpy(&mb.cf, &gwj->mod.modframe.set, sizeof(mb.cf));
509 mb.modtype = gwj->mod.modtype.set;
510 if (nla_put(skb, CGW_MOD_SET, sizeof(mb), &mb) < 0)
511 goto cancel;
512 else
513 nlh->nlmsg_len += NLA_HDRLEN + NLA_ALIGN(sizeof(mb));
514 }
515
516 if (gwj->mod.csumfunc.crc8) {
517 if (nla_put(skb, CGW_CS_CRC8, CGW_CS_CRC8_LEN,
518 &gwj->mod.csum.crc8) < 0)
519 goto cancel;
520 else
521 nlh->nlmsg_len += NLA_HDRLEN + \
522 NLA_ALIGN(CGW_CS_CRC8_LEN);
523 }
524
525 if (gwj->mod.csumfunc.xor) {
526 if (nla_put(skb, CGW_CS_XOR, CGW_CS_XOR_LEN,
527 &gwj->mod.csum.xor) < 0)
528 goto cancel;
529 else
530 nlh->nlmsg_len += NLA_HDRLEN + \
531 NLA_ALIGN(CGW_CS_XOR_LEN);
532 }
533
534 if (gwj->gwtype == CGW_TYPE_CAN_CAN) {
535
536 if (gwj->ccgw.filter.can_id || gwj->ccgw.filter.can_mask) {
537 if (nla_put(skb, CGW_FILTER, sizeof(struct can_filter),
538 &gwj->ccgw.filter) < 0)
539 goto cancel;
540 else
541 nlh->nlmsg_len += NLA_HDRLEN +
542 NLA_ALIGN(sizeof(struct can_filter));
543 }
544
545 if (nla_put_u32(skb, CGW_SRC_IF, gwj->ccgw.src_idx) < 0)
546 goto cancel;
547 else
548 nlh->nlmsg_len += NLA_HDRLEN + NLA_ALIGN(sizeof(u32));
549
550 if (nla_put_u32(skb, CGW_DST_IF, gwj->ccgw.dst_idx) < 0)
551 goto cancel;
552 else
553 nlh->nlmsg_len += NLA_HDRLEN + NLA_ALIGN(sizeof(u32));
554 }
555
556 return skb->len;
557
558cancel:
559 nlmsg_cancel(skb, nlh);
560 return -EMSGSIZE;
561}
562
563/* Dump information about all CAN gateway jobs, in response to RTM_GETROUTE */
564static int cgw_dump_jobs(struct sk_buff *skb, struct netlink_callback *cb)
565{
566 struct cgw_job *gwj = NULL;
567 struct hlist_node *n;
568 int idx = 0;
569 int s_idx = cb->args[0];
570
571 rcu_read_lock();
572 hlist_for_each_entry_rcu(gwj, n, &cgw_list, list) {
573 if (idx < s_idx)
574 goto cont;
575
576 if (cgw_put_job(skb, gwj) < 0)
577 break;
578cont:
579 idx++;
580 }
581 rcu_read_unlock();
582
583 cb->args[0] = idx;
584
585 return skb->len;
586}
587
588/* check for common and gwtype specific attributes */
589static int cgw_parse_attr(struct nlmsghdr *nlh, struct cf_mod *mod,
590 u8 gwtype, void *gwtypeattr)
591{
592 struct nlattr *tb[CGW_MAX+1];
593 struct cgw_frame_mod mb;
594 int modidx = 0;
595 int err = 0;
596
597 /* initialize modification & checksum data space */
598 memset(mod, 0, sizeof(*mod));
599
600 err = nlmsg_parse(nlh, sizeof(struct rtcanmsg), tb, CGW_MAX, NULL);
601 if (err < 0)
602 return err;
603
604 /* check for AND/OR/XOR/SET modifications */
605
606 if (tb[CGW_MOD_AND] &&
607 nla_len(tb[CGW_MOD_AND]) == CGW_MODATTR_LEN) {
608 nla_memcpy(&mb, tb[CGW_MOD_AND], CGW_MODATTR_LEN);
609
610 canframecpy(&mod->modframe.and, &mb.cf);
611 mod->modtype.and = mb.modtype;
612
613 if (mb.modtype & CGW_MOD_ID)
614 mod->modfunc[modidx++] = mod_and_id;
615
616 if (mb.modtype & CGW_MOD_DLC)
617 mod->modfunc[modidx++] = mod_and_dlc;
618
619 if (mb.modtype & CGW_MOD_DATA)
620 mod->modfunc[modidx++] = mod_and_data;
621 }
622
623 if (tb[CGW_MOD_OR] &&
624 nla_len(tb[CGW_MOD_OR]) == CGW_MODATTR_LEN) {
625 nla_memcpy(&mb, tb[CGW_MOD_OR], CGW_MODATTR_LEN);
626
627 canframecpy(&mod->modframe.or, &mb.cf);
628 mod->modtype.or = mb.modtype;
629
630 if (mb.modtype & CGW_MOD_ID)
631 mod->modfunc[modidx++] = mod_or_id;
632
633 if (mb.modtype & CGW_MOD_DLC)
634 mod->modfunc[modidx++] = mod_or_dlc;
635
636 if (mb.modtype & CGW_MOD_DATA)
637 mod->modfunc[modidx++] = mod_or_data;
638 }
639
640 if (tb[CGW_MOD_XOR] &&
641 nla_len(tb[CGW_MOD_XOR]) == CGW_MODATTR_LEN) {
642 nla_memcpy(&mb, tb[CGW_MOD_XOR], CGW_MODATTR_LEN);
643
644 canframecpy(&mod->modframe.xor, &mb.cf);
645 mod->modtype.xor = mb.modtype;
646
647 if (mb.modtype & CGW_MOD_ID)
648 mod->modfunc[modidx++] = mod_xor_id;
649
650 if (mb.modtype & CGW_MOD_DLC)
651 mod->modfunc[modidx++] = mod_xor_dlc;
652
653 if (mb.modtype & CGW_MOD_DATA)
654 mod->modfunc[modidx++] = mod_xor_data;
655 }
656
657 if (tb[CGW_MOD_SET] &&
658 nla_len(tb[CGW_MOD_SET]) == CGW_MODATTR_LEN) {
659 nla_memcpy(&mb, tb[CGW_MOD_SET], CGW_MODATTR_LEN);
660
661 canframecpy(&mod->modframe.set, &mb.cf);
662 mod->modtype.set = mb.modtype;
663
664 if (mb.modtype & CGW_MOD_ID)
665 mod->modfunc[modidx++] = mod_set_id;
666
667 if (mb.modtype & CGW_MOD_DLC)
668 mod->modfunc[modidx++] = mod_set_dlc;
669
670 if (mb.modtype & CGW_MOD_DATA)
671 mod->modfunc[modidx++] = mod_set_data;
672 }
673
674 /* check for checksum operations after CAN frame modifications */
675 if (modidx) {
676
677 if (tb[CGW_CS_CRC8] &&
678 nla_len(tb[CGW_CS_CRC8]) == CGW_CS_CRC8_LEN) {
679
680 struct cgw_csum_crc8 *c = (struct cgw_csum_crc8 *)\
681 nla_data(tb[CGW_CS_CRC8]);
682
683 err = cgw_chk_csum_parms(c->from_idx, c->to_idx,
684 c->result_idx);
685 if (err)
686 return err;
687
688 nla_memcpy(&mod->csum.crc8, tb[CGW_CS_CRC8],
689 CGW_CS_CRC8_LEN);
690
691 /*
692 * select dedicated processing function to reduce
693 * runtime operations in receive hot path.
694 */
695 if (c->from_idx < 0 || c->to_idx < 0 ||
696 c->result_idx < 0)
697 mod->csumfunc.crc8 = cgw_csum_crc8_rel;
698 else if (c->from_idx <= c->to_idx)
699 mod->csumfunc.crc8 = cgw_csum_crc8_pos;
700 else
701 mod->csumfunc.crc8 = cgw_csum_crc8_neg;
702 }
703
704 if (tb[CGW_CS_XOR] &&
705 nla_len(tb[CGW_CS_XOR]) == CGW_CS_XOR_LEN) {
706
707 struct cgw_csum_xor *c = (struct cgw_csum_xor *)\
708 nla_data(tb[CGW_CS_XOR]);
709
710 err = cgw_chk_csum_parms(c->from_idx, c->to_idx,
711 c->result_idx);
712 if (err)
713 return err;
714
715 nla_memcpy(&mod->csum.xor, tb[CGW_CS_XOR],
716 CGW_CS_XOR_LEN);
717
718 /*
719 * select dedicated processing function to reduce
720 * runtime operations in receive hot path.
721 */
722 if (c->from_idx < 0 || c->to_idx < 0 ||
723 c->result_idx < 0)
724 mod->csumfunc.xor = cgw_csum_xor_rel;
725 else if (c->from_idx <= c->to_idx)
726 mod->csumfunc.xor = cgw_csum_xor_pos;
727 else
728 mod->csumfunc.xor = cgw_csum_xor_neg;
729 }
730 }
731
732 if (gwtype == CGW_TYPE_CAN_CAN) {
733
734 /* check CGW_TYPE_CAN_CAN specific attributes */
735
736 struct can_can_gw *ccgw = (struct can_can_gw *)gwtypeattr;
737 memset(ccgw, 0, sizeof(*ccgw));
738
739 /* check for can_filter in attributes */
740 if (tb[CGW_FILTER] &&
741 nla_len(tb[CGW_FILTER]) == sizeof(struct can_filter))
742 nla_memcpy(&ccgw->filter, tb[CGW_FILTER],
743 sizeof(struct can_filter));
744
745 err = -ENODEV;
746
747 /* specifying two interfaces is mandatory */
748 if (!tb[CGW_SRC_IF] || !tb[CGW_DST_IF])
749 return err;
750
751 if (nla_len(tb[CGW_SRC_IF]) == sizeof(u32))
752 nla_memcpy(&ccgw->src_idx, tb[CGW_SRC_IF],
753 sizeof(u32));
754
755 if (nla_len(tb[CGW_DST_IF]) == sizeof(u32))
756 nla_memcpy(&ccgw->dst_idx, tb[CGW_DST_IF],
757 sizeof(u32));
758
759 /* both indices set to 0 for flushing all routing entries */
760 if (!ccgw->src_idx && !ccgw->dst_idx)
761 return 0;
762
763 /* only one index set to 0 is an error */
764 if (!ccgw->src_idx || !ccgw->dst_idx)
765 return err;
766 }
767
768 /* add the checks for other gwtypes here */
769
770 return 0;
771}
772
773static int cgw_create_job(struct sk_buff *skb, struct nlmsghdr *nlh,
774 void *arg)
775{
776 struct rtcanmsg *r;
777 struct cgw_job *gwj;
778 int err = 0;
779
780 if (nlmsg_len(nlh) < sizeof(*r))
781 return -EINVAL;
782
783 r = nlmsg_data(nlh);
784 if (r->can_family != AF_CAN)
785 return -EPFNOSUPPORT;
786
787 /* so far we only support CAN -> CAN routings */
788 if (r->gwtype != CGW_TYPE_CAN_CAN)
789 return -EINVAL;
790
791 gwj = kmem_cache_alloc(cgw_cache, GFP_KERNEL);
792 if (!gwj)
793 return -ENOMEM;
794
795 gwj->handled_frames = 0;
796 gwj->dropped_frames = 0;
797 gwj->flags = r->flags;
798 gwj->gwtype = r->gwtype;
799
800 err = cgw_parse_attr(nlh, &gwj->mod, CGW_TYPE_CAN_CAN, &gwj->ccgw);
801 if (err < 0)
802 goto out;
803
804 err = -ENODEV;
805
806 /* ifindex == 0 is not allowed for job creation */
807 if (!gwj->ccgw.src_idx || !gwj->ccgw.dst_idx)
808 goto out;
809
810 gwj->src.dev = dev_get_by_index(&init_net, gwj->ccgw.src_idx);
811
812 if (!gwj->src.dev)
813 goto out;
814
815 /* check for CAN netdev not using header_ops - see gw_rcv() */
816 if (gwj->src.dev->type != ARPHRD_CAN || gwj->src.dev->header_ops)
817 goto put_src_out;
818
819 gwj->dst.dev = dev_get_by_index(&init_net, gwj->ccgw.dst_idx);
820
821 if (!gwj->dst.dev)
822 goto put_src_out;
823
824 /* check for CAN netdev not using header_ops - see gw_rcv() */
825 if (gwj->dst.dev->type != ARPHRD_CAN || gwj->dst.dev->header_ops)
826 goto put_src_dst_out;
827
828 ASSERT_RTNL();
829
830 err = cgw_register_filter(gwj);
831 if (!err)
832 hlist_add_head_rcu(&gwj->list, &cgw_list);
833
834put_src_dst_out:
835 dev_put(gwj->dst.dev);
836put_src_out:
837 dev_put(gwj->src.dev);
838out:
839 if (err)
840 kmem_cache_free(cgw_cache, gwj);
841
842 return err;
843}
844
845static void cgw_remove_all_jobs(void)
846{
847 struct cgw_job *gwj = NULL;
848 struct hlist_node *n, *nx;
849
850 ASSERT_RTNL();
851
852 hlist_for_each_entry_safe(gwj, n, nx, &cgw_list, list) {
853 hlist_del(&gwj->list);
854 cgw_unregister_filter(gwj);
855 kfree(gwj);
856 }
857}
858
859static int cgw_remove_job(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
860{
861 struct cgw_job *gwj = NULL;
862 struct hlist_node *n, *nx;
863 struct rtcanmsg *r;
864 struct cf_mod mod;
865 struct can_can_gw ccgw;
866 int err = 0;
867
868 if (nlmsg_len(nlh) < sizeof(*r))
869 return -EINVAL;
870
871 r = nlmsg_data(nlh);
872 if (r->can_family != AF_CAN)
873 return -EPFNOSUPPORT;
874
875 /* so far we only support CAN -> CAN routings */
876 if (r->gwtype != CGW_TYPE_CAN_CAN)
877 return -EINVAL;
878
879 err = cgw_parse_attr(nlh, &mod, CGW_TYPE_CAN_CAN, &ccgw);
880 if (err < 0)
881 return err;
882
883 /* two interface indices both set to 0 => remove all entries */
884 if (!ccgw.src_idx && !ccgw.dst_idx) {
885 cgw_remove_all_jobs();
886 return 0;
887 }
888
889 err = -EINVAL;
890
891 ASSERT_RTNL();
892
893 /* remove only the first matching entry */
894 hlist_for_each_entry_safe(gwj, n, nx, &cgw_list, list) {
895
896 if (gwj->flags != r->flags)
897 continue;
898
899 if (memcmp(&gwj->mod, &mod, sizeof(mod)))
900 continue;
901
902 /* if (r->gwtype == CGW_TYPE_CAN_CAN) - is made sure here */
903 if (memcmp(&gwj->ccgw, &ccgw, sizeof(ccgw)))
904 continue;
905
906 hlist_del(&gwj->list);
907 cgw_unregister_filter(gwj);
908 kfree(gwj);
909 err = 0;
910 break;
911 }
912
913 return err;
914}
915
916static __init int cgw_module_init(void)
917{
918 printk(banner);
919
920 cgw_cache = kmem_cache_create("can_gw", sizeof(struct cgw_job),
921 0, 0, NULL);
922
923 if (!cgw_cache)
924 return -ENOMEM;
925
926 /* set notifier */
927 notifier.notifier_call = cgw_notifier;
928 register_netdevice_notifier(&notifier);
929
930 if (__rtnl_register(PF_CAN, RTM_GETROUTE, NULL, cgw_dump_jobs, NULL)) {
931 unregister_netdevice_notifier(&notifier);
932 kmem_cache_destroy(cgw_cache);
933 return -ENOBUFS;
934 }
935
936 /* Only the first call to __rtnl_register can fail */
937 __rtnl_register(PF_CAN, RTM_NEWROUTE, cgw_create_job, NULL, NULL);
938 __rtnl_register(PF_CAN, RTM_DELROUTE, cgw_remove_job, NULL, NULL);
939
940 return 0;
941}
942
943static __exit void cgw_module_exit(void)
944{
945 rtnl_unregister_all(PF_CAN);
946
947 unregister_netdevice_notifier(&notifier);
948
949 rtnl_lock();
950 cgw_remove_all_jobs();
951 rtnl_unlock();
952
953 rcu_barrier(); /* Wait for completion of call_rcu()'s */
954
955 kmem_cache_destroy(cgw_cache);
956}
957
958module_init(cgw_module_init);
959module_exit(cgw_module_exit);
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
index 132963abc266..2883ea01e680 100644
--- a/net/ceph/ceph_common.c
+++ b/net/ceph/ceph_common.c
@@ -232,6 +232,7 @@ void ceph_destroy_options(struct ceph_options *opt)
232 ceph_crypto_key_destroy(opt->key); 232 ceph_crypto_key_destroy(opt->key);
233 kfree(opt->key); 233 kfree(opt->key);
234 } 234 }
235 kfree(opt->mon_addr);
235 kfree(opt); 236 kfree(opt);
236} 237}
237EXPORT_SYMBOL(ceph_destroy_options); 238EXPORT_SYMBOL(ceph_destroy_options);
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index c340e2e0765b..9918e9eb276e 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -2307,6 +2307,7 @@ struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags)
2307 m->front_max = front_len; 2307 m->front_max = front_len;
2308 m->front_is_vmalloc = false; 2308 m->front_is_vmalloc = false;
2309 m->more_to_follow = false; 2309 m->more_to_follow = false;
2310 m->ack_stamp = 0;
2310 m->pool = NULL; 2311 m->pool = NULL;
2311 2312
2312 /* middle */ 2313 /* middle */
diff --git a/net/ceph/msgpool.c b/net/ceph/msgpool.c
index d5f2d97ac05c..1f4cb30a42c5 100644
--- a/net/ceph/msgpool.c
+++ b/net/ceph/msgpool.c
@@ -7,27 +7,37 @@
7 7
8#include <linux/ceph/msgpool.h> 8#include <linux/ceph/msgpool.h>
9 9
10static void *alloc_fn(gfp_t gfp_mask, void *arg) 10static void *msgpool_alloc(gfp_t gfp_mask, void *arg)
11{ 11{
12 struct ceph_msgpool *pool = arg; 12 struct ceph_msgpool *pool = arg;
13 void *p; 13 struct ceph_msg *msg;
14 14
15 p = ceph_msg_new(0, pool->front_len, gfp_mask); 15 msg = ceph_msg_new(0, pool->front_len, gfp_mask);
16 if (!p) 16 if (!msg) {
17 pr_err("msgpool %s alloc failed\n", pool->name); 17 dout("msgpool_alloc %s failed\n", pool->name);
18 return p; 18 } else {
19 dout("msgpool_alloc %s %p\n", pool->name, msg);
20 msg->pool = pool;
21 }
22 return msg;
19} 23}
20 24
21static void free_fn(void *element, void *arg) 25static void msgpool_free(void *element, void *arg)
22{ 26{
23 ceph_msg_put(element); 27 struct ceph_msgpool *pool = arg;
28 struct ceph_msg *msg = element;
29
30 dout("msgpool_release %s %p\n", pool->name, msg);
31 msg->pool = NULL;
32 ceph_msg_put(msg);
24} 33}
25 34
26int ceph_msgpool_init(struct ceph_msgpool *pool, 35int ceph_msgpool_init(struct ceph_msgpool *pool,
27 int front_len, int size, bool blocking, const char *name) 36 int front_len, int size, bool blocking, const char *name)
28{ 37{
38 dout("msgpool %s init\n", name);
29 pool->front_len = front_len; 39 pool->front_len = front_len;
30 pool->pool = mempool_create(size, alloc_fn, free_fn, pool); 40 pool->pool = mempool_create(size, msgpool_alloc, msgpool_free, pool);
31 if (!pool->pool) 41 if (!pool->pool)
32 return -ENOMEM; 42 return -ENOMEM;
33 pool->name = name; 43 pool->name = name;
@@ -36,14 +46,17 @@ int ceph_msgpool_init(struct ceph_msgpool *pool,
36 46
37void ceph_msgpool_destroy(struct ceph_msgpool *pool) 47void ceph_msgpool_destroy(struct ceph_msgpool *pool)
38{ 48{
49 dout("msgpool %s destroy\n", pool->name);
39 mempool_destroy(pool->pool); 50 mempool_destroy(pool->pool);
40} 51}
41 52
42struct ceph_msg *ceph_msgpool_get(struct ceph_msgpool *pool, 53struct ceph_msg *ceph_msgpool_get(struct ceph_msgpool *pool,
43 int front_len) 54 int front_len)
44{ 55{
56 struct ceph_msg *msg;
57
45 if (front_len > pool->front_len) { 58 if (front_len > pool->front_len) {
46 pr_err("msgpool_get pool %s need front %d, pool size is %d\n", 59 dout("msgpool_get %s need front %d, pool size is %d\n",
47 pool->name, front_len, pool->front_len); 60 pool->name, front_len, pool->front_len);
48 WARN_ON(1); 61 WARN_ON(1);
49 62
@@ -51,14 +64,19 @@ struct ceph_msg *ceph_msgpool_get(struct ceph_msgpool *pool,
51 return ceph_msg_new(0, front_len, GFP_NOFS); 64 return ceph_msg_new(0, front_len, GFP_NOFS);
52 } 65 }
53 66
54 return mempool_alloc(pool->pool, GFP_NOFS); 67 msg = mempool_alloc(pool->pool, GFP_NOFS);
68 dout("msgpool_get %s %p\n", pool->name, msg);
69 return msg;
55} 70}
56 71
57void ceph_msgpool_put(struct ceph_msgpool *pool, struct ceph_msg *msg) 72void ceph_msgpool_put(struct ceph_msgpool *pool, struct ceph_msg *msg)
58{ 73{
74 dout("msgpool_put %s %p\n", pool->name, msg);
75
59 /* reset msg front_len; user may have changed it */ 76 /* reset msg front_len; user may have changed it */
60 msg->front.iov_len = pool->front_len; 77 msg->front.iov_len = pool->front_len;
61 msg->hdr.front_len = cpu_to_le32(pool->front_len); 78 msg->hdr.front_len = cpu_to_le32(pool->front_len);
62 79
63 kref_init(&msg->kref); /* retake single ref */ 80 kref_init(&msg->kref); /* retake single ref */
81 mempool_free(msg, pool->pool);
64} 82}
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index ce310eee708d..88ad8a2501b5 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -217,6 +217,7 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
217 INIT_LIST_HEAD(&req->r_unsafe_item); 217 INIT_LIST_HEAD(&req->r_unsafe_item);
218 INIT_LIST_HEAD(&req->r_linger_item); 218 INIT_LIST_HEAD(&req->r_linger_item);
219 INIT_LIST_HEAD(&req->r_linger_osd); 219 INIT_LIST_HEAD(&req->r_linger_osd);
220 INIT_LIST_HEAD(&req->r_req_lru_item);
220 req->r_flags = flags; 221 req->r_flags = flags;
221 222
222 WARN_ON((flags & (CEPH_OSD_FLAG_READ|CEPH_OSD_FLAG_WRITE)) == 0); 223 WARN_ON((flags & (CEPH_OSD_FLAG_READ|CEPH_OSD_FLAG_WRITE)) == 0);
@@ -685,6 +686,18 @@ static void __remove_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd)
685 put_osd(osd); 686 put_osd(osd);
686} 687}
687 688
689static void remove_all_osds(struct ceph_osd_client *osdc)
690{
691 dout("__remove_old_osds %p\n", osdc);
692 mutex_lock(&osdc->request_mutex);
693 while (!RB_EMPTY_ROOT(&osdc->osds)) {
694 struct ceph_osd *osd = rb_entry(rb_first(&osdc->osds),
695 struct ceph_osd, o_node);
696 __remove_osd(osdc, osd);
697 }
698 mutex_unlock(&osdc->request_mutex);
699}
700
688static void __move_osd_to_lru(struct ceph_osd_client *osdc, 701static void __move_osd_to_lru(struct ceph_osd_client *osdc,
689 struct ceph_osd *osd) 702 struct ceph_osd *osd)
690{ 703{
@@ -701,14 +714,14 @@ static void __remove_osd_from_lru(struct ceph_osd *osd)
701 list_del_init(&osd->o_osd_lru); 714 list_del_init(&osd->o_osd_lru);
702} 715}
703 716
704static void remove_old_osds(struct ceph_osd_client *osdc, int remove_all) 717static void remove_old_osds(struct ceph_osd_client *osdc)
705{ 718{
706 struct ceph_osd *osd, *nosd; 719 struct ceph_osd *osd, *nosd;
707 720
708 dout("__remove_old_osds %p\n", osdc); 721 dout("__remove_old_osds %p\n", osdc);
709 mutex_lock(&osdc->request_mutex); 722 mutex_lock(&osdc->request_mutex);
710 list_for_each_entry_safe(osd, nosd, &osdc->osd_lru, o_osd_lru) { 723 list_for_each_entry_safe(osd, nosd, &osdc->osd_lru, o_osd_lru) {
711 if (!remove_all && time_before(jiffies, osd->lru_ttl)) 724 if (time_before(jiffies, osd->lru_ttl))
712 break; 725 break;
713 __remove_osd(osdc, osd); 726 __remove_osd(osdc, osd);
714 } 727 }
@@ -751,6 +764,7 @@ static void __insert_osd(struct ceph_osd_client *osdc, struct ceph_osd *new)
751 struct rb_node *parent = NULL; 764 struct rb_node *parent = NULL;
752 struct ceph_osd *osd = NULL; 765 struct ceph_osd *osd = NULL;
753 766
767 dout("__insert_osd %p osd%d\n", new, new->o_osd);
754 while (*p) { 768 while (*p) {
755 parent = *p; 769 parent = *p;
756 osd = rb_entry(parent, struct ceph_osd, o_node); 770 osd = rb_entry(parent, struct ceph_osd, o_node);
@@ -803,13 +817,10 @@ static void __register_request(struct ceph_osd_client *osdc,
803{ 817{
804 req->r_tid = ++osdc->last_tid; 818 req->r_tid = ++osdc->last_tid;
805 req->r_request->hdr.tid = cpu_to_le64(req->r_tid); 819 req->r_request->hdr.tid = cpu_to_le64(req->r_tid);
806 INIT_LIST_HEAD(&req->r_req_lru_item);
807
808 dout("__register_request %p tid %lld\n", req, req->r_tid); 820 dout("__register_request %p tid %lld\n", req, req->r_tid);
809 __insert_request(osdc, req); 821 __insert_request(osdc, req);
810 ceph_osdc_get_request(req); 822 ceph_osdc_get_request(req);
811 osdc->num_requests++; 823 osdc->num_requests++;
812
813 if (osdc->num_requests == 1) { 824 if (osdc->num_requests == 1) {
814 dout(" first request, scheduling timeout\n"); 825 dout(" first request, scheduling timeout\n");
815 __schedule_osd_timeout(osdc); 826 __schedule_osd_timeout(osdc);
@@ -1144,7 +1155,7 @@ static void handle_osds_timeout(struct work_struct *work)
1144 1155
1145 dout("osds timeout\n"); 1156 dout("osds timeout\n");
1146 down_read(&osdc->map_sem); 1157 down_read(&osdc->map_sem);
1147 remove_old_osds(osdc, 0); 1158 remove_old_osds(osdc);
1148 up_read(&osdc->map_sem); 1159 up_read(&osdc->map_sem);
1149 1160
1150 schedule_delayed_work(&osdc->osds_timeout_work, 1161 schedule_delayed_work(&osdc->osds_timeout_work,
@@ -1862,8 +1873,7 @@ void ceph_osdc_stop(struct ceph_osd_client *osdc)
1862 ceph_osdmap_destroy(osdc->osdmap); 1873 ceph_osdmap_destroy(osdc->osdmap);
1863 osdc->osdmap = NULL; 1874 osdc->osdmap = NULL;
1864 } 1875 }
1865 remove_old_osds(osdc, 1); 1876 remove_all_osds(osdc);
1866 WARN_ON(!RB_EMPTY_ROOT(&osdc->osds));
1867 mempool_destroy(osdc->req_mempool); 1877 mempool_destroy(osdc->req_mempool);
1868 ceph_msgpool_destroy(&osdc->msgpool_op); 1878 ceph_msgpool_destroy(&osdc->msgpool_op);
1869 ceph_msgpool_destroy(&osdc->msgpool_op_reply); 1879 ceph_msgpool_destroy(&osdc->msgpool_op_reply);
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index e97c3588c3ec..fd863fe76934 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -339,6 +339,7 @@ static int __insert_pg_mapping(struct ceph_pg_mapping *new,
339 struct ceph_pg_mapping *pg = NULL; 339 struct ceph_pg_mapping *pg = NULL;
340 int c; 340 int c;
341 341
342 dout("__insert_pg_mapping %llx %p\n", *(u64 *)&new->pgid, new);
342 while (*p) { 343 while (*p) {
343 parent = *p; 344 parent = *p;
344 pg = rb_entry(parent, struct ceph_pg_mapping, node); 345 pg = rb_entry(parent, struct ceph_pg_mapping, node);
@@ -366,16 +367,33 @@ static struct ceph_pg_mapping *__lookup_pg_mapping(struct rb_root *root,
366 while (n) { 367 while (n) {
367 pg = rb_entry(n, struct ceph_pg_mapping, node); 368 pg = rb_entry(n, struct ceph_pg_mapping, node);
368 c = pgid_cmp(pgid, pg->pgid); 369 c = pgid_cmp(pgid, pg->pgid);
369 if (c < 0) 370 if (c < 0) {
370 n = n->rb_left; 371 n = n->rb_left;
371 else if (c > 0) 372 } else if (c > 0) {
372 n = n->rb_right; 373 n = n->rb_right;
373 else 374 } else {
375 dout("__lookup_pg_mapping %llx got %p\n",
376 *(u64 *)&pgid, pg);
374 return pg; 377 return pg;
378 }
375 } 379 }
376 return NULL; 380 return NULL;
377} 381}
378 382
383static int __remove_pg_mapping(struct rb_root *root, struct ceph_pg pgid)
384{
385 struct ceph_pg_mapping *pg = __lookup_pg_mapping(root, pgid);
386
387 if (pg) {
388 dout("__remove_pg_mapping %llx %p\n", *(u64 *)&pgid, pg);
389 rb_erase(&pg->node, root);
390 kfree(pg);
391 return 0;
392 }
393 dout("__remove_pg_mapping %llx dne\n", *(u64 *)&pgid);
394 return -ENOENT;
395}
396
379/* 397/*
380 * rbtree of pg pool info 398 * rbtree of pg pool info
381 */ 399 */
@@ -711,7 +729,6 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
711 void *start = *p; 729 void *start = *p;
712 int err = -EINVAL; 730 int err = -EINVAL;
713 u16 version; 731 u16 version;
714 struct rb_node *rbp;
715 732
716 ceph_decode_16_safe(p, end, version, bad); 733 ceph_decode_16_safe(p, end, version, bad);
717 if (version > CEPH_OSDMAP_INC_VERSION) { 734 if (version > CEPH_OSDMAP_INC_VERSION) {
@@ -861,7 +878,6 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
861 } 878 }
862 879
863 /* new_pg_temp */ 880 /* new_pg_temp */
864 rbp = rb_first(&map->pg_temp);
865 ceph_decode_32_safe(p, end, len, bad); 881 ceph_decode_32_safe(p, end, len, bad);
866 while (len--) { 882 while (len--) {
867 struct ceph_pg_mapping *pg; 883 struct ceph_pg_mapping *pg;
@@ -872,18 +888,6 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
872 ceph_decode_copy(p, &pgid, sizeof(pgid)); 888 ceph_decode_copy(p, &pgid, sizeof(pgid));
873 pglen = ceph_decode_32(p); 889 pglen = ceph_decode_32(p);
874 890
875 /* remove any? */
876 while (rbp && pgid_cmp(rb_entry(rbp, struct ceph_pg_mapping,
877 node)->pgid, pgid) <= 0) {
878 struct ceph_pg_mapping *cur =
879 rb_entry(rbp, struct ceph_pg_mapping, node);
880
881 rbp = rb_next(rbp);
882 dout(" removed pg_temp %llx\n", *(u64 *)&cur->pgid);
883 rb_erase(&cur->node, &map->pg_temp);
884 kfree(cur);
885 }
886
887 if (pglen) { 891 if (pglen) {
888 /* insert */ 892 /* insert */
889 ceph_decode_need(p, end, pglen*sizeof(u32), bad); 893 ceph_decode_need(p, end, pglen*sizeof(u32), bad);
@@ -903,17 +907,11 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
903 } 907 }
904 dout(" added pg_temp %llx len %d\n", *(u64 *)&pgid, 908 dout(" added pg_temp %llx len %d\n", *(u64 *)&pgid,
905 pglen); 909 pglen);
910 } else {
911 /* remove */
912 __remove_pg_mapping(&map->pg_temp, pgid);
906 } 913 }
907 } 914 }
908 while (rbp) {
909 struct ceph_pg_mapping *cur =
910 rb_entry(rbp, struct ceph_pg_mapping, node);
911
912 rbp = rb_next(rbp);
913 dout(" removed pg_temp %llx\n", *(u64 *)&cur->pgid);
914 rb_erase(&cur->node, &map->pg_temp);
915 kfree(cur);
916 }
917 915
918 /* ignore the rest */ 916 /* ignore the rest */
919 *p = end; 917 *p = end;
@@ -1046,10 +1044,25 @@ static int *calc_pg_raw(struct ceph_osdmap *osdmap, struct ceph_pg pgid,
1046 struct ceph_pg_mapping *pg; 1044 struct ceph_pg_mapping *pg;
1047 struct ceph_pg_pool_info *pool; 1045 struct ceph_pg_pool_info *pool;
1048 int ruleno; 1046 int ruleno;
1049 unsigned poolid, ps, pps; 1047 unsigned poolid, ps, pps, t;
1050 int preferred; 1048 int preferred;
1051 1049
1050 poolid = le32_to_cpu(pgid.pool);
1051 ps = le16_to_cpu(pgid.ps);
1052 preferred = (s16)le16_to_cpu(pgid.preferred);
1053
1054 pool = __lookup_pg_pool(&osdmap->pg_pools, poolid);
1055 if (!pool)
1056 return NULL;
1057
1052 /* pg_temp? */ 1058 /* pg_temp? */
1059 if (preferred >= 0)
1060 t = ceph_stable_mod(ps, le32_to_cpu(pool->v.lpg_num),
1061 pool->lpgp_num_mask);
1062 else
1063 t = ceph_stable_mod(ps, le32_to_cpu(pool->v.pg_num),
1064 pool->pgp_num_mask);
1065 pgid.ps = cpu_to_le16(t);
1053 pg = __lookup_pg_mapping(&osdmap->pg_temp, pgid); 1066 pg = __lookup_pg_mapping(&osdmap->pg_temp, pgid);
1054 if (pg) { 1067 if (pg) {
1055 *num = pg->len; 1068 *num = pg->len;
@@ -1057,18 +1070,6 @@ static int *calc_pg_raw(struct ceph_osdmap *osdmap, struct ceph_pg pgid,
1057 } 1070 }
1058 1071
1059 /* crush */ 1072 /* crush */
1060 poolid = le32_to_cpu(pgid.pool);
1061 ps = le16_to_cpu(pgid.ps);
1062 preferred = (s16)le16_to_cpu(pgid.preferred);
1063
1064 /* don't forcefeed bad device ids to crush */
1065 if (preferred >= osdmap->max_osd ||
1066 preferred >= osdmap->crush->max_devices)
1067 preferred = -1;
1068
1069 pool = __lookup_pg_pool(&osdmap->pg_pools, poolid);
1070 if (!pool)
1071 return NULL;
1072 ruleno = crush_find_rule(osdmap->crush, pool->v.crush_ruleset, 1073 ruleno = crush_find_rule(osdmap->crush, pool->v.crush_ruleset,
1073 pool->v.type, pool->v.size); 1074 pool->v.type, pool->v.size);
1074 if (ruleno < 0) { 1075 if (ruleno < 0) {
@@ -1078,6 +1079,11 @@ static int *calc_pg_raw(struct ceph_osdmap *osdmap, struct ceph_pg pgid,
1078 return NULL; 1079 return NULL;
1079 } 1080 }
1080 1081
1082 /* don't forcefeed bad device ids to crush */
1083 if (preferred >= osdmap->max_osd ||
1084 preferred >= osdmap->crush->max_devices)
1085 preferred = -1;
1086
1081 if (preferred >= 0) 1087 if (preferred >= 0)
1082 pps = ceph_stable_mod(ps, 1088 pps = ceph_stable_mod(ps,
1083 le32_to_cpu(pool->v.lpgp_num), 1089 le32_to_cpu(pool->v.lpgp_num),
diff --git a/net/core/Makefile b/net/core/Makefile
index 8a04dd22cf77..0d357b1c4e57 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -3,7 +3,7 @@
3# 3#
4 4
5obj-y := sock.o request_sock.o skbuff.o iovec.o datagram.o stream.o scm.o \ 5obj-y := sock.o request_sock.o skbuff.o iovec.o datagram.o stream.o scm.o \
6 gen_stats.o gen_estimator.o net_namespace.o 6 gen_stats.o gen_estimator.o net_namespace.o secure_seq.o
7 7
8obj-$(CONFIG_SYSCTL) += sysctl_net_core.o 8obj-$(CONFIG_SYSCTL) += sysctl_net_core.o
9 9
diff --git a/net/core/datagram.c b/net/core/datagram.c
index 18ac112ea7ae..6449bed457d4 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -332,7 +332,7 @@ int skb_copy_datagram_iovec(const struct sk_buff *skb, int offset,
332 int err; 332 int err;
333 u8 *vaddr; 333 u8 *vaddr;
334 skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; 334 skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
335 struct page *page = frag->page; 335 struct page *page = skb_frag_page(frag);
336 336
337 if (copy > len) 337 if (copy > len)
338 copy = len; 338 copy = len;
@@ -418,7 +418,7 @@ int skb_copy_datagram_const_iovec(const struct sk_buff *skb, int offset,
418 int err; 418 int err;
419 u8 *vaddr; 419 u8 *vaddr;
420 skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; 420 skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
421 struct page *page = frag->page; 421 struct page *page = skb_frag_page(frag);
422 422
423 if (copy > len) 423 if (copy > len)
424 copy = len; 424 copy = len;
@@ -508,7 +508,7 @@ int skb_copy_datagram_from_iovec(struct sk_buff *skb, int offset,
508 int err; 508 int err;
509 u8 *vaddr; 509 u8 *vaddr;
510 skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; 510 skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
511 struct page *page = frag->page; 511 struct page *page = skb_frag_page(frag);
512 512
513 if (copy > len) 513 if (copy > len)
514 copy = len; 514 copy = len;
@@ -594,7 +594,7 @@ static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset,
594 int err = 0; 594 int err = 0;
595 u8 *vaddr; 595 u8 *vaddr;
596 skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; 596 skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
597 struct page *page = frag->page; 597 struct page *page = skb_frag_page(frag);
598 598
599 if (copy > len) 599 if (copy > len)
600 copy = len; 600 copy = len;
diff --git a/net/core/dev.c b/net/core/dev.c
index 17d67b579beb..70ecb86439ca 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -133,6 +133,9 @@
133#include <linux/pci.h> 133#include <linux/pci.h>
134#include <linux/inetdevice.h> 134#include <linux/inetdevice.h>
135#include <linux/cpu_rmap.h> 135#include <linux/cpu_rmap.h>
136#include <linux/if_tunnel.h>
137#include <linux/if_pppox.h>
138#include <linux/ppp_defs.h>
136 139
137#include "net-sysfs.h" 140#include "net-sysfs.h"
138 141
@@ -1515,6 +1518,14 @@ static inline bool is_skb_forwardable(struct net_device *dev,
1515 */ 1518 */
1516int dev_forward_skb(struct net_device *dev, struct sk_buff *skb) 1519int dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
1517{ 1520{
1521 if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) {
1522 if (skb_copy_ubufs(skb, GFP_ATOMIC)) {
1523 atomic_long_inc(&dev->rx_dropped);
1524 kfree_skb(skb);
1525 return NET_RX_DROP;
1526 }
1527 }
1528
1518 skb_orphan(skb); 1529 skb_orphan(skb);
1519 nf_reset(skb); 1530 nf_reset(skb);
1520 1531
@@ -1947,9 +1958,11 @@ static int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
1947#ifdef CONFIG_HIGHMEM 1958#ifdef CONFIG_HIGHMEM
1948 int i; 1959 int i;
1949 if (!(dev->features & NETIF_F_HIGHDMA)) { 1960 if (!(dev->features & NETIF_F_HIGHDMA)) {
1950 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) 1961 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
1951 if (PageHighMem(skb_shinfo(skb)->frags[i].page)) 1962 skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
1963 if (PageHighMem(skb_frag_page(frag)))
1952 return 1; 1964 return 1;
1965 }
1953 } 1966 }
1954 1967
1955 if (PCI_DMA_BUS_IS_PHYS) { 1968 if (PCI_DMA_BUS_IS_PHYS) {
@@ -1958,7 +1971,8 @@ static int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
1958 if (!pdev) 1971 if (!pdev)
1959 return 0; 1972 return 0;
1960 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { 1973 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
1961 dma_addr_t addr = page_to_phys(skb_shinfo(skb)->frags[i].page); 1974 skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
1975 dma_addr_t addr = page_to_phys(skb_frag_page(frag));
1962 if (!pdev->dma_mask || addr + PAGE_SIZE - 1 > *pdev->dma_mask) 1976 if (!pdev->dma_mask || addr + PAGE_SIZE - 1 > *pdev->dma_mask)
1963 return 1; 1977 return 1;
1964 } 1978 }
@@ -2519,25 +2533,31 @@ static inline void ____napi_schedule(struct softnet_data *sd,
2519 2533
2520/* 2534/*
2521 * __skb_get_rxhash: calculate a flow hash based on src/dst addresses 2535 * __skb_get_rxhash: calculate a flow hash based on src/dst addresses
2522 * and src/dst port numbers. Returns a non-zero hash number on success 2536 * and src/dst port numbers. Sets rxhash in skb to non-zero hash value
2523 * and 0 on failure. 2537 * on success, zero indicates no valid hash. Also, sets l4_rxhash in skb
2538 * if hash is a canonical 4-tuple hash over transport ports.
2524 */ 2539 */
2525__u32 __skb_get_rxhash(struct sk_buff *skb) 2540void __skb_get_rxhash(struct sk_buff *skb)
2526{ 2541{
2527 int nhoff, hash = 0, poff; 2542 int nhoff, hash = 0, poff;
2528 const struct ipv6hdr *ip6; 2543 const struct ipv6hdr *ip6;
2529 const struct iphdr *ip; 2544 const struct iphdr *ip;
2545 const struct vlan_hdr *vlan;
2530 u8 ip_proto; 2546 u8 ip_proto;
2531 u32 addr1, addr2, ihl; 2547 u32 addr1, addr2;
2548 u16 proto;
2532 union { 2549 union {
2533 u32 v32; 2550 u32 v32;
2534 u16 v16[2]; 2551 u16 v16[2];
2535 } ports; 2552 } ports;
2536 2553
2537 nhoff = skb_network_offset(skb); 2554 nhoff = skb_network_offset(skb);
2555 proto = skb->protocol;
2538 2556
2539 switch (skb->protocol) { 2557again:
2558 switch (proto) {
2540 case __constant_htons(ETH_P_IP): 2559 case __constant_htons(ETH_P_IP):
2560ip:
2541 if (!pskb_may_pull(skb, sizeof(*ip) + nhoff)) 2561 if (!pskb_may_pull(skb, sizeof(*ip) + nhoff))
2542 goto done; 2562 goto done;
2543 2563
@@ -2548,9 +2568,10 @@ __u32 __skb_get_rxhash(struct sk_buff *skb)
2548 ip_proto = ip->protocol; 2568 ip_proto = ip->protocol;
2549 addr1 = (__force u32) ip->saddr; 2569 addr1 = (__force u32) ip->saddr;
2550 addr2 = (__force u32) ip->daddr; 2570 addr2 = (__force u32) ip->daddr;
2551 ihl = ip->ihl; 2571 nhoff += ip->ihl * 4;
2552 break; 2572 break;
2553 case __constant_htons(ETH_P_IPV6): 2573 case __constant_htons(ETH_P_IPV6):
2574ipv6:
2554 if (!pskb_may_pull(skb, sizeof(*ip6) + nhoff)) 2575 if (!pskb_may_pull(skb, sizeof(*ip6) + nhoff))
2555 goto done; 2576 goto done;
2556 2577
@@ -2558,20 +2579,71 @@ __u32 __skb_get_rxhash(struct sk_buff *skb)
2558 ip_proto = ip6->nexthdr; 2579 ip_proto = ip6->nexthdr;
2559 addr1 = (__force u32) ip6->saddr.s6_addr32[3]; 2580 addr1 = (__force u32) ip6->saddr.s6_addr32[3];
2560 addr2 = (__force u32) ip6->daddr.s6_addr32[3]; 2581 addr2 = (__force u32) ip6->daddr.s6_addr32[3];
2561 ihl = (40 >> 2); 2582 nhoff += 40;
2562 break; 2583 break;
2584 case __constant_htons(ETH_P_8021Q):
2585 if (!pskb_may_pull(skb, sizeof(*vlan) + nhoff))
2586 goto done;
2587 vlan = (const struct vlan_hdr *) (skb->data + nhoff);
2588 proto = vlan->h_vlan_encapsulated_proto;
2589 nhoff += sizeof(*vlan);
2590 goto again;
2591 case __constant_htons(ETH_P_PPP_SES):
2592 if (!pskb_may_pull(skb, PPPOE_SES_HLEN + nhoff))
2593 goto done;
2594 proto = *((__be16 *) (skb->data + nhoff +
2595 sizeof(struct pppoe_hdr)));
2596 nhoff += PPPOE_SES_HLEN;
2597 switch (proto) {
2598 case __constant_htons(PPP_IP):
2599 goto ip;
2600 case __constant_htons(PPP_IPV6):
2601 goto ipv6;
2602 default:
2603 goto done;
2604 }
2563 default: 2605 default:
2564 goto done; 2606 goto done;
2565 } 2607 }
2566 2608
2609 switch (ip_proto) {
2610 case IPPROTO_GRE:
2611 if (pskb_may_pull(skb, nhoff + 16)) {
2612 u8 *h = skb->data + nhoff;
2613 __be16 flags = *(__be16 *)h;
2614
2615 /*
2616 * Only look inside GRE if version zero and no
2617 * routing
2618 */
2619 if (!(flags & (GRE_VERSION|GRE_ROUTING))) {
2620 proto = *(__be16 *)(h + 2);
2621 nhoff += 4;
2622 if (flags & GRE_CSUM)
2623 nhoff += 4;
2624 if (flags & GRE_KEY)
2625 nhoff += 4;
2626 if (flags & GRE_SEQ)
2627 nhoff += 4;
2628 goto again;
2629 }
2630 }
2631 break;
2632 case IPPROTO_IPIP:
2633 goto again;
2634 default:
2635 break;
2636 }
2637
2567 ports.v32 = 0; 2638 ports.v32 = 0;
2568 poff = proto_ports_offset(ip_proto); 2639 poff = proto_ports_offset(ip_proto);
2569 if (poff >= 0) { 2640 if (poff >= 0) {
2570 nhoff += ihl * 4 + poff; 2641 nhoff += poff;
2571 if (pskb_may_pull(skb, nhoff + 4)) { 2642 if (pskb_may_pull(skb, nhoff + 4)) {
2572 ports.v32 = * (__force u32 *) (skb->data + nhoff); 2643 ports.v32 = * (__force u32 *) (skb->data + nhoff);
2573 if (ports.v16[1] < ports.v16[0]) 2644 if (ports.v16[1] < ports.v16[0])
2574 swap(ports.v16[0], ports.v16[1]); 2645 swap(ports.v16[0], ports.v16[1]);
2646 skb->l4_rxhash = 1;
2575 } 2647 }
2576 } 2648 }
2577 2649
@@ -2584,7 +2656,7 @@ __u32 __skb_get_rxhash(struct sk_buff *skb)
2584 hash = 1; 2656 hash = 1;
2585 2657
2586done: 2658done:
2587 return hash; 2659 skb->rxhash = hash;
2588} 2660}
2589EXPORT_SYMBOL(__skb_get_rxhash); 2661EXPORT_SYMBOL(__skb_get_rxhash);
2590 2662
@@ -2598,10 +2670,7 @@ static struct rps_dev_flow *
2598set_rps_cpu(struct net_device *dev, struct sk_buff *skb, 2670set_rps_cpu(struct net_device *dev, struct sk_buff *skb,
2599 struct rps_dev_flow *rflow, u16 next_cpu) 2671 struct rps_dev_flow *rflow, u16 next_cpu)
2600{ 2672{
2601 u16 tcpu; 2673 if (next_cpu != RPS_NO_CPU) {
2602
2603 tcpu = rflow->cpu = next_cpu;
2604 if (tcpu != RPS_NO_CPU) {
2605#ifdef CONFIG_RFS_ACCEL 2674#ifdef CONFIG_RFS_ACCEL
2606 struct netdev_rx_queue *rxqueue; 2675 struct netdev_rx_queue *rxqueue;
2607 struct rps_dev_flow_table *flow_table; 2676 struct rps_dev_flow_table *flow_table;
@@ -2629,16 +2698,16 @@ set_rps_cpu(struct net_device *dev, struct sk_buff *skb,
2629 goto out; 2698 goto out;
2630 old_rflow = rflow; 2699 old_rflow = rflow;
2631 rflow = &flow_table->flows[flow_id]; 2700 rflow = &flow_table->flows[flow_id];
2632 rflow->cpu = next_cpu;
2633 rflow->filter = rc; 2701 rflow->filter = rc;
2634 if (old_rflow->filter == rflow->filter) 2702 if (old_rflow->filter == rflow->filter)
2635 old_rflow->filter = RPS_NO_FILTER; 2703 old_rflow->filter = RPS_NO_FILTER;
2636 out: 2704 out:
2637#endif 2705#endif
2638 rflow->last_qtail = 2706 rflow->last_qtail =
2639 per_cpu(softnet_data, tcpu).input_queue_head; 2707 per_cpu(softnet_data, next_cpu).input_queue_head;
2640 } 2708 }
2641 2709
2710 rflow->cpu = next_cpu;
2642 return rflow; 2711 return rflow;
2643} 2712}
2644 2713
@@ -2673,13 +2742,13 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
2673 map = rcu_dereference(rxqueue->rps_map); 2742 map = rcu_dereference(rxqueue->rps_map);
2674 if (map) { 2743 if (map) {
2675 if (map->len == 1 && 2744 if (map->len == 1 &&
2676 !rcu_dereference_raw(rxqueue->rps_flow_table)) { 2745 !rcu_access_pointer(rxqueue->rps_flow_table)) {
2677 tcpu = map->cpus[0]; 2746 tcpu = map->cpus[0];
2678 if (cpu_online(tcpu)) 2747 if (cpu_online(tcpu))
2679 cpu = tcpu; 2748 cpu = tcpu;
2680 goto done; 2749 goto done;
2681 } 2750 }
2682 } else if (!rcu_dereference_raw(rxqueue->rps_flow_table)) { 2751 } else if (!rcu_access_pointer(rxqueue->rps_flow_table)) {
2683 goto done; 2752 goto done;
2684 } 2753 }
2685 2754
@@ -3094,8 +3163,8 @@ void netdev_rx_handler_unregister(struct net_device *dev)
3094{ 3163{
3095 3164
3096 ASSERT_RTNL(); 3165 ASSERT_RTNL();
3097 rcu_assign_pointer(dev->rx_handler, NULL); 3166 RCU_INIT_POINTER(dev->rx_handler, NULL);
3098 rcu_assign_pointer(dev->rx_handler_data, NULL); 3167 RCU_INIT_POINTER(dev->rx_handler_data, NULL);
3099} 3168}
3100EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister); 3169EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister);
3101 3170
@@ -3187,10 +3256,9 @@ ncls:
3187 ret = deliver_skb(skb, pt_prev, orig_dev); 3256 ret = deliver_skb(skb, pt_prev, orig_dev);
3188 pt_prev = NULL; 3257 pt_prev = NULL;
3189 } 3258 }
3190 if (vlan_do_receive(&skb)) { 3259 if (vlan_do_receive(&skb))
3191 ret = __netif_receive_skb(skb); 3260 goto another_round;
3192 goto out; 3261 else if (unlikely(!skb))
3193 } else if (unlikely(!skb))
3194 goto out; 3262 goto out;
3195 } 3263 }
3196 3264
@@ -3424,7 +3492,7 @@ pull:
3424 skb_shinfo(skb)->frags[0].size -= grow; 3492 skb_shinfo(skb)->frags[0].size -= grow;
3425 3493
3426 if (unlikely(!skb_shinfo(skb)->frags[0].size)) { 3494 if (unlikely(!skb_shinfo(skb)->frags[0].size)) {
3427 put_page(skb_shinfo(skb)->frags[0].page); 3495 skb_frag_unref(skb, 0);
3428 memmove(skb_shinfo(skb)->frags, 3496 memmove(skb_shinfo(skb)->frags,
3429 skb_shinfo(skb)->frags + 1, 3497 skb_shinfo(skb)->frags + 1,
3430 --skb_shinfo(skb)->nr_frags * sizeof(skb_frag_t)); 3498 --skb_shinfo(skb)->nr_frags * sizeof(skb_frag_t));
@@ -3488,10 +3556,9 @@ void skb_gro_reset_offset(struct sk_buff *skb)
3488 NAPI_GRO_CB(skb)->frag0_len = 0; 3556 NAPI_GRO_CB(skb)->frag0_len = 0;
3489 3557
3490 if (skb->mac_header == skb->tail && 3558 if (skb->mac_header == skb->tail &&
3491 !PageHighMem(skb_shinfo(skb)->frags[0].page)) { 3559 !PageHighMem(skb_frag_page(&skb_shinfo(skb)->frags[0]))) {
3492 NAPI_GRO_CB(skb)->frag0 = 3560 NAPI_GRO_CB(skb)->frag0 =
3493 page_address(skb_shinfo(skb)->frags[0].page) + 3561 skb_frag_address(&skb_shinfo(skb)->frags[0]);
3494 skb_shinfo(skb)->frags[0].page_offset;
3495 NAPI_GRO_CB(skb)->frag0_len = skb_shinfo(skb)->frags[0].size; 3562 NAPI_GRO_CB(skb)->frag0_len = skb_shinfo(skb)->frags[0].size;
3496 } 3563 }
3497} 3564}
@@ -4489,9 +4556,7 @@ void __dev_set_rx_mode(struct net_device *dev)
4489 if (!netif_device_present(dev)) 4556 if (!netif_device_present(dev))
4490 return; 4557 return;
4491 4558
4492 if (ops->ndo_set_rx_mode) 4559 if (!(dev->priv_flags & IFF_UNICAST_FLT)) {
4493 ops->ndo_set_rx_mode(dev);
4494 else {
4495 /* Unicast addresses changes may only happen under the rtnl, 4560 /* Unicast addresses changes may only happen under the rtnl,
4496 * therefore calling __dev_set_promiscuity here is safe. 4561 * therefore calling __dev_set_promiscuity here is safe.
4497 */ 4562 */
@@ -4502,10 +4567,10 @@ void __dev_set_rx_mode(struct net_device *dev)
4502 __dev_set_promiscuity(dev, -1); 4567 __dev_set_promiscuity(dev, -1);
4503 dev->uc_promisc = false; 4568 dev->uc_promisc = false;
4504 } 4569 }
4505
4506 if (ops->ndo_set_multicast_list)
4507 ops->ndo_set_multicast_list(dev);
4508 } 4570 }
4571
4572 if (ops->ndo_set_rx_mode)
4573 ops->ndo_set_rx_mode(dev);
4509} 4574}
4510 4575
4511void dev_set_rx_mode(struct net_device *dev) 4576void dev_set_rx_mode(struct net_device *dev)
@@ -4516,30 +4581,6 @@ void dev_set_rx_mode(struct net_device *dev)
4516} 4581}
4517 4582
4518/** 4583/**
4519 * dev_ethtool_get_settings - call device's ethtool_ops::get_settings()
4520 * @dev: device
4521 * @cmd: memory area for ethtool_ops::get_settings() result
4522 *
4523 * The cmd arg is initialized properly (cleared and
4524 * ethtool_cmd::cmd field set to ETHTOOL_GSET).
4525 *
4526 * Return device's ethtool_ops::get_settings() result value or
4527 * -EOPNOTSUPP when device doesn't expose
4528 * ethtool_ops::get_settings() operation.
4529 */
4530int dev_ethtool_get_settings(struct net_device *dev,
4531 struct ethtool_cmd *cmd)
4532{
4533 if (!dev->ethtool_ops || !dev->ethtool_ops->get_settings)
4534 return -EOPNOTSUPP;
4535
4536 memset(cmd, 0, sizeof(struct ethtool_cmd));
4537 cmd->cmd = ETHTOOL_GSET;
4538 return dev->ethtool_ops->get_settings(dev, cmd);
4539}
4540EXPORT_SYMBOL(dev_ethtool_get_settings);
4541
4542/**
4543 * dev_get_flags - get flags reported to userspace 4584 * dev_get_flags - get flags reported to userspace
4544 * @dev: device 4585 * @dev: device
4545 * 4586 *
@@ -4855,7 +4896,7 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
4855 return -EOPNOTSUPP; 4896 return -EOPNOTSUPP;
4856 4897
4857 case SIOCADDMULTI: 4898 case SIOCADDMULTI:
4858 if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) || 4899 if (!ops->ndo_set_rx_mode ||
4859 ifr->ifr_hwaddr.sa_family != AF_UNSPEC) 4900 ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
4860 return -EINVAL; 4901 return -EINVAL;
4861 if (!netif_device_present(dev)) 4902 if (!netif_device_present(dev))
@@ -4863,7 +4904,7 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
4863 return dev_mc_add_global(dev, ifr->ifr_hwaddr.sa_data); 4904 return dev_mc_add_global(dev, ifr->ifr_hwaddr.sa_data);
4864 4905
4865 case SIOCDELMULTI: 4906 case SIOCDELMULTI:
4866 if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) || 4907 if (!ops->ndo_set_rx_mode ||
4867 ifr->ifr_hwaddr.sa_family != AF_UNSPEC) 4908 ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
4868 return -EINVAL; 4909 return -EINVAL;
4869 if (!netif_device_present(dev)) 4910 if (!netif_device_present(dev))
@@ -5727,8 +5768,8 @@ void netdev_run_todo(void)
5727 5768
5728 /* paranoia */ 5769 /* paranoia */
5729 BUG_ON(netdev_refcnt_read(dev)); 5770 BUG_ON(netdev_refcnt_read(dev));
5730 WARN_ON(rcu_dereference_raw(dev->ip_ptr)); 5771 WARN_ON(rcu_access_pointer(dev->ip_ptr));
5731 WARN_ON(rcu_dereference_raw(dev->ip6_ptr)); 5772 WARN_ON(rcu_access_pointer(dev->ip6_ptr));
5732 WARN_ON(dev->dn_ptr); 5773 WARN_ON(dev->dn_ptr);
5733 5774
5734 if (dev->destructor) 5775 if (dev->destructor)
@@ -5932,7 +5973,7 @@ void free_netdev(struct net_device *dev)
5932 kfree(dev->_rx); 5973 kfree(dev->_rx);
5933#endif 5974#endif
5934 5975
5935 kfree(rcu_dereference_raw(dev->ingress_queue)); 5976 kfree(rcu_dereference_protected(dev->ingress_queue, 1));
5936 5977
5937 /* Flush device addresses */ 5978 /* Flush device addresses */
5938 dev_addr_flush(dev); 5979 dev_addr_flush(dev);
diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c
index e2e66939ed00..283d1b863876 100644
--- a/net/core/dev_addr_lists.c
+++ b/net/core/dev_addr_lists.c
@@ -591,8 +591,8 @@ EXPORT_SYMBOL(dev_mc_del_global);
591 * addresses that have no users left. The source device must be 591 * addresses that have no users left. The source device must be
592 * locked by netif_tx_lock_bh. 592 * locked by netif_tx_lock_bh.
593 * 593 *
594 * This function is intended to be called from the dev->set_multicast_list 594 * This function is intended to be called from the ndo_set_rx_mode
595 * or dev->set_rx_mode function of layered software devices. 595 * function of layered software devices.
596 */ 596 */
597int dev_mc_sync(struct net_device *to, struct net_device *from) 597int dev_mc_sync(struct net_device *to, struct net_device *from)
598{ 598{
diff --git a/net/core/dst.c b/net/core/dst.c
index 14b33baf0733..d5e2c4c09107 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -171,7 +171,7 @@ void *dst_alloc(struct dst_ops *ops, struct net_device *dev,
171 dst_init_metrics(dst, dst_default_metrics, true); 171 dst_init_metrics(dst, dst_default_metrics, true);
172 dst->expires = 0UL; 172 dst->expires = 0UL;
173 dst->path = dst; 173 dst->path = dst;
174 dst->_neighbour = NULL; 174 RCU_INIT_POINTER(dst->_neighbour, NULL);
175#ifdef CONFIG_XFRM 175#ifdef CONFIG_XFRM
176 dst->xfrm = NULL; 176 dst->xfrm = NULL;
177#endif 177#endif
@@ -229,11 +229,11 @@ struct dst_entry *dst_destroy(struct dst_entry * dst)
229 smp_rmb(); 229 smp_rmb();
230 230
231again: 231again:
232 neigh = dst->_neighbour; 232 neigh = rcu_dereference_protected(dst->_neighbour, 1);
233 child = dst->child; 233 child = dst->child;
234 234
235 if (neigh) { 235 if (neigh) {
236 dst->_neighbour = NULL; 236 RCU_INIT_POINTER(dst->_neighbour, NULL);
237 neigh_release(neigh); 237 neigh_release(neigh);
238 } 238 }
239 239
@@ -360,14 +360,19 @@ static void dst_ifdown(struct dst_entry *dst, struct net_device *dev,
360 if (!unregister) { 360 if (!unregister) {
361 dst->input = dst->output = dst_discard; 361 dst->input = dst->output = dst_discard;
362 } else { 362 } else {
363 struct neighbour *neigh;
364
363 dst->dev = dev_net(dst->dev)->loopback_dev; 365 dst->dev = dev_net(dst->dev)->loopback_dev;
364 dev_hold(dst->dev); 366 dev_hold(dst->dev);
365 dev_put(dev); 367 dev_put(dev);
366 if (dst->_neighbour && dst->_neighbour->dev == dev) { 368 rcu_read_lock();
367 dst->_neighbour->dev = dst->dev; 369 neigh = dst_get_neighbour(dst);
370 if (neigh && neigh->dev == dev) {
371 neigh->dev = dst->dev;
368 dev_hold(dst->dev); 372 dev_hold(dst->dev);
369 dev_put(dev); 373 dev_put(dev);
370 } 374 }
375 rcu_read_unlock();
371 } 376 }
372} 377}
373 378
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 6cdba5fc2bed..f44481707124 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -569,15 +569,25 @@ int __ethtool_set_flags(struct net_device *dev, u32 data)
569 return 0; 569 return 0;
570} 570}
571 571
572static int ethtool_get_settings(struct net_device *dev, void __user *useraddr) 572int __ethtool_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
573{ 573{
574 struct ethtool_cmd cmd = { .cmd = ETHTOOL_GSET }; 574 ASSERT_RTNL();
575 int err;
576 575
577 if (!dev->ethtool_ops->get_settings) 576 if (!dev->ethtool_ops || !dev->ethtool_ops->get_settings)
578 return -EOPNOTSUPP; 577 return -EOPNOTSUPP;
579 578
580 err = dev->ethtool_ops->get_settings(dev, &cmd); 579 memset(cmd, 0, sizeof(struct ethtool_cmd));
580 cmd->cmd = ETHTOOL_GSET;
581 return dev->ethtool_ops->get_settings(dev, cmd);
582}
583EXPORT_SYMBOL(__ethtool_get_settings);
584
585static int ethtool_get_settings(struct net_device *dev, void __user *useraddr)
586{
587 int err;
588 struct ethtool_cmd cmd;
589
590 err = __ethtool_get_settings(dev, &cmd);
581 if (err < 0) 591 if (err < 0)
582 return err; 592 return err;
583 593
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index e7ab0c0285b5..38be4744133f 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -384,8 +384,8 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
384 */ 384 */
385 list_for_each_entry(r, &ops->rules_list, list) { 385 list_for_each_entry(r, &ops->rules_list, list) {
386 if (r->action == FR_ACT_GOTO && 386 if (r->action == FR_ACT_GOTO &&
387 r->target == rule->pref) { 387 r->target == rule->pref &&
388 BUG_ON(rtnl_dereference(r->ctarget) != NULL); 388 rtnl_dereference(r->ctarget) == NULL) {
389 rcu_assign_pointer(r->ctarget, rule); 389 rcu_assign_pointer(r->ctarget, rule);
390 if (--ops->unresolved_rules == 0) 390 if (--ops->unresolved_rules == 0)
391 break; 391 break;
@@ -487,7 +487,7 @@ static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
487 if (ops->nr_goto_rules > 0) { 487 if (ops->nr_goto_rules > 0) {
488 list_for_each_entry(tmp, &ops->rules_list, list) { 488 list_for_each_entry(tmp, &ops->rules_list, list) {
489 if (rtnl_dereference(tmp->ctarget) == rule) { 489 if (rtnl_dereference(tmp->ctarget) == rule) {
490 rcu_assign_pointer(tmp->ctarget, NULL); 490 RCU_INIT_POINTER(tmp->ctarget, NULL);
491 ops->unresolved_rules++; 491 ops->unresolved_rules++;
492 } 492 }
493 } 493 }
@@ -545,7 +545,7 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule,
545 frh->flags = rule->flags; 545 frh->flags = rule->flags;
546 546
547 if (rule->action == FR_ACT_GOTO && 547 if (rule->action == FR_ACT_GOTO &&
548 rcu_dereference_raw(rule->ctarget) == NULL) 548 rcu_access_pointer(rule->ctarget) == NULL)
549 frh->flags |= FIB_RULE_UNRESOLVED; 549 frh->flags |= FIB_RULE_UNRESOLVED;
550 550
551 if (rule->iifname[0]) { 551 if (rule->iifname[0]) {
diff --git a/net/core/filter.c b/net/core/filter.c
index 36f975fa87cb..8fcc2d776e09 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -645,7 +645,7 @@ int sk_detach_filter(struct sock *sk)
645 filter = rcu_dereference_protected(sk->sk_filter, 645 filter = rcu_dereference_protected(sk->sk_filter,
646 sock_owned_by_user(sk)); 646 sock_owned_by_user(sk));
647 if (filter) { 647 if (filter) {
648 rcu_assign_pointer(sk->sk_filter, NULL); 648 RCU_INIT_POINTER(sk->sk_filter, NULL);
649 sk_filter_uncharge(sk, filter); 649 sk_filter_uncharge(sk, filter);
650 ret = 0; 650 ret = 0;
651 } 651 }
diff --git a/net/core/flow.c b/net/core/flow.c
index bf32c33cad3b..555a456efb07 100644
--- a/net/core/flow.c
+++ b/net/core/flow.c
@@ -30,6 +30,7 @@ struct flow_cache_entry {
30 struct hlist_node hlist; 30 struct hlist_node hlist;
31 struct list_head gc_list; 31 struct list_head gc_list;
32 } u; 32 } u;
33 struct net *net;
33 u16 family; 34 u16 family;
34 u8 dir; 35 u8 dir;
35 u32 genid; 36 u32 genid;
@@ -172,29 +173,26 @@ static void flow_new_hash_rnd(struct flow_cache *fc,
172 173
173static u32 flow_hash_code(struct flow_cache *fc, 174static u32 flow_hash_code(struct flow_cache *fc,
174 struct flow_cache_percpu *fcp, 175 struct flow_cache_percpu *fcp,
175 const struct flowi *key) 176 const struct flowi *key,
177 size_t keysize)
176{ 178{
177 const u32 *k = (const u32 *) key; 179 const u32 *k = (const u32 *) key;
180 const u32 length = keysize * sizeof(flow_compare_t) / sizeof(u32);
178 181
179 return jhash2(k, (sizeof(*key) / sizeof(u32)), fcp->hash_rnd) 182 return jhash2(k, length, fcp->hash_rnd)
180 & (flow_cache_hash_size(fc) - 1); 183 & (flow_cache_hash_size(fc) - 1);
181} 184}
182 185
183typedef unsigned long flow_compare_t;
184
185/* I hear what you're saying, use memcmp. But memcmp cannot make 186/* I hear what you're saying, use memcmp. But memcmp cannot make
186 * important assumptions that we can here, such as alignment and 187 * important assumptions that we can here, such as alignment.
187 * constant size.
188 */ 188 */
189static int flow_key_compare(const struct flowi *key1, const struct flowi *key2) 189static int flow_key_compare(const struct flowi *key1, const struct flowi *key2,
190 size_t keysize)
190{ 191{
191 const flow_compare_t *k1, *k1_lim, *k2; 192 const flow_compare_t *k1, *k1_lim, *k2;
192 const int n_elem = sizeof(struct flowi) / sizeof(flow_compare_t);
193
194 BUILD_BUG_ON(sizeof(struct flowi) % sizeof(flow_compare_t));
195 193
196 k1 = (const flow_compare_t *) key1; 194 k1 = (const flow_compare_t *) key1;
197 k1_lim = k1 + n_elem; 195 k1_lim = k1 + keysize;
198 196
199 k2 = (const flow_compare_t *) key2; 197 k2 = (const flow_compare_t *) key2;
200 198
@@ -215,6 +213,7 @@ flow_cache_lookup(struct net *net, const struct flowi *key, u16 family, u8 dir,
215 struct flow_cache_entry *fle, *tfle; 213 struct flow_cache_entry *fle, *tfle;
216 struct hlist_node *entry; 214 struct hlist_node *entry;
217 struct flow_cache_object *flo; 215 struct flow_cache_object *flo;
216 size_t keysize;
218 unsigned int hash; 217 unsigned int hash;
219 218
220 local_bh_disable(); 219 local_bh_disable();
@@ -222,6 +221,11 @@ flow_cache_lookup(struct net *net, const struct flowi *key, u16 family, u8 dir,
222 221
223 fle = NULL; 222 fle = NULL;
224 flo = NULL; 223 flo = NULL;
224
225 keysize = flow_key_size(family);
226 if (!keysize)
227 goto nocache;
228
225 /* Packet really early in init? Making flow_cache_init a 229 /* Packet really early in init? Making flow_cache_init a
226 * pre-smp initcall would solve this. --RR */ 230 * pre-smp initcall would solve this. --RR */
227 if (!fcp->hash_table) 231 if (!fcp->hash_table)
@@ -230,11 +234,12 @@ flow_cache_lookup(struct net *net, const struct flowi *key, u16 family, u8 dir,
230 if (fcp->hash_rnd_recalc) 234 if (fcp->hash_rnd_recalc)
231 flow_new_hash_rnd(fc, fcp); 235 flow_new_hash_rnd(fc, fcp);
232 236
233 hash = flow_hash_code(fc, fcp, key); 237 hash = flow_hash_code(fc, fcp, key, keysize);
234 hlist_for_each_entry(tfle, entry, &fcp->hash_table[hash], u.hlist) { 238 hlist_for_each_entry(tfle, entry, &fcp->hash_table[hash], u.hlist) {
235 if (tfle->family == family && 239 if (tfle->net == net &&
240 tfle->family == family &&
236 tfle->dir == dir && 241 tfle->dir == dir &&
237 flow_key_compare(key, &tfle->key) == 0) { 242 flow_key_compare(key, &tfle->key, keysize) == 0) {
238 fle = tfle; 243 fle = tfle;
239 break; 244 break;
240 } 245 }
@@ -246,9 +251,10 @@ flow_cache_lookup(struct net *net, const struct flowi *key, u16 family, u8 dir,
246 251
247 fle = kmem_cache_alloc(flow_cachep, GFP_ATOMIC); 252 fle = kmem_cache_alloc(flow_cachep, GFP_ATOMIC);
248 if (fle) { 253 if (fle) {
254 fle->net = net;
249 fle->family = family; 255 fle->family = family;
250 fle->dir = dir; 256 fle->dir = dir;
251 memcpy(&fle->key, key, sizeof(*key)); 257 memcpy(&fle->key, key, keysize * sizeof(flow_compare_t));
252 fle->object = NULL; 258 fle->object = NULL;
253 hlist_add_head(&fle->u.hlist, &fcp->hash_table[hash]); 259 hlist_add_head(&fle->u.hlist, &fcp->hash_table[hash]);
254 fcp->hash_count++; 260 fcp->hash_count++;
diff --git a/net/core/kmap_skb.h b/net/core/kmap_skb.h
index 283c2b993fb8..81e1ed7c8383 100644
--- a/net/core/kmap_skb.h
+++ b/net/core/kmap_skb.h
@@ -7,7 +7,7 @@ static inline void *kmap_skb_frag(const skb_frag_t *frag)
7 7
8 local_bh_disable(); 8 local_bh_disable();
9#endif 9#endif
10 return kmap_atomic(frag->page, KM_SKB_DATA_SOFTIRQ); 10 return kmap_atomic(skb_frag_page(frag), KM_SKB_DATA_SOFTIRQ);
11} 11}
12 12
13static inline void kunmap_skb_frag(void *vaddr) 13static inline void kunmap_skb_frag(void *vaddr)
diff --git a/net/core/link_watch.c b/net/core/link_watch.c
index 357bd4ee4baa..c3519c6d1b16 100644
--- a/net/core/link_watch.c
+++ b/net/core/link_watch.c
@@ -78,8 +78,13 @@ static void rfc2863_policy(struct net_device *dev)
78 78
79static bool linkwatch_urgent_event(struct net_device *dev) 79static bool linkwatch_urgent_event(struct net_device *dev)
80{ 80{
81 return netif_running(dev) && netif_carrier_ok(dev) && 81 if (!netif_running(dev))
82 qdisc_tx_changing(dev); 82 return false;
83
84 if (dev->ifindex != dev->iflink)
85 return true;
86
87 return netif_carrier_ok(dev) && qdisc_tx_changing(dev);
83} 88}
84 89
85 90
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 8fab9b0bb203..43449649cf73 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -844,6 +844,19 @@ static void neigh_invalidate(struct neighbour *neigh)
844 skb_queue_purge(&neigh->arp_queue); 844 skb_queue_purge(&neigh->arp_queue);
845} 845}
846 846
847static void neigh_probe(struct neighbour *neigh)
848 __releases(neigh->lock)
849{
850 struct sk_buff *skb = skb_peek(&neigh->arp_queue);
851 /* keep skb alive even if arp_queue overflows */
852 if (skb)
853 skb = skb_copy(skb, GFP_ATOMIC);
854 write_unlock(&neigh->lock);
855 neigh->ops->solicit(neigh, skb);
856 atomic_inc(&neigh->probes);
857 kfree_skb(skb);
858}
859
847/* Called when a timer expires for a neighbour entry. */ 860/* Called when a timer expires for a neighbour entry. */
848 861
849static void neigh_timer_handler(unsigned long arg) 862static void neigh_timer_handler(unsigned long arg)
@@ -920,14 +933,7 @@ static void neigh_timer_handler(unsigned long arg)
920 neigh_hold(neigh); 933 neigh_hold(neigh);
921 } 934 }
922 if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) { 935 if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) {
923 struct sk_buff *skb = skb_peek(&neigh->arp_queue); 936 neigh_probe(neigh);
924 /* keep skb alive even if arp_queue overflows */
925 if (skb)
926 skb = skb_copy(skb, GFP_ATOMIC);
927 write_unlock(&neigh->lock);
928 neigh->ops->solicit(neigh, skb);
929 atomic_inc(&neigh->probes);
930 kfree_skb(skb);
931 } else { 937 } else {
932out: 938out:
933 write_unlock(&neigh->lock); 939 write_unlock(&neigh->lock);
@@ -942,7 +948,7 @@ out:
942int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb) 948int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
943{ 949{
944 int rc; 950 int rc;
945 unsigned long now; 951 bool immediate_probe = false;
946 952
947 write_lock_bh(&neigh->lock); 953 write_lock_bh(&neigh->lock);
948 954
@@ -950,14 +956,16 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
950 if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE)) 956 if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE))
951 goto out_unlock_bh; 957 goto out_unlock_bh;
952 958
953 now = jiffies;
954
955 if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) { 959 if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) {
956 if (neigh->parms->mcast_probes + neigh->parms->app_probes) { 960 if (neigh->parms->mcast_probes + neigh->parms->app_probes) {
961 unsigned long next, now = jiffies;
962
957 atomic_set(&neigh->probes, neigh->parms->ucast_probes); 963 atomic_set(&neigh->probes, neigh->parms->ucast_probes);
958 neigh->nud_state = NUD_INCOMPLETE; 964 neigh->nud_state = NUD_INCOMPLETE;
959 neigh->updated = jiffies; 965 neigh->updated = now;
960 neigh_add_timer(neigh, now + 1); 966 next = now + max(neigh->parms->retrans_time, HZ/2);
967 neigh_add_timer(neigh, next);
968 immediate_probe = true;
961 } else { 969 } else {
962 neigh->nud_state = NUD_FAILED; 970 neigh->nud_state = NUD_FAILED;
963 neigh->updated = jiffies; 971 neigh->updated = jiffies;
@@ -989,7 +997,11 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
989 rc = 1; 997 rc = 1;
990 } 998 }
991out_unlock_bh: 999out_unlock_bh:
992 write_unlock_bh(&neigh->lock); 1000 if (immediate_probe)
1001 neigh_probe(neigh);
1002 else
1003 write_unlock(&neigh->lock);
1004 local_bh_enable();
993 return rc; 1005 return rc;
994} 1006}
995EXPORT_SYMBOL(__neigh_event_send); 1007EXPORT_SYMBOL(__neigh_event_send);
@@ -1319,11 +1331,15 @@ static void neigh_proxy_process(unsigned long arg)
1319 1331
1320 if (tdif <= 0) { 1332 if (tdif <= 0) {
1321 struct net_device *dev = skb->dev; 1333 struct net_device *dev = skb->dev;
1334
1322 __skb_unlink(skb, &tbl->proxy_queue); 1335 __skb_unlink(skb, &tbl->proxy_queue);
1323 if (tbl->proxy_redo && netif_running(dev)) 1336 if (tbl->proxy_redo && netif_running(dev)) {
1337 rcu_read_lock();
1324 tbl->proxy_redo(skb); 1338 tbl->proxy_redo(skb);
1325 else 1339 rcu_read_unlock();
1340 } else {
1326 kfree_skb(skb); 1341 kfree_skb(skb);
1342 }
1327 1343
1328 dev_put(dev); 1344 dev_put(dev);
1329 } else if (!sched_next || tdif < sched_next) 1345 } else if (!sched_next || tdif < sched_next)
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 1683e5db2f27..7604a635376b 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -147,7 +147,7 @@ static ssize_t show_speed(struct device *dev,
147 147
148 if (netif_running(netdev)) { 148 if (netif_running(netdev)) {
149 struct ethtool_cmd cmd; 149 struct ethtool_cmd cmd;
150 if (!dev_ethtool_get_settings(netdev, &cmd)) 150 if (!__ethtool_get_settings(netdev, &cmd))
151 ret = sprintf(buf, fmt_udec, ethtool_cmd_speed(&cmd)); 151 ret = sprintf(buf, fmt_udec, ethtool_cmd_speed(&cmd));
152 } 152 }
153 rtnl_unlock(); 153 rtnl_unlock();
@@ -165,7 +165,7 @@ static ssize_t show_duplex(struct device *dev,
165 165
166 if (netif_running(netdev)) { 166 if (netif_running(netdev)) {
167 struct ethtool_cmd cmd; 167 struct ethtool_cmd cmd;
168 if (!dev_ethtool_get_settings(netdev, &cmd)) 168 if (!__ethtool_get_settings(netdev, &cmd))
169 ret = sprintf(buf, "%s\n", 169 ret = sprintf(buf, "%s\n",
170 cmd.duplex ? "full" : "half"); 170 cmd.duplex ? "full" : "half");
171 } 171 }
@@ -712,13 +712,13 @@ static void rx_queue_release(struct kobject *kobj)
712 struct rps_dev_flow_table *flow_table; 712 struct rps_dev_flow_table *flow_table;
713 713
714 714
715 map = rcu_dereference_raw(queue->rps_map); 715 map = rcu_dereference_protected(queue->rps_map, 1);
716 if (map) { 716 if (map) {
717 RCU_INIT_POINTER(queue->rps_map, NULL); 717 RCU_INIT_POINTER(queue->rps_map, NULL);
718 kfree_rcu(map, rcu); 718 kfree_rcu(map, rcu);
719 } 719 }
720 720
721 flow_table = rcu_dereference_raw(queue->rps_flow_table); 721 flow_table = rcu_dereference_protected(queue->rps_flow_table, 1);
722 if (flow_table) { 722 if (flow_table) {
723 RCU_INIT_POINTER(queue->rps_flow_table, NULL); 723 RCU_INIT_POINTER(queue->rps_flow_table, NULL);
724 call_rcu(&flow_table->rcu, rps_dev_flow_table_release); 724 call_rcu(&flow_table->rcu, rps_dev_flow_table_release);
@@ -987,10 +987,10 @@ static ssize_t store_xps_map(struct netdev_queue *queue,
987 } 987 }
988 988
989 if (nonempty) 989 if (nonempty)
990 rcu_assign_pointer(dev->xps_maps, new_dev_maps); 990 RCU_INIT_POINTER(dev->xps_maps, new_dev_maps);
991 else { 991 else {
992 kfree(new_dev_maps); 992 kfree(new_dev_maps);
993 rcu_assign_pointer(dev->xps_maps, NULL); 993 RCU_INIT_POINTER(dev->xps_maps, NULL);
994 } 994 }
995 995
996 if (dev_maps) 996 if (dev_maps)
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index adf84dd8c7b5..f57d94627a2a 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -558,13 +558,14 @@ int __netpoll_rx(struct sk_buff *skb)
558 if (skb_shared(skb)) 558 if (skb_shared(skb))
559 goto out; 559 goto out;
560 560
561 iph = (struct iphdr *)skb->data;
562 if (!pskb_may_pull(skb, sizeof(struct iphdr))) 561 if (!pskb_may_pull(skb, sizeof(struct iphdr)))
563 goto out; 562 goto out;
563 iph = (struct iphdr *)skb->data;
564 if (iph->ihl < 5 || iph->version != 4) 564 if (iph->ihl < 5 || iph->version != 4)
565 goto out; 565 goto out;
566 if (!pskb_may_pull(skb, iph->ihl*4)) 566 if (!pskb_may_pull(skb, iph->ihl*4))
567 goto out; 567 goto out;
568 iph = (struct iphdr *)skb->data;
568 if (ip_fast_csum((u8 *)iph, iph->ihl) != 0) 569 if (ip_fast_csum((u8 *)iph, iph->ihl) != 0)
569 goto out; 570 goto out;
570 571
@@ -579,6 +580,7 @@ int __netpoll_rx(struct sk_buff *skb)
579 if (pskb_trim_rcsum(skb, len)) 580 if (pskb_trim_rcsum(skb, len))
580 goto out; 581 goto out;
581 582
583 iph = (struct iphdr *)skb->data;
582 if (iph->protocol != IPPROTO_UDP) 584 if (iph->protocol != IPPROTO_UDP)
583 goto out; 585 goto out;
584 586
@@ -760,7 +762,7 @@ int __netpoll_setup(struct netpoll *np)
760 } 762 }
761 763
762 /* last thing to do is link it to the net device structure */ 764 /* last thing to do is link it to the net device structure */
763 rcu_assign_pointer(ndev->npinfo, npinfo); 765 RCU_INIT_POINTER(ndev->npinfo, npinfo);
764 766
765 return 0; 767 return 0;
766 768
@@ -901,7 +903,7 @@ void __netpoll_cleanup(struct netpoll *np)
901 if (ops->ndo_netpoll_cleanup) 903 if (ops->ndo_netpoll_cleanup)
902 ops->ndo_netpoll_cleanup(np->dev); 904 ops->ndo_netpoll_cleanup(np->dev);
903 905
904 rcu_assign_pointer(np->dev->npinfo, NULL); 906 RCU_INIT_POINTER(np->dev->npinfo, NULL);
905 907
906 /* avoid racing with NAPI reading npinfo */ 908 /* avoid racing with NAPI reading npinfo */
907 synchronize_rcu_bh(); 909 synchronize_rcu_bh();
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index e35a6fbb8110..796044ac0bf3 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -2602,8 +2602,7 @@ static void pktgen_finalize_skb(struct pktgen_dev *pkt_dev, struct sk_buff *skb,
2602 if (!pkt_dev->page) 2602 if (!pkt_dev->page)
2603 break; 2603 break;
2604 } 2604 }
2605 skb_shinfo(skb)->frags[i].page = pkt_dev->page; 2605 skb_frag_set_page(skb, i, pkt_dev->page);
2606 get_page(pkt_dev->page);
2607 skb_shinfo(skb)->frags[i].page_offset = 0; 2606 skb_shinfo(skb)->frags[i].page_offset = 0;
2608 /*last fragment, fill rest of data*/ 2607 /*last fragment, fill rest of data*/
2609 if (i == (frags - 1)) 2608 if (i == (frags - 1))
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 99d9e953fe39..9083e82bdae5 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -731,7 +731,8 @@ static inline int rtnl_vfinfo_size(const struct net_device *dev)
731 size += num_vfs * 731 size += num_vfs *
732 (nla_total_size(sizeof(struct ifla_vf_mac)) + 732 (nla_total_size(sizeof(struct ifla_vf_mac)) +
733 nla_total_size(sizeof(struct ifla_vf_vlan)) + 733 nla_total_size(sizeof(struct ifla_vf_vlan)) +
734 nla_total_size(sizeof(struct ifla_vf_tx_rate))); 734 nla_total_size(sizeof(struct ifla_vf_tx_rate)) +
735 nla_total_size(sizeof(struct ifla_vf_spoofchk)));
735 return size; 736 return size;
736 } else 737 } else
737 return 0; 738 return 0;
@@ -954,13 +955,27 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
954 struct ifla_vf_mac vf_mac; 955 struct ifla_vf_mac vf_mac;
955 struct ifla_vf_vlan vf_vlan; 956 struct ifla_vf_vlan vf_vlan;
956 struct ifla_vf_tx_rate vf_tx_rate; 957 struct ifla_vf_tx_rate vf_tx_rate;
958 struct ifla_vf_spoofchk vf_spoofchk;
959
960 /*
961 * Not all SR-IOV capable drivers support the
962 * spoofcheck query. Preset to -1 so the user
963 * space tool can detect that the driver didn't
964 * report anything.
965 */
966 ivi.spoofchk = -1;
957 if (dev->netdev_ops->ndo_get_vf_config(dev, i, &ivi)) 967 if (dev->netdev_ops->ndo_get_vf_config(dev, i, &ivi))
958 break; 968 break;
959 vf_mac.vf = vf_vlan.vf = vf_tx_rate.vf = ivi.vf; 969 vf_mac.vf =
970 vf_vlan.vf =
971 vf_tx_rate.vf =
972 vf_spoofchk.vf = ivi.vf;
973
960 memcpy(vf_mac.mac, ivi.mac, sizeof(ivi.mac)); 974 memcpy(vf_mac.mac, ivi.mac, sizeof(ivi.mac));
961 vf_vlan.vlan = ivi.vlan; 975 vf_vlan.vlan = ivi.vlan;
962 vf_vlan.qos = ivi.qos; 976 vf_vlan.qos = ivi.qos;
963 vf_tx_rate.rate = ivi.tx_rate; 977 vf_tx_rate.rate = ivi.tx_rate;
978 vf_spoofchk.setting = ivi.spoofchk;
964 vf = nla_nest_start(skb, IFLA_VF_INFO); 979 vf = nla_nest_start(skb, IFLA_VF_INFO);
965 if (!vf) { 980 if (!vf) {
966 nla_nest_cancel(skb, vfinfo); 981 nla_nest_cancel(skb, vfinfo);
@@ -968,7 +983,10 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
968 } 983 }
969 NLA_PUT(skb, IFLA_VF_MAC, sizeof(vf_mac), &vf_mac); 984 NLA_PUT(skb, IFLA_VF_MAC, sizeof(vf_mac), &vf_mac);
970 NLA_PUT(skb, IFLA_VF_VLAN, sizeof(vf_vlan), &vf_vlan); 985 NLA_PUT(skb, IFLA_VF_VLAN, sizeof(vf_vlan), &vf_vlan);
971 NLA_PUT(skb, IFLA_VF_TX_RATE, sizeof(vf_tx_rate), &vf_tx_rate); 986 NLA_PUT(skb, IFLA_VF_TX_RATE, sizeof(vf_tx_rate),
987 &vf_tx_rate);
988 NLA_PUT(skb, IFLA_VF_SPOOFCHK, sizeof(vf_spoofchk),
989 &vf_spoofchk);
972 nla_nest_end(skb, vf); 990 nla_nest_end(skb, vf);
973 } 991 }
974 nla_nest_end(skb, vfinfo); 992 nla_nest_end(skb, vfinfo);
@@ -1202,6 +1220,15 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr *attr)
1202 ivt->rate); 1220 ivt->rate);
1203 break; 1221 break;
1204 } 1222 }
1223 case IFLA_VF_SPOOFCHK: {
1224 struct ifla_vf_spoofchk *ivs;
1225 ivs = nla_data(vf);
1226 err = -EOPNOTSUPP;
1227 if (ops->ndo_set_vf_spoofchk)
1228 err = ops->ndo_set_vf_spoofchk(dev, ivs->vf,
1229 ivs->setting);
1230 break;
1231 }
1205 default: 1232 default:
1206 err = -EINVAL; 1233 err = -EINVAL;
1207 break; 1234 break;
@@ -1604,7 +1631,6 @@ struct net_device *rtnl_create_link(struct net *src_net, struct net *net,
1604 dev_net_set(dev, net); 1631 dev_net_set(dev, net);
1605 dev->rtnl_link_ops = ops; 1632 dev->rtnl_link_ops = ops;
1606 dev->rtnl_link_state = RTNL_LINK_INITIALIZING; 1633 dev->rtnl_link_state = RTNL_LINK_INITIALIZING;
1607 dev->real_num_tx_queues = real_num_queues;
1608 1634
1609 if (tb[IFLA_MTU]) 1635 if (tb[IFLA_MTU])
1610 dev->mtu = nla_get_u32(tb[IFLA_MTU]); 1636 dev->mtu = nla_get_u32(tb[IFLA_MTU]);
diff --git a/net/core/scm.c b/net/core/scm.c
index 4c1ef026d695..ff52ad0a5150 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -173,7 +173,7 @@ int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *p)
173 if (err) 173 if (err)
174 goto error; 174 goto error;
175 175
176 if (pid_vnr(p->pid) != p->creds.pid) { 176 if (!p->pid || pid_vnr(p->pid) != p->creds.pid) {
177 struct pid *pid; 177 struct pid *pid;
178 err = -ESRCH; 178 err = -ESRCH;
179 pid = find_get_pid(p->creds.pid); 179 pid = find_get_pid(p->creds.pid);
@@ -183,8 +183,9 @@ int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *p)
183 p->pid = pid; 183 p->pid = pid;
184 } 184 }
185 185
186 if ((p->cred->euid != p->creds.uid) || 186 if (!p->cred ||
187 (p->cred->egid != p->creds.gid)) { 187 (p->cred->euid != p->creds.uid) ||
188 (p->cred->egid != p->creds.gid)) {
188 struct cred *cred; 189 struct cred *cred;
189 err = -ENOMEM; 190 err = -ENOMEM;
190 cred = prepare_creds(); 191 cred = prepare_creds();
@@ -192,8 +193,9 @@ int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *p)
192 goto error; 193 goto error;
193 194
194 cred->uid = cred->euid = p->creds.uid; 195 cred->uid = cred->euid = p->creds.uid;
195 cred->gid = cred->egid = p->creds.uid; 196 cred->gid = cred->egid = p->creds.gid;
196 put_cred(p->cred); 197 if (p->cred)
198 put_cred(p->cred);
197 p->cred = cred; 199 p->cred = cred;
198 } 200 }
199 break; 201 break;
diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c
new file mode 100644
index 000000000000..45329d7c9dd9
--- /dev/null
+++ b/net/core/secure_seq.c
@@ -0,0 +1,184 @@
1#include <linux/kernel.h>
2#include <linux/init.h>
3#include <linux/cryptohash.h>
4#include <linux/module.h>
5#include <linux/cache.h>
6#include <linux/random.h>
7#include <linux/hrtimer.h>
8#include <linux/ktime.h>
9#include <linux/string.h>
10
11#include <net/secure_seq.h>
12
13static u32 net_secret[MD5_MESSAGE_BYTES / 4] ____cacheline_aligned;
14
15static int __init net_secret_init(void)
16{
17 get_random_bytes(net_secret, sizeof(net_secret));
18 return 0;
19}
20late_initcall(net_secret_init);
21
22static u32 seq_scale(u32 seq)
23{
24 /*
25 * As close as possible to RFC 793, which
26 * suggests using a 250 kHz clock.
27 * Further reading shows this assumes 2 Mb/s networks.
28 * For 10 Mb/s Ethernet, a 1 MHz clock is appropriate.
29 * For 10 Gb/s Ethernet, a 1 GHz clock should be ok, but
30 * we also need to limit the resolution so that the u32 seq
31 * overlaps less than one time per MSL (2 minutes).
32 * Choosing a clock of 64 ns period is OK. (period of 274 s)
33 */
34 return seq + (ktime_to_ns(ktime_get_real()) >> 6);
35}
36
37#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
38__u32 secure_tcpv6_sequence_number(__be32 *saddr, __be32 *daddr,
39 __be16 sport, __be16 dport)
40{
41 u32 secret[MD5_MESSAGE_BYTES / 4];
42 u32 hash[MD5_DIGEST_WORDS];
43 u32 i;
44
45 memcpy(hash, saddr, 16);
46 for (i = 0; i < 4; i++)
47 secret[i] = net_secret[i] + daddr[i];
48 secret[4] = net_secret[4] +
49 (((__force u16)sport << 16) + (__force u16)dport);
50 for (i = 5; i < MD5_MESSAGE_BYTES / 4; i++)
51 secret[i] = net_secret[i];
52
53 md5_transform(hash, secret);
54
55 return seq_scale(hash[0]);
56}
57EXPORT_SYMBOL(secure_tcpv6_sequence_number);
58
59u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr,
60 __be16 dport)
61{
62 u32 secret[MD5_MESSAGE_BYTES / 4];
63 u32 hash[MD5_DIGEST_WORDS];
64 u32 i;
65
66 memcpy(hash, saddr, 16);
67 for (i = 0; i < 4; i++)
68 secret[i] = net_secret[i] + (__force u32) daddr[i];
69 secret[4] = net_secret[4] + (__force u32)dport;
70 for (i = 5; i < MD5_MESSAGE_BYTES / 4; i++)
71 secret[i] = net_secret[i];
72
73 md5_transform(hash, secret);
74
75 return hash[0];
76}
77#endif
78
79#ifdef CONFIG_INET
80__u32 secure_ip_id(__be32 daddr)
81{
82 u32 hash[MD5_DIGEST_WORDS];
83
84 hash[0] = (__force __u32) daddr;
85 hash[1] = net_secret[13];
86 hash[2] = net_secret[14];
87 hash[3] = net_secret[15];
88
89 md5_transform(hash, net_secret);
90
91 return hash[0];
92}
93
94__u32 secure_ipv6_id(const __be32 daddr[4])
95{
96 __u32 hash[4];
97
98 memcpy(hash, daddr, 16);
99 md5_transform(hash, net_secret);
100
101 return hash[0];
102}
103
104__u32 secure_tcp_sequence_number(__be32 saddr, __be32 daddr,
105 __be16 sport, __be16 dport)
106{
107 u32 hash[MD5_DIGEST_WORDS];
108
109 hash[0] = (__force u32)saddr;
110 hash[1] = (__force u32)daddr;
111 hash[2] = ((__force u16)sport << 16) + (__force u16)dport;
112 hash[3] = net_secret[15];
113
114 md5_transform(hash, net_secret);
115
116 return seq_scale(hash[0]);
117}
118
119u32 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport)
120{
121 u32 hash[MD5_DIGEST_WORDS];
122
123 hash[0] = (__force u32)saddr;
124 hash[1] = (__force u32)daddr;
125 hash[2] = (__force u32)dport ^ net_secret[14];
126 hash[3] = net_secret[15];
127
128 md5_transform(hash, net_secret);
129
130 return hash[0];
131}
132EXPORT_SYMBOL_GPL(secure_ipv4_port_ephemeral);
133#endif
134
135#if defined(CONFIG_IP_DCCP) || defined(CONFIG_IP_DCCP_MODULE)
136u64 secure_dccp_sequence_number(__be32 saddr, __be32 daddr,
137 __be16 sport, __be16 dport)
138{
139 u32 hash[MD5_DIGEST_WORDS];
140 u64 seq;
141
142 hash[0] = (__force u32)saddr;
143 hash[1] = (__force u32)daddr;
144 hash[2] = ((__force u16)sport << 16) + (__force u16)dport;
145 hash[3] = net_secret[15];
146
147 md5_transform(hash, net_secret);
148
149 seq = hash[0] | (((u64)hash[1]) << 32);
150 seq += ktime_to_ns(ktime_get_real());
151 seq &= (1ull << 48) - 1;
152
153 return seq;
154}
155EXPORT_SYMBOL(secure_dccp_sequence_number);
156
157#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
158u64 secure_dccpv6_sequence_number(__be32 *saddr, __be32 *daddr,
159 __be16 sport, __be16 dport)
160{
161 u32 secret[MD5_MESSAGE_BYTES / 4];
162 u32 hash[MD5_DIGEST_WORDS];
163 u64 seq;
164 u32 i;
165
166 memcpy(hash, saddr, 16);
167 for (i = 0; i < 4; i++)
168 secret[i] = net_secret[i] + daddr[i];
169 secret[4] = net_secret[4] +
170 (((__force u16)sport << 16) + (__force u16)dport);
171 for (i = 5; i < MD5_MESSAGE_BYTES / 4; i++)
172 secret[i] = net_secret[i];
173
174 md5_transform(hash, secret);
175
176 seq = hash[0] | (((u64)hash[1]) << 32);
177 seq += ktime_to_ns(ktime_get_real());
178 seq &= (1ull << 48) - 1;
179
180 return seq;
181}
182EXPORT_SYMBOL(secure_dccpv6_sequence_number);
183#endif
184#endif
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 2beda824636e..a7f855dca922 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -184,11 +184,20 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
184 goto out; 184 goto out;
185 prefetchw(skb); 185 prefetchw(skb);
186 186
187 size = SKB_DATA_ALIGN(size); 187 /* We do our best to align skb_shared_info on a separate cache
188 data = kmalloc_node_track_caller(size + sizeof(struct skb_shared_info), 188 * line. It usually works because kmalloc(X > SMP_CACHE_BYTES) gives
189 gfp_mask, node); 189 * aligned memory blocks, unless SLUB/SLAB debug is enabled.
190 * Both skb->head and skb_shared_info are cache line aligned.
191 */
192 size += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
193 data = kmalloc_node_track_caller(size, gfp_mask, node);
190 if (!data) 194 if (!data)
191 goto nodata; 195 goto nodata;
196 /* kmalloc(size) might give us more room than requested.
197 * Put skb_shared_info exactly at the end of allocated zone,
198 * to allow max possible filling before reallocation.
199 */
200 size = SKB_WITH_OVERHEAD(ksize(data));
192 prefetchw(data + size); 201 prefetchw(data + size);
193 202
194 /* 203 /*
@@ -197,7 +206,8 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
197 * the tail pointer in struct sk_buff! 206 * the tail pointer in struct sk_buff!
198 */ 207 */
199 memset(skb, 0, offsetof(struct sk_buff, tail)); 208 memset(skb, 0, offsetof(struct sk_buff, tail));
200 skb->truesize = size + sizeof(struct sk_buff); 209 /* Account for allocated memory : skb + skb->head */
210 skb->truesize = SKB_TRUESIZE(size);
201 atomic_set(&skb->users, 1); 211 atomic_set(&skb->users, 1);
202 skb->head = data; 212 skb->head = data;
203 skb->data = data; 213 skb->data = data;
@@ -326,7 +336,7 @@ static void skb_release_data(struct sk_buff *skb)
326 if (skb_shinfo(skb)->nr_frags) { 336 if (skb_shinfo(skb)->nr_frags) {
327 int i; 337 int i;
328 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) 338 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
329 put_page(skb_shinfo(skb)->frags[i].page); 339 skb_frag_unref(skb, i);
330 } 340 }
331 341
332 /* 342 /*
@@ -529,6 +539,8 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
529 new->mac_header = old->mac_header; 539 new->mac_header = old->mac_header;
530 skb_dst_copy(new, old); 540 skb_dst_copy(new, old);
531 new->rxhash = old->rxhash; 541 new->rxhash = old->rxhash;
542 new->ooo_okay = old->ooo_okay;
543 new->l4_rxhash = old->l4_rxhash;
532#ifdef CONFIG_XFRM 544#ifdef CONFIG_XFRM
533 new->sp = secpath_get(old->sp); 545 new->sp = secpath_get(old->sp);
534#endif 546#endif
@@ -611,8 +623,21 @@ struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src)
611} 623}
612EXPORT_SYMBOL_GPL(skb_morph); 624EXPORT_SYMBOL_GPL(skb_morph);
613 625
614/* skb frags copy userspace buffers to kernel */ 626/* skb_copy_ubufs - copy userspace skb frags buffers to kernel
615static int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask) 627 * @skb: the skb to modify
628 * @gfp_mask: allocation priority
629 *
630 * This must be called on SKBTX_DEV_ZEROCOPY skb.
631 * It will copy all frags into kernel and drop the reference
632 * to userspace pages.
633 *
634 * If this function is called from an interrupt gfp_mask() must be
635 * %GFP_ATOMIC.
636 *
637 * Returns 0 on success or a negative error code on failure
638 * to allocate kernel memory to copy to.
639 */
640int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask)
616{ 641{
617 int i; 642 int i;
618 int num_frags = skb_shinfo(skb)->nr_frags; 643 int num_frags = skb_shinfo(skb)->nr_frags;
@@ -652,6 +677,8 @@ static int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask)
652 skb_shinfo(skb)->frags[i - 1].page = head; 677 skb_shinfo(skb)->frags[i - 1].page = head;
653 head = (struct page *)head->private; 678 head = (struct page *)head->private;
654 } 679 }
680
681 skb_shinfo(skb)->tx_flags &= ~SKBTX_DEV_ZEROCOPY;
655 return 0; 682 return 0;
656} 683}
657 684
@@ -677,7 +704,6 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask)
677 if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) { 704 if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) {
678 if (skb_copy_ubufs(skb, gfp_mask)) 705 if (skb_copy_ubufs(skb, gfp_mask))
679 return NULL; 706 return NULL;
680 skb_shinfo(skb)->tx_flags &= ~SKBTX_DEV_ZEROCOPY;
681 } 707 }
682 708
683 n = skb + 1; 709 n = skb + 1;
@@ -803,11 +829,10 @@ struct sk_buff *pskb_copy(struct sk_buff *skb, gfp_t gfp_mask)
803 n = NULL; 829 n = NULL;
804 goto out; 830 goto out;
805 } 831 }
806 skb_shinfo(skb)->tx_flags &= ~SKBTX_DEV_ZEROCOPY;
807 } 832 }
808 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { 833 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
809 skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i]; 834 skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i];
810 get_page(skb_shinfo(n)->frags[i].page); 835 skb_frag_ref(skb, i);
811 } 836 }
812 skb_shinfo(n)->nr_frags = i; 837 skb_shinfo(n)->nr_frags = i;
813 } 838 }
@@ -896,10 +921,9 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
896 if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) { 921 if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) {
897 if (skb_copy_ubufs(skb, gfp_mask)) 922 if (skb_copy_ubufs(skb, gfp_mask))
898 goto nofrags; 923 goto nofrags;
899 skb_shinfo(skb)->tx_flags &= ~SKBTX_DEV_ZEROCOPY;
900 } 924 }
901 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) 925 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
902 get_page(skb_shinfo(skb)->frags[i].page); 926 skb_frag_ref(skb, i);
903 927
904 if (skb_has_frag_list(skb)) 928 if (skb_has_frag_list(skb))
905 skb_clone_fraglist(skb); 929 skb_clone_fraglist(skb);
@@ -1179,7 +1203,7 @@ drop_pages:
1179 skb_shinfo(skb)->nr_frags = i; 1203 skb_shinfo(skb)->nr_frags = i;
1180 1204
1181 for (; i < nfrags; i++) 1205 for (; i < nfrags; i++)
1182 put_page(skb_shinfo(skb)->frags[i].page); 1206 skb_frag_unref(skb, i);
1183 1207
1184 if (skb_has_frag_list(skb)) 1208 if (skb_has_frag_list(skb))
1185 skb_drop_fraglist(skb); 1209 skb_drop_fraglist(skb);
@@ -1348,7 +1372,7 @@ pull_pages:
1348 k = 0; 1372 k = 0;
1349 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { 1373 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
1350 if (skb_shinfo(skb)->frags[i].size <= eat) { 1374 if (skb_shinfo(skb)->frags[i].size <= eat) {
1351 put_page(skb_shinfo(skb)->frags[i].page); 1375 skb_frag_unref(skb, i);
1352 eat -= skb_shinfo(skb)->frags[i].size; 1376 eat -= skb_shinfo(skb)->frags[i].size;
1353 } else { 1377 } else {
1354 skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i]; 1378 skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
@@ -1369,8 +1393,21 @@ pull_pages:
1369} 1393}
1370EXPORT_SYMBOL(__pskb_pull_tail); 1394EXPORT_SYMBOL(__pskb_pull_tail);
1371 1395
1372/* Copy some data bits from skb to kernel buffer. */ 1396/**
1373 1397 * skb_copy_bits - copy bits from skb to kernel buffer
1398 * @skb: source skb
1399 * @offset: offset in source
1400 * @to: destination buffer
1401 * @len: number of bytes to copy
1402 *
1403 * Copy the specified number of bytes from the source skb to the
1404 * destination buffer.
1405 *
1406 * CAUTION ! :
1407 * If its prototype is ever changed,
1408 * check arch/{*}/net/{*}.S files,
1409 * since it is called from BPF assembly code.
1410 */
1374int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len) 1411int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len)
1375{ 1412{
1376 int start = skb_headlen(skb); 1413 int start = skb_headlen(skb);
@@ -1594,7 +1631,8 @@ static int __skb_splice_bits(struct sk_buff *skb, struct pipe_inode_info *pipe,
1594 for (seg = 0; seg < skb_shinfo(skb)->nr_frags; seg++) { 1631 for (seg = 0; seg < skb_shinfo(skb)->nr_frags; seg++) {
1595 const skb_frag_t *f = &skb_shinfo(skb)->frags[seg]; 1632 const skb_frag_t *f = &skb_shinfo(skb)->frags[seg];
1596 1633
1597 if (__splice_segment(f->page, f->page_offset, f->size, 1634 if (__splice_segment(skb_frag_page(f),
1635 f->page_offset, f->size,
1598 offset, len, skb, spd, 0, sk, pipe)) 1636 offset, len, skb, spd, 0, sk, pipe))
1599 return 1; 1637 return 1;
1600 } 1638 }
@@ -2139,7 +2177,7 @@ static inline void skb_split_no_header(struct sk_buff *skb,
2139 * where splitting is expensive. 2177 * where splitting is expensive.
2140 * 2. Split is accurately. We make this. 2178 * 2. Split is accurately. We make this.
2141 */ 2179 */
2142 get_page(skb_shinfo(skb)->frags[i].page); 2180 skb_frag_ref(skb, i);
2143 skb_shinfo(skb1)->frags[0].page_offset += len - pos; 2181 skb_shinfo(skb1)->frags[0].page_offset += len - pos;
2144 skb_shinfo(skb1)->frags[0].size -= len - pos; 2182 skb_shinfo(skb1)->frags[0].size -= len - pos;
2145 skb_shinfo(skb)->frags[i].size = len - pos; 2183 skb_shinfo(skb)->frags[i].size = len - pos;
@@ -2214,7 +2252,8 @@ int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen)
2214 * commit all, so that we don't have to undo partial changes 2252 * commit all, so that we don't have to undo partial changes
2215 */ 2253 */
2216 if (!to || 2254 if (!to ||
2217 !skb_can_coalesce(tgt, to, fragfrom->page, fragfrom->page_offset)) { 2255 !skb_can_coalesce(tgt, to, skb_frag_page(fragfrom),
2256 fragfrom->page_offset)) {
2218 merge = -1; 2257 merge = -1;
2219 } else { 2258 } else {
2220 merge = to - 1; 2259 merge = to - 1;
@@ -2261,7 +2300,7 @@ int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen)
2261 to++; 2300 to++;
2262 2301
2263 } else { 2302 } else {
2264 get_page(fragfrom->page); 2303 __skb_frag_ref(fragfrom);
2265 fragto->page = fragfrom->page; 2304 fragto->page = fragfrom->page;
2266 fragto->page_offset = fragfrom->page_offset; 2305 fragto->page_offset = fragfrom->page_offset;
2267 fragto->size = todo; 2306 fragto->size = todo;
@@ -2283,7 +2322,7 @@ int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen)
2283 fragto = &skb_shinfo(tgt)->frags[merge]; 2322 fragto = &skb_shinfo(tgt)->frags[merge];
2284 2323
2285 fragto->size += fragfrom->size; 2324 fragto->size += fragfrom->size;
2286 put_page(fragfrom->page); 2325 __skb_frag_unref(fragfrom);
2287 } 2326 }
2288 2327
2289 /* Reposition in the original skb */ 2328 /* Reposition in the original skb */
@@ -2528,8 +2567,7 @@ int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb,
2528 left = PAGE_SIZE - frag->page_offset; 2567 left = PAGE_SIZE - frag->page_offset;
2529 copy = (length > left)? left : length; 2568 copy = (length > left)? left : length;
2530 2569
2531 ret = getfrag(from, (page_address(frag->page) + 2570 ret = getfrag(from, skb_frag_address(frag) + frag->size,
2532 frag->page_offset + frag->size),
2533 offset, copy, 0, skb); 2571 offset, copy, 0, skb);
2534 if (ret < 0) 2572 if (ret < 0)
2535 return -EFAULT; 2573 return -EFAULT;
@@ -2681,7 +2719,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, u32 features)
2681 2719
2682 while (pos < offset + len && i < nfrags) { 2720 while (pos < offset + len && i < nfrags) {
2683 *frag = skb_shinfo(skb)->frags[i]; 2721 *frag = skb_shinfo(skb)->frags[i];
2684 get_page(frag->page); 2722 __skb_frag_ref(frag);
2685 size = frag->size; 2723 size = frag->size;
2686 2724
2687 if (pos < offset) { 2725 if (pos < offset) {
@@ -2904,7 +2942,7 @@ __skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len)
2904 2942
2905 if (copy > len) 2943 if (copy > len)
2906 copy = len; 2944 copy = len;
2907 sg_set_page(&sg[elt], frag->page, copy, 2945 sg_set_page(&sg[elt], skb_frag_page(frag), copy,
2908 frag->page_offset+offset-start); 2946 frag->page_offset+offset-start);
2909 elt++; 2947 elt++;
2910 if (!(len -= copy)) 2948 if (!(len -= copy))
diff --git a/net/core/sock.c b/net/core/sock.c
index bc745d00ea4d..5a087626bb3a 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -207,7 +207,7 @@ static struct lock_class_key af_callback_keys[AF_MAX];
207 * not depend upon such differences. 207 * not depend upon such differences.
208 */ 208 */
209#define _SK_MEM_PACKETS 256 209#define _SK_MEM_PACKETS 256
210#define _SK_MEM_OVERHEAD (sizeof(struct sk_buff) + 256) 210#define _SK_MEM_OVERHEAD SKB_TRUESIZE(256)
211#define SK_WMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS) 211#define SK_WMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
212#define SK_RMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS) 212#define SK_RMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
213 213
@@ -387,7 +387,7 @@ struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie)
387 387
388 if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) { 388 if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
389 sk_tx_queue_clear(sk); 389 sk_tx_queue_clear(sk);
390 rcu_assign_pointer(sk->sk_dst_cache, NULL); 390 RCU_INIT_POINTER(sk->sk_dst_cache, NULL);
391 dst_release(dst); 391 dst_release(dst);
392 return NULL; 392 return NULL;
393 } 393 }
@@ -738,10 +738,7 @@ set_rcvbuf:
738 /* We implement the SO_SNDLOWAT etc to 738 /* We implement the SO_SNDLOWAT etc to
739 not be settable (1003.1g 5.3) */ 739 not be settable (1003.1g 5.3) */
740 case SO_RXQ_OVFL: 740 case SO_RXQ_OVFL:
741 if (valbool) 741 sock_valbool_flag(sk, SOCK_RXQ_OVFL, valbool);
742 sock_set_flag(sk, SOCK_RXQ_OVFL);
743 else
744 sock_reset_flag(sk, SOCK_RXQ_OVFL);
745 break; 742 break;
746 default: 743 default:
747 ret = -ENOPROTOOPT; 744 ret = -ENOPROTOOPT;
@@ -1158,7 +1155,7 @@ static void __sk_free(struct sock *sk)
1158 atomic_read(&sk->sk_wmem_alloc) == 0); 1155 atomic_read(&sk->sk_wmem_alloc) == 0);
1159 if (filter) { 1156 if (filter) {
1160 sk_filter_uncharge(sk, filter); 1157 sk_filter_uncharge(sk, filter);
1161 rcu_assign_pointer(sk->sk_filter, NULL); 1158 RCU_INIT_POINTER(sk->sk_filter, NULL);
1162 } 1159 }
1163 1160
1164 sock_disable_timestamp(sk, SOCK_TIMESTAMP); 1161 sock_disable_timestamp(sk, SOCK_TIMESTAMP);
@@ -1533,7 +1530,6 @@ struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len,
1533 skb_shinfo(skb)->nr_frags = npages; 1530 skb_shinfo(skb)->nr_frags = npages;
1534 for (i = 0; i < npages; i++) { 1531 for (i = 0; i < npages; i++) {
1535 struct page *page; 1532 struct page *page;
1536 skb_frag_t *frag;
1537 1533
1538 page = alloc_pages(sk->sk_allocation, 0); 1534 page = alloc_pages(sk->sk_allocation, 0);
1539 if (!page) { 1535 if (!page) {
@@ -1543,12 +1539,11 @@ struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len,
1543 goto failure; 1539 goto failure;
1544 } 1540 }
1545 1541
1546 frag = &skb_shinfo(skb)->frags[i]; 1542 __skb_fill_page_desc(skb, i,
1547 frag->page = page; 1543 page, 0,
1548 frag->page_offset = 0; 1544 (data_len >= PAGE_SIZE ?
1549 frag->size = (data_len >= PAGE_SIZE ? 1545 PAGE_SIZE :
1550 PAGE_SIZE : 1546 data_len));
1551 data_len);
1552 data_len -= PAGE_SIZE; 1547 data_len -= PAGE_SIZE;
1553 } 1548 }
1554 1549
diff --git a/net/core/user_dma.c b/net/core/user_dma.c
index 25d717ebc92e..34e9664cae3b 100644
--- a/net/core/user_dma.c
+++ b/net/core/user_dma.c
@@ -78,7 +78,7 @@ int dma_skb_copy_datagram_iovec(struct dma_chan *chan,
78 copy = end - offset; 78 copy = end - offset;
79 if (copy > 0) { 79 if (copy > 0) {
80 skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; 80 skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
81 struct page *page = frag->page; 81 struct page *page = skb_frag_page(frag);
82 82
83 if (copy > len) 83 if (copy > len)
84 copy = len; 84 copy = len;
diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c
index 3cb56af4e13c..9bfbc1d1b50c 100644
--- a/net/dcb/dcbnl.c
+++ b/net/dcb/dcbnl.c
@@ -1255,7 +1255,7 @@ static int dcbnl_ieee_fill(struct sk_buff *skb, struct net_device *netdev)
1255 1255
1256 spin_lock(&dcb_lock); 1256 spin_lock(&dcb_lock);
1257 list_for_each_entry(itr, &dcb_app_list, list) { 1257 list_for_each_entry(itr, &dcb_app_list, list) {
1258 if (strncmp(itr->name, netdev->name, IFNAMSIZ) == 0) { 1258 if (itr->ifindex == netdev->ifindex) {
1259 err = nla_put(skb, DCB_ATTR_IEEE_APP, sizeof(itr->app), 1259 err = nla_put(skb, DCB_ATTR_IEEE_APP, sizeof(itr->app),
1260 &itr->app); 1260 &itr->app);
1261 if (err) { 1261 if (err) {
@@ -1412,7 +1412,7 @@ static int dcbnl_cee_fill(struct sk_buff *skb, struct net_device *netdev)
1412 goto dcb_unlock; 1412 goto dcb_unlock;
1413 1413
1414 list_for_each_entry(itr, &dcb_app_list, list) { 1414 list_for_each_entry(itr, &dcb_app_list, list) {
1415 if (strncmp(itr->name, netdev->name, IFNAMSIZ) == 0) { 1415 if (itr->ifindex == netdev->ifindex) {
1416 struct nlattr *app_nest = nla_nest_start(skb, 1416 struct nlattr *app_nest = nla_nest_start(skb,
1417 DCB_ATTR_APP); 1417 DCB_ATTR_APP);
1418 if (!app_nest) 1418 if (!app_nest)
@@ -2050,7 +2050,7 @@ u8 dcb_getapp(struct net_device *dev, struct dcb_app *app)
2050 list_for_each_entry(itr, &dcb_app_list, list) { 2050 list_for_each_entry(itr, &dcb_app_list, list) {
2051 if (itr->app.selector == app->selector && 2051 if (itr->app.selector == app->selector &&
2052 itr->app.protocol == app->protocol && 2052 itr->app.protocol == app->protocol &&
2053 (strncmp(itr->name, dev->name, IFNAMSIZ) == 0)) { 2053 itr->ifindex == dev->ifindex) {
2054 prio = itr->app.priority; 2054 prio = itr->app.priority;
2055 break; 2055 break;
2056 } 2056 }
@@ -2073,15 +2073,17 @@ int dcb_setapp(struct net_device *dev, struct dcb_app *new)
2073 struct dcb_app_type *itr; 2073 struct dcb_app_type *itr;
2074 struct dcb_app_type event; 2074 struct dcb_app_type event;
2075 2075
2076 memcpy(&event.name, dev->name, sizeof(event.name)); 2076 event.ifindex = dev->ifindex;
2077 memcpy(&event.app, new, sizeof(event.app)); 2077 memcpy(&event.app, new, sizeof(event.app));
2078 if (dev->dcbnl_ops->getdcbx)
2079 event.dcbx = dev->dcbnl_ops->getdcbx(dev);
2078 2080
2079 spin_lock(&dcb_lock); 2081 spin_lock(&dcb_lock);
2080 /* Search for existing match and replace */ 2082 /* Search for existing match and replace */
2081 list_for_each_entry(itr, &dcb_app_list, list) { 2083 list_for_each_entry(itr, &dcb_app_list, list) {
2082 if (itr->app.selector == new->selector && 2084 if (itr->app.selector == new->selector &&
2083 itr->app.protocol == new->protocol && 2085 itr->app.protocol == new->protocol &&
2084 (strncmp(itr->name, dev->name, IFNAMSIZ) == 0)) { 2086 itr->ifindex == dev->ifindex) {
2085 if (new->priority) 2087 if (new->priority)
2086 itr->app.priority = new->priority; 2088 itr->app.priority = new->priority;
2087 else { 2089 else {
@@ -2101,7 +2103,7 @@ int dcb_setapp(struct net_device *dev, struct dcb_app *new)
2101 } 2103 }
2102 2104
2103 memcpy(&entry->app, new, sizeof(*new)); 2105 memcpy(&entry->app, new, sizeof(*new));
2104 strncpy(entry->name, dev->name, IFNAMSIZ); 2106 entry->ifindex = dev->ifindex;
2105 list_add(&entry->list, &dcb_app_list); 2107 list_add(&entry->list, &dcb_app_list);
2106 } 2108 }
2107out: 2109out:
@@ -2127,7 +2129,7 @@ u8 dcb_ieee_getapp_mask(struct net_device *dev, struct dcb_app *app)
2127 list_for_each_entry(itr, &dcb_app_list, list) { 2129 list_for_each_entry(itr, &dcb_app_list, list) {
2128 if (itr->app.selector == app->selector && 2130 if (itr->app.selector == app->selector &&
2129 itr->app.protocol == app->protocol && 2131 itr->app.protocol == app->protocol &&
2130 (strncmp(itr->name, dev->name, IFNAMSIZ) == 0)) { 2132 itr->ifindex == dev->ifindex) {
2131 prio |= 1 << itr->app.priority; 2133 prio |= 1 << itr->app.priority;
2132 } 2134 }
2133 } 2135 }
@@ -2150,8 +2152,10 @@ int dcb_ieee_setapp(struct net_device *dev, struct dcb_app *new)
2150 struct dcb_app_type event; 2152 struct dcb_app_type event;
2151 int err = 0; 2153 int err = 0;
2152 2154
2153 memcpy(&event.name, dev->name, sizeof(event.name)); 2155 event.ifindex = dev->ifindex;
2154 memcpy(&event.app, new, sizeof(event.app)); 2156 memcpy(&event.app, new, sizeof(event.app));
2157 if (dev->dcbnl_ops->getdcbx)
2158 event.dcbx = dev->dcbnl_ops->getdcbx(dev);
2155 2159
2156 spin_lock(&dcb_lock); 2160 spin_lock(&dcb_lock);
2157 /* Search for existing match and abort if found */ 2161 /* Search for existing match and abort if found */
@@ -2159,7 +2163,7 @@ int dcb_ieee_setapp(struct net_device *dev, struct dcb_app *new)
2159 if (itr->app.selector == new->selector && 2163 if (itr->app.selector == new->selector &&
2160 itr->app.protocol == new->protocol && 2164 itr->app.protocol == new->protocol &&
2161 itr->app.priority == new->priority && 2165 itr->app.priority == new->priority &&
2162 (strncmp(itr->name, dev->name, IFNAMSIZ) == 0)) { 2166 itr->ifindex == dev->ifindex) {
2163 err = -EEXIST; 2167 err = -EEXIST;
2164 goto out; 2168 goto out;
2165 } 2169 }
@@ -2173,7 +2177,7 @@ int dcb_ieee_setapp(struct net_device *dev, struct dcb_app *new)
2173 } 2177 }
2174 2178
2175 memcpy(&entry->app, new, sizeof(*new)); 2179 memcpy(&entry->app, new, sizeof(*new));
2176 strncpy(entry->name, dev->name, IFNAMSIZ); 2180 entry->ifindex = dev->ifindex;
2177 list_add(&entry->list, &dcb_app_list); 2181 list_add(&entry->list, &dcb_app_list);
2178out: 2182out:
2179 spin_unlock(&dcb_lock); 2183 spin_unlock(&dcb_lock);
@@ -2194,8 +2198,10 @@ int dcb_ieee_delapp(struct net_device *dev, struct dcb_app *del)
2194 struct dcb_app_type event; 2198 struct dcb_app_type event;
2195 int err = -ENOENT; 2199 int err = -ENOENT;
2196 2200
2197 memcpy(&event.name, dev->name, sizeof(event.name)); 2201 event.ifindex = dev->ifindex;
2198 memcpy(&event.app, del, sizeof(event.app)); 2202 memcpy(&event.app, del, sizeof(event.app));
2203 if (dev->dcbnl_ops->getdcbx)
2204 event.dcbx = dev->dcbnl_ops->getdcbx(dev);
2199 2205
2200 spin_lock(&dcb_lock); 2206 spin_lock(&dcb_lock);
2201 /* Search for existing match and remove it. */ 2207 /* Search for existing match and remove it. */
@@ -2203,7 +2209,7 @@ int dcb_ieee_delapp(struct net_device *dev, struct dcb_app *del)
2203 if (itr->app.selector == del->selector && 2209 if (itr->app.selector == del->selector &&
2204 itr->app.protocol == del->protocol && 2210 itr->app.protocol == del->protocol &&
2205 itr->app.priority == del->priority && 2211 itr->app.priority == del->priority &&
2206 (strncmp(itr->name, dev->name, IFNAMSIZ) == 0)) { 2212 itr->ifindex == dev->ifindex) {
2207 list_del(&itr->list); 2213 list_del(&itr->list);
2208 kfree(itr); 2214 kfree(itr);
2209 err = 0; 2215 err = 0;
diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c
index 0462040fc818..67164bb6ae4d 100644
--- a/net/dccp/ccids/ccid2.c
+++ b/net/dccp/ccids/ccid2.c
@@ -85,7 +85,6 @@ static int ccid2_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb)
85 85
86static void ccid2_change_l_ack_ratio(struct sock *sk, u32 val) 86static void ccid2_change_l_ack_ratio(struct sock *sk, u32 val)
87{ 87{
88 struct dccp_sock *dp = dccp_sk(sk);
89 u32 max_ratio = DIV_ROUND_UP(ccid2_hc_tx_sk(sk)->tx_cwnd, 2); 88 u32 max_ratio = DIV_ROUND_UP(ccid2_hc_tx_sk(sk)->tx_cwnd, 2);
90 89
91 /* 90 /*
@@ -98,14 +97,33 @@ static void ccid2_change_l_ack_ratio(struct sock *sk, u32 val)
98 DCCP_WARN("Limiting Ack Ratio (%u) to %u\n", val, max_ratio); 97 DCCP_WARN("Limiting Ack Ratio (%u) to %u\n", val, max_ratio);
99 val = max_ratio; 98 val = max_ratio;
100 } 99 }
101 if (val > DCCPF_ACK_RATIO_MAX) 100 dccp_feat_signal_nn_change(sk, DCCPF_ACK_RATIO,
102 val = DCCPF_ACK_RATIO_MAX; 101 min_t(u32, val, DCCPF_ACK_RATIO_MAX));
102}
103 103
104 if (val == dp->dccps_l_ack_ratio) 104static void ccid2_check_l_ack_ratio(struct sock *sk)
105 return; 105{
106 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
106 107
107 ccid2_pr_debug("changing local ack ratio to %u\n", val); 108 /*
108 dp->dccps_l_ack_ratio = val; 109 * After a loss, idle period, application limited period, or RTO we
110 * need to check that the ack ratio is still less than the congestion
111 * window. Otherwise, we will send an entire congestion window of
112 * packets and got no response because we haven't sent ack ratio
113 * packets yet.
114 * If the ack ratio does need to be reduced, we reduce it to half of
115 * the congestion window (or 1 if that's zero) instead of to the
116 * congestion window. This prevents problems if one ack is lost.
117 */
118 if (dccp_feat_nn_get(sk, DCCPF_ACK_RATIO) > hc->tx_cwnd)
119 ccid2_change_l_ack_ratio(sk, hc->tx_cwnd/2 ? : 1U);
120}
121
122static void ccid2_change_l_seq_window(struct sock *sk, u64 val)
123{
124 dccp_feat_signal_nn_change(sk, DCCPF_SEQUENCE_WINDOW,
125 clamp_val(val, DCCPF_SEQ_WMIN,
126 DCCPF_SEQ_WMAX));
109} 127}
110 128
111static void ccid2_hc_tx_rto_expire(unsigned long data) 129static void ccid2_hc_tx_rto_expire(unsigned long data)
@@ -187,6 +205,8 @@ static void ccid2_cwnd_application_limited(struct sock *sk, const u32 now)
187 } 205 }
188 hc->tx_cwnd_used = 0; 206 hc->tx_cwnd_used = 0;
189 hc->tx_cwnd_stamp = now; 207 hc->tx_cwnd_stamp = now;
208
209 ccid2_check_l_ack_ratio(sk);
190} 210}
191 211
192/* This borrows the code of tcp_cwnd_restart() */ 212/* This borrows the code of tcp_cwnd_restart() */
@@ -205,6 +225,8 @@ static void ccid2_cwnd_restart(struct sock *sk, const u32 now)
205 225
206 hc->tx_cwnd_stamp = now; 226 hc->tx_cwnd_stamp = now;
207 hc->tx_cwnd_used = 0; 227 hc->tx_cwnd_used = 0;
228
229 ccid2_check_l_ack_ratio(sk);
208} 230}
209 231
210static void ccid2_hc_tx_packet_sent(struct sock *sk, unsigned int len) 232static void ccid2_hc_tx_packet_sent(struct sock *sk, unsigned int len)
@@ -405,17 +427,37 @@ static void ccid2_new_ack(struct sock *sk, struct ccid2_seq *seqp,
405 unsigned int *maxincr) 427 unsigned int *maxincr)
406{ 428{
407 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); 429 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
408 430 struct dccp_sock *dp = dccp_sk(sk);
409 if (hc->tx_cwnd < hc->tx_ssthresh) { 431 int r_seq_used = hc->tx_cwnd / dp->dccps_l_ack_ratio;
410 if (*maxincr > 0 && ++hc->tx_packets_acked == 2) { 432
433 if (hc->tx_cwnd < dp->dccps_l_seq_win &&
434 r_seq_used < dp->dccps_r_seq_win) {
435 if (hc->tx_cwnd < hc->tx_ssthresh) {
436 if (*maxincr > 0 && ++hc->tx_packets_acked >= 2) {
437 hc->tx_cwnd += 1;
438 *maxincr -= 1;
439 hc->tx_packets_acked = 0;
440 }
441 } else if (++hc->tx_packets_acked >= hc->tx_cwnd) {
411 hc->tx_cwnd += 1; 442 hc->tx_cwnd += 1;
412 *maxincr -= 1;
413 hc->tx_packets_acked = 0; 443 hc->tx_packets_acked = 0;
414 } 444 }
415 } else if (++hc->tx_packets_acked >= hc->tx_cwnd) {
416 hc->tx_cwnd += 1;
417 hc->tx_packets_acked = 0;
418 } 445 }
446
447 /*
448 * Adjust the local sequence window and the ack ratio to allow about
449 * 5 times the number of packets in the network (RFC 4340 7.5.2)
450 */
451 if (r_seq_used * CCID2_WIN_CHANGE_FACTOR >= dp->dccps_r_seq_win)
452 ccid2_change_l_ack_ratio(sk, dp->dccps_l_ack_ratio * 2);
453 else if (r_seq_used * CCID2_WIN_CHANGE_FACTOR < dp->dccps_r_seq_win/2)
454 ccid2_change_l_ack_ratio(sk, dp->dccps_l_ack_ratio / 2 ? : 1U);
455
456 if (hc->tx_cwnd * CCID2_WIN_CHANGE_FACTOR >= dp->dccps_l_seq_win)
457 ccid2_change_l_seq_window(sk, dp->dccps_l_seq_win * 2);
458 else if (hc->tx_cwnd * CCID2_WIN_CHANGE_FACTOR < dp->dccps_l_seq_win/2)
459 ccid2_change_l_seq_window(sk, dp->dccps_l_seq_win / 2);
460
419 /* 461 /*
420 * FIXME: RTT is sampled several times per acknowledgment (for each 462 * FIXME: RTT is sampled several times per acknowledgment (for each
421 * entry in the Ack Vector), instead of once per Ack (as in TCP SACK). 463 * entry in the Ack Vector), instead of once per Ack (as in TCP SACK).
@@ -441,9 +483,7 @@ static void ccid2_congestion_event(struct sock *sk, struct ccid2_seq *seqp)
441 hc->tx_cwnd = hc->tx_cwnd / 2 ? : 1U; 483 hc->tx_cwnd = hc->tx_cwnd / 2 ? : 1U;
442 hc->tx_ssthresh = max(hc->tx_cwnd, 2U); 484 hc->tx_ssthresh = max(hc->tx_cwnd, 2U);
443 485
444 /* Avoid spurious timeouts resulting from Ack Ratio > cwnd */ 486 ccid2_check_l_ack_ratio(sk);
445 if (dccp_sk(sk)->dccps_l_ack_ratio > hc->tx_cwnd)
446 ccid2_change_l_ack_ratio(sk, hc->tx_cwnd);
447} 487}
448 488
449static int ccid2_hc_tx_parse_options(struct sock *sk, u8 packet_type, 489static int ccid2_hc_tx_parse_options(struct sock *sk, u8 packet_type,
@@ -494,8 +534,16 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
494 if (hc->tx_rpdupack >= NUMDUPACK) { 534 if (hc->tx_rpdupack >= NUMDUPACK) {
495 hc->tx_rpdupack = -1; /* XXX lame */ 535 hc->tx_rpdupack = -1; /* XXX lame */
496 hc->tx_rpseq = 0; 536 hc->tx_rpseq = 0;
497 537#ifdef __CCID2_COPES_GRACEFULLY_WITH_ACK_CONGESTION_CONTROL__
538 /*
539 * FIXME: Ack Congestion Control is broken; in
540 * the current state instabilities occurred with
541 * Ack Ratios greater than 1; causing hang-ups
542 * and long RTO timeouts. This needs to be fixed
543 * before opening up dynamic changes. -- gerrit
544 */
498 ccid2_change_l_ack_ratio(sk, 2 * dp->dccps_l_ack_ratio); 545 ccid2_change_l_ack_ratio(sk, 2 * dp->dccps_l_ack_ratio);
546#endif
499 } 547 }
500 } 548 }
501 } 549 }
diff --git a/net/dccp/ccids/ccid2.h b/net/dccp/ccids/ccid2.h
index f585d330e1e5..18c97543e522 100644
--- a/net/dccp/ccids/ccid2.h
+++ b/net/dccp/ccids/ccid2.h
@@ -43,6 +43,12 @@ struct ccid2_seq {
43#define CCID2_SEQBUF_LEN 1024 43#define CCID2_SEQBUF_LEN 1024
44#define CCID2_SEQBUF_MAX 128 44#define CCID2_SEQBUF_MAX 128
45 45
46/*
47 * Multiple of congestion window to keep the sequence window at
48 * (RFC 4340 7.5.2)
49 */
50#define CCID2_WIN_CHANGE_FACTOR 5
51
46/** 52/**
47 * struct ccid2_hc_tx_sock - CCID2 TX half connection 53 * struct ccid2_hc_tx_sock - CCID2 TX half connection
48 * @tx_{cwnd,ssthresh,pipe}: as per RFC 4341, section 5 54 * @tx_{cwnd,ssthresh,pipe}: as per RFC 4341, section 5
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index 5fdb07229017..583490aaf56f 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -474,6 +474,7 @@ static inline int dccp_ack_pending(const struct sock *sk)
474 return dccp_ackvec_pending(sk) || inet_csk_ack_scheduled(sk); 474 return dccp_ackvec_pending(sk) || inet_csk_ack_scheduled(sk);
475} 475}
476 476
477extern int dccp_feat_signal_nn_change(struct sock *sk, u8 feat, u64 nn_val);
477extern int dccp_feat_finalise_settings(struct dccp_sock *dp); 478extern int dccp_feat_finalise_settings(struct dccp_sock *dp);
478extern int dccp_feat_server_ccid_dependencies(struct dccp_request_sock *dreq); 479extern int dccp_feat_server_ccid_dependencies(struct dccp_request_sock *dreq);
479extern int dccp_feat_insert_opts(struct dccp_sock*, struct dccp_request_sock*, 480extern int dccp_feat_insert_opts(struct dccp_sock*, struct dccp_request_sock*,
diff --git a/net/dccp/feat.c b/net/dccp/feat.c
index 568def952722..23cea0ee3101 100644
--- a/net/dccp/feat.c
+++ b/net/dccp/feat.c
@@ -12,6 +12,7 @@
12 * ----------- 12 * -----------
13 * o Feature negotiation is coordinated with connection setup (as in TCP), wild 13 * o Feature negotiation is coordinated with connection setup (as in TCP), wild
14 * changes of parameters of an established connection are not supported. 14 * changes of parameters of an established connection are not supported.
15 * o Changing non-negotiable (NN) values is supported in state OPEN/PARTOPEN.
15 * o All currently known SP features have 1-byte quantities. If in the future 16 * o All currently known SP features have 1-byte quantities. If in the future
16 * extensions of RFCs 4340..42 define features with item lengths larger than 17 * extensions of RFCs 4340..42 define features with item lengths larger than
17 * one byte, a feature-specific extension of the code will be required. 18 * one byte, a feature-specific extension of the code will be required.
@@ -343,6 +344,20 @@ static int __dccp_feat_activate(struct sock *sk, const int idx,
343 return dccp_feat_table[idx].activation_hdlr(sk, val, rx); 344 return dccp_feat_table[idx].activation_hdlr(sk, val, rx);
344} 345}
345 346
347/**
348 * dccp_feat_activate - Activate feature value on socket
349 * @sk: fully connected DCCP socket (after handshake is complete)
350 * @feat_num: feature to activate, one of %dccp_feature_numbers
351 * @local: whether local (1) or remote (0) @feat_num is meant
352 * @fval: the value (SP or NN) to activate, or NULL to use the default value
353 * For general use this function is preferable over __dccp_feat_activate().
354 */
355static int dccp_feat_activate(struct sock *sk, u8 feat_num, bool local,
356 dccp_feat_val const *fval)
357{
358 return __dccp_feat_activate(sk, dccp_feat_index(feat_num), local, fval);
359}
360
346/* Test for "Req'd" feature (RFC 4340, 6.4) */ 361/* Test for "Req'd" feature (RFC 4340, 6.4) */
347static inline int dccp_feat_must_be_understood(u8 feat_num) 362static inline int dccp_feat_must_be_understood(u8 feat_num)
348{ 363{
@@ -650,11 +665,22 @@ int dccp_feat_insert_opts(struct dccp_sock *dp, struct dccp_request_sock *dreq,
650 return -1; 665 return -1;
651 if (pos->needs_mandatory && dccp_insert_option_mandatory(skb)) 666 if (pos->needs_mandatory && dccp_insert_option_mandatory(skb))
652 return -1; 667 return -1;
653 /* 668
654 * Enter CHANGING after transmitting the Change option (6.6.2). 669 if (skb->sk->sk_state == DCCP_OPEN &&
655 */ 670 (opt == DCCPO_CONFIRM_R || opt == DCCPO_CONFIRM_L)) {
656 if (pos->state == FEAT_INITIALISING) 671 /*
657 pos->state = FEAT_CHANGING; 672 * Confirms don't get retransmitted (6.6.3) once the
673 * connection is in state OPEN
674 */
675 dccp_feat_list_pop(pos);
676 } else {
677 /*
678 * Enter CHANGING after transmitting the Change
679 * option (6.6.2).
680 */
681 if (pos->state == FEAT_INITIALISING)
682 pos->state = FEAT_CHANGING;
683 }
658 } 684 }
659 return 0; 685 return 0;
660} 686}
@@ -730,6 +756,70 @@ int dccp_feat_register_sp(struct sock *sk, u8 feat, u8 is_local,
730 0, list, len); 756 0, list, len);
731} 757}
732 758
759/**
760 * dccp_feat_nn_get - Query current/pending value of NN feature
761 * @sk: DCCP socket of an established connection
762 * @feat: NN feature number from %dccp_feature_numbers
763 * For a known NN feature, returns value currently being negotiated, or
764 * current (confirmed) value if no negotiation is going on.
765 */
766u64 dccp_feat_nn_get(struct sock *sk, u8 feat)
767{
768 if (dccp_feat_type(feat) == FEAT_NN) {
769 struct dccp_sock *dp = dccp_sk(sk);
770 struct dccp_feat_entry *entry;
771
772 entry = dccp_feat_list_lookup(&dp->dccps_featneg, feat, 1);
773 if (entry != NULL)
774 return entry->val.nn;
775
776 switch (feat) {
777 case DCCPF_ACK_RATIO:
778 return dp->dccps_l_ack_ratio;
779 case DCCPF_SEQUENCE_WINDOW:
780 return dp->dccps_l_seq_win;
781 }
782 }
783 DCCP_BUG("attempt to look up unsupported feature %u", feat);
784 return 0;
785}
786EXPORT_SYMBOL_GPL(dccp_feat_nn_get);
787
788/**
789 * dccp_feat_signal_nn_change - Update NN values for an established connection
790 * @sk: DCCP socket of an established connection
791 * @feat: NN feature number from %dccp_feature_numbers
792 * @nn_val: the new value to use
793 * This function is used to communicate NN updates out-of-band.
794 */
795int dccp_feat_signal_nn_change(struct sock *sk, u8 feat, u64 nn_val)
796{
797 struct list_head *fn = &dccp_sk(sk)->dccps_featneg;
798 dccp_feat_val fval = { .nn = nn_val };
799 struct dccp_feat_entry *entry;
800
801 if (sk->sk_state != DCCP_OPEN && sk->sk_state != DCCP_PARTOPEN)
802 return 0;
803
804 if (dccp_feat_type(feat) != FEAT_NN ||
805 !dccp_feat_is_valid_nn_val(feat, nn_val))
806 return -EINVAL;
807
808 if (nn_val == dccp_feat_nn_get(sk, feat))
809 return 0; /* already set or negotiation under way */
810
811 entry = dccp_feat_list_lookup(fn, feat, 1);
812 if (entry != NULL) {
813 dccp_pr_debug("Clobbering existing NN entry %llu -> %llu\n",
814 (unsigned long long)entry->val.nn,
815 (unsigned long long)nn_val);
816 dccp_feat_list_pop(entry);
817 }
818
819 inet_csk_schedule_ack(sk);
820 return dccp_feat_push_change(fn, feat, 1, 0, &fval);
821}
822EXPORT_SYMBOL_GPL(dccp_feat_signal_nn_change);
733 823
734/* 824/*
735 * Tracking features whose value depend on the choice of CCID 825 * Tracking features whose value depend on the choice of CCID
@@ -1187,6 +1277,100 @@ confirmation_failed:
1187} 1277}
1188 1278
1189/** 1279/**
1280 * dccp_feat_handle_nn_established - Fast-path reception of NN options
1281 * @sk: socket of an established DCCP connection
1282 * @mandatory: whether @opt was preceded by a Mandatory option
1283 * @opt: %DCCPO_CHANGE_L | %DCCPO_CONFIRM_R (NN only)
1284 * @feat: NN number, one of %dccp_feature_numbers
1285 * @val: NN value
1286 * @len: length of @val in bytes
1287 * This function combines the functionality of change_recv/confirm_recv, with
1288 * the following differences (reset codes are the same):
1289 * - cleanup after receiving the Confirm;
1290 * - values are directly activated after successful parsing;
1291 * - deliberately restricted to NN features.
1292 * The restriction to NN features is essential since SP features can have non-
1293 * predictable outcomes (depending on the remote configuration), and are inter-
1294 * dependent (CCIDs for instance cause further dependencies).
1295 */
1296static u8 dccp_feat_handle_nn_established(struct sock *sk, u8 mandatory, u8 opt,
1297 u8 feat, u8 *val, u8 len)
1298{
1299 struct list_head *fn = &dccp_sk(sk)->dccps_featneg;
1300 const bool local = (opt == DCCPO_CONFIRM_R);
1301 struct dccp_feat_entry *entry;
1302 u8 type = dccp_feat_type(feat);
1303 dccp_feat_val fval;
1304
1305 dccp_feat_print_opt(opt, feat, val, len, mandatory);
1306
1307 /* Ignore non-mandatory unknown and non-NN features */
1308 if (type == FEAT_UNKNOWN) {
1309 if (local && !mandatory)
1310 return 0;
1311 goto fast_path_unknown;
1312 } else if (type != FEAT_NN) {
1313 return 0;
1314 }
1315
1316 /*
1317 * We don't accept empty Confirms, since in fast-path feature
1318 * negotiation the values are enabled immediately after sending
1319 * the Change option.
1320 * Empty Changes on the other hand are invalid (RFC 4340, 6.1).
1321 */
1322 if (len == 0 || len > sizeof(fval.nn))
1323 goto fast_path_unknown;
1324
1325 if (opt == DCCPO_CHANGE_L) {
1326 fval.nn = dccp_decode_value_var(val, len);
1327 if (!dccp_feat_is_valid_nn_val(feat, fval.nn))
1328 goto fast_path_unknown;
1329
1330 if (dccp_feat_push_confirm(fn, feat, local, &fval) ||
1331 dccp_feat_activate(sk, feat, local, &fval))
1332 return DCCP_RESET_CODE_TOO_BUSY;
1333
1334 /* set the `Ack Pending' flag to piggyback a Confirm */
1335 inet_csk_schedule_ack(sk);
1336
1337 } else if (opt == DCCPO_CONFIRM_R) {
1338 entry = dccp_feat_list_lookup(fn, feat, local);
1339 if (entry == NULL || entry->state != FEAT_CHANGING)
1340 return 0;
1341
1342 fval.nn = dccp_decode_value_var(val, len);
1343 /*
1344 * Just ignore a value that doesn't match our current value.
1345 * If the option changes twice within two RTTs, then at least
1346 * one CONFIRM will be received for the old value after a
1347 * new CHANGE was sent.
1348 */
1349 if (fval.nn != entry->val.nn)
1350 return 0;
1351
1352 /* Only activate after receiving the Confirm option (6.6.1). */
1353 dccp_feat_activate(sk, feat, local, &fval);
1354
1355 /* It has been confirmed - so remove the entry */
1356 dccp_feat_list_pop(entry);
1357
1358 } else {
1359 DCCP_WARN("Received illegal option %u\n", opt);
1360 goto fast_path_failed;
1361 }
1362 return 0;
1363
1364fast_path_unknown:
1365 if (!mandatory)
1366 return dccp_push_empty_confirm(fn, feat, local);
1367
1368fast_path_failed:
1369 return mandatory ? DCCP_RESET_CODE_MANDATORY_ERROR
1370 : DCCP_RESET_CODE_OPTION_ERROR;
1371}
1372
1373/**
1190 * dccp_feat_parse_options - Process Feature-Negotiation Options 1374 * dccp_feat_parse_options - Process Feature-Negotiation Options
1191 * @sk: for general use and used by the client during connection setup 1375 * @sk: for general use and used by the client during connection setup
1192 * @dreq: used by the server during connection setup 1376 * @dreq: used by the server during connection setup
@@ -1221,6 +1405,14 @@ int dccp_feat_parse_options(struct sock *sk, struct dccp_request_sock *dreq,
1221 return dccp_feat_confirm_recv(fn, mandatory, opt, feat, 1405 return dccp_feat_confirm_recv(fn, mandatory, opt, feat,
1222 val, len, server); 1406 val, len, server);
1223 } 1407 }
1408 break;
1409 /*
1410 * Support for exchanging NN options on an established connection.
1411 */
1412 case DCCP_OPEN:
1413 case DCCP_PARTOPEN:
1414 return dccp_feat_handle_nn_established(sk, mandatory, opt, feat,
1415 val, len);
1224 } 1416 }
1225 return 0; /* ignore FN options in all other states */ 1417 return 0; /* ignore FN options in all other states */
1226} 1418}
diff --git a/net/dccp/feat.h b/net/dccp/feat.h
index e56a4e5e634e..90b957d34d26 100644
--- a/net/dccp/feat.h
+++ b/net/dccp/feat.h
@@ -129,6 +129,7 @@ extern int dccp_feat_clone_list(struct list_head const *, struct list_head *);
129 129
130extern void dccp_encode_value_var(const u64 value, u8 *to, const u8 len); 130extern void dccp_encode_value_var(const u64 value, u8 *to, const u8 len);
131extern u64 dccp_decode_value_var(const u8 *bf, const u8 len); 131extern u64 dccp_decode_value_var(const u8 *bf, const u8 len);
132extern u64 dccp_feat_nn_get(struct sock *sk, u8 feat);
132 133
133extern int dccp_insert_option_mandatory(struct sk_buff *skb); 134extern int dccp_insert_option_mandatory(struct sk_buff *skb);
134extern int dccp_insert_fn_opt(struct sk_buff *skb, u8 type, u8 feat, 135extern int dccp_insert_fn_opt(struct sk_buff *skb, u8 type, u8 feat,
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 8c36adfd1919..332639b56f4d 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -26,6 +26,7 @@
26#include <net/timewait_sock.h> 26#include <net/timewait_sock.h>
27#include <net/tcp_states.h> 27#include <net/tcp_states.h>
28#include <net/xfrm.h> 28#include <net/xfrm.h>
29#include <net/secure_seq.h>
29 30
30#include "ackvec.h" 31#include "ackvec.h"
31#include "ccid.h" 32#include "ccid.h"
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 8dc4348774a5..b74f76117dcf 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -29,6 +29,7 @@
29#include <net/transp_v6.h> 29#include <net/transp_v6.h>
30#include <net/ip6_checksum.h> 30#include <net/ip6_checksum.h>
31#include <net/xfrm.h> 31#include <net/xfrm.h>
32#include <net/secure_seq.h>
32 33
33#include "dccp.h" 34#include "dccp.h"
34#include "ipv6.h" 35#include "ipv6.h"
@@ -69,13 +70,7 @@ static inline void dccp_v6_send_check(struct sock *sk, struct sk_buff *skb)
69 dh->dccph_checksum = dccp_v6_csum_finish(skb, &np->saddr, &np->daddr); 70 dh->dccph_checksum = dccp_v6_csum_finish(skb, &np->saddr, &np->daddr);
70} 71}
71 72
72static inline __u32 secure_dccpv6_sequence_number(__be32 *saddr, __be32 *daddr, 73static inline __u64 dccp_v6_init_sequence(struct sk_buff *skb)
73 __be16 sport, __be16 dport )
74{
75 return secure_tcpv6_sequence_number(saddr, daddr, sport, dport);
76}
77
78static inline __u32 dccp_v6_init_sequence(struct sk_buff *skb)
79{ 74{
80 return secure_dccpv6_sequence_number(ipv6_hdr(skb)->daddr.s6_addr32, 75 return secure_dccpv6_sequence_number(ipv6_hdr(skb)->daddr.s6_addr32,
81 ipv6_hdr(skb)->saddr.s6_addr32, 76 ipv6_hdr(skb)->saddr.s6_addr32,
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 152975d942d9..e742f90a6858 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -184,7 +184,6 @@ int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
184 dp->dccps_rate_last = jiffies; 184 dp->dccps_rate_last = jiffies;
185 dp->dccps_role = DCCP_ROLE_UNDEFINED; 185 dp->dccps_role = DCCP_ROLE_UNDEFINED;
186 dp->dccps_service = DCCP_SERVICE_CODE_IS_ABSENT; 186 dp->dccps_service = DCCP_SERVICE_CODE_IS_ABSENT;
187 dp->dccps_l_ack_ratio = dp->dccps_r_ack_ratio = 1;
188 dp->dccps_tx_qlen = sysctl_dccp_tx_qlen; 187 dp->dccps_tx_qlen = sysctl_dccp_tx_qlen;
189 188
190 dccp_init_xmit_timers(sk); 189 dccp_init_xmit_timers(sk);
diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c
index ba4faceec405..2ab16e12520c 100644
--- a/net/decnet/dn_dev.c
+++ b/net/decnet/dn_dev.c
@@ -388,7 +388,7 @@ static int dn_dev_insert_ifa(struct dn_dev *dn_db, struct dn_ifaddr *ifa)
388 } 388 }
389 389
390 ifa->ifa_next = dn_db->ifa_list; 390 ifa->ifa_next = dn_db->ifa_list;
391 rcu_assign_pointer(dn_db->ifa_list, ifa); 391 RCU_INIT_POINTER(dn_db->ifa_list, ifa);
392 392
393 dn_ifaddr_notify(RTM_NEWADDR, ifa); 393 dn_ifaddr_notify(RTM_NEWADDR, ifa);
394 blocking_notifier_call_chain(&dnaddr_chain, NETDEV_UP, ifa); 394 blocking_notifier_call_chain(&dnaddr_chain, NETDEV_UP, ifa);
@@ -1093,7 +1093,7 @@ static struct dn_dev *dn_dev_create(struct net_device *dev, int *err)
1093 1093
1094 memcpy(&dn_db->parms, p, sizeof(struct dn_dev_parms)); 1094 memcpy(&dn_db->parms, p, sizeof(struct dn_dev_parms));
1095 1095
1096 rcu_assign_pointer(dev->dn_ptr, dn_db); 1096 RCU_INIT_POINTER(dev->dn_ptr, dn_db);
1097 dn_db->dev = dev; 1097 dn_db->dev = dev;
1098 init_timer(&dn_db->timer); 1098 init_timer(&dn_db->timer);
1099 1099
@@ -1101,7 +1101,7 @@ static struct dn_dev *dn_dev_create(struct net_device *dev, int *err)
1101 1101
1102 dn_db->neigh_parms = neigh_parms_alloc(dev, &dn_neigh_table); 1102 dn_db->neigh_parms = neigh_parms_alloc(dev, &dn_neigh_table);
1103 if (!dn_db->neigh_parms) { 1103 if (!dn_db->neigh_parms) {
1104 rcu_assign_pointer(dev->dn_ptr, NULL); 1104 RCU_INIT_POINTER(dev->dn_ptr, NULL);
1105 kfree(dn_db); 1105 kfree(dn_db);
1106 return NULL; 1106 return NULL;
1107 } 1107 }
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 0a47b6c37038..56cf9b8e1c7c 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -301,7 +301,6 @@ static const struct net_device_ops dsa_netdev_ops = {
301 .ndo_start_xmit = dsa_xmit, 301 .ndo_start_xmit = dsa_xmit,
302 .ndo_change_rx_flags = dsa_slave_change_rx_flags, 302 .ndo_change_rx_flags = dsa_slave_change_rx_flags,
303 .ndo_set_rx_mode = dsa_slave_set_rx_mode, 303 .ndo_set_rx_mode = dsa_slave_set_rx_mode,
304 .ndo_set_multicast_list = dsa_slave_set_rx_mode,
305 .ndo_set_mac_address = dsa_slave_set_mac_address, 304 .ndo_set_mac_address = dsa_slave_set_mac_address,
306 .ndo_do_ioctl = dsa_slave_ioctl, 305 .ndo_do_ioctl = dsa_slave_ioctl,
307}; 306};
@@ -314,7 +313,6 @@ static const struct net_device_ops edsa_netdev_ops = {
314 .ndo_start_xmit = edsa_xmit, 313 .ndo_start_xmit = edsa_xmit,
315 .ndo_change_rx_flags = dsa_slave_change_rx_flags, 314 .ndo_change_rx_flags = dsa_slave_change_rx_flags,
316 .ndo_set_rx_mode = dsa_slave_set_rx_mode, 315 .ndo_set_rx_mode = dsa_slave_set_rx_mode,
317 .ndo_set_multicast_list = dsa_slave_set_rx_mode,
318 .ndo_set_mac_address = dsa_slave_set_mac_address, 316 .ndo_set_mac_address = dsa_slave_set_mac_address,
319 .ndo_do_ioctl = dsa_slave_ioctl, 317 .ndo_do_ioctl = dsa_slave_ioctl,
320}; 318};
@@ -327,7 +325,6 @@ static const struct net_device_ops trailer_netdev_ops = {
327 .ndo_start_xmit = trailer_xmit, 325 .ndo_start_xmit = trailer_xmit,
328 .ndo_change_rx_flags = dsa_slave_change_rx_flags, 326 .ndo_change_rx_flags = dsa_slave_change_rx_flags,
329 .ndo_set_rx_mode = dsa_slave_set_rx_mode, 327 .ndo_set_rx_mode = dsa_slave_set_rx_mode,
330 .ndo_set_multicast_list = dsa_slave_set_rx_mode,
331 .ndo_set_mac_address = dsa_slave_set_mac_address, 328 .ndo_set_mac_address = dsa_slave_set_mac_address,
332 .ndo_do_ioctl = dsa_slave_ioctl, 329 .ndo_do_ioctl = dsa_slave_ioctl,
333}; 330};
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index 27997d35ebd3..a2468363978e 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -340,7 +340,7 @@ void ether_setup(struct net_device *dev)
340 dev->addr_len = ETH_ALEN; 340 dev->addr_len = ETH_ALEN;
341 dev->tx_queue_len = 1000; /* Ethernet wants good queues */ 341 dev->tx_queue_len = 1000; /* Ethernet wants good queues */
342 dev->flags = IFF_BROADCAST|IFF_MULTICAST; 342 dev->flags = IFF_BROADCAST|IFF_MULTICAST;
343 dev->priv_flags = IFF_TX_SKB_SHARING; 343 dev->priv_flags |= IFF_TX_SKB_SHARING;
344 344
345 memset(dev->broadcast, 0xFF, ETH_ALEN); 345 memset(dev->broadcast, 0xFF, ETH_ALEN);
346 346
diff --git a/net/ieee802154/6lowpan.c b/net/ieee802154/6lowpan.c
new file mode 100644
index 000000000000..19d6aefe97d4
--- /dev/null
+++ b/net/ieee802154/6lowpan.c
@@ -0,0 +1,891 @@
1/*
2 * Copyright 2011, Siemens AG
3 * written by Alexander Smirnov <alex.bluesman.smirnov@gmail.com>
4 */
5
6/*
7 * Based on patches from Jon Smirl <jonsmirl@gmail.com>
8 * Copyright (c) 2011 Jon Smirl <jonsmirl@gmail.com>
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License version 2
12 * as published by the Free Software Foundation.
13 *
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License along
20 * with this program; if not, write to the Free Software Foundation, Inc.,
21 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
22 */
23
24/* Jon's code is based on 6lowpan implementation for Contiki which is:
25 * Copyright (c) 2008, Swedish Institute of Computer Science.
26 * All rights reserved.
27 *
28 * Redistribution and use in source and binary forms, with or without
29 * modification, are permitted provided that the following conditions
30 * are met:
31 * 1. Redistributions of source code must retain the above copyright
32 * notice, this list of conditions and the following disclaimer.
33 * 2. Redistributions in binary form must reproduce the above copyright
34 * notice, this list of conditions and the following disclaimer in the
35 * documentation and/or other materials provided with the distribution.
36 * 3. Neither the name of the Institute nor the names of its contributors
37 * may be used to endorse or promote products derived from this software
38 * without specific prior written permission.
39 *
40 * THIS SOFTWARE IS PROVIDED BY THE INSTITUTE AND CONTRIBUTORS ``AS IS'' AND
41 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
42 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
43 * ARE DISCLAIMED. IN NO EVENT SHALL THE INSTITUTE OR CONTRIBUTORS BE LIABLE
44 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
45 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
46 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
47 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
48 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
49 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
50 * SUCH DAMAGE.
51 */
52
53#define DEBUG
54
55#include <linux/bitops.h>
56#include <linux/if_arp.h>
57#include <linux/module.h>
58#include <linux/moduleparam.h>
59#include <linux/netdevice.h>
60#include <net/af_ieee802154.h>
61#include <net/ieee802154.h>
62#include <net/ieee802154_netdev.h>
63#include <net/ipv6.h>
64
65#include "6lowpan.h"
66
67/* TTL uncompression values */
68static const u8 lowpan_ttl_values[] = {0, 1, 64, 255};
69
70static LIST_HEAD(lowpan_devices);
71
72/*
73 * Uncompression of linklocal:
74 * 0 -> 16 bytes from packet
75 * 1 -> 2 bytes from prefix - bunch of zeroes and 8 from packet
76 * 2 -> 2 bytes from prefix - zeroes + 2 from packet
77 * 3 -> 2 bytes from prefix - infer 8 bytes from lladdr
78 *
79 * NOTE: => the uncompress function does change 0xf to 0x10
80 * NOTE: 0x00 => no-autoconfig => unspecified
81 */
82static const u8 lowpan_unc_llconf[] = {0x0f, 0x28, 0x22, 0x20};
83
84/*
85 * Uncompression of ctx-based:
86 * 0 -> 0 bits from packet [unspecified / reserved]
87 * 1 -> 8 bytes from prefix - bunch of zeroes and 8 from packet
88 * 2 -> 8 bytes from prefix - zeroes + 2 from packet
89 * 3 -> 8 bytes from prefix - infer 8 bytes from lladdr
90 */
91static const u8 lowpan_unc_ctxconf[] = {0x00, 0x88, 0x82, 0x80};
92
93/*
94 * Uncompression of ctx-base
95 * 0 -> 0 bits from packet
96 * 1 -> 2 bytes from prefix - bunch of zeroes 5 from packet
97 * 2 -> 2 bytes from prefix - zeroes + 3 from packet
98 * 3 -> 2 bytes from prefix - infer 1 bytes from lladdr
99 */
100static const u8 lowpan_unc_mxconf[] = {0x0f, 0x25, 0x23, 0x21};
101
102/* Link local prefix */
103static const u8 lowpan_llprefix[] = {0xfe, 0x80};
104
105/* private device info */
106struct lowpan_dev_info {
107 struct net_device *real_dev; /* real WPAN device ptr */
108 struct mutex dev_list_mtx; /* mutex for list ops */
109};
110
111struct lowpan_dev_record {
112 struct net_device *ldev;
113 struct list_head list;
114};
115
116static inline struct
117lowpan_dev_info *lowpan_dev_info(const struct net_device *dev)
118{
119 return netdev_priv(dev);
120}
121
122static inline void lowpan_address_flip(u8 *src, u8 *dest)
123{
124 int i;
125 for (i = 0; i < IEEE802154_ADDR_LEN; i++)
126 (dest)[IEEE802154_ADDR_LEN - i - 1] = (src)[i];
127}
128
129/* list of all 6lowpan devices, uses for package delivering */
130/* print data in line */
131static inline void lowpan_raw_dump_inline(const char *caller, char *msg,
132 unsigned char *buf, int len)
133{
134#ifdef DEBUG
135 if (msg)
136 pr_debug("(%s) %s: ", caller, msg);
137 print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_NONE,
138 16, 1, buf, len, false);
139#endif /* DEBUG */
140}
141
142/*
143 * print data in a table format:
144 *
145 * addr: xx xx xx xx xx xx
146 * addr: xx xx xx xx xx xx
147 * ...
148 */
149static inline void lowpan_raw_dump_table(const char *caller, char *msg,
150 unsigned char *buf, int len)
151{
152#ifdef DEBUG
153 if (msg)
154 pr_debug("(%s) %s:\n", caller, msg);
155 print_hex_dump(KERN_DEBUG, "\t", DUMP_PREFIX_OFFSET,
156 16, 1, buf, len, false);
157#endif /* DEBUG */
158}
159
160static u8
161lowpan_compress_addr_64(u8 **hc06_ptr, u8 shift, const struct in6_addr *ipaddr,
162 const unsigned char *lladdr)
163{
164 u8 val = 0;
165
166 if (is_addr_mac_addr_based(ipaddr, lladdr))
167 val = 3; /* 0-bits */
168 else if (lowpan_is_iid_16_bit_compressable(ipaddr)) {
169 /* compress IID to 16 bits xxxx::XXXX */
170 memcpy(*hc06_ptr, &ipaddr->s6_addr16[7], 2);
171 *hc06_ptr += 2;
172 val = 2; /* 16-bits */
173 } else {
174 /* do not compress IID => xxxx::IID */
175 memcpy(*hc06_ptr, &ipaddr->s6_addr16[4], 8);
176 *hc06_ptr += 8;
177 val = 1; /* 64-bits */
178 }
179
180 return rol8(val, shift);
181}
182
183static void
184lowpan_uip_ds6_set_addr_iid(struct in6_addr *ipaddr, unsigned char *lladdr)
185{
186 memcpy(&ipaddr->s6_addr[8], lladdr, IEEE802154_ALEN);
187 /* second bit-flip (Universe/Local) is done according RFC2464 */
188 ipaddr->s6_addr[8] ^= 0x02;
189}
190
191/*
192 * Uncompress addresses based on a prefix and a postfix with zeroes in
193 * between. If the postfix is zero in length it will use the link address
194 * to configure the IP address (autoconf style).
195 * pref_post_count takes a byte where the first nibble specify prefix count
196 * and the second postfix count (NOTE: 15/0xf => 16 bytes copy).
197 */
198static int
199lowpan_uncompress_addr(struct sk_buff *skb, struct in6_addr *ipaddr,
200 u8 const *prefix, u8 pref_post_count, unsigned char *lladdr)
201{
202 u8 prefcount = pref_post_count >> 4;
203 u8 postcount = pref_post_count & 0x0f;
204
205 /* full nibble 15 => 16 */
206 prefcount = (prefcount == 15 ? 16 : prefcount);
207 postcount = (postcount == 15 ? 16 : postcount);
208
209 if (lladdr)
210 lowpan_raw_dump_inline(__func__, "linklocal address",
211 lladdr, IEEE802154_ALEN);
212 if (prefcount > 0)
213 memcpy(ipaddr, prefix, prefcount);
214
215 if (prefcount + postcount < 16)
216 memset(&ipaddr->s6_addr[prefcount], 0,
217 16 - (prefcount + postcount));
218
219 if (postcount > 0) {
220 memcpy(&ipaddr->s6_addr[16 - postcount], skb->data, postcount);
221 skb_pull(skb, postcount);
222 } else if (prefcount > 0) {
223 if (lladdr == NULL)
224 return -EINVAL;
225
226 /* no IID based configuration if no prefix and no data */
227 lowpan_uip_ds6_set_addr_iid(ipaddr, lladdr);
228 }
229
230 pr_debug("(%s): uncompressing %d + %d => ", __func__, prefcount,
231 postcount);
232 lowpan_raw_dump_inline(NULL, NULL, ipaddr->s6_addr, 16);
233
234 return 0;
235}
236
237static u8 lowpan_fetch_skb_u8(struct sk_buff *skb)
238{
239 u8 ret;
240
241 ret = skb->data[0];
242 skb_pull(skb, 1);
243
244 return ret;
245}
246
247static int lowpan_header_create(struct sk_buff *skb,
248 struct net_device *dev,
249 unsigned short type, const void *_daddr,
250 const void *_saddr, unsigned len)
251{
252 u8 tmp, iphc0, iphc1, *hc06_ptr;
253 struct ipv6hdr *hdr;
254 const u8 *saddr = _saddr;
255 const u8 *daddr = _daddr;
256 u8 *head;
257 struct ieee802154_addr sa, da;
258
259 if (type != ETH_P_IPV6)
260 return 0;
261 /* TODO:
262 * if this package isn't ipv6 one, where should it be routed?
263 */
264 head = kzalloc(100, GFP_KERNEL);
265 if (head == NULL)
266 return -ENOMEM;
267
268 hdr = ipv6_hdr(skb);
269 hc06_ptr = head + 2;
270
271 pr_debug("(%s): IPv6 header dump:\n\tversion = %d\n\tlength = %d\n"
272 "\tnexthdr = 0x%02x\n\thop_lim = %d\n", __func__,
273 hdr->version, ntohs(hdr->payload_len), hdr->nexthdr,
274 hdr->hop_limit);
275
276 lowpan_raw_dump_table(__func__, "raw skb network header dump",
277 skb_network_header(skb), sizeof(struct ipv6hdr));
278
279 if (!saddr)
280 saddr = dev->dev_addr;
281
282 lowpan_raw_dump_inline(__func__, "saddr", (unsigned char *)saddr, 8);
283
284 /*
285 * As we copy some bit-length fields, in the IPHC encoding bytes,
286 * we sometimes use |=
287 * If the field is 0, and the current bit value in memory is 1,
288 * this does not work. We therefore reset the IPHC encoding here
289 */
290 iphc0 = LOWPAN_DISPATCH_IPHC;
291 iphc1 = 0;
292
293 /* TODO: context lookup */
294
295 lowpan_raw_dump_inline(__func__, "daddr", (unsigned char *)daddr, 8);
296
297 /*
298 * Traffic class, flow label
299 * If flow label is 0, compress it. If traffic class is 0, compress it
300 * We have to process both in the same time as the offset of traffic
301 * class depends on the presence of version and flow label
302 */
303
304 /* hc06 format of TC is ECN | DSCP , original one is DSCP | ECN */
305 tmp = (hdr->priority << 4) | (hdr->flow_lbl[0] >> 4);
306 tmp = ((tmp & 0x03) << 6) | (tmp >> 2);
307
308 if (((hdr->flow_lbl[0] & 0x0F) == 0) &&
309 (hdr->flow_lbl[1] == 0) && (hdr->flow_lbl[2] == 0)) {
310 /* flow label can be compressed */
311 iphc0 |= LOWPAN_IPHC_FL_C;
312 if ((hdr->priority == 0) &&
313 ((hdr->flow_lbl[0] & 0xF0) == 0)) {
314 /* compress (elide) all */
315 iphc0 |= LOWPAN_IPHC_TC_C;
316 } else {
317 /* compress only the flow label */
318 *hc06_ptr = tmp;
319 hc06_ptr += 1;
320 }
321 } else {
322 /* Flow label cannot be compressed */
323 if ((hdr->priority == 0) &&
324 ((hdr->flow_lbl[0] & 0xF0) == 0)) {
325 /* compress only traffic class */
326 iphc0 |= LOWPAN_IPHC_TC_C;
327 *hc06_ptr = (tmp & 0xc0) | (hdr->flow_lbl[0] & 0x0F);
328 memcpy(hc06_ptr + 1, &hdr->flow_lbl[1], 2);
329 hc06_ptr += 3;
330 } else {
331 /* compress nothing */
332 memcpy(hc06_ptr, &hdr, 4);
333 /* replace the top byte with new ECN | DSCP format */
334 *hc06_ptr = tmp;
335 hc06_ptr += 4;
336 }
337 }
338
339 /* NOTE: payload length is always compressed */
340
341 /* Next Header is compress if UDP */
342 if (hdr->nexthdr == UIP_PROTO_UDP)
343 iphc0 |= LOWPAN_IPHC_NH_C;
344
345/* TODO: next header compression */
346
347 if ((iphc0 & LOWPAN_IPHC_NH_C) == 0) {
348 *hc06_ptr = hdr->nexthdr;
349 hc06_ptr += 1;
350 }
351
352 /*
353 * Hop limit
354 * if 1: compress, encoding is 01
355 * if 64: compress, encoding is 10
356 * if 255: compress, encoding is 11
357 * else do not compress
358 */
359 switch (hdr->hop_limit) {
360 case 1:
361 iphc0 |= LOWPAN_IPHC_TTL_1;
362 break;
363 case 64:
364 iphc0 |= LOWPAN_IPHC_TTL_64;
365 break;
366 case 255:
367 iphc0 |= LOWPAN_IPHC_TTL_255;
368 break;
369 default:
370 *hc06_ptr = hdr->hop_limit;
371 break;
372 }
373
374 /* source address compression */
375 if (is_addr_unspecified(&hdr->saddr)) {
376 pr_debug("(%s): source address is unspecified, setting SAC\n",
377 __func__);
378 iphc1 |= LOWPAN_IPHC_SAC;
379 /* TODO: context lookup */
380 } else if (is_addr_link_local(&hdr->saddr)) {
381 pr_debug("(%s): source address is link-local\n", __func__);
382 iphc1 |= lowpan_compress_addr_64(&hc06_ptr,
383 LOWPAN_IPHC_SAM_BIT, &hdr->saddr, saddr);
384 } else {
385 pr_debug("(%s): send the full source address\n", __func__);
386 memcpy(hc06_ptr, &hdr->saddr.s6_addr16[0], 16);
387 hc06_ptr += 16;
388 }
389
390 /* destination address compression */
391 if (is_addr_mcast(&hdr->daddr)) {
392 pr_debug("(%s): destination address is multicast", __func__);
393 iphc1 |= LOWPAN_IPHC_M;
394 if (lowpan_is_mcast_addr_compressable8(&hdr->daddr)) {
395 pr_debug("compressed to 1 octet\n");
396 iphc1 |= LOWPAN_IPHC_DAM_11;
397 /* use last byte */
398 *hc06_ptr = hdr->daddr.s6_addr[15];
399 hc06_ptr += 1;
400 } else if (lowpan_is_mcast_addr_compressable32(&hdr->daddr)) {
401 pr_debug("compressed to 4 octets\n");
402 iphc1 |= LOWPAN_IPHC_DAM_10;
403 /* second byte + the last three */
404 *hc06_ptr = hdr->daddr.s6_addr[1];
405 memcpy(hc06_ptr + 1, &hdr->daddr.s6_addr[13], 3);
406 hc06_ptr += 4;
407 } else if (lowpan_is_mcast_addr_compressable48(&hdr->daddr)) {
408 pr_debug("compressed to 6 octets\n");
409 iphc1 |= LOWPAN_IPHC_DAM_01;
410 /* second byte + the last five */
411 *hc06_ptr = hdr->daddr.s6_addr[1];
412 memcpy(hc06_ptr + 1, &hdr->daddr.s6_addr[11], 5);
413 hc06_ptr += 6;
414 } else {
415 pr_debug("using full address\n");
416 iphc1 |= LOWPAN_IPHC_DAM_00;
417 memcpy(hc06_ptr, &hdr->daddr.s6_addr[0], 16);
418 hc06_ptr += 16;
419 }
420 } else {
421 pr_debug("(%s): destination address is unicast: ", __func__);
422 /* TODO: context lookup */
423 if (is_addr_link_local(&hdr->daddr)) {
424 pr_debug("destination address is link-local\n");
425 iphc1 |= lowpan_compress_addr_64(&hc06_ptr,
426 LOWPAN_IPHC_DAM_BIT, &hdr->daddr, daddr);
427 } else {
428 pr_debug("using full address\n");
429 memcpy(hc06_ptr, &hdr->daddr.s6_addr16[0], 16);
430 hc06_ptr += 16;
431 }
432 }
433
434 /* TODO: UDP header compression */
435 /* TODO: Next Header compression */
436
437 head[0] = iphc0;
438 head[1] = iphc1;
439
440 skb_pull(skb, sizeof(struct ipv6hdr));
441 memcpy(skb_push(skb, hc06_ptr - head), head, hc06_ptr - head);
442
443 kfree(head);
444
445 lowpan_raw_dump_table(__func__, "raw skb data dump", skb->data,
446 skb->len);
447
448 /*
449 * NOTE1: I'm still unsure about the fact that compression and WPAN
450 * header are created here and not later in the xmit. So wait for
451 * an opinion of net maintainers.
452 */
453 /*
454 * NOTE2: to be absolutely correct, we must derive PANid information
455 * from MAC subif of the 'dev' and 'real_dev' network devices, but
456 * this isn't implemented in mainline yet, so currently we assign 0xff
457 */
458 {
459 /* prepare wpan address data */
460 sa.addr_type = IEEE802154_ADDR_LONG;
461 sa.pan_id = 0xff;
462
463 da.addr_type = IEEE802154_ADDR_LONG;
464 da.pan_id = 0xff;
465
466 memcpy(&(da.hwaddr), daddr, 8);
467 memcpy(&(sa.hwaddr), saddr, 8);
468
469 mac_cb(skb)->flags = IEEE802154_FC_TYPE_DATA;
470 return dev_hard_header(skb, lowpan_dev_info(dev)->real_dev,
471 type, (void *)&da, (void *)&sa, skb->len);
472 }
473}
474
475static int lowpan_skb_deliver(struct sk_buff *skb, struct ipv6hdr *hdr)
476{
477 struct sk_buff *new;
478 struct lowpan_dev_record *entry;
479 int stat = NET_RX_SUCCESS;
480
481 new = skb_copy_expand(skb, sizeof(struct ipv6hdr), skb_tailroom(skb),
482 GFP_ATOMIC);
483 kfree_skb(skb);
484
485 if (!new)
486 return -ENOMEM;
487
488 skb_push(new, sizeof(struct ipv6hdr));
489 skb_reset_network_header(new);
490 skb_copy_to_linear_data(new, hdr, sizeof(struct ipv6hdr));
491
492 new->protocol = htons(ETH_P_IPV6);
493 new->pkt_type = PACKET_HOST;
494
495 rcu_read_lock();
496 list_for_each_entry_rcu(entry, &lowpan_devices, list)
497 if (lowpan_dev_info(entry->ldev)->real_dev == new->dev) {
498 skb = skb_copy(new, GFP_ATOMIC);
499 if (!skb) {
500 stat = -ENOMEM;
501 break;
502 }
503
504 skb->dev = entry->ldev;
505 stat = netif_rx(skb);
506 }
507 rcu_read_unlock();
508
509 kfree_skb(new);
510
511 return stat;
512}
513
514static int
515lowpan_process_data(struct sk_buff *skb)
516{
517 struct ipv6hdr hdr;
518 u8 tmp, iphc0, iphc1, num_context = 0;
519 u8 *_saddr, *_daddr;
520 int err;
521
522 lowpan_raw_dump_table(__func__, "raw skb data dump", skb->data,
523 skb->len);
524 /* at least two bytes will be used for the encoding */
525 if (skb->len < 2)
526 goto drop;
527 iphc0 = lowpan_fetch_skb_u8(skb);
528 iphc1 = lowpan_fetch_skb_u8(skb);
529
530 _saddr = mac_cb(skb)->sa.hwaddr;
531 _daddr = mac_cb(skb)->da.hwaddr;
532
533 pr_debug("(%s): iphc0 = %02x, iphc1 = %02x\n", __func__, iphc0, iphc1);
534
535 /* another if the CID flag is set */
536 if (iphc1 & LOWPAN_IPHC_CID) {
537 pr_debug("(%s): CID flag is set, increase header with one\n",
538 __func__);
539 if (!skb->len)
540 goto drop;
541 num_context = lowpan_fetch_skb_u8(skb);
542 }
543
544 hdr.version = 6;
545
546 /* Traffic Class and Flow Label */
547 switch ((iphc0 & LOWPAN_IPHC_TF) >> 3) {
548 /*
549 * Traffic Class and FLow Label carried in-line
550 * ECN + DSCP + 4-bit Pad + Flow Label (4 bytes)
551 */
552 case 0: /* 00b */
553 if (!skb->len)
554 goto drop;
555 tmp = lowpan_fetch_skb_u8(skb);
556 memcpy(&hdr.flow_lbl, &skb->data[0], 3);
557 skb_pull(skb, 3);
558 hdr.priority = ((tmp >> 2) & 0x0f);
559 hdr.flow_lbl[0] = ((tmp >> 2) & 0x30) | (tmp << 6) |
560 (hdr.flow_lbl[0] & 0x0f);
561 break;
562 /*
563 * Traffic class carried in-line
564 * ECN + DSCP (1 byte), Flow Label is elided
565 */
566 case 1: /* 10b */
567 if (!skb->len)
568 goto drop;
569 tmp = lowpan_fetch_skb_u8(skb);
570 hdr.priority = ((tmp >> 2) & 0x0f);
571 hdr.flow_lbl[0] = ((tmp << 6) & 0xC0) | ((tmp >> 2) & 0x30);
572 hdr.flow_lbl[1] = 0;
573 hdr.flow_lbl[2] = 0;
574 break;
575 /*
576 * Flow Label carried in-line
577 * ECN + 2-bit Pad + Flow Label (3 bytes), DSCP is elided
578 */
579 case 2: /* 01b */
580 if (!skb->len)
581 goto drop;
582 tmp = lowpan_fetch_skb_u8(skb);
583 hdr.flow_lbl[0] = (skb->data[0] & 0x0F) | ((tmp >> 2) & 0x30);
584 memcpy(&hdr.flow_lbl[1], &skb->data[0], 2);
585 skb_pull(skb, 2);
586 break;
587 /* Traffic Class and Flow Label are elided */
588 case 3: /* 11b */
589 hdr.priority = 0;
590 hdr.flow_lbl[0] = 0;
591 hdr.flow_lbl[1] = 0;
592 hdr.flow_lbl[2] = 0;
593 break;
594 default:
595 break;
596 }
597
598 /* Next Header */
599 if ((iphc0 & LOWPAN_IPHC_NH_C) == 0) {
600 /* Next header is carried inline */
601 if (!skb->len)
602 goto drop;
603 hdr.nexthdr = lowpan_fetch_skb_u8(skb);
604 pr_debug("(%s): NH flag is set, next header is carried "
605 "inline: %02x\n", __func__, hdr.nexthdr);
606 }
607
608 /* Hop Limit */
609 if ((iphc0 & 0x03) != LOWPAN_IPHC_TTL_I)
610 hdr.hop_limit = lowpan_ttl_values[iphc0 & 0x03];
611 else {
612 if (!skb->len)
613 goto drop;
614 hdr.hop_limit = lowpan_fetch_skb_u8(skb);
615 }
616
617 /* Extract SAM to the tmp variable */
618 tmp = ((iphc1 & LOWPAN_IPHC_SAM) >> LOWPAN_IPHC_SAM_BIT) & 0x03;
619
620 /* Source address uncompression */
621 pr_debug("(%s): source address stateless compression\n", __func__);
622 err = lowpan_uncompress_addr(skb, &hdr.saddr, lowpan_llprefix,
623 lowpan_unc_llconf[tmp], skb->data);
624 if (err)
625 goto drop;
626
627 /* Extract DAM to the tmp variable */
628 tmp = ((iphc1 & LOWPAN_IPHC_DAM_11) >> LOWPAN_IPHC_DAM_BIT) & 0x03;
629
630 /* check for Multicast Compression */
631 if (iphc1 & LOWPAN_IPHC_M) {
632 if (iphc1 & LOWPAN_IPHC_DAC) {
633 pr_debug("(%s): destination address context-based "
634 "multicast compression\n", __func__);
635 /* TODO: implement this */
636 } else {
637 u8 prefix[] = {0xff, 0x02};
638
639 pr_debug("(%s): destination address non-context-based"
640 " multicast compression\n", __func__);
641 if (0 < tmp && tmp < 3) {
642 if (!skb->len)
643 goto drop;
644 else
645 prefix[1] = lowpan_fetch_skb_u8(skb);
646 }
647
648 err = lowpan_uncompress_addr(skb, &hdr.daddr, prefix,
649 lowpan_unc_mxconf[tmp], NULL);
650 if (err)
651 goto drop;
652 }
653 } else {
654 pr_debug("(%s): destination address stateless compression\n",
655 __func__);
656 err = lowpan_uncompress_addr(skb, &hdr.daddr, lowpan_llprefix,
657 lowpan_unc_llconf[tmp], skb->data);
658 if (err)
659 goto drop;
660 }
661
662 /* TODO: UDP header parse */
663
664 /* Not fragmented package */
665 hdr.payload_len = htons(skb->len);
666
667 pr_debug("(%s): skb headroom size = %d, data length = %d\n", __func__,
668 skb_headroom(skb), skb->len);
669
670 pr_debug("(%s): IPv6 header dump:\n\tversion = %d\n\tlength = %d\n\t"
671 "nexthdr = 0x%02x\n\thop_lim = %d\n", __func__, hdr.version,
672 ntohs(hdr.payload_len), hdr.nexthdr, hdr.hop_limit);
673
674 lowpan_raw_dump_table(__func__, "raw header dump", (u8 *)&hdr,
675 sizeof(hdr));
676 return lowpan_skb_deliver(skb, &hdr);
677drop:
678 kfree_skb(skb);
679 return -EINVAL;
680}
681
682static int lowpan_set_address(struct net_device *dev, void *p)
683{
684 struct sockaddr *sa = p;
685
686 if (netif_running(dev))
687 return -EBUSY;
688
689 /* TODO: validate addr */
690 memcpy(dev->dev_addr, sa->sa_data, dev->addr_len);
691
692 return 0;
693}
694
695static netdev_tx_t lowpan_xmit(struct sk_buff *skb, struct net_device *dev)
696{
697 int err = 0;
698
699 pr_debug("(%s): package xmit\n", __func__);
700
701 skb->dev = lowpan_dev_info(dev)->real_dev;
702 if (skb->dev == NULL) {
703 pr_debug("(%s) ERROR: no real wpan device found\n", __func__);
704 dev_kfree_skb(skb);
705 } else
706 err = dev_queue_xmit(skb);
707
708 return (err < 0 ? NETDEV_TX_BUSY : NETDEV_TX_OK);
709}
710
711static void lowpan_dev_free(struct net_device *dev)
712{
713 dev_put(lowpan_dev_info(dev)->real_dev);
714 free_netdev(dev);
715}
716
717static struct header_ops lowpan_header_ops = {
718 .create = lowpan_header_create,
719};
720
721static const struct net_device_ops lowpan_netdev_ops = {
722 .ndo_start_xmit = lowpan_xmit,
723 .ndo_set_mac_address = lowpan_set_address,
724};
725
726static void lowpan_setup(struct net_device *dev)
727{
728 pr_debug("(%s)\n", __func__);
729
730 dev->addr_len = IEEE802154_ADDR_LEN;
731 memset(dev->broadcast, 0xff, IEEE802154_ADDR_LEN);
732 dev->type = ARPHRD_IEEE802154;
733 dev->features = NETIF_F_NO_CSUM;
734 /* Frame Control + Sequence Number + Address fields + Security Header */
735 dev->hard_header_len = 2 + 1 + 20 + 14;
736 dev->needed_tailroom = 2; /* FCS */
737 dev->mtu = 1281;
738 dev->tx_queue_len = 0;
739 dev->flags = IFF_NOARP | IFF_BROADCAST;
740 dev->watchdog_timeo = 0;
741
742 dev->netdev_ops = &lowpan_netdev_ops;
743 dev->header_ops = &lowpan_header_ops;
744 dev->destructor = lowpan_dev_free;
745}
746
747static int lowpan_validate(struct nlattr *tb[], struct nlattr *data[])
748{
749 pr_debug("(%s)\n", __func__);
750
751 if (tb[IFLA_ADDRESS]) {
752 if (nla_len(tb[IFLA_ADDRESS]) != IEEE802154_ADDR_LEN)
753 return -EINVAL;
754 }
755 return 0;
756}
757
758static int lowpan_rcv(struct sk_buff *skb, struct net_device *dev,
759 struct packet_type *pt, struct net_device *orig_dev)
760{
761 if (!netif_running(dev))
762 goto drop;
763
764 if (dev->type != ARPHRD_IEEE802154)
765 goto drop;
766
767 /* check that it's our buffer */
768 if ((skb->data[0] & 0xe0) == 0x60)
769 lowpan_process_data(skb);
770
771 return NET_RX_SUCCESS;
772
773drop:
774 kfree_skb(skb);
775 return NET_RX_DROP;
776}
777
778static int lowpan_newlink(struct net *src_net, struct net_device *dev,
779 struct nlattr *tb[], struct nlattr *data[])
780{
781 struct net_device *real_dev;
782 struct lowpan_dev_record *entry;
783
784 pr_debug("(%s)\n", __func__);
785
786 if (!tb[IFLA_LINK])
787 return -EINVAL;
788 /* find and hold real wpan device */
789 real_dev = dev_get_by_index(src_net, nla_get_u32(tb[IFLA_LINK]));
790 if (!real_dev)
791 return -ENODEV;
792
793 lowpan_dev_info(dev)->real_dev = real_dev;
794 mutex_init(&lowpan_dev_info(dev)->dev_list_mtx);
795
796 entry = kzalloc(sizeof(struct lowpan_dev_record), GFP_KERNEL);
797 if (!entry) {
798 dev_put(real_dev);
799 lowpan_dev_info(dev)->real_dev = NULL;
800 return -ENOMEM;
801 }
802
803 entry->ldev = dev;
804
805 mutex_lock(&lowpan_dev_info(dev)->dev_list_mtx);
806 INIT_LIST_HEAD(&entry->list);
807 list_add_tail(&entry->list, &lowpan_devices);
808 mutex_unlock(&lowpan_dev_info(dev)->dev_list_mtx);
809
810 register_netdevice(dev);
811
812 return 0;
813}
814
815static void lowpan_dellink(struct net_device *dev, struct list_head *head)
816{
817 struct lowpan_dev_info *lowpan_dev = lowpan_dev_info(dev);
818 struct net_device *real_dev = lowpan_dev->real_dev;
819 struct lowpan_dev_record *entry;
820 struct lowpan_dev_record *tmp;
821
822 ASSERT_RTNL();
823
824 mutex_lock(&lowpan_dev_info(dev)->dev_list_mtx);
825 list_for_each_entry_safe(entry, tmp, &lowpan_devices, list) {
826 if (entry->ldev == dev) {
827 list_del(&entry->list);
828 kfree(entry);
829 }
830 }
831 mutex_unlock(&lowpan_dev_info(dev)->dev_list_mtx);
832
833 mutex_destroy(&lowpan_dev_info(dev)->dev_list_mtx);
834
835 unregister_netdevice_queue(dev, head);
836
837 dev_put(real_dev);
838}
839
840static struct rtnl_link_ops lowpan_link_ops __read_mostly = {
841 .kind = "lowpan",
842 .priv_size = sizeof(struct lowpan_dev_info),
843 .setup = lowpan_setup,
844 .newlink = lowpan_newlink,
845 .dellink = lowpan_dellink,
846 .validate = lowpan_validate,
847};
848
849static inline int __init lowpan_netlink_init(void)
850{
851 return rtnl_link_register(&lowpan_link_ops);
852}
853
854static inline void __init lowpan_netlink_fini(void)
855{
856 rtnl_link_unregister(&lowpan_link_ops);
857}
858
859static struct packet_type lowpan_packet_type = {
860 .type = __constant_htons(ETH_P_IEEE802154),
861 .func = lowpan_rcv,
862};
863
864static int __init lowpan_init_module(void)
865{
866 int err = 0;
867
868 pr_debug("(%s)\n", __func__);
869
870 err = lowpan_netlink_init();
871 if (err < 0)
872 goto out;
873
874 dev_add_pack(&lowpan_packet_type);
875out:
876 return err;
877}
878
879static void __exit lowpan_cleanup_module(void)
880{
881 pr_debug("(%s)\n", __func__);
882
883 lowpan_netlink_fini();
884
885 dev_remove_pack(&lowpan_packet_type);
886}
887
888module_init(lowpan_init_module);
889module_exit(lowpan_cleanup_module);
890MODULE_LICENSE("GPL");
891MODULE_ALIAS_RTNL_LINK("lowpan");
diff --git a/net/ieee802154/6lowpan.h b/net/ieee802154/6lowpan.h
new file mode 100644
index 000000000000..5d8cf80b930d
--- /dev/null
+++ b/net/ieee802154/6lowpan.h
@@ -0,0 +1,212 @@
1/*
2 * Copyright 2011, Siemens AG
3 * written by Alexander Smirnov <alex.bluesman.smirnov@gmail.com>
4 */
5
6/*
7 * Based on patches from Jon Smirl <jonsmirl@gmail.com>
8 * Copyright (c) 2011 Jon Smirl <jonsmirl@gmail.com>
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License version 2
12 * as published by the Free Software Foundation.
13 *
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License along
20 * with this program; if not, write to the Free Software Foundation, Inc.,
21 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
22 */
23
24/* Jon's code is based on 6lowpan implementation for Contiki which is:
25 * Copyright (c) 2008, Swedish Institute of Computer Science.
26 * All rights reserved.
27 *
28 * Redistribution and use in source and binary forms, with or without
29 * modification, are permitted provided that the following conditions
30 * are met:
31 * 1. Redistributions of source code must retain the above copyright
32 * notice, this list of conditions and the following disclaimer.
33 * 2. Redistributions in binary form must reproduce the above copyright
34 * notice, this list of conditions and the following disclaimer in the
35 * documentation and/or other materials provided with the distribution.
36 * 3. Neither the name of the Institute nor the names of its contributors
37 * may be used to endorse or promote products derived from this software
38 * without specific prior written permission.
39 *
40 * THIS SOFTWARE IS PROVIDED BY THE INSTITUTE AND CONTRIBUTORS ``AS IS'' AND
41 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
42 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
43 * ARE DISCLAIMED. IN NO EVENT SHALL THE INSTITUTE OR CONTRIBUTORS BE LIABLE
44 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
45 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
46 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
47 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
48 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
49 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
50 * SUCH DAMAGE.
51 */
52
53#ifndef __6LOWPAN_H__
54#define __6LOWPAN_H__
55
56/* need to know address length to manipulate with it */
57#define IEEE802154_ALEN 8
58
59#define UIP_802154_SHORTADDR_LEN 2 /* compressed ipv6 address length */
60#define UIP_IPH_LEN 40 /* ipv6 fixed header size */
61#define UIP_PROTO_UDP 17 /* ipv6 next header value for UDP */
62#define UIP_FRAGH_LEN 8 /* ipv6 fragment header size */
63
64/*
65 * ipv6 address based on mac
66 * second bit-flip (Universe/Local) is done according RFC2464
67 */
68#define is_addr_mac_addr_based(a, m) \
69 ((((a)->s6_addr[8]) == (((m)[0]) ^ 0x02)) && \
70 (((a)->s6_addr[9]) == (m)[1]) && \
71 (((a)->s6_addr[10]) == (m)[2]) && \
72 (((a)->s6_addr[11]) == (m)[3]) && \
73 (((a)->s6_addr[12]) == (m)[4]) && \
74 (((a)->s6_addr[13]) == (m)[5]) && \
75 (((a)->s6_addr[14]) == (m)[6]) && \
76 (((a)->s6_addr[15]) == (m)[7]))
77
78/* ipv6 address is unspecified */
79#define is_addr_unspecified(a) \
80 ((((a)->s6_addr32[0]) == 0) && \
81 (((a)->s6_addr32[1]) == 0) && \
82 (((a)->s6_addr32[2]) == 0) && \
83 (((a)->s6_addr32[3]) == 0))
84
85/* compare ipv6 addresses prefixes */
86#define ipaddr_prefixcmp(addr1, addr2, length) \
87 (memcmp(addr1, addr2, length >> 3) == 0)
88
89/* local link, i.e. FE80::/10 */
90#define is_addr_link_local(a) (((a)->s6_addr16[0]) == 0x80FE)
91
92/*
93 * check whether we can compress the IID to 16 bits,
94 * it's possible for unicast adresses with first 49 bits are zero only.
95 */
96#define lowpan_is_iid_16_bit_compressable(a) \
97 ((((a)->s6_addr16[4]) == 0) && \
98 (((a)->s6_addr16[5]) == 0) && \
99 (((a)->s6_addr16[6]) == 0) && \
100 ((((a)->s6_addr[14]) & 0x80) == 0))
101
102/* multicast address */
103#define is_addr_mcast(a) (((a)->s6_addr[0]) == 0xFF)
104
105/* check whether the 112-bit gid of the multicast address is mappable to: */
106
107/* 9 bits, for FF02::1 (all nodes) and FF02::2 (all routers) addresses only. */
108#define lowpan_is_mcast_addr_compressable(a) \
109 ((((a)->s6_addr16[1]) == 0) && \
110 (((a)->s6_addr16[2]) == 0) && \
111 (((a)->s6_addr16[3]) == 0) && \
112 (((a)->s6_addr16[4]) == 0) && \
113 (((a)->s6_addr16[5]) == 0) && \
114 (((a)->s6_addr16[6]) == 0) && \
115 (((a)->s6_addr[14]) == 0) && \
116 ((((a)->s6_addr[15]) == 1) || (((a)->s6_addr[15]) == 2)))
117
118/* 48 bits, FFXX::00XX:XXXX:XXXX */
119#define lowpan_is_mcast_addr_compressable48(a) \
120 ((((a)->s6_addr16[1]) == 0) && \
121 (((a)->s6_addr16[2]) == 0) && \
122 (((a)->s6_addr16[3]) == 0) && \
123 (((a)->s6_addr16[4]) == 0) && \
124 (((a)->s6_addr[10]) == 0))
125
126/* 32 bits, FFXX::00XX:XXXX */
127#define lowpan_is_mcast_addr_compressable32(a) \
128 ((((a)->s6_addr16[1]) == 0) && \
129 (((a)->s6_addr16[2]) == 0) && \
130 (((a)->s6_addr16[3]) == 0) && \
131 (((a)->s6_addr16[4]) == 0) && \
132 (((a)->s6_addr16[5]) == 0) && \
133 (((a)->s6_addr[12]) == 0))
134
135/* 8 bits, FF02::00XX */
136#define lowpan_is_mcast_addr_compressable8(a) \
137 ((((a)->s6_addr[1]) == 2) && \
138 (((a)->s6_addr16[1]) == 0) && \
139 (((a)->s6_addr16[2]) == 0) && \
140 (((a)->s6_addr16[3]) == 0) && \
141 (((a)->s6_addr16[4]) == 0) && \
142 (((a)->s6_addr16[5]) == 0) && \
143 (((a)->s6_addr16[6]) == 0) && \
144 (((a)->s6_addr[14]) == 0))
145
146#define lowpan_is_addr_broadcast(a) \
147 ((((a)[0]) == 0xFF) && \
148 (((a)[1]) == 0xFF) && \
149 (((a)[2]) == 0xFF) && \
150 (((a)[3]) == 0xFF) && \
151 (((a)[4]) == 0xFF) && \
152 (((a)[5]) == 0xFF) && \
153 (((a)[6]) == 0xFF) && \
154 (((a)[7]) == 0xFF))
155
156#define LOWPAN_DISPATCH_IPV6 0x41 /* 01000001 = 65 */
157#define LOWPAN_DISPATCH_HC1 0x42 /* 01000010 = 66 */
158#define LOWPAN_DISPATCH_IPHC 0x60 /* 011xxxxx = ... */
159#define LOWPAN_DISPATCH_FRAG1 0xc0 /* 11000xxx */
160#define LOWPAN_DISPATCH_FRAGN 0xe0 /* 11100xxx */
161
162/*
163 * Values of fields within the IPHC encoding first byte
164 * (C stands for compressed and I for inline)
165 */
166#define LOWPAN_IPHC_TF 0x18
167
168#define LOWPAN_IPHC_FL_C 0x10
169#define LOWPAN_IPHC_TC_C 0x08
170#define LOWPAN_IPHC_NH_C 0x04
171#define LOWPAN_IPHC_TTL_1 0x01
172#define LOWPAN_IPHC_TTL_64 0x02
173#define LOWPAN_IPHC_TTL_255 0x03
174#define LOWPAN_IPHC_TTL_I 0x00
175
176
177/* Values of fields within the IPHC encoding second byte */
178#define LOWPAN_IPHC_CID 0x80
179
180#define LOWPAN_IPHC_SAC 0x40
181#define LOWPAN_IPHC_SAM_00 0x00
182#define LOWPAN_IPHC_SAM_01 0x10
183#define LOWPAN_IPHC_SAM_10 0x20
184#define LOWPAN_IPHC_SAM 0x30
185
186#define LOWPAN_IPHC_SAM_BIT 4
187
188#define LOWPAN_IPHC_M 0x08
189#define LOWPAN_IPHC_DAC 0x04
190#define LOWPAN_IPHC_DAM_00 0x00
191#define LOWPAN_IPHC_DAM_01 0x01
192#define LOWPAN_IPHC_DAM_10 0x02
193#define LOWPAN_IPHC_DAM_11 0x03
194
195#define LOWPAN_IPHC_DAM_BIT 0
196/*
197 * LOWPAN_UDP encoding (works together with IPHC)
198 */
199#define LOWPAN_NHC_UDP_MASK 0xF8
200#define LOWPAN_NHC_UDP_ID 0xF0
201#define LOWPAN_NHC_UDP_CHECKSUMC 0x04
202#define LOWPAN_NHC_UDP_CHECKSUMI 0x00
203
204/* values for port compression, _with checksum_ ie bit 5 set to 0 */
205#define LOWPAN_NHC_UDP_CS_P_00 0xF0 /* all inline */
206#define LOWPAN_NHC_UDP_CS_P_01 0xF1 /* source 16bit inline,
207 dest = 0xF0 + 8 bit inline */
208#define LOWPAN_NHC_UDP_CS_P_10 0xF2 /* source = 0xF0 + 8bit inline,
209 dest = 16 bit inline */
210#define LOWPAN_NHC_UDP_CS_P_11 0xF3 /* source & dest = 0xF0B + 4bit inline */
211
212#endif /* __6LOWPAN_H__ */
diff --git a/net/ieee802154/Kconfig b/net/ieee802154/Kconfig
index 1c1de97d264a..7dee65052925 100644
--- a/net/ieee802154/Kconfig
+++ b/net/ieee802154/Kconfig
@@ -10,3 +10,9 @@ config IEEE802154
10 10
11 Say Y here to compile LR-WPAN support into the kernel or say M to 11 Say Y here to compile LR-WPAN support into the kernel or say M to
12 compile it as modules. 12 compile it as modules.
13
14config IEEE802154_6LOWPAN
15 tristate "6lowpan support over IEEE 802.15.4"
16 depends on IEEE802154 && IPV6
17 ---help---
18 IPv6 compression over IEEE 802.15.4.
diff --git a/net/ieee802154/Makefile b/net/ieee802154/Makefile
index 5761185f884e..d7716d64c6bb 100644
--- a/net/ieee802154/Makefile
+++ b/net/ieee802154/Makefile
@@ -1,3 +1,5 @@
1obj-$(CONFIG_IEEE802154) += ieee802154.o af_802154.o 1obj-$(CONFIG_IEEE802154) += ieee802154.o af_802154.o
2ieee802154-y := netlink.o nl-mac.o nl-phy.o nl_policy.o wpan-class.o 2obj-$(CONFIG_IEEE802154_6LOWPAN) += 6lowpan.o
3af_802154-y := af_ieee802154.o raw.o dgram.o 3
4ieee802154-y := netlink.o nl-mac.o nl-phy.o nl_policy.o wpan-class.o
5af_802154-y := af_ieee802154.o raw.o dgram.o
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 1b745d412cf6..dd2b9478ddd1 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -466,8 +466,13 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
466 goto out; 466 goto out;
467 467
468 if (addr->sin_family != AF_INET) { 468 if (addr->sin_family != AF_INET) {
469 /* Compatibility games : accept AF_UNSPEC (mapped to AF_INET)
470 * only if s_addr is INADDR_ANY.
471 */
469 err = -EAFNOSUPPORT; 472 err = -EAFNOSUPPORT;
470 goto out; 473 if (addr->sin_family != AF_UNSPEC ||
474 addr->sin_addr.s_addr != htonl(INADDR_ANY))
475 goto out;
471 } 476 }
472 477
473 chk_addr_ret = inet_addr_type(sock_net(sk), addr->sin_addr.s_addr); 478 chk_addr_ret = inet_addr_type(sock_net(sk), addr->sin_addr.s_addr);
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index 2c2a98e402e7..86f3b885b4f3 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -476,7 +476,7 @@ int cipso_v4_doi_add(struct cipso_v4_doi *doi_def,
476 doi = doi_def->doi; 476 doi = doi_def->doi;
477 doi_type = doi_def->type; 477 doi_type = doi_def->type;
478 478
479 if (doi_def == NULL || doi_def->doi == CIPSO_V4_DOI_UNKNOWN) 479 if (doi_def->doi == CIPSO_V4_DOI_UNKNOWN)
480 goto doi_add_return; 480 goto doi_add_return;
481 for (iter = 0; iter < CIPSO_V4_TAG_MAXCNT; iter++) { 481 for (iter = 0; iter < CIPSO_V4_TAG_MAXCNT; iter++) {
482 switch (doi_def->tags[iter]) { 482 switch (doi_def->tags[iter]) {
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index bc19bd06dd00..c6b5092f29a1 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -258,7 +258,7 @@ static struct in_device *inetdev_init(struct net_device *dev)
258 ip_mc_up(in_dev); 258 ip_mc_up(in_dev);
259 259
260 /* we can receive as soon as ip_ptr is set -- do this last */ 260 /* we can receive as soon as ip_ptr is set -- do this last */
261 rcu_assign_pointer(dev->ip_ptr, in_dev); 261 RCU_INIT_POINTER(dev->ip_ptr, in_dev);
262out: 262out:
263 return in_dev; 263 return in_dev;
264out_kfree: 264out_kfree:
@@ -291,7 +291,7 @@ static void inetdev_destroy(struct in_device *in_dev)
291 inet_free_ifa(ifa); 291 inet_free_ifa(ifa);
292 } 292 }
293 293
294 rcu_assign_pointer(dev->ip_ptr, NULL); 294 RCU_INIT_POINTER(dev->ip_ptr, NULL);
295 295
296 devinet_sysctl_unregister(in_dev); 296 devinet_sysctl_unregister(in_dev);
297 neigh_parms_release(&arp_tbl, in_dev->arp_parms); 297 neigh_parms_release(&arp_tbl, in_dev->arp_parms);
@@ -1175,7 +1175,7 @@ static int inetdev_event(struct notifier_block *this, unsigned long event,
1175 switch (event) { 1175 switch (event) {
1176 case NETDEV_REGISTER: 1176 case NETDEV_REGISTER:
1177 printk(KERN_DEBUG "inetdev_event: bug\n"); 1177 printk(KERN_DEBUG "inetdev_event: bug\n");
1178 rcu_assign_pointer(dev->ip_ptr, NULL); 1178 RCU_INIT_POINTER(dev->ip_ptr, NULL);
1179 break; 1179 break;
1180 case NETDEV_UP: 1180 case NETDEV_UP:
1181 if (!inetdev_valid_mtu(dev->mtu)) 1181 if (!inetdev_valid_mtu(dev->mtu))
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 33e2c35b74b7..80106d89d548 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -142,6 +142,14 @@ const struct fib_prop fib_props[RTN_MAX + 1] = {
142}; 142};
143 143
144/* Release a nexthop info record */ 144/* Release a nexthop info record */
145static void free_fib_info_rcu(struct rcu_head *head)
146{
147 struct fib_info *fi = container_of(head, struct fib_info, rcu);
148
149 if (fi->fib_metrics != (u32 *) dst_default_metrics)
150 kfree(fi->fib_metrics);
151 kfree(fi);
152}
145 153
146void free_fib_info(struct fib_info *fi) 154void free_fib_info(struct fib_info *fi)
147{ 155{
@@ -156,7 +164,7 @@ void free_fib_info(struct fib_info *fi)
156 } endfor_nexthops(fi); 164 } endfor_nexthops(fi);
157 fib_info_cnt--; 165 fib_info_cnt--;
158 release_net(fi->fib_net); 166 release_net(fi->fib_net);
159 kfree_rcu(fi, rcu); 167 call_rcu(&fi->rcu, free_fib_info_rcu);
160} 168}
161 169
162void fib_release_info(struct fib_info *fi) 170void fib_release_info(struct fib_info *fi)
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index de9e2978476f..89d6f71a6a99 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -204,7 +204,7 @@ static inline struct tnode *node_parent_rcu(const struct rt_trie_node *node)
204 return (struct tnode *)(parent & ~NODE_TYPE_MASK); 204 return (struct tnode *)(parent & ~NODE_TYPE_MASK);
205} 205}
206 206
207/* Same as rcu_assign_pointer 207/* Same as RCU_INIT_POINTER
208 * but that macro() assumes that value is a pointer. 208 * but that macro() assumes that value is a pointer.
209 */ 209 */
210static inline void node_set_parent(struct rt_trie_node *node, struct tnode *ptr) 210static inline void node_set_parent(struct rt_trie_node *node, struct tnode *ptr)
@@ -528,7 +528,7 @@ static void tnode_put_child_reorg(struct tnode *tn, int i, struct rt_trie_node *
528 if (n) 528 if (n)
529 node_set_parent(n, tn); 529 node_set_parent(n, tn);
530 530
531 rcu_assign_pointer(tn->child[i], n); 531 RCU_INIT_POINTER(tn->child[i], n);
532} 532}
533 533
534#define MAX_WORK 10 534#define MAX_WORK 10
@@ -1014,7 +1014,7 @@ static void trie_rebalance(struct trie *t, struct tnode *tn)
1014 1014
1015 tp = node_parent((struct rt_trie_node *) tn); 1015 tp = node_parent((struct rt_trie_node *) tn);
1016 if (!tp) 1016 if (!tp)
1017 rcu_assign_pointer(t->trie, (struct rt_trie_node *)tn); 1017 RCU_INIT_POINTER(t->trie, (struct rt_trie_node *)tn);
1018 1018
1019 tnode_free_flush(); 1019 tnode_free_flush();
1020 if (!tp) 1020 if (!tp)
@@ -1026,7 +1026,7 @@ static void trie_rebalance(struct trie *t, struct tnode *tn)
1026 if (IS_TNODE(tn)) 1026 if (IS_TNODE(tn))
1027 tn = (struct tnode *)resize(t, (struct tnode *)tn); 1027 tn = (struct tnode *)resize(t, (struct tnode *)tn);
1028 1028
1029 rcu_assign_pointer(t->trie, (struct rt_trie_node *)tn); 1029 RCU_INIT_POINTER(t->trie, (struct rt_trie_node *)tn);
1030 tnode_free_flush(); 1030 tnode_free_flush();
1031} 1031}
1032 1032
@@ -1163,7 +1163,7 @@ static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen)
1163 put_child(t, (struct tnode *)tp, cindex, 1163 put_child(t, (struct tnode *)tp, cindex,
1164 (struct rt_trie_node *)tn); 1164 (struct rt_trie_node *)tn);
1165 } else { 1165 } else {
1166 rcu_assign_pointer(t->trie, (struct rt_trie_node *)tn); 1166 RCU_INIT_POINTER(t->trie, (struct rt_trie_node *)tn);
1167 tp = tn; 1167 tp = tn;
1168 } 1168 }
1169 } 1169 }
@@ -1621,7 +1621,7 @@ static void trie_leaf_remove(struct trie *t, struct leaf *l)
1621 put_child(t, (struct tnode *)tp, cindex, NULL); 1621 put_child(t, (struct tnode *)tp, cindex, NULL);
1622 trie_rebalance(t, tp); 1622 trie_rebalance(t, tp);
1623 } else 1623 } else
1624 rcu_assign_pointer(t->trie, NULL); 1624 RCU_INIT_POINTER(t->trie, NULL);
1625 1625
1626 free_leaf(l); 1626 free_leaf(l);
1627} 1627}
diff --git a/net/ipv4/gre.c b/net/ipv4/gre.c
index dbfc21de3479..8cb1ebb7cd74 100644
--- a/net/ipv4/gre.c
+++ b/net/ipv4/gre.c
@@ -34,7 +34,7 @@ int gre_add_protocol(const struct gre_protocol *proto, u8 version)
34 if (gre_proto[version]) 34 if (gre_proto[version])
35 goto err_out_unlock; 35 goto err_out_unlock;
36 36
37 rcu_assign_pointer(gre_proto[version], proto); 37 RCU_INIT_POINTER(gre_proto[version], proto);
38 spin_unlock(&gre_proto_lock); 38 spin_unlock(&gre_proto_lock);
39 return 0; 39 return 0;
40 40
@@ -54,7 +54,7 @@ int gre_del_protocol(const struct gre_protocol *proto, u8 version)
54 if (rcu_dereference_protected(gre_proto[version], 54 if (rcu_dereference_protected(gre_proto[version],
55 lockdep_is_held(&gre_proto_lock)) != proto) 55 lockdep_is_held(&gre_proto_lock)) != proto)
56 goto err_out_unlock; 56 goto err_out_unlock;
57 rcu_assign_pointer(gre_proto[version], NULL); 57 RCU_INIT_POINTER(gre_proto[version], NULL);
58 spin_unlock(&gre_proto_lock); 58 spin_unlock(&gre_proto_lock);
59 synchronize_rcu(); 59 synchronize_rcu();
60 return 0; 60 return 0;
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 23ef31baa1af..ab188ae12fd9 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -1152,10 +1152,9 @@ static int __net_init icmp_sk_init(struct net *net)
1152 net->ipv4.icmp_sk[i] = sk; 1152 net->ipv4.icmp_sk[i] = sk;
1153 1153
1154 /* Enough space for 2 64K ICMP packets, including 1154 /* Enough space for 2 64K ICMP packets, including
1155 * sk_buff struct overhead. 1155 * sk_buff/skb_shared_info struct overhead.
1156 */ 1156 */
1157 sk->sk_sndbuf = 1157 sk->sk_sndbuf = 2 * SKB_TRUESIZE(64 * 1024);
1158 (2 * ((64 * 1024) + sizeof(struct sk_buff)));
1159 1158
1160 /* 1159 /*
1161 * Speedup sock_wfree() 1160 * Speedup sock_wfree()
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index f1d27f6c9351..c7472eff2d51 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -767,7 +767,7 @@ static int igmp_xmarksources(struct ip_mc_list *pmc, int nsrcs, __be32 *srcs)
767 break; 767 break;
768 for (i=0; i<nsrcs; i++) { 768 for (i=0; i<nsrcs; i++) {
769 /* skip inactive filters */ 769 /* skip inactive filters */
770 if (pmc->sfcount[MCAST_INCLUDE] || 770 if (psf->sf_count[MCAST_INCLUDE] ||
771 pmc->sfcount[MCAST_EXCLUDE] != 771 pmc->sfcount[MCAST_EXCLUDE] !=
772 psf->sf_count[MCAST_EXCLUDE]) 772 psf->sf_count[MCAST_EXCLUDE])
773 continue; 773 continue;
@@ -1009,7 +1009,7 @@ static void ip_mc_filter_add(struct in_device *in_dev, __be32 addr)
1009 1009
1010 /* Checking for IFF_MULTICAST here is WRONG-WRONG-WRONG. 1010 /* Checking for IFF_MULTICAST here is WRONG-WRONG-WRONG.
1011 We will get multicast token leakage, when IFF_MULTICAST 1011 We will get multicast token leakage, when IFF_MULTICAST
1012 is changed. This check should be done in dev->set_multicast_list 1012 is changed. This check should be done in ndo_set_rx_mode
1013 routine. Something sort of: 1013 routine. Something sort of:
1014 if (dev->mc_list && dev->flags&IFF_MULTICAST) { do it; } 1014 if (dev->mc_list && dev->flags&IFF_MULTICAST) { do it; }
1015 --ANK 1015 --ANK
@@ -1242,7 +1242,7 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 addr)
1242 1242
1243 im->next_rcu = in_dev->mc_list; 1243 im->next_rcu = in_dev->mc_list;
1244 in_dev->mc_count++; 1244 in_dev->mc_count++;
1245 rcu_assign_pointer(in_dev->mc_list, im); 1245 RCU_INIT_POINTER(in_dev->mc_list, im);
1246 1246
1247#ifdef CONFIG_IP_MULTICAST 1247#ifdef CONFIG_IP_MULTICAST
1248 igmpv3_del_delrec(in_dev, im->multiaddr); 1248 igmpv3_del_delrec(in_dev, im->multiaddr);
@@ -1718,7 +1718,7 @@ static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode,
1718 1718
1719 pmc->sfcount[sfmode]--; 1719 pmc->sfcount[sfmode]--;
1720 for (j=0; j<i; j++) 1720 for (j=0; j<i; j++)
1721 (void) ip_mc_del1_src(pmc, sfmode, &psfsrc[i]); 1721 (void) ip_mc_del1_src(pmc, sfmode, &psfsrc[j]);
1722 } else if (isexclude != (pmc->sfcount[MCAST_EXCLUDE] != 0)) { 1722 } else if (isexclude != (pmc->sfcount[MCAST_EXCLUDE] != 0)) {
1723#ifdef CONFIG_IP_MULTICAST 1723#ifdef CONFIG_IP_MULTICAST
1724 struct ip_sf_list *psf; 1724 struct ip_sf_list *psf;
@@ -1813,7 +1813,7 @@ int ip_mc_join_group(struct sock *sk , struct ip_mreqn *imr)
1813 iml->next_rcu = inet->mc_list; 1813 iml->next_rcu = inet->mc_list;
1814 iml->sflist = NULL; 1814 iml->sflist = NULL;
1815 iml->sfmode = MCAST_EXCLUDE; 1815 iml->sfmode = MCAST_EXCLUDE;
1816 rcu_assign_pointer(inet->mc_list, iml); 1816 RCU_INIT_POINTER(inet->mc_list, iml);
1817 ip_mc_inc_group(in_dev, addr); 1817 ip_mc_inc_group(in_dev, addr);
1818 err = 0; 1818 err = 0;
1819done: 1819done:
@@ -1835,7 +1835,7 @@ static int ip_mc_leave_src(struct sock *sk, struct ip_mc_socklist *iml,
1835 } 1835 }
1836 err = ip_mc_del_src(in_dev, &iml->multi.imr_multiaddr.s_addr, 1836 err = ip_mc_del_src(in_dev, &iml->multi.imr_multiaddr.s_addr,
1837 iml->sfmode, psf->sl_count, psf->sl_addr, 0); 1837 iml->sfmode, psf->sl_count, psf->sl_addr, 0);
1838 rcu_assign_pointer(iml->sflist, NULL); 1838 RCU_INIT_POINTER(iml->sflist, NULL);
1839 /* decrease mem now to avoid the memleak warning */ 1839 /* decrease mem now to avoid the memleak warning */
1840 atomic_sub(IP_SFLSIZE(psf->sl_max), &sk->sk_omem_alloc); 1840 atomic_sub(IP_SFLSIZE(psf->sl_max), &sk->sk_omem_alloc);
1841 kfree_rcu(psf, rcu); 1841 kfree_rcu(psf, rcu);
@@ -2000,7 +2000,7 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct
2000 atomic_sub(IP_SFLSIZE(psl->sl_max), &sk->sk_omem_alloc); 2000 atomic_sub(IP_SFLSIZE(psl->sl_max), &sk->sk_omem_alloc);
2001 kfree_rcu(psl, rcu); 2001 kfree_rcu(psl, rcu);
2002 } 2002 }
2003 rcu_assign_pointer(pmc->sflist, newpsl); 2003 RCU_INIT_POINTER(pmc->sflist, newpsl);
2004 psl = newpsl; 2004 psl = newpsl;
2005 } 2005 }
2006 rv = 1; /* > 0 for insert logic below if sl_count is 0 */ 2006 rv = 1; /* > 0 for insert logic below if sl_count is 0 */
@@ -2103,7 +2103,7 @@ int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf, int ifindex)
2103 } else 2103 } else
2104 (void) ip_mc_del_src(in_dev, &msf->imsf_multiaddr, pmc->sfmode, 2104 (void) ip_mc_del_src(in_dev, &msf->imsf_multiaddr, pmc->sfmode,
2105 0, NULL, 0); 2105 0, NULL, 0);
2106 rcu_assign_pointer(pmc->sflist, newpsl); 2106 RCU_INIT_POINTER(pmc->sflist, newpsl);
2107 pmc->sfmode = msf->imsf_fmode; 2107 pmc->sfmode = msf->imsf_fmode;
2108 err = 0; 2108 err = 0;
2109done: 2109done:
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 389a2e6a17fd..f5e2bdaef949 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -108,6 +108,9 @@ static int inet_csk_diag_fill(struct sock *sk,
108 icsk->icsk_ca_ops->name); 108 icsk->icsk_ca_ops->name);
109 } 109 }
110 110
111 if ((ext & (1 << (INET_DIAG_TOS - 1))) && (sk->sk_family != AF_INET6))
112 RTA_PUT_U8(skb, INET_DIAG_TOS, inet->tos);
113
111 r->idiag_family = sk->sk_family; 114 r->idiag_family = sk->sk_family;
112 r->idiag_state = sk->sk_state; 115 r->idiag_state = sk->sk_state;
113 r->idiag_timer = 0; 116 r->idiag_timer = 0;
@@ -130,6 +133,8 @@ static int inet_csk_diag_fill(struct sock *sk,
130 &np->rcv_saddr); 133 &np->rcv_saddr);
131 ipv6_addr_copy((struct in6_addr *)r->id.idiag_dst, 134 ipv6_addr_copy((struct in6_addr *)r->id.idiag_dst,
132 &np->daddr); 135 &np->daddr);
136 if (ext & (1 << (INET_DIAG_TOS - 1)))
137 RTA_PUT_U8(skb, INET_DIAG_TOS, np->tclass);
133 } 138 }
134#endif 139#endif
135 140
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 3c0369a3a663..984ec656b03b 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -21,6 +21,7 @@
21 21
22#include <net/inet_connection_sock.h> 22#include <net/inet_connection_sock.h>
23#include <net/inet_hashtables.h> 23#include <net/inet_hashtables.h>
24#include <net/secure_seq.h>
24#include <net/ip.h> 25#include <net/ip.h>
25 26
26/* 27/*
diff --git a/net/ipv4/inet_lro.c b/net/ipv4/inet_lro.c
index ef7ae6049a51..8e6be5aad115 100644
--- a/net/ipv4/inet_lro.c
+++ b/net/ipv4/inet_lro.c
@@ -433,7 +433,7 @@ static struct sk_buff *__lro_proc_segment(struct net_lro_mgr *lro_mgr,
433 if (!lro_mgr->get_frag_header || 433 if (!lro_mgr->get_frag_header ||
434 lro_mgr->get_frag_header(frags, (void *)&mac_hdr, (void *)&iph, 434 lro_mgr->get_frag_header(frags, (void *)&mac_hdr, (void *)&iph,
435 (void *)&tcph, &flags, priv)) { 435 (void *)&tcph, &flags, priv)) {
436 mac_hdr = page_address(frags->page) + frags->page_offset; 436 mac_hdr = skb_frag_address(frags);
437 goto out1; 437 goto out1;
438 } 438 }
439 439
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index e38213817d0a..86f13c67ea85 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -19,6 +19,7 @@
19#include <linux/net.h> 19#include <linux/net.h>
20#include <net/ip.h> 20#include <net/ip.h>
21#include <net/inetpeer.h> 21#include <net/inetpeer.h>
22#include <net/secure_seq.h>
22 23
23/* 24/*
24 * Theory of operations. 25 * Theory of operations.
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index ccaaa851ab42..ae3bb147affd 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -122,6 +122,7 @@ static int ip_dev_loopback_xmit(struct sk_buff *newskb)
122 newskb->pkt_type = PACKET_LOOPBACK; 122 newskb->pkt_type = PACKET_LOOPBACK;
123 newskb->ip_summed = CHECKSUM_UNNECESSARY; 123 newskb->ip_summed = CHECKSUM_UNNECESSARY;
124 WARN_ON(!skb_dst(newskb)); 124 WARN_ON(!skb_dst(newskb));
125 skb_dst_force(newskb);
125 netif_rx_ni(newskb); 126 netif_rx_ni(newskb);
126 return 0; 127 return 0;
127} 128}
@@ -204,9 +205,15 @@ static inline int ip_finish_output2(struct sk_buff *skb)
204 skb = skb2; 205 skb = skb2;
205 } 206 }
206 207
208 rcu_read_lock();
207 neigh = dst_get_neighbour(dst); 209 neigh = dst_get_neighbour(dst);
208 if (neigh) 210 if (neigh) {
209 return neigh_output(neigh, skb); 211 int res = neigh_output(neigh, skb);
212
213 rcu_read_unlock();
214 return res;
215 }
216 rcu_read_unlock();
210 217
211 if (net_ratelimit()) 218 if (net_ratelimit())
212 printk(KERN_DEBUG "ip_finish_output2: No header cache and no neighbour!\n"); 219 printk(KERN_DEBUG "ip_finish_output2: No header cache and no neighbour!\n");
@@ -982,13 +989,13 @@ alloc_new_skb:
982 if (page && (left = PAGE_SIZE - off) > 0) { 989 if (page && (left = PAGE_SIZE - off) > 0) {
983 if (copy >= left) 990 if (copy >= left)
984 copy = left; 991 copy = left;
985 if (page != frag->page) { 992 if (page != skb_frag_page(frag)) {
986 if (i == MAX_SKB_FRAGS) { 993 if (i == MAX_SKB_FRAGS) {
987 err = -EMSGSIZE; 994 err = -EMSGSIZE;
988 goto error; 995 goto error;
989 } 996 }
990 get_page(page);
991 skb_fill_page_desc(skb, i, page, off, 0); 997 skb_fill_page_desc(skb, i, page, off, 0);
998 skb_frag_ref(skb, i);
992 frag = &skb_shinfo(skb)->frags[i]; 999 frag = &skb_shinfo(skb)->frags[i];
993 } 1000 }
994 } else if (i < MAX_SKB_FRAGS) { 1001 } else if (i < MAX_SKB_FRAGS) {
@@ -1008,7 +1015,8 @@ alloc_new_skb:
1008 err = -EMSGSIZE; 1015 err = -EMSGSIZE;
1009 goto error; 1016 goto error;
1010 } 1017 }
1011 if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) { 1018 if (getfrag(from, skb_frag_address(frag)+frag->size,
1019 offset, copy, skb->len, skb) < 0) {
1012 err = -EFAULT; 1020 err = -EFAULT;
1013 goto error; 1021 goto error;
1014 } 1022 }
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index ab0c9efd1efa..8905e92f896a 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -1067,7 +1067,7 @@ EXPORT_SYMBOL(compat_ip_setsockopt);
1067 */ 1067 */
1068 1068
1069static int do_ip_getsockopt(struct sock *sk, int level, int optname, 1069static int do_ip_getsockopt(struct sock *sk, int level, int optname,
1070 char __user *optval, int __user *optlen) 1070 char __user *optval, int __user *optlen, unsigned flags)
1071{ 1071{
1072 struct inet_sock *inet = inet_sk(sk); 1072 struct inet_sock *inet = inet_sk(sk);
1073 int val; 1073 int val;
@@ -1240,7 +1240,7 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname,
1240 1240
1241 msg.msg_control = optval; 1241 msg.msg_control = optval;
1242 msg.msg_controllen = len; 1242 msg.msg_controllen = len;
1243 msg.msg_flags = 0; 1243 msg.msg_flags = flags;
1244 1244
1245 if (inet->cmsg_flags & IP_CMSG_PKTINFO) { 1245 if (inet->cmsg_flags & IP_CMSG_PKTINFO) {
1246 struct in_pktinfo info; 1246 struct in_pktinfo info;
@@ -1294,7 +1294,7 @@ int ip_getsockopt(struct sock *sk, int level,
1294{ 1294{
1295 int err; 1295 int err;
1296 1296
1297 err = do_ip_getsockopt(sk, level, optname, optval, optlen); 1297 err = do_ip_getsockopt(sk, level, optname, optval, optlen, 0);
1298#ifdef CONFIG_NETFILTER 1298#ifdef CONFIG_NETFILTER
1299 /* we need to exclude all possible ENOPROTOOPTs except default case */ 1299 /* we need to exclude all possible ENOPROTOOPTs except default case */
1300 if (err == -ENOPROTOOPT && optname != IP_PKTOPTIONS && 1300 if (err == -ENOPROTOOPT && optname != IP_PKTOPTIONS &&
@@ -1327,7 +1327,8 @@ int compat_ip_getsockopt(struct sock *sk, int level, int optname,
1327 return compat_mc_getsockopt(sk, level, optname, optval, optlen, 1327 return compat_mc_getsockopt(sk, level, optname, optval, optlen,
1328 ip_getsockopt); 1328 ip_getsockopt);
1329 1329
1330 err = do_ip_getsockopt(sk, level, optname, optval, optlen); 1330 err = do_ip_getsockopt(sk, level, optname, optval, optlen,
1331 MSG_CMSG_COMPAT);
1331 1332
1332#ifdef CONFIG_NETFILTER 1333#ifdef CONFIG_NETFILTER
1333 /* we need to exclude all possible ENOPROTOOPTs except default case */ 1334 /* we need to exclude all possible ENOPROTOOPTs except default case */
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 378b20b7ca6e..065effd8349a 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -231,7 +231,7 @@ static void ipip_tunnel_unlink(struct ipip_net *ipn, struct ip_tunnel *t)
231 (iter = rtnl_dereference(*tp)) != NULL; 231 (iter = rtnl_dereference(*tp)) != NULL;
232 tp = &iter->next) { 232 tp = &iter->next) {
233 if (t == iter) { 233 if (t == iter) {
234 rcu_assign_pointer(*tp, t->next); 234 RCU_INIT_POINTER(*tp, t->next);
235 break; 235 break;
236 } 236 }
237 } 237 }
@@ -241,8 +241,8 @@ static void ipip_tunnel_link(struct ipip_net *ipn, struct ip_tunnel *t)
241{ 241{
242 struct ip_tunnel __rcu **tp = ipip_bucket(ipn, t); 242 struct ip_tunnel __rcu **tp = ipip_bucket(ipn, t);
243 243
244 rcu_assign_pointer(t->next, rtnl_dereference(*tp)); 244 RCU_INIT_POINTER(t->next, rtnl_dereference(*tp));
245 rcu_assign_pointer(*tp, t); 245 RCU_INIT_POINTER(*tp, t);
246} 246}
247 247
248static struct ip_tunnel * ipip_tunnel_locate(struct net *net, 248static struct ip_tunnel * ipip_tunnel_locate(struct net *net,
@@ -301,7 +301,7 @@ static void ipip_tunnel_uninit(struct net_device *dev)
301 struct ipip_net *ipn = net_generic(net, ipip_net_id); 301 struct ipip_net *ipn = net_generic(net, ipip_net_id);
302 302
303 if (dev == ipn->fb_tunnel_dev) 303 if (dev == ipn->fb_tunnel_dev)
304 rcu_assign_pointer(ipn->tunnels_wc[0], NULL); 304 RCU_INIT_POINTER(ipn->tunnels_wc[0], NULL);
305 else 305 else
306 ipip_tunnel_unlink(ipn, netdev_priv(dev)); 306 ipip_tunnel_unlink(ipn, netdev_priv(dev));
307 dev_put(dev); 307 dev_put(dev);
@@ -791,7 +791,7 @@ static int __net_init ipip_fb_tunnel_init(struct net_device *dev)
791 return -ENOMEM; 791 return -ENOMEM;
792 792
793 dev_hold(dev); 793 dev_hold(dev);
794 rcu_assign_pointer(ipn->tunnels_wc[0], tunnel); 794 RCU_INIT_POINTER(ipn->tunnels_wc[0], tunnel);
795 return 0; 795 return 0;
796} 796}
797 797
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 58e879157976..6164e982e0ef 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -1176,7 +1176,7 @@ static void mrtsock_destruct(struct sock *sk)
1176 ipmr_for_each_table(mrt, net) { 1176 ipmr_for_each_table(mrt, net) {
1177 if (sk == rtnl_dereference(mrt->mroute_sk)) { 1177 if (sk == rtnl_dereference(mrt->mroute_sk)) {
1178 IPV4_DEVCONF_ALL(net, MC_FORWARDING)--; 1178 IPV4_DEVCONF_ALL(net, MC_FORWARDING)--;
1179 rcu_assign_pointer(mrt->mroute_sk, NULL); 1179 RCU_INIT_POINTER(mrt->mroute_sk, NULL);
1180 mroute_clean_tables(mrt); 1180 mroute_clean_tables(mrt);
1181 } 1181 }
1182 } 1182 }
@@ -1203,7 +1203,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi
1203 return -ENOENT; 1203 return -ENOENT;
1204 1204
1205 if (optname != MRT_INIT) { 1205 if (optname != MRT_INIT) {
1206 if (sk != rcu_dereference_raw(mrt->mroute_sk) && 1206 if (sk != rcu_access_pointer(mrt->mroute_sk) &&
1207 !capable(CAP_NET_ADMIN)) 1207 !capable(CAP_NET_ADMIN))
1208 return -EACCES; 1208 return -EACCES;
1209 } 1209 }
@@ -1224,13 +1224,13 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi
1224 1224
1225 ret = ip_ra_control(sk, 1, mrtsock_destruct); 1225 ret = ip_ra_control(sk, 1, mrtsock_destruct);
1226 if (ret == 0) { 1226 if (ret == 0) {
1227 rcu_assign_pointer(mrt->mroute_sk, sk); 1227 RCU_INIT_POINTER(mrt->mroute_sk, sk);
1228 IPV4_DEVCONF_ALL(net, MC_FORWARDING)++; 1228 IPV4_DEVCONF_ALL(net, MC_FORWARDING)++;
1229 } 1229 }
1230 rtnl_unlock(); 1230 rtnl_unlock();
1231 return ret; 1231 return ret;
1232 case MRT_DONE: 1232 case MRT_DONE:
1233 if (sk != rcu_dereference_raw(mrt->mroute_sk)) 1233 if (sk != rcu_access_pointer(mrt->mroute_sk))
1234 return -EACCES; 1234 return -EACCES;
1235 return ip_ra_control(sk, 0, NULL); 1235 return ip_ra_control(sk, 0, NULL);
1236 case MRT_ADD_VIF: 1236 case MRT_ADD_VIF:
diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c
index 2e97e3ec1eb7..929b27bdeb79 100644
--- a/net/ipv4/netfilter.c
+++ b/net/ipv4/netfilter.c
@@ -18,17 +18,15 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type)
18 struct rtable *rt; 18 struct rtable *rt;
19 struct flowi4 fl4 = {}; 19 struct flowi4 fl4 = {};
20 __be32 saddr = iph->saddr; 20 __be32 saddr = iph->saddr;
21 __u8 flags = 0; 21 __u8 flags = skb->sk ? inet_sk_flowi_flags(skb->sk) : 0;
22 unsigned int hh_len; 22 unsigned int hh_len;
23 23
24 if (!skb->sk && addr_type != RTN_LOCAL) { 24 if (addr_type == RTN_UNSPEC)
25 if (addr_type == RTN_UNSPEC) 25 addr_type = inet_addr_type(net, saddr);
26 addr_type = inet_addr_type(net, saddr); 26 if (addr_type == RTN_LOCAL || addr_type == RTN_UNICAST)
27 if (addr_type == RTN_LOCAL || addr_type == RTN_UNICAST) 27 flags |= FLOWI_FLAG_ANYSRC;
28 flags |= FLOWI_FLAG_ANYSRC; 28 else
29 else 29 saddr = 0;
30 saddr = 0;
31 }
32 30
33 /* some non-standard hacks like ipt_REJECT.c:send_reset() can cause 31 /* some non-standard hacks like ipt_REJECT.c:send_reset() can cause
34 * packets with foreign saddr to appear on the NF_INET_LOCAL_OUT hook. 32 * packets with foreign saddr to appear on the NF_INET_LOCAL_OUT hook.
@@ -38,7 +36,7 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type)
38 fl4.flowi4_tos = RT_TOS(iph->tos); 36 fl4.flowi4_tos = RT_TOS(iph->tos);
39 fl4.flowi4_oif = skb->sk ? skb->sk->sk_bound_dev_if : 0; 37 fl4.flowi4_oif = skb->sk ? skb->sk->sk_bound_dev_if : 0;
40 fl4.flowi4_mark = skb->mark; 38 fl4.flowi4_mark = skb->mark;
41 fl4.flowi4_flags = skb->sk ? inet_sk_flowi_flags(skb->sk) : flags; 39 fl4.flowi4_flags = flags;
42 rt = ip_route_output_key(net, &fl4); 40 rt = ip_route_output_key(net, &fl4);
43 if (IS_ERR(rt)) 41 if (IS_ERR(rt))
44 return -1; 42 return -1;
diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c
index 5c9b9d963918..e59aabd0eae4 100644
--- a/net/ipv4/netfilter/ip_queue.c
+++ b/net/ipv4/netfilter/ip_queue.c
@@ -218,6 +218,7 @@ ipq_build_packet_message(struct nf_queue_entry *entry, int *errp)
218 return skb; 218 return skb;
219 219
220nlmsg_failure: 220nlmsg_failure:
221 kfree_skb(skb);
221 *errp = -EINVAL; 222 *errp = -EINVAL;
222 printk(KERN_ERR "ip_queue: error creating packet message\n"); 223 printk(KERN_ERR "ip_queue: error creating packet message\n");
223 return NULL; 224 return NULL;
@@ -313,7 +314,7 @@ ipq_set_verdict(struct ipq_verdict_msg *vmsg, unsigned int len)
313{ 314{
314 struct nf_queue_entry *entry; 315 struct nf_queue_entry *entry;
315 316
316 if (vmsg->value > NF_MAX_VERDICT) 317 if (vmsg->value > NF_MAX_VERDICT || vmsg->value == NF_STOLEN)
317 return -EINVAL; 318 return -EINVAL;
318 319
319 entry = ipq_find_dequeue_entry(vmsg->id); 320 entry = ipq_find_dequeue_entry(vmsg->id);
@@ -358,12 +359,9 @@ ipq_receive_peer(struct ipq_peer_msg *pmsg,
358 break; 359 break;
359 360
360 case IPQM_VERDICT: 361 case IPQM_VERDICT:
361 if (pmsg->msg.verdict.value > NF_MAX_VERDICT) 362 status = ipq_set_verdict(&pmsg->msg.verdict,
362 status = -EINVAL; 363 len - sizeof(*pmsg));
363 else 364 break;
364 status = ipq_set_verdict(&pmsg->msg.verdict,
365 len - sizeof(*pmsg));
366 break;
367 default: 365 default:
368 status = -EINVAL; 366 status = -EINVAL;
369 } 367 }
diff --git a/net/ipv4/netfilter/nf_nat_amanda.c b/net/ipv4/netfilter/nf_nat_amanda.c
index 703f366fd235..7b22382ff0e9 100644
--- a/net/ipv4/netfilter/nf_nat_amanda.c
+++ b/net/ipv4/netfilter/nf_nat_amanda.c
@@ -70,14 +70,14 @@ static unsigned int help(struct sk_buff *skb,
70 70
71static void __exit nf_nat_amanda_fini(void) 71static void __exit nf_nat_amanda_fini(void)
72{ 72{
73 rcu_assign_pointer(nf_nat_amanda_hook, NULL); 73 RCU_INIT_POINTER(nf_nat_amanda_hook, NULL);
74 synchronize_rcu(); 74 synchronize_rcu();
75} 75}
76 76
77static int __init nf_nat_amanda_init(void) 77static int __init nf_nat_amanda_init(void)
78{ 78{
79 BUG_ON(nf_nat_amanda_hook != NULL); 79 BUG_ON(nf_nat_amanda_hook != NULL);
80 rcu_assign_pointer(nf_nat_amanda_hook, help); 80 RCU_INIT_POINTER(nf_nat_amanda_hook, help);
81 return 0; 81 return 0;
82} 82}
83 83
diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c
index 3346de5d94d0..447bc5cfdc6c 100644
--- a/net/ipv4/netfilter/nf_nat_core.c
+++ b/net/ipv4/netfilter/nf_nat_core.c
@@ -514,7 +514,7 @@ int nf_nat_protocol_register(const struct nf_nat_protocol *proto)
514 ret = -EBUSY; 514 ret = -EBUSY;
515 goto out; 515 goto out;
516 } 516 }
517 rcu_assign_pointer(nf_nat_protos[proto->protonum], proto); 517 RCU_INIT_POINTER(nf_nat_protos[proto->protonum], proto);
518 out: 518 out:
519 spin_unlock_bh(&nf_nat_lock); 519 spin_unlock_bh(&nf_nat_lock);
520 return ret; 520 return ret;
@@ -525,7 +525,7 @@ EXPORT_SYMBOL(nf_nat_protocol_register);
525void nf_nat_protocol_unregister(const struct nf_nat_protocol *proto) 525void nf_nat_protocol_unregister(const struct nf_nat_protocol *proto)
526{ 526{
527 spin_lock_bh(&nf_nat_lock); 527 spin_lock_bh(&nf_nat_lock);
528 rcu_assign_pointer(nf_nat_protos[proto->protonum], 528 RCU_INIT_POINTER(nf_nat_protos[proto->protonum],
529 &nf_nat_unknown_protocol); 529 &nf_nat_unknown_protocol);
530 spin_unlock_bh(&nf_nat_lock); 530 spin_unlock_bh(&nf_nat_lock);
531 synchronize_rcu(); 531 synchronize_rcu();
@@ -736,10 +736,10 @@ static int __init nf_nat_init(void)
736 /* Sew in builtin protocols. */ 736 /* Sew in builtin protocols. */
737 spin_lock_bh(&nf_nat_lock); 737 spin_lock_bh(&nf_nat_lock);
738 for (i = 0; i < MAX_IP_NAT_PROTO; i++) 738 for (i = 0; i < MAX_IP_NAT_PROTO; i++)
739 rcu_assign_pointer(nf_nat_protos[i], &nf_nat_unknown_protocol); 739 RCU_INIT_POINTER(nf_nat_protos[i], &nf_nat_unknown_protocol);
740 rcu_assign_pointer(nf_nat_protos[IPPROTO_TCP], &nf_nat_protocol_tcp); 740 RCU_INIT_POINTER(nf_nat_protos[IPPROTO_TCP], &nf_nat_protocol_tcp);
741 rcu_assign_pointer(nf_nat_protos[IPPROTO_UDP], &nf_nat_protocol_udp); 741 RCU_INIT_POINTER(nf_nat_protos[IPPROTO_UDP], &nf_nat_protocol_udp);
742 rcu_assign_pointer(nf_nat_protos[IPPROTO_ICMP], &nf_nat_protocol_icmp); 742 RCU_INIT_POINTER(nf_nat_protos[IPPROTO_ICMP], &nf_nat_protocol_icmp);
743 spin_unlock_bh(&nf_nat_lock); 743 spin_unlock_bh(&nf_nat_lock);
744 744
745 /* Initialize fake conntrack so that NAT will skip it */ 745 /* Initialize fake conntrack so that NAT will skip it */
@@ -748,12 +748,12 @@ static int __init nf_nat_init(void)
748 l3proto = nf_ct_l3proto_find_get((u_int16_t)AF_INET); 748 l3proto = nf_ct_l3proto_find_get((u_int16_t)AF_INET);
749 749
750 BUG_ON(nf_nat_seq_adjust_hook != NULL); 750 BUG_ON(nf_nat_seq_adjust_hook != NULL);
751 rcu_assign_pointer(nf_nat_seq_adjust_hook, nf_nat_seq_adjust); 751 RCU_INIT_POINTER(nf_nat_seq_adjust_hook, nf_nat_seq_adjust);
752 BUG_ON(nfnetlink_parse_nat_setup_hook != NULL); 752 BUG_ON(nfnetlink_parse_nat_setup_hook != NULL);
753 rcu_assign_pointer(nfnetlink_parse_nat_setup_hook, 753 RCU_INIT_POINTER(nfnetlink_parse_nat_setup_hook,
754 nfnetlink_parse_nat_setup); 754 nfnetlink_parse_nat_setup);
755 BUG_ON(nf_ct_nat_offset != NULL); 755 BUG_ON(nf_ct_nat_offset != NULL);
756 rcu_assign_pointer(nf_ct_nat_offset, nf_nat_get_offset); 756 RCU_INIT_POINTER(nf_ct_nat_offset, nf_nat_get_offset);
757 return 0; 757 return 0;
758 758
759 cleanup_extend: 759 cleanup_extend:
@@ -766,9 +766,9 @@ static void __exit nf_nat_cleanup(void)
766 unregister_pernet_subsys(&nf_nat_net_ops); 766 unregister_pernet_subsys(&nf_nat_net_ops);
767 nf_ct_l3proto_put(l3proto); 767 nf_ct_l3proto_put(l3proto);
768 nf_ct_extend_unregister(&nat_extend); 768 nf_ct_extend_unregister(&nat_extend);
769 rcu_assign_pointer(nf_nat_seq_adjust_hook, NULL); 769 RCU_INIT_POINTER(nf_nat_seq_adjust_hook, NULL);
770 rcu_assign_pointer(nfnetlink_parse_nat_setup_hook, NULL); 770 RCU_INIT_POINTER(nfnetlink_parse_nat_setup_hook, NULL);
771 rcu_assign_pointer(nf_ct_nat_offset, NULL); 771 RCU_INIT_POINTER(nf_ct_nat_offset, NULL);
772 synchronize_net(); 772 synchronize_net();
773} 773}
774 774
diff --git a/net/ipv4/netfilter/nf_nat_ftp.c b/net/ipv4/netfilter/nf_nat_ftp.c
index dc73abb3fe27..e462a957d080 100644
--- a/net/ipv4/netfilter/nf_nat_ftp.c
+++ b/net/ipv4/netfilter/nf_nat_ftp.c
@@ -113,14 +113,14 @@ out:
113 113
114static void __exit nf_nat_ftp_fini(void) 114static void __exit nf_nat_ftp_fini(void)
115{ 115{
116 rcu_assign_pointer(nf_nat_ftp_hook, NULL); 116 RCU_INIT_POINTER(nf_nat_ftp_hook, NULL);
117 synchronize_rcu(); 117 synchronize_rcu();
118} 118}
119 119
120static int __init nf_nat_ftp_init(void) 120static int __init nf_nat_ftp_init(void)
121{ 121{
122 BUG_ON(nf_nat_ftp_hook != NULL); 122 BUG_ON(nf_nat_ftp_hook != NULL);
123 rcu_assign_pointer(nf_nat_ftp_hook, nf_nat_ftp); 123 RCU_INIT_POINTER(nf_nat_ftp_hook, nf_nat_ftp);
124 return 0; 124 return 0;
125} 125}
126 126
diff --git a/net/ipv4/netfilter/nf_nat_h323.c b/net/ipv4/netfilter/nf_nat_h323.c
index 790f3160e012..b9a1136addbd 100644
--- a/net/ipv4/netfilter/nf_nat_h323.c
+++ b/net/ipv4/netfilter/nf_nat_h323.c
@@ -581,30 +581,30 @@ static int __init init(void)
581 BUG_ON(nat_callforwarding_hook != NULL); 581 BUG_ON(nat_callforwarding_hook != NULL);
582 BUG_ON(nat_q931_hook != NULL); 582 BUG_ON(nat_q931_hook != NULL);
583 583
584 rcu_assign_pointer(set_h245_addr_hook, set_h245_addr); 584 RCU_INIT_POINTER(set_h245_addr_hook, set_h245_addr);
585 rcu_assign_pointer(set_h225_addr_hook, set_h225_addr); 585 RCU_INIT_POINTER(set_h225_addr_hook, set_h225_addr);
586 rcu_assign_pointer(set_sig_addr_hook, set_sig_addr); 586 RCU_INIT_POINTER(set_sig_addr_hook, set_sig_addr);
587 rcu_assign_pointer(set_ras_addr_hook, set_ras_addr); 587 RCU_INIT_POINTER(set_ras_addr_hook, set_ras_addr);
588 rcu_assign_pointer(nat_rtp_rtcp_hook, nat_rtp_rtcp); 588 RCU_INIT_POINTER(nat_rtp_rtcp_hook, nat_rtp_rtcp);
589 rcu_assign_pointer(nat_t120_hook, nat_t120); 589 RCU_INIT_POINTER(nat_t120_hook, nat_t120);
590 rcu_assign_pointer(nat_h245_hook, nat_h245); 590 RCU_INIT_POINTER(nat_h245_hook, nat_h245);
591 rcu_assign_pointer(nat_callforwarding_hook, nat_callforwarding); 591 RCU_INIT_POINTER(nat_callforwarding_hook, nat_callforwarding);
592 rcu_assign_pointer(nat_q931_hook, nat_q931); 592 RCU_INIT_POINTER(nat_q931_hook, nat_q931);
593 return 0; 593 return 0;
594} 594}
595 595
596/****************************************************************************/ 596/****************************************************************************/
597static void __exit fini(void) 597static void __exit fini(void)
598{ 598{
599 rcu_assign_pointer(set_h245_addr_hook, NULL); 599 RCU_INIT_POINTER(set_h245_addr_hook, NULL);
600 rcu_assign_pointer(set_h225_addr_hook, NULL); 600 RCU_INIT_POINTER(set_h225_addr_hook, NULL);
601 rcu_assign_pointer(set_sig_addr_hook, NULL); 601 RCU_INIT_POINTER(set_sig_addr_hook, NULL);
602 rcu_assign_pointer(set_ras_addr_hook, NULL); 602 RCU_INIT_POINTER(set_ras_addr_hook, NULL);
603 rcu_assign_pointer(nat_rtp_rtcp_hook, NULL); 603 RCU_INIT_POINTER(nat_rtp_rtcp_hook, NULL);
604 rcu_assign_pointer(nat_t120_hook, NULL); 604 RCU_INIT_POINTER(nat_t120_hook, NULL);
605 rcu_assign_pointer(nat_h245_hook, NULL); 605 RCU_INIT_POINTER(nat_h245_hook, NULL);
606 rcu_assign_pointer(nat_callforwarding_hook, NULL); 606 RCU_INIT_POINTER(nat_callforwarding_hook, NULL);
607 rcu_assign_pointer(nat_q931_hook, NULL); 607 RCU_INIT_POINTER(nat_q931_hook, NULL);
608 synchronize_rcu(); 608 synchronize_rcu();
609} 609}
610 610
diff --git a/net/ipv4/netfilter/nf_nat_irc.c b/net/ipv4/netfilter/nf_nat_irc.c
index 535e1a802356..979ae165f4ef 100644
--- a/net/ipv4/netfilter/nf_nat_irc.c
+++ b/net/ipv4/netfilter/nf_nat_irc.c
@@ -75,14 +75,14 @@ static unsigned int help(struct sk_buff *skb,
75 75
76static void __exit nf_nat_irc_fini(void) 76static void __exit nf_nat_irc_fini(void)
77{ 77{
78 rcu_assign_pointer(nf_nat_irc_hook, NULL); 78 RCU_INIT_POINTER(nf_nat_irc_hook, NULL);
79 synchronize_rcu(); 79 synchronize_rcu();
80} 80}
81 81
82static int __init nf_nat_irc_init(void) 82static int __init nf_nat_irc_init(void)
83{ 83{
84 BUG_ON(nf_nat_irc_hook != NULL); 84 BUG_ON(nf_nat_irc_hook != NULL);
85 rcu_assign_pointer(nf_nat_irc_hook, help); 85 RCU_INIT_POINTER(nf_nat_irc_hook, help);
86 return 0; 86 return 0;
87} 87}
88 88
diff --git a/net/ipv4/netfilter/nf_nat_pptp.c b/net/ipv4/netfilter/nf_nat_pptp.c
index 4c060038d29f..3e8284ba46b8 100644
--- a/net/ipv4/netfilter/nf_nat_pptp.c
+++ b/net/ipv4/netfilter/nf_nat_pptp.c
@@ -282,25 +282,25 @@ static int __init nf_nat_helper_pptp_init(void)
282 nf_nat_need_gre(); 282 nf_nat_need_gre();
283 283
284 BUG_ON(nf_nat_pptp_hook_outbound != NULL); 284 BUG_ON(nf_nat_pptp_hook_outbound != NULL);
285 rcu_assign_pointer(nf_nat_pptp_hook_outbound, pptp_outbound_pkt); 285 RCU_INIT_POINTER(nf_nat_pptp_hook_outbound, pptp_outbound_pkt);
286 286
287 BUG_ON(nf_nat_pptp_hook_inbound != NULL); 287 BUG_ON(nf_nat_pptp_hook_inbound != NULL);
288 rcu_assign_pointer(nf_nat_pptp_hook_inbound, pptp_inbound_pkt); 288 RCU_INIT_POINTER(nf_nat_pptp_hook_inbound, pptp_inbound_pkt);
289 289
290 BUG_ON(nf_nat_pptp_hook_exp_gre != NULL); 290 BUG_ON(nf_nat_pptp_hook_exp_gre != NULL);
291 rcu_assign_pointer(nf_nat_pptp_hook_exp_gre, pptp_exp_gre); 291 RCU_INIT_POINTER(nf_nat_pptp_hook_exp_gre, pptp_exp_gre);
292 292
293 BUG_ON(nf_nat_pptp_hook_expectfn != NULL); 293 BUG_ON(nf_nat_pptp_hook_expectfn != NULL);
294 rcu_assign_pointer(nf_nat_pptp_hook_expectfn, pptp_nat_expected); 294 RCU_INIT_POINTER(nf_nat_pptp_hook_expectfn, pptp_nat_expected);
295 return 0; 295 return 0;
296} 296}
297 297
298static void __exit nf_nat_helper_pptp_fini(void) 298static void __exit nf_nat_helper_pptp_fini(void)
299{ 299{
300 rcu_assign_pointer(nf_nat_pptp_hook_expectfn, NULL); 300 RCU_INIT_POINTER(nf_nat_pptp_hook_expectfn, NULL);
301 rcu_assign_pointer(nf_nat_pptp_hook_exp_gre, NULL); 301 RCU_INIT_POINTER(nf_nat_pptp_hook_exp_gre, NULL);
302 rcu_assign_pointer(nf_nat_pptp_hook_inbound, NULL); 302 RCU_INIT_POINTER(nf_nat_pptp_hook_inbound, NULL);
303 rcu_assign_pointer(nf_nat_pptp_hook_outbound, NULL); 303 RCU_INIT_POINTER(nf_nat_pptp_hook_outbound, NULL);
304 synchronize_rcu(); 304 synchronize_rcu();
305} 305}
306 306
diff --git a/net/ipv4/netfilter/nf_nat_proto_common.c b/net/ipv4/netfilter/nf_nat_proto_common.c
index 3e61faf23a9a..f52d41ea0690 100644
--- a/net/ipv4/netfilter/nf_nat_proto_common.c
+++ b/net/ipv4/netfilter/nf_nat_proto_common.c
@@ -12,6 +12,7 @@
12#include <linux/ip.h> 12#include <linux/ip.h>
13 13
14#include <linux/netfilter.h> 14#include <linux/netfilter.h>
15#include <net/secure_seq.h>
15#include <net/netfilter/nf_nat.h> 16#include <net/netfilter/nf_nat.h>
16#include <net/netfilter/nf_nat_core.h> 17#include <net/netfilter/nf_nat_core.h>
17#include <net/netfilter/nf_nat_rule.h> 18#include <net/netfilter/nf_nat_rule.h>
diff --git a/net/ipv4/netfilter/nf_nat_sip.c b/net/ipv4/netfilter/nf_nat_sip.c
index e40cf7816fdb..78844d9208f1 100644
--- a/net/ipv4/netfilter/nf_nat_sip.c
+++ b/net/ipv4/netfilter/nf_nat_sip.c
@@ -528,13 +528,13 @@ err1:
528 528
529static void __exit nf_nat_sip_fini(void) 529static void __exit nf_nat_sip_fini(void)
530{ 530{
531 rcu_assign_pointer(nf_nat_sip_hook, NULL); 531 RCU_INIT_POINTER(nf_nat_sip_hook, NULL);
532 rcu_assign_pointer(nf_nat_sip_seq_adjust_hook, NULL); 532 RCU_INIT_POINTER(nf_nat_sip_seq_adjust_hook, NULL);
533 rcu_assign_pointer(nf_nat_sip_expect_hook, NULL); 533 RCU_INIT_POINTER(nf_nat_sip_expect_hook, NULL);
534 rcu_assign_pointer(nf_nat_sdp_addr_hook, NULL); 534 RCU_INIT_POINTER(nf_nat_sdp_addr_hook, NULL);
535 rcu_assign_pointer(nf_nat_sdp_port_hook, NULL); 535 RCU_INIT_POINTER(nf_nat_sdp_port_hook, NULL);
536 rcu_assign_pointer(nf_nat_sdp_session_hook, NULL); 536 RCU_INIT_POINTER(nf_nat_sdp_session_hook, NULL);
537 rcu_assign_pointer(nf_nat_sdp_media_hook, NULL); 537 RCU_INIT_POINTER(nf_nat_sdp_media_hook, NULL);
538 synchronize_rcu(); 538 synchronize_rcu();
539} 539}
540 540
@@ -547,13 +547,13 @@ static int __init nf_nat_sip_init(void)
547 BUG_ON(nf_nat_sdp_port_hook != NULL); 547 BUG_ON(nf_nat_sdp_port_hook != NULL);
548 BUG_ON(nf_nat_sdp_session_hook != NULL); 548 BUG_ON(nf_nat_sdp_session_hook != NULL);
549 BUG_ON(nf_nat_sdp_media_hook != NULL); 549 BUG_ON(nf_nat_sdp_media_hook != NULL);
550 rcu_assign_pointer(nf_nat_sip_hook, ip_nat_sip); 550 RCU_INIT_POINTER(nf_nat_sip_hook, ip_nat_sip);
551 rcu_assign_pointer(nf_nat_sip_seq_adjust_hook, ip_nat_sip_seq_adjust); 551 RCU_INIT_POINTER(nf_nat_sip_seq_adjust_hook, ip_nat_sip_seq_adjust);
552 rcu_assign_pointer(nf_nat_sip_expect_hook, ip_nat_sip_expect); 552 RCU_INIT_POINTER(nf_nat_sip_expect_hook, ip_nat_sip_expect);
553 rcu_assign_pointer(nf_nat_sdp_addr_hook, ip_nat_sdp_addr); 553 RCU_INIT_POINTER(nf_nat_sdp_addr_hook, ip_nat_sdp_addr);
554 rcu_assign_pointer(nf_nat_sdp_port_hook, ip_nat_sdp_port); 554 RCU_INIT_POINTER(nf_nat_sdp_port_hook, ip_nat_sdp_port);
555 rcu_assign_pointer(nf_nat_sdp_session_hook, ip_nat_sdp_session); 555 RCU_INIT_POINTER(nf_nat_sdp_session_hook, ip_nat_sdp_session);
556 rcu_assign_pointer(nf_nat_sdp_media_hook, ip_nat_sdp_media); 556 RCU_INIT_POINTER(nf_nat_sdp_media_hook, ip_nat_sdp_media);
557 return 0; 557 return 0;
558} 558}
559 559
diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c
index 076b7c8c4aa4..d1cb412c18e0 100644
--- a/net/ipv4/netfilter/nf_nat_snmp_basic.c
+++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c
@@ -1310,7 +1310,7 @@ static int __init nf_nat_snmp_basic_init(void)
1310 int ret = 0; 1310 int ret = 0;
1311 1311
1312 BUG_ON(nf_nat_snmp_hook != NULL); 1312 BUG_ON(nf_nat_snmp_hook != NULL);
1313 rcu_assign_pointer(nf_nat_snmp_hook, help); 1313 RCU_INIT_POINTER(nf_nat_snmp_hook, help);
1314 1314
1315 ret = nf_conntrack_helper_register(&snmp_trap_helper); 1315 ret = nf_conntrack_helper_register(&snmp_trap_helper);
1316 if (ret < 0) { 1316 if (ret < 0) {
@@ -1322,7 +1322,7 @@ static int __init nf_nat_snmp_basic_init(void)
1322 1322
1323static void __exit nf_nat_snmp_basic_fini(void) 1323static void __exit nf_nat_snmp_basic_fini(void)
1324{ 1324{
1325 rcu_assign_pointer(nf_nat_snmp_hook, NULL); 1325 RCU_INIT_POINTER(nf_nat_snmp_hook, NULL);
1326 nf_conntrack_helper_unregister(&snmp_trap_helper); 1326 nf_conntrack_helper_unregister(&snmp_trap_helper);
1327} 1327}
1328 1328
diff --git a/net/ipv4/netfilter/nf_nat_standalone.c b/net/ipv4/netfilter/nf_nat_standalone.c
index a6e606e84820..92900482edea 100644
--- a/net/ipv4/netfilter/nf_nat_standalone.c
+++ b/net/ipv4/netfilter/nf_nat_standalone.c
@@ -284,7 +284,7 @@ static int __init nf_nat_standalone_init(void)
284 284
285#ifdef CONFIG_XFRM 285#ifdef CONFIG_XFRM
286 BUG_ON(ip_nat_decode_session != NULL); 286 BUG_ON(ip_nat_decode_session != NULL);
287 rcu_assign_pointer(ip_nat_decode_session, nat_decode_session); 287 RCU_INIT_POINTER(ip_nat_decode_session, nat_decode_session);
288#endif 288#endif
289 ret = nf_nat_rule_init(); 289 ret = nf_nat_rule_init();
290 if (ret < 0) { 290 if (ret < 0) {
@@ -302,7 +302,7 @@ static int __init nf_nat_standalone_init(void)
302 nf_nat_rule_cleanup(); 302 nf_nat_rule_cleanup();
303 cleanup_decode_session: 303 cleanup_decode_session:
304#ifdef CONFIG_XFRM 304#ifdef CONFIG_XFRM
305 rcu_assign_pointer(ip_nat_decode_session, NULL); 305 RCU_INIT_POINTER(ip_nat_decode_session, NULL);
306 synchronize_net(); 306 synchronize_net();
307#endif 307#endif
308 return ret; 308 return ret;
@@ -313,7 +313,7 @@ static void __exit nf_nat_standalone_fini(void)
313 nf_unregister_hooks(nf_nat_ops, ARRAY_SIZE(nf_nat_ops)); 313 nf_unregister_hooks(nf_nat_ops, ARRAY_SIZE(nf_nat_ops));
314 nf_nat_rule_cleanup(); 314 nf_nat_rule_cleanup();
315#ifdef CONFIG_XFRM 315#ifdef CONFIG_XFRM
316 rcu_assign_pointer(ip_nat_decode_session, NULL); 316 RCU_INIT_POINTER(ip_nat_decode_session, NULL);
317 synchronize_net(); 317 synchronize_net();
318#endif 318#endif
319 /* Conntrack caches are unregistered in nf_conntrack_cleanup */ 319 /* Conntrack caches are unregistered in nf_conntrack_cleanup */
diff --git a/net/ipv4/netfilter/nf_nat_tftp.c b/net/ipv4/netfilter/nf_nat_tftp.c
index 7274a43c7a12..a2901bf829c0 100644
--- a/net/ipv4/netfilter/nf_nat_tftp.c
+++ b/net/ipv4/netfilter/nf_nat_tftp.c
@@ -36,14 +36,14 @@ static unsigned int help(struct sk_buff *skb,
36 36
37static void __exit nf_nat_tftp_fini(void) 37static void __exit nf_nat_tftp_fini(void)
38{ 38{
39 rcu_assign_pointer(nf_nat_tftp_hook, NULL); 39 RCU_INIT_POINTER(nf_nat_tftp_hook, NULL);
40 synchronize_rcu(); 40 synchronize_rcu();
41} 41}
42 42
43static int __init nf_nat_tftp_init(void) 43static int __init nf_nat_tftp_init(void)
44{ 44{
45 BUG_ON(nf_nat_tftp_hook != NULL); 45 BUG_ON(nf_nat_tftp_hook != NULL);
46 rcu_assign_pointer(nf_nat_tftp_hook, help); 46 RCU_INIT_POINTER(nf_nat_tftp_hook, help);
47 return 0; 47 return 0;
48} 48}
49 49
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index b14ec7d03b6e..4bfad5da94f4 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -254,6 +254,8 @@ static const struct snmp_mib snmp4_net_list[] = {
254 SNMP_MIB_ITEM("TCPDeferAcceptDrop", LINUX_MIB_TCPDEFERACCEPTDROP), 254 SNMP_MIB_ITEM("TCPDeferAcceptDrop", LINUX_MIB_TCPDEFERACCEPTDROP),
255 SNMP_MIB_ITEM("IPReversePathFilter", LINUX_MIB_IPRPFILTER), 255 SNMP_MIB_ITEM("IPReversePathFilter", LINUX_MIB_IPRPFILTER),
256 SNMP_MIB_ITEM("TCPTimeWaitOverflow", LINUX_MIB_TCPTIMEWAITOVERFLOW), 256 SNMP_MIB_ITEM("TCPTimeWaitOverflow", LINUX_MIB_TCPTIMEWAITOVERFLOW),
257 SNMP_MIB_ITEM("TCPReqQFullDoCookies", LINUX_MIB_TCPREQQFULLDOCOOKIES),
258 SNMP_MIB_ITEM("TCPReqQFullDrop", LINUX_MIB_TCPREQQFULLDROP),
257 SNMP_MIB_SENTINEL 259 SNMP_MIB_SENTINEL
258}; 260};
259 261
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 1457acb39cec..61714bd52925 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -563,7 +563,8 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
563 flowi4_init_output(&fl4, ipc.oif, sk->sk_mark, tos, 563 flowi4_init_output(&fl4, ipc.oif, sk->sk_mark, tos,
564 RT_SCOPE_UNIVERSE, 564 RT_SCOPE_UNIVERSE,
565 inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol, 565 inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol,
566 FLOWI_FLAG_CAN_SLEEP, daddr, saddr, 0, 0); 566 inet_sk_flowi_flags(sk) | FLOWI_FLAG_CAN_SLEEP,
567 daddr, saddr, 0, 0);
567 568
568 if (!inet->hdrincl) { 569 if (!inet->hdrincl) {
569 err = raw_probe_proto_opt(&fl4, msg); 570 err = raw_probe_proto_opt(&fl4, msg);
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 1730689f560e..26c77e14395f 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -109,6 +109,7 @@
109#include <linux/sysctl.h> 109#include <linux/sysctl.h>
110#endif 110#endif
111#include <net/atmclip.h> 111#include <net/atmclip.h>
112#include <net/secure_seq.h>
112 113
113#define RT_FL_TOS(oldflp4) \ 114#define RT_FL_TOS(oldflp4) \
114 ((u32)(oldflp4->flowi4_tos & (IPTOS_RT_MASK | RTO_ONLINK))) 115 ((u32)(oldflp4->flowi4_tos & (IPTOS_RT_MASK | RTO_ONLINK)))
@@ -119,7 +120,6 @@
119 120
120static int ip_rt_max_size; 121static int ip_rt_max_size;
121static int ip_rt_gc_timeout __read_mostly = RT_GC_TIMEOUT; 122static int ip_rt_gc_timeout __read_mostly = RT_GC_TIMEOUT;
122static int ip_rt_gc_interval __read_mostly = 60 * HZ;
123static int ip_rt_gc_min_interval __read_mostly = HZ / 2; 123static int ip_rt_gc_min_interval __read_mostly = HZ / 2;
124static int ip_rt_redirect_number __read_mostly = 9; 124static int ip_rt_redirect_number __read_mostly = 9;
125static int ip_rt_redirect_load __read_mostly = HZ / 50; 125static int ip_rt_redirect_load __read_mostly = HZ / 50;
@@ -323,7 +323,7 @@ static struct rtable *rt_cache_get_first(struct seq_file *seq)
323 struct rtable *r = NULL; 323 struct rtable *r = NULL;
324 324
325 for (st->bucket = rt_hash_mask; st->bucket >= 0; --st->bucket) { 325 for (st->bucket = rt_hash_mask; st->bucket >= 0; --st->bucket) {
326 if (!rcu_dereference_raw(rt_hash_table[st->bucket].chain)) 326 if (!rcu_access_pointer(rt_hash_table[st->bucket].chain))
327 continue; 327 continue;
328 rcu_read_lock_bh(); 328 rcu_read_lock_bh();
329 r = rcu_dereference_bh(rt_hash_table[st->bucket].chain); 329 r = rcu_dereference_bh(rt_hash_table[st->bucket].chain);
@@ -349,7 +349,7 @@ static struct rtable *__rt_cache_get_next(struct seq_file *seq,
349 do { 349 do {
350 if (--st->bucket < 0) 350 if (--st->bucket < 0)
351 return NULL; 351 return NULL;
352 } while (!rcu_dereference_raw(rt_hash_table[st->bucket].chain)); 352 } while (!rcu_access_pointer(rt_hash_table[st->bucket].chain));
353 rcu_read_lock_bh(); 353 rcu_read_lock_bh();
354 r = rcu_dereference_bh(rt_hash_table[st->bucket].chain); 354 r = rcu_dereference_bh(rt_hash_table[st->bucket].chain);
355 } 355 }
@@ -721,7 +721,7 @@ static inline bool compare_hash_inputs(const struct rtable *rt1,
721{ 721{
722 return ((((__force u32)rt1->rt_key_dst ^ (__force u32)rt2->rt_key_dst) | 722 return ((((__force u32)rt1->rt_key_dst ^ (__force u32)rt2->rt_key_dst) |
723 ((__force u32)rt1->rt_key_src ^ (__force u32)rt2->rt_key_src) | 723 ((__force u32)rt1->rt_key_src ^ (__force u32)rt2->rt_key_src) |
724 (rt1->rt_iif ^ rt2->rt_iif)) == 0); 724 (rt1->rt_route_iif ^ rt2->rt_route_iif)) == 0);
725} 725}
726 726
727static inline int compare_keys(struct rtable *rt1, struct rtable *rt2) 727static inline int compare_keys(struct rtable *rt1, struct rtable *rt2)
@@ -730,8 +730,8 @@ static inline int compare_keys(struct rtable *rt1, struct rtable *rt2)
730 ((__force u32)rt1->rt_key_src ^ (__force u32)rt2->rt_key_src) | 730 ((__force u32)rt1->rt_key_src ^ (__force u32)rt2->rt_key_src) |
731 (rt1->rt_mark ^ rt2->rt_mark) | 731 (rt1->rt_mark ^ rt2->rt_mark) |
732 (rt1->rt_key_tos ^ rt2->rt_key_tos) | 732 (rt1->rt_key_tos ^ rt2->rt_key_tos) |
733 (rt1->rt_oif ^ rt2->rt_oif) | 733 (rt1->rt_route_iif ^ rt2->rt_route_iif) |
734 (rt1->rt_iif ^ rt2->rt_iif)) == 0; 734 (rt1->rt_oif ^ rt2->rt_oif)) == 0;
735} 735}
736 736
737static inline int compare_netns(struct rtable *rt1, struct rtable *rt2) 737static inline int compare_netns(struct rtable *rt1, struct rtable *rt2)
@@ -760,7 +760,7 @@ static void rt_do_flush(struct net *net, int process_context)
760 760
761 if (process_context && need_resched()) 761 if (process_context && need_resched())
762 cond_resched(); 762 cond_resched();
763 rth = rcu_dereference_raw(rt_hash_table[i].chain); 763 rth = rcu_access_pointer(rt_hash_table[i].chain);
764 if (!rth) 764 if (!rth)
765 continue; 765 continue;
766 766
@@ -1628,16 +1628,18 @@ static int check_peer_redir(struct dst_entry *dst, struct inet_peer *peer)
1628{ 1628{
1629 struct rtable *rt = (struct rtable *) dst; 1629 struct rtable *rt = (struct rtable *) dst;
1630 __be32 orig_gw = rt->rt_gateway; 1630 __be32 orig_gw = rt->rt_gateway;
1631 struct neighbour *n; 1631 struct neighbour *n, *old_n;
1632 1632
1633 dst_confirm(&rt->dst); 1633 dst_confirm(&rt->dst);
1634 1634
1635 neigh_release(dst_get_neighbour(&rt->dst));
1636 dst_set_neighbour(&rt->dst, NULL);
1637
1638 rt->rt_gateway = peer->redirect_learned.a4; 1635 rt->rt_gateway = peer->redirect_learned.a4;
1639 rt_bind_neighbour(rt); 1636
1640 n = dst_get_neighbour(&rt->dst); 1637 n = ipv4_neigh_lookup(&rt->dst, &rt->rt_gateway);
1638 if (IS_ERR(n))
1639 return PTR_ERR(n);
1640 old_n = xchg(&rt->dst._neighbour, n);
1641 if (old_n)
1642 neigh_release(old_n);
1641 if (!n || !(n->nud_state & NUD_VALID)) { 1643 if (!n || !(n->nud_state & NUD_VALID)) {
1642 if (n) 1644 if (n)
1643 neigh_event_send(n, NULL); 1645 neigh_event_send(n, NULL);
@@ -2317,8 +2319,7 @@ int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr,
2317 rth = rcu_dereference(rth->dst.rt_next)) { 2319 rth = rcu_dereference(rth->dst.rt_next)) {
2318 if ((((__force u32)rth->rt_key_dst ^ (__force u32)daddr) | 2320 if ((((__force u32)rth->rt_key_dst ^ (__force u32)daddr) |
2319 ((__force u32)rth->rt_key_src ^ (__force u32)saddr) | 2321 ((__force u32)rth->rt_key_src ^ (__force u32)saddr) |
2320 (rth->rt_iif ^ iif) | 2322 (rth->rt_route_iif ^ iif) |
2321 rth->rt_oif |
2322 (rth->rt_key_tos ^ tos)) == 0 && 2323 (rth->rt_key_tos ^ tos)) == 0 &&
2323 rth->rt_mark == skb->mark && 2324 rth->rt_mark == skb->mark &&
2324 net_eq(dev_net(rth->dst.dev), net) && 2325 net_eq(dev_net(rth->dst.dev), net) &&
@@ -3119,13 +3120,6 @@ static ctl_table ipv4_route_table[] = {
3119 .proc_handler = proc_dointvec_jiffies, 3120 .proc_handler = proc_dointvec_jiffies,
3120 }, 3121 },
3121 { 3122 {
3122 .procname = "gc_interval",
3123 .data = &ip_rt_gc_interval,
3124 .maxlen = sizeof(int),
3125 .mode = 0644,
3126 .proc_handler = proc_dointvec_jiffies,
3127 },
3128 {
3129 .procname = "redirect_load", 3123 .procname = "redirect_load",
3130 .data = &ip_rt_redirect_load, 3124 .data = &ip_rt_redirect_load,
3131 .maxlen = sizeof(int), 3125 .maxlen = sizeof(int),
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 92bb9434b338..3bc5c8f7c71b 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -276,7 +276,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
276 int mss; 276 int mss;
277 struct rtable *rt; 277 struct rtable *rt;
278 __u8 rcv_wscale; 278 __u8 rcv_wscale;
279 bool ecn_ok; 279 bool ecn_ok = false;
280 280
281 if (!sysctl_tcp_syncookies || !th->ack || th->rst) 281 if (!sysctl_tcp_syncookies || !th->ack || th->rst)
282 goto out; 282 goto out;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 46febcacb729..4c0da24fb649 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -524,7 +524,7 @@ EXPORT_SYMBOL(tcp_ioctl);
524 524
525static inline void tcp_mark_push(struct tcp_sock *tp, struct sk_buff *skb) 525static inline void tcp_mark_push(struct tcp_sock *tp, struct sk_buff *skb)
526{ 526{
527 TCP_SKB_CB(skb)->flags |= TCPHDR_PSH; 527 TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_PSH;
528 tp->pushed_seq = tp->write_seq; 528 tp->pushed_seq = tp->write_seq;
529} 529}
530 530
@@ -540,7 +540,7 @@ static inline void skb_entail(struct sock *sk, struct sk_buff *skb)
540 540
541 skb->csum = 0; 541 skb->csum = 0;
542 tcb->seq = tcb->end_seq = tp->write_seq; 542 tcb->seq = tcb->end_seq = tp->write_seq;
543 tcb->flags = TCPHDR_ACK; 543 tcb->tcp_flags = TCPHDR_ACK;
544 tcb->sacked = 0; 544 tcb->sacked = 0;
545 skb_header_release(skb); 545 skb_header_release(skb);
546 tcp_add_write_queue_tail(sk, skb); 546 tcp_add_write_queue_tail(sk, skb);
@@ -830,7 +830,7 @@ new_segment:
830 skb_shinfo(skb)->gso_segs = 0; 830 skb_shinfo(skb)->gso_segs = 0;
831 831
832 if (!copied) 832 if (!copied)
833 TCP_SKB_CB(skb)->flags &= ~TCPHDR_PSH; 833 TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_PSH;
834 834
835 copied += copy; 835 copied += copy;
836 poffset += copy; 836 poffset += copy;
@@ -1074,7 +1074,7 @@ new_segment:
1074 } 1074 }
1075 1075
1076 if (!copied) 1076 if (!copied)
1077 TCP_SKB_CB(skb)->flags &= ~TCPHDR_PSH; 1077 TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_PSH;
1078 1078
1079 tp->write_seq += copy; 1079 tp->write_seq += copy;
1080 TCP_SKB_CB(skb)->end_seq += copy; 1080 TCP_SKB_CB(skb)->end_seq += copy;
@@ -2455,8 +2455,10 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
2455 info->tcpi_rcv_wscale = tp->rx_opt.rcv_wscale; 2455 info->tcpi_rcv_wscale = tp->rx_opt.rcv_wscale;
2456 } 2456 }
2457 2457
2458 if (tp->ecn_flags&TCP_ECN_OK) 2458 if (tp->ecn_flags & TCP_ECN_OK)
2459 info->tcpi_options |= TCPI_OPT_ECN; 2459 info->tcpi_options |= TCPI_OPT_ECN;
2460 if (tp->ecn_flags & TCP_ECN_SEEN)
2461 info->tcpi_options |= TCPI_OPT_ECN_SEEN;
2460 2462
2461 info->tcpi_rto = jiffies_to_usecs(icsk->icsk_rto); 2463 info->tcpi_rto = jiffies_to_usecs(icsk->icsk_rto);
2462 info->tcpi_ato = jiffies_to_usecs(icsk->icsk_ack.ato); 2464 info->tcpi_ato = jiffies_to_usecs(icsk->icsk_ack.ato);
@@ -2857,26 +2859,25 @@ EXPORT_SYMBOL(tcp_gro_complete);
2857 2859
2858#ifdef CONFIG_TCP_MD5SIG 2860#ifdef CONFIG_TCP_MD5SIG
2859static unsigned long tcp_md5sig_users; 2861static unsigned long tcp_md5sig_users;
2860static struct tcp_md5sig_pool * __percpu *tcp_md5sig_pool; 2862static struct tcp_md5sig_pool __percpu *tcp_md5sig_pool;
2861static DEFINE_SPINLOCK(tcp_md5sig_pool_lock); 2863static DEFINE_SPINLOCK(tcp_md5sig_pool_lock);
2862 2864
2863static void __tcp_free_md5sig_pool(struct tcp_md5sig_pool * __percpu *pool) 2865static void __tcp_free_md5sig_pool(struct tcp_md5sig_pool __percpu *pool)
2864{ 2866{
2865 int cpu; 2867 int cpu;
2868
2866 for_each_possible_cpu(cpu) { 2869 for_each_possible_cpu(cpu) {
2867 struct tcp_md5sig_pool *p = *per_cpu_ptr(pool, cpu); 2870 struct tcp_md5sig_pool *p = per_cpu_ptr(pool, cpu);
2868 if (p) { 2871
2869 if (p->md5_desc.tfm) 2872 if (p->md5_desc.tfm)
2870 crypto_free_hash(p->md5_desc.tfm); 2873 crypto_free_hash(p->md5_desc.tfm);
2871 kfree(p);
2872 }
2873 } 2874 }
2874 free_percpu(pool); 2875 free_percpu(pool);
2875} 2876}
2876 2877
2877void tcp_free_md5sig_pool(void) 2878void tcp_free_md5sig_pool(void)
2878{ 2879{
2879 struct tcp_md5sig_pool * __percpu *pool = NULL; 2880 struct tcp_md5sig_pool __percpu *pool = NULL;
2880 2881
2881 spin_lock_bh(&tcp_md5sig_pool_lock); 2882 spin_lock_bh(&tcp_md5sig_pool_lock);
2882 if (--tcp_md5sig_users == 0) { 2883 if (--tcp_md5sig_users == 0) {
@@ -2889,30 +2890,24 @@ void tcp_free_md5sig_pool(void)
2889} 2890}
2890EXPORT_SYMBOL(tcp_free_md5sig_pool); 2891EXPORT_SYMBOL(tcp_free_md5sig_pool);
2891 2892
2892static struct tcp_md5sig_pool * __percpu * 2893static struct tcp_md5sig_pool __percpu *
2893__tcp_alloc_md5sig_pool(struct sock *sk) 2894__tcp_alloc_md5sig_pool(struct sock *sk)
2894{ 2895{
2895 int cpu; 2896 int cpu;
2896 struct tcp_md5sig_pool * __percpu *pool; 2897 struct tcp_md5sig_pool __percpu *pool;
2897 2898
2898 pool = alloc_percpu(struct tcp_md5sig_pool *); 2899 pool = alloc_percpu(struct tcp_md5sig_pool);
2899 if (!pool) 2900 if (!pool)
2900 return NULL; 2901 return NULL;
2901 2902
2902 for_each_possible_cpu(cpu) { 2903 for_each_possible_cpu(cpu) {
2903 struct tcp_md5sig_pool *p;
2904 struct crypto_hash *hash; 2904 struct crypto_hash *hash;
2905 2905
2906 p = kzalloc(sizeof(*p), sk->sk_allocation);
2907 if (!p)
2908 goto out_free;
2909 *per_cpu_ptr(pool, cpu) = p;
2910
2911 hash = crypto_alloc_hash("md5", 0, CRYPTO_ALG_ASYNC); 2906 hash = crypto_alloc_hash("md5", 0, CRYPTO_ALG_ASYNC);
2912 if (!hash || IS_ERR(hash)) 2907 if (!hash || IS_ERR(hash))
2913 goto out_free; 2908 goto out_free;
2914 2909
2915 p->md5_desc.tfm = hash; 2910 per_cpu_ptr(pool, cpu)->md5_desc.tfm = hash;
2916 } 2911 }
2917 return pool; 2912 return pool;
2918out_free: 2913out_free:
@@ -2920,9 +2915,9 @@ out_free:
2920 return NULL; 2915 return NULL;
2921} 2916}
2922 2917
2923struct tcp_md5sig_pool * __percpu *tcp_alloc_md5sig_pool(struct sock *sk) 2918struct tcp_md5sig_pool __percpu *tcp_alloc_md5sig_pool(struct sock *sk)
2924{ 2919{
2925 struct tcp_md5sig_pool * __percpu *pool; 2920 struct tcp_md5sig_pool __percpu *pool;
2926 int alloc = 0; 2921 int alloc = 0;
2927 2922
2928retry: 2923retry:
@@ -2941,7 +2936,7 @@ retry:
2941 2936
2942 if (alloc) { 2937 if (alloc) {
2943 /* we cannot hold spinlock here because this may sleep. */ 2938 /* we cannot hold spinlock here because this may sleep. */
2944 struct tcp_md5sig_pool * __percpu *p; 2939 struct tcp_md5sig_pool __percpu *p;
2945 2940
2946 p = __tcp_alloc_md5sig_pool(sk); 2941 p = __tcp_alloc_md5sig_pool(sk);
2947 spin_lock_bh(&tcp_md5sig_pool_lock); 2942 spin_lock_bh(&tcp_md5sig_pool_lock);
@@ -2974,7 +2969,7 @@ EXPORT_SYMBOL(tcp_alloc_md5sig_pool);
2974 */ 2969 */
2975struct tcp_md5sig_pool *tcp_get_md5sig_pool(void) 2970struct tcp_md5sig_pool *tcp_get_md5sig_pool(void)
2976{ 2971{
2977 struct tcp_md5sig_pool * __percpu *p; 2972 struct tcp_md5sig_pool __percpu *p;
2978 2973
2979 local_bh_disable(); 2974 local_bh_disable();
2980 2975
@@ -2985,7 +2980,7 @@ struct tcp_md5sig_pool *tcp_get_md5sig_pool(void)
2985 spin_unlock(&tcp_md5sig_pool_lock); 2980 spin_unlock(&tcp_md5sig_pool_lock);
2986 2981
2987 if (p) 2982 if (p)
2988 return *this_cpu_ptr(p); 2983 return this_cpu_ptr(p);
2989 2984
2990 local_bh_enable(); 2985 local_bh_enable();
2991 return NULL; 2986 return NULL;
@@ -3035,7 +3030,8 @@ int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp,
3035 3030
3036 for (i = 0; i < shi->nr_frags; ++i) { 3031 for (i = 0; i < shi->nr_frags; ++i) {
3037 const struct skb_frag_struct *f = &shi->frags[i]; 3032 const struct skb_frag_struct *f = &shi->frags[i];
3038 sg_set_page(&sg, f->page, f->size, f->page_offset); 3033 struct page *page = skb_frag_page(f);
3034 sg_set_page(&sg, page, f->size, f->page_offset);
3039 if (crypto_hash_update(desc, &sg, f->size)) 3035 if (crypto_hash_update(desc, &sg, f->size))
3040 return 1; 3036 return 1;
3041 } 3037 }
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index ea0d2183df4b..c1653fe47255 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -217,16 +217,25 @@ static inline void TCP_ECN_withdraw_cwr(struct tcp_sock *tp)
217 tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR; 217 tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR;
218} 218}
219 219
220static inline void TCP_ECN_check_ce(struct tcp_sock *tp, struct sk_buff *skb) 220static inline void TCP_ECN_check_ce(struct tcp_sock *tp, const struct sk_buff *skb)
221{ 221{
222 if (tp->ecn_flags & TCP_ECN_OK) { 222 if (!(tp->ecn_flags & TCP_ECN_OK))
223 if (INET_ECN_is_ce(TCP_SKB_CB(skb)->flags)) 223 return;
224 tp->ecn_flags |= TCP_ECN_DEMAND_CWR; 224
225 switch (TCP_SKB_CB(skb)->ip_dsfield & INET_ECN_MASK) {
226 case INET_ECN_NOT_ECT:
225 /* Funny extension: if ECT is not set on a segment, 227 /* Funny extension: if ECT is not set on a segment,
226 * it is surely retransmit. It is not in ECN RFC, 228 * and we already seen ECT on a previous segment,
227 * but Linux follows this rule. */ 229 * it is probably a retransmit.
228 else if (INET_ECN_is_not_ect((TCP_SKB_CB(skb)->flags))) 230 */
231 if (tp->ecn_flags & TCP_ECN_SEEN)
229 tcp_enter_quickack_mode((struct sock *)tp); 232 tcp_enter_quickack_mode((struct sock *)tp);
233 break;
234 case INET_ECN_CE:
235 tp->ecn_flags |= TCP_ECN_DEMAND_CWR;
236 /* fallinto */
237 default:
238 tp->ecn_flags |= TCP_ECN_SEEN;
230 } 239 }
231} 240}
232 241
@@ -256,8 +265,7 @@ static inline int TCP_ECN_rcv_ecn_echo(struct tcp_sock *tp, struct tcphdr *th)
256 265
257static void tcp_fixup_sndbuf(struct sock *sk) 266static void tcp_fixup_sndbuf(struct sock *sk)
258{ 267{
259 int sndmem = tcp_sk(sk)->rx_opt.mss_clamp + MAX_TCP_HEADER + 16 + 268 int sndmem = SKB_TRUESIZE(tcp_sk(sk)->rx_opt.mss_clamp + MAX_TCP_HEADER);
260 sizeof(struct sk_buff);
261 269
262 if (sk->sk_sndbuf < 3 * sndmem) { 270 if (sk->sk_sndbuf < 3 * sndmem) {
263 sk->sk_sndbuf = 3 * sndmem; 271 sk->sk_sndbuf = 3 * sndmem;
@@ -340,7 +348,7 @@ static void tcp_grow_window(struct sock *sk, struct sk_buff *skb)
340static void tcp_fixup_rcvbuf(struct sock *sk) 348static void tcp_fixup_rcvbuf(struct sock *sk)
341{ 349{
342 struct tcp_sock *tp = tcp_sk(sk); 350 struct tcp_sock *tp = tcp_sk(sk);
343 int rcvmem = tp->advmss + MAX_TCP_HEADER + 16 + sizeof(struct sk_buff); 351 int rcvmem = SKB_TRUESIZE(tp->advmss + MAX_TCP_HEADER);
344 352
345 /* Try to select rcvbuf so that 4 mss-sized segments 353 /* Try to select rcvbuf so that 4 mss-sized segments
346 * will fit to window and corresponding skbs will fit to our rcvbuf. 354 * will fit to window and corresponding skbs will fit to our rcvbuf.
@@ -531,8 +539,7 @@ void tcp_rcv_space_adjust(struct sock *sk)
531 space /= tp->advmss; 539 space /= tp->advmss;
532 if (!space) 540 if (!space)
533 space = 1; 541 space = 1;
534 rcvmem = (tp->advmss + MAX_TCP_HEADER + 542 rcvmem = SKB_TRUESIZE(tp->advmss + MAX_TCP_HEADER);
535 16 + sizeof(struct sk_buff));
536 while (tcp_win_from_space(rcvmem) < tp->advmss) 543 while (tcp_win_from_space(rcvmem) < tp->advmss)
537 rcvmem += 128; 544 rcvmem += 128;
538 space *= rcvmem; 545 space *= rcvmem;
@@ -1124,7 +1131,7 @@ static int tcp_is_sackblock_valid(struct tcp_sock *tp, int is_dsack,
1124 return 0; 1131 return 0;
1125 1132
1126 /* ...Then it's D-SACK, and must reside below snd_una completely */ 1133 /* ...Then it's D-SACK, and must reside below snd_una completely */
1127 if (!after(end_seq, tp->snd_una)) 1134 if (after(end_seq, tp->snd_una))
1128 return 0; 1135 return 0;
1129 1136
1130 if (!before(start_seq, tp->undo_marker)) 1137 if (!before(start_seq, tp->undo_marker))
@@ -1389,9 +1396,7 @@ static int tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,
1389 1396
1390 BUG_ON(!pcount); 1397 BUG_ON(!pcount);
1391 1398
1392 /* Tweak before seqno plays */ 1399 if (skb == tp->lost_skb_hint)
1393 if (!tcp_is_fack(tp) && tcp_is_sack(tp) && tp->lost_skb_hint &&
1394 !before(TCP_SKB_CB(tp->lost_skb_hint)->seq, TCP_SKB_CB(skb)->seq))
1395 tp->lost_cnt_hint += pcount; 1400 tp->lost_cnt_hint += pcount;
1396 1401
1397 TCP_SKB_CB(prev)->end_seq += shifted; 1402 TCP_SKB_CB(prev)->end_seq += shifted;
@@ -1440,7 +1445,7 @@ static int tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,
1440 tp->lost_cnt_hint -= tcp_skb_pcount(prev); 1445 tp->lost_cnt_hint -= tcp_skb_pcount(prev);
1441 } 1446 }
1442 1447
1443 TCP_SKB_CB(skb)->flags |= TCP_SKB_CB(prev)->flags; 1448 TCP_SKB_CB(skb)->tcp_flags |= TCP_SKB_CB(prev)->tcp_flags;
1444 if (skb == tcp_highest_sack(sk)) 1449 if (skb == tcp_highest_sack(sk))
1445 tcp_advance_highest_sack(sk, skb); 1450 tcp_advance_highest_sack(sk, skb);
1446 1451
@@ -2830,9 +2835,13 @@ static int tcp_try_undo_loss(struct sock *sk)
2830static inline void tcp_complete_cwr(struct sock *sk) 2835static inline void tcp_complete_cwr(struct sock *sk)
2831{ 2836{
2832 struct tcp_sock *tp = tcp_sk(sk); 2837 struct tcp_sock *tp = tcp_sk(sk);
2833 /* Do not moderate cwnd if it's already undone in cwr or recovery */ 2838
2834 if (tp->undo_marker && tp->snd_cwnd > tp->snd_ssthresh) { 2839 /* Do not moderate cwnd if it's already undone in cwr or recovery. */
2835 tp->snd_cwnd = tp->snd_ssthresh; 2840 if (tp->undo_marker) {
2841 if (inet_csk(sk)->icsk_ca_state == TCP_CA_CWR)
2842 tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh);
2843 else /* PRR */
2844 tp->snd_cwnd = tp->snd_ssthresh;
2836 tp->snd_cwnd_stamp = tcp_time_stamp; 2845 tp->snd_cwnd_stamp = tcp_time_stamp;
2837 } 2846 }
2838 tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR); 2847 tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR);
@@ -2950,6 +2959,38 @@ void tcp_simple_retransmit(struct sock *sk)
2950} 2959}
2951EXPORT_SYMBOL(tcp_simple_retransmit); 2960EXPORT_SYMBOL(tcp_simple_retransmit);
2952 2961
2962/* This function implements the PRR algorithm, specifcally the PRR-SSRB
2963 * (proportional rate reduction with slow start reduction bound) as described in
2964 * http://www.ietf.org/id/draft-mathis-tcpm-proportional-rate-reduction-01.txt.
2965 * It computes the number of packets to send (sndcnt) based on packets newly
2966 * delivered:
2967 * 1) If the packets in flight is larger than ssthresh, PRR spreads the
2968 * cwnd reductions across a full RTT.
2969 * 2) If packets in flight is lower than ssthresh (such as due to excess
2970 * losses and/or application stalls), do not perform any further cwnd
2971 * reductions, but instead slow start up to ssthresh.
2972 */
2973static void tcp_update_cwnd_in_recovery(struct sock *sk, int newly_acked_sacked,
2974 int fast_rexmit, int flag)
2975{
2976 struct tcp_sock *tp = tcp_sk(sk);
2977 int sndcnt = 0;
2978 int delta = tp->snd_ssthresh - tcp_packets_in_flight(tp);
2979
2980 if (tcp_packets_in_flight(tp) > tp->snd_ssthresh) {
2981 u64 dividend = (u64)tp->snd_ssthresh * tp->prr_delivered +
2982 tp->prior_cwnd - 1;
2983 sndcnt = div_u64(dividend, tp->prior_cwnd) - tp->prr_out;
2984 } else {
2985 sndcnt = min_t(int, delta,
2986 max_t(int, tp->prr_delivered - tp->prr_out,
2987 newly_acked_sacked) + 1);
2988 }
2989
2990 sndcnt = max(sndcnt, (fast_rexmit ? 1 : 0));
2991 tp->snd_cwnd = tcp_packets_in_flight(tp) + sndcnt;
2992}
2993
2953/* Process an event, which can update packets-in-flight not trivially. 2994/* Process an event, which can update packets-in-flight not trivially.
2954 * Main goal of this function is to calculate new estimate for left_out, 2995 * Main goal of this function is to calculate new estimate for left_out,
2955 * taking into account both packets sitting in receiver's buffer and 2996 * taking into account both packets sitting in receiver's buffer and
@@ -2961,7 +3002,8 @@ EXPORT_SYMBOL(tcp_simple_retransmit);
2961 * It does _not_ decide what to send, it is made in function 3002 * It does _not_ decide what to send, it is made in function
2962 * tcp_xmit_retransmit_queue(). 3003 * tcp_xmit_retransmit_queue().
2963 */ 3004 */
2964static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag) 3005static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked,
3006 int newly_acked_sacked, int flag)
2965{ 3007{
2966 struct inet_connection_sock *icsk = inet_csk(sk); 3008 struct inet_connection_sock *icsk = inet_csk(sk);
2967 struct tcp_sock *tp = tcp_sk(sk); 3009 struct tcp_sock *tp = tcp_sk(sk);
@@ -3111,13 +3153,17 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag)
3111 3153
3112 tp->bytes_acked = 0; 3154 tp->bytes_acked = 0;
3113 tp->snd_cwnd_cnt = 0; 3155 tp->snd_cwnd_cnt = 0;
3156 tp->prior_cwnd = tp->snd_cwnd;
3157 tp->prr_delivered = 0;
3158 tp->prr_out = 0;
3114 tcp_set_ca_state(sk, TCP_CA_Recovery); 3159 tcp_set_ca_state(sk, TCP_CA_Recovery);
3115 fast_rexmit = 1; 3160 fast_rexmit = 1;
3116 } 3161 }
3117 3162
3118 if (do_lost || (tcp_is_fack(tp) && tcp_head_timedout(sk))) 3163 if (do_lost || (tcp_is_fack(tp) && tcp_head_timedout(sk)))
3119 tcp_update_scoreboard(sk, fast_rexmit); 3164 tcp_update_scoreboard(sk, fast_rexmit);
3120 tcp_cwnd_down(sk, flag); 3165 tp->prr_delivered += newly_acked_sacked;
3166 tcp_update_cwnd_in_recovery(sk, newly_acked_sacked, fast_rexmit, flag);
3121 tcp_xmit_retransmit_queue(sk); 3167 tcp_xmit_retransmit_queue(sk);
3122} 3168}
3123 3169
@@ -3298,7 +3344,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
3298 * connection startup slow start one packet too 3344 * connection startup slow start one packet too
3299 * quickly. This is severely frowned upon behavior. 3345 * quickly. This is severely frowned upon behavior.
3300 */ 3346 */
3301 if (!(scb->flags & TCPHDR_SYN)) { 3347 if (!(scb->tcp_flags & TCPHDR_SYN)) {
3302 flag |= FLAG_DATA_ACKED; 3348 flag |= FLAG_DATA_ACKED;
3303 } else { 3349 } else {
3304 flag |= FLAG_SYN_ACKED; 3350 flag |= FLAG_SYN_ACKED;
@@ -3632,6 +3678,8 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
3632 u32 prior_in_flight; 3678 u32 prior_in_flight;
3633 u32 prior_fackets; 3679 u32 prior_fackets;
3634 int prior_packets; 3680 int prior_packets;
3681 int prior_sacked = tp->sacked_out;
3682 int newly_acked_sacked = 0;
3635 int frto_cwnd = 0; 3683 int frto_cwnd = 0;
3636 3684
3637 /* If the ack is older than previous acks 3685 /* If the ack is older than previous acks
@@ -3703,6 +3751,9 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
3703 /* See if we can take anything off of the retransmit queue. */ 3751 /* See if we can take anything off of the retransmit queue. */
3704 flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una); 3752 flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una);
3705 3753
3754 newly_acked_sacked = (prior_packets - prior_sacked) -
3755 (tp->packets_out - tp->sacked_out);
3756
3706 if (tp->frto_counter) 3757 if (tp->frto_counter)
3707 frto_cwnd = tcp_process_frto(sk, flag); 3758 frto_cwnd = tcp_process_frto(sk, flag);
3708 /* Guarantee sacktag reordering detection against wrap-arounds */ 3759 /* Guarantee sacktag reordering detection against wrap-arounds */
@@ -3715,7 +3766,7 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
3715 tcp_may_raise_cwnd(sk, flag)) 3766 tcp_may_raise_cwnd(sk, flag))
3716 tcp_cong_avoid(sk, ack, prior_in_flight); 3767 tcp_cong_avoid(sk, ack, prior_in_flight);
3717 tcp_fastretrans_alert(sk, prior_packets - tp->packets_out, 3768 tcp_fastretrans_alert(sk, prior_packets - tp->packets_out,
3718 flag); 3769 newly_acked_sacked, flag);
3719 } else { 3770 } else {
3720 if ((flag & FLAG_DATA_ACKED) && !frto_cwnd) 3771 if ((flag & FLAG_DATA_ACKED) && !frto_cwnd)
3721 tcp_cong_avoid(sk, ack, prior_in_flight); 3772 tcp_cong_avoid(sk, ack, prior_in_flight);
@@ -4897,8 +4948,10 @@ static void tcp_new_space(struct sock *sk)
4897 struct tcp_sock *tp = tcp_sk(sk); 4948 struct tcp_sock *tp = tcp_sk(sk);
4898 4949
4899 if (tcp_should_expand_sndbuf(sk)) { 4950 if (tcp_should_expand_sndbuf(sk)) {
4900 int sndmem = max_t(u32, tp->rx_opt.mss_clamp, tp->mss_cache) + 4951 int sndmem = SKB_TRUESIZE(max_t(u32,
4901 MAX_TCP_HEADER + 16 + sizeof(struct sk_buff); 4952 tp->rx_opt.mss_clamp,
4953 tp->mss_cache) +
4954 MAX_TCP_HEADER);
4902 int demanded = max_t(unsigned int, tp->snd_cwnd, 4955 int demanded = max_t(unsigned int, tp->snd_cwnd,
4903 tp->reordering + 1); 4956 tp->reordering + 1);
4904 sndmem *= 2 * demanded; 4957 sndmem *= 2 * demanded;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 955b8e65b69e..48da7cc41e23 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -72,6 +72,7 @@
72#include <net/timewait_sock.h> 72#include <net/timewait_sock.h>
73#include <net/xfrm.h> 73#include <net/xfrm.h>
74#include <net/netdma.h> 74#include <net/netdma.h>
75#include <net/secure_seq.h>
75 76
76#include <linux/inet.h> 77#include <linux/inet.h>
77#include <linux/ipv6.h> 78#include <linux/ipv6.h>
@@ -807,20 +808,38 @@ static void tcp_v4_reqsk_destructor(struct request_sock *req)
807 kfree(inet_rsk(req)->opt); 808 kfree(inet_rsk(req)->opt);
808} 809}
809 810
810static void syn_flood_warning(const struct sk_buff *skb) 811/*
812 * Return 1 if a syncookie should be sent
813 */
814int tcp_syn_flood_action(struct sock *sk,
815 const struct sk_buff *skb,
816 const char *proto)
811{ 817{
812 const char *msg; 818 const char *msg = "Dropping request";
819 int want_cookie = 0;
820 struct listen_sock *lopt;
821
822
813 823
814#ifdef CONFIG_SYN_COOKIES 824#ifdef CONFIG_SYN_COOKIES
815 if (sysctl_tcp_syncookies) 825 if (sysctl_tcp_syncookies) {
816 msg = "Sending cookies"; 826 msg = "Sending cookies";
817 else 827 want_cookie = 1;
828 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDOCOOKIES);
829 } else
818#endif 830#endif
819 msg = "Dropping request"; 831 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP);
820 832
821 pr_info("TCP: Possible SYN flooding on port %d. %s.\n", 833 lopt = inet_csk(sk)->icsk_accept_queue.listen_opt;
822 ntohs(tcp_hdr(skb)->dest), msg); 834 if (!lopt->synflood_warned) {
835 lopt->synflood_warned = 1;
836 pr_info("%s: Possible SYN flooding on port %d. %s. "
837 " Check SNMP counters.\n",
838 proto, ntohs(tcp_hdr(skb)->dest), msg);
839 }
840 return want_cookie;
823} 841}
842EXPORT_SYMBOL(tcp_syn_flood_action);
824 843
825/* 844/*
826 * Save and compile IPv4 options into the request_sock if needed. 845 * Save and compile IPv4 options into the request_sock if needed.
@@ -908,18 +927,21 @@ int tcp_v4_md5_do_add(struct sock *sk, __be32 addr,
908 } 927 }
909 sk_nocaps_add(sk, NETIF_F_GSO_MASK); 928 sk_nocaps_add(sk, NETIF_F_GSO_MASK);
910 } 929 }
911 if (tcp_alloc_md5sig_pool(sk) == NULL) { 930
931 md5sig = tp->md5sig_info;
932 if (md5sig->entries4 == 0 &&
933 tcp_alloc_md5sig_pool(sk) == NULL) {
912 kfree(newkey); 934 kfree(newkey);
913 return -ENOMEM; 935 return -ENOMEM;
914 } 936 }
915 md5sig = tp->md5sig_info;
916 937
917 if (md5sig->alloced4 == md5sig->entries4) { 938 if (md5sig->alloced4 == md5sig->entries4) {
918 keys = kmalloc((sizeof(*keys) * 939 keys = kmalloc((sizeof(*keys) *
919 (md5sig->entries4 + 1)), GFP_ATOMIC); 940 (md5sig->entries4 + 1)), GFP_ATOMIC);
920 if (!keys) { 941 if (!keys) {
921 kfree(newkey); 942 kfree(newkey);
922 tcp_free_md5sig_pool(); 943 if (md5sig->entries4 == 0)
944 tcp_free_md5sig_pool();
923 return -ENOMEM; 945 return -ENOMEM;
924 } 946 }
925 947
@@ -963,6 +985,7 @@ int tcp_v4_md5_do_del(struct sock *sk, __be32 addr)
963 kfree(tp->md5sig_info->keys4); 985 kfree(tp->md5sig_info->keys4);
964 tp->md5sig_info->keys4 = NULL; 986 tp->md5sig_info->keys4 = NULL;
965 tp->md5sig_info->alloced4 = 0; 987 tp->md5sig_info->alloced4 = 0;
988 tcp_free_md5sig_pool();
966 } else if (tp->md5sig_info->entries4 != i) { 989 } else if (tp->md5sig_info->entries4 != i) {
967 /* Need to do some manipulation */ 990 /* Need to do some manipulation */
968 memmove(&tp->md5sig_info->keys4[i], 991 memmove(&tp->md5sig_info->keys4[i],
@@ -970,7 +993,6 @@ int tcp_v4_md5_do_del(struct sock *sk, __be32 addr)
970 (tp->md5sig_info->entries4 - i) * 993 (tp->md5sig_info->entries4 - i) *
971 sizeof(struct tcp4_md5sig_key)); 994 sizeof(struct tcp4_md5sig_key));
972 } 995 }
973 tcp_free_md5sig_pool();
974 return 0; 996 return 0;
975 } 997 }
976 } 998 }
@@ -1234,11 +1256,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1234 __be32 saddr = ip_hdr(skb)->saddr; 1256 __be32 saddr = ip_hdr(skb)->saddr;
1235 __be32 daddr = ip_hdr(skb)->daddr; 1257 __be32 daddr = ip_hdr(skb)->daddr;
1236 __u32 isn = TCP_SKB_CB(skb)->when; 1258 __u32 isn = TCP_SKB_CB(skb)->when;
1237#ifdef CONFIG_SYN_COOKIES
1238 int want_cookie = 0; 1259 int want_cookie = 0;
1239#else
1240#define want_cookie 0 /* Argh, why doesn't gcc optimize this :( */
1241#endif
1242 1260
1243 /* Never answer to SYNs send to broadcast or multicast */ 1261 /* Never answer to SYNs send to broadcast or multicast */
1244 if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) 1262 if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
@@ -1249,14 +1267,9 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1249 * evidently real one. 1267 * evidently real one.
1250 */ 1268 */
1251 if (inet_csk_reqsk_queue_is_full(sk) && !isn) { 1269 if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
1252 if (net_ratelimit()) 1270 want_cookie = tcp_syn_flood_action(sk, skb, "TCP");
1253 syn_flood_warning(skb); 1271 if (!want_cookie)
1254#ifdef CONFIG_SYN_COOKIES 1272 goto drop;
1255 if (sysctl_tcp_syncookies) {
1256 want_cookie = 1;
1257 } else
1258#endif
1259 goto drop;
1260 } 1273 }
1261 1274
1262 /* Accept backlog is full. If we have already queued enough 1275 /* Accept backlog is full. If we have already queued enough
@@ -1302,9 +1315,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1302 while (l-- > 0) 1315 while (l-- > 0)
1303 *c++ ^= *hash_location++; 1316 *c++ ^= *hash_location++;
1304 1317
1305#ifdef CONFIG_SYN_COOKIES
1306 want_cookie = 0; /* not our kind of cookie */ 1318 want_cookie = 0; /* not our kind of cookie */
1307#endif
1308 tmp_ext.cookie_out_never = 0; /* false */ 1319 tmp_ext.cookie_out_never = 0; /* false */
1309 tmp_ext.cookie_plus = tmp_opt.cookie_plus; 1320 tmp_ext.cookie_plus = tmp_opt.cookie_plus;
1310 } else if (!tp->rx_opt.cookie_in_always) { 1321 } else if (!tp->rx_opt.cookie_in_always) {
@@ -1577,7 +1588,7 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
1577#endif 1588#endif
1578 1589
1579 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ 1590 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1580 sock_rps_save_rxhash(sk, skb->rxhash); 1591 sock_rps_save_rxhash(sk, skb);
1581 if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) { 1592 if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) {
1582 rsk = sk; 1593 rsk = sk;
1583 goto reset; 1594 goto reset;
@@ -1594,7 +1605,7 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
1594 goto discard; 1605 goto discard;
1595 1606
1596 if (nsk != sk) { 1607 if (nsk != sk) {
1597 sock_rps_save_rxhash(nsk, skb->rxhash); 1608 sock_rps_save_rxhash(nsk, skb);
1598 if (tcp_child_process(sk, nsk, skb)) { 1609 if (tcp_child_process(sk, nsk, skb)) {
1599 rsk = nsk; 1610 rsk = nsk;
1600 goto reset; 1611 goto reset;
@@ -1602,7 +1613,7 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
1602 return 0; 1613 return 0;
1603 } 1614 }
1604 } else 1615 } else
1605 sock_rps_save_rxhash(sk, skb->rxhash); 1616 sock_rps_save_rxhash(sk, skb);
1606 1617
1607 if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) { 1618 if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) {
1608 rsk = sk; 1619 rsk = sk;
@@ -1669,7 +1680,7 @@ int tcp_v4_rcv(struct sk_buff *skb)
1669 skb->len - th->doff * 4); 1680 skb->len - th->doff * 4);
1670 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq); 1681 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1671 TCP_SKB_CB(skb)->when = 0; 1682 TCP_SKB_CB(skb)->when = 0;
1672 TCP_SKB_CB(skb)->flags = iph->tos; 1683 TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph);
1673 TCP_SKB_CB(skb)->sacked = 0; 1684 TCP_SKB_CB(skb)->sacked = 0;
1674 1685
1675 sk = __inet_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest); 1686 sk = __inet_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 882e0b0964d0..dde6b5768316 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -297,9 +297,9 @@ static u16 tcp_select_window(struct sock *sk)
297/* Packet ECN state for a SYN-ACK */ 297/* Packet ECN state for a SYN-ACK */
298static inline void TCP_ECN_send_synack(struct tcp_sock *tp, struct sk_buff *skb) 298static inline void TCP_ECN_send_synack(struct tcp_sock *tp, struct sk_buff *skb)
299{ 299{
300 TCP_SKB_CB(skb)->flags &= ~TCPHDR_CWR; 300 TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_CWR;
301 if (!(tp->ecn_flags & TCP_ECN_OK)) 301 if (!(tp->ecn_flags & TCP_ECN_OK))
302 TCP_SKB_CB(skb)->flags &= ~TCPHDR_ECE; 302 TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_ECE;
303} 303}
304 304
305/* Packet ECN state for a SYN. */ 305/* Packet ECN state for a SYN. */
@@ -309,7 +309,7 @@ static inline void TCP_ECN_send_syn(struct sock *sk, struct sk_buff *skb)
309 309
310 tp->ecn_flags = 0; 310 tp->ecn_flags = 0;
311 if (sysctl_tcp_ecn == 1) { 311 if (sysctl_tcp_ecn == 1) {
312 TCP_SKB_CB(skb)->flags |= TCPHDR_ECE | TCPHDR_CWR; 312 TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_ECE | TCPHDR_CWR;
313 tp->ecn_flags = TCP_ECN_OK; 313 tp->ecn_flags = TCP_ECN_OK;
314 } 314 }
315} 315}
@@ -356,7 +356,7 @@ static void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags)
356 skb->ip_summed = CHECKSUM_PARTIAL; 356 skb->ip_summed = CHECKSUM_PARTIAL;
357 skb->csum = 0; 357 skb->csum = 0;
358 358
359 TCP_SKB_CB(skb)->flags = flags; 359 TCP_SKB_CB(skb)->tcp_flags = flags;
360 TCP_SKB_CB(skb)->sacked = 0; 360 TCP_SKB_CB(skb)->sacked = 0;
361 361
362 skb_shinfo(skb)->gso_segs = 1; 362 skb_shinfo(skb)->gso_segs = 1;
@@ -826,7 +826,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
826 tcb = TCP_SKB_CB(skb); 826 tcb = TCP_SKB_CB(skb);
827 memset(&opts, 0, sizeof(opts)); 827 memset(&opts, 0, sizeof(opts));
828 828
829 if (unlikely(tcb->flags & TCPHDR_SYN)) 829 if (unlikely(tcb->tcp_flags & TCPHDR_SYN))
830 tcp_options_size = tcp_syn_options(sk, skb, &opts, &md5); 830 tcp_options_size = tcp_syn_options(sk, skb, &opts, &md5);
831 else 831 else
832 tcp_options_size = tcp_established_options(sk, skb, &opts, 832 tcp_options_size = tcp_established_options(sk, skb, &opts,
@@ -850,9 +850,9 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
850 th->seq = htonl(tcb->seq); 850 th->seq = htonl(tcb->seq);
851 th->ack_seq = htonl(tp->rcv_nxt); 851 th->ack_seq = htonl(tp->rcv_nxt);
852 *(((__be16 *)th) + 6) = htons(((tcp_header_size >> 2) << 12) | 852 *(((__be16 *)th) + 6) = htons(((tcp_header_size >> 2) << 12) |
853 tcb->flags); 853 tcb->tcp_flags);
854 854
855 if (unlikely(tcb->flags & TCPHDR_SYN)) { 855 if (unlikely(tcb->tcp_flags & TCPHDR_SYN)) {
856 /* RFC1323: The window in SYN & SYN/ACK segments 856 /* RFC1323: The window in SYN & SYN/ACK segments
857 * is never scaled. 857 * is never scaled.
858 */ 858 */
@@ -875,7 +875,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
875 } 875 }
876 876
877 tcp_options_write((__be32 *)(th + 1), tp, &opts); 877 tcp_options_write((__be32 *)(th + 1), tp, &opts);
878 if (likely((tcb->flags & TCPHDR_SYN) == 0)) 878 if (likely((tcb->tcp_flags & TCPHDR_SYN) == 0))
879 TCP_ECN_send(sk, skb, tcp_header_size); 879 TCP_ECN_send(sk, skb, tcp_header_size);
880 880
881#ifdef CONFIG_TCP_MD5SIG 881#ifdef CONFIG_TCP_MD5SIG
@@ -889,7 +889,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
889 889
890 icsk->icsk_af_ops->send_check(sk, skb); 890 icsk->icsk_af_ops->send_check(sk, skb);
891 891
892 if (likely(tcb->flags & TCPHDR_ACK)) 892 if (likely(tcb->tcp_flags & TCPHDR_ACK))
893 tcp_event_ack_sent(sk, tcp_skb_pcount(skb)); 893 tcp_event_ack_sent(sk, tcp_skb_pcount(skb));
894 894
895 if (skb->len != tcp_header_size) 895 if (skb->len != tcp_header_size)
@@ -1032,9 +1032,9 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len,
1032 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(buff)->seq; 1032 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(buff)->seq;
1033 1033
1034 /* PSH and FIN should only be set in the second packet. */ 1034 /* PSH and FIN should only be set in the second packet. */
1035 flags = TCP_SKB_CB(skb)->flags; 1035 flags = TCP_SKB_CB(skb)->tcp_flags;
1036 TCP_SKB_CB(skb)->flags = flags & ~(TCPHDR_FIN | TCPHDR_PSH); 1036 TCP_SKB_CB(skb)->tcp_flags = flags & ~(TCPHDR_FIN | TCPHDR_PSH);
1037 TCP_SKB_CB(buff)->flags = flags; 1037 TCP_SKB_CB(buff)->tcp_flags = flags;
1038 TCP_SKB_CB(buff)->sacked = TCP_SKB_CB(skb)->sacked; 1038 TCP_SKB_CB(buff)->sacked = TCP_SKB_CB(skb)->sacked;
1039 1039
1040 if (!skb_shinfo(skb)->nr_frags && skb->ip_summed != CHECKSUM_PARTIAL) { 1040 if (!skb_shinfo(skb)->nr_frags && skb->ip_summed != CHECKSUM_PARTIAL) {
@@ -1095,7 +1095,7 @@ static void __pskb_trim_head(struct sk_buff *skb, int len)
1095 k = 0; 1095 k = 0;
1096 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { 1096 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
1097 if (skb_shinfo(skb)->frags[i].size <= eat) { 1097 if (skb_shinfo(skb)->frags[i].size <= eat) {
1098 put_page(skb_shinfo(skb)->frags[i].page); 1098 skb_frag_unref(skb, i);
1099 eat -= skb_shinfo(skb)->frags[i].size; 1099 eat -= skb_shinfo(skb)->frags[i].size;
1100 } else { 1100 } else {
1101 skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i]; 1101 skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
@@ -1340,7 +1340,8 @@ static inline unsigned int tcp_cwnd_test(struct tcp_sock *tp,
1340 u32 in_flight, cwnd; 1340 u32 in_flight, cwnd;
1341 1341
1342 /* Don't be strict about the congestion window for the final FIN. */ 1342 /* Don't be strict about the congestion window for the final FIN. */
1343 if ((TCP_SKB_CB(skb)->flags & TCPHDR_FIN) && tcp_skb_pcount(skb) == 1) 1343 if ((TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) &&
1344 tcp_skb_pcount(skb) == 1)
1344 return 1; 1345 return 1;
1345 1346
1346 in_flight = tcp_packets_in_flight(tp); 1347 in_flight = tcp_packets_in_flight(tp);
@@ -1409,7 +1410,7 @@ static inline int tcp_nagle_test(struct tcp_sock *tp, struct sk_buff *skb,
1409 * Nagle can be ignored during F-RTO too (see RFC4138). 1410 * Nagle can be ignored during F-RTO too (see RFC4138).
1410 */ 1411 */
1411 if (tcp_urg_mode(tp) || (tp->frto_counter == 2) || 1412 if (tcp_urg_mode(tp) || (tp->frto_counter == 2) ||
1412 (TCP_SKB_CB(skb)->flags & TCPHDR_FIN)) 1413 (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN))
1413 return 1; 1414 return 1;
1414 1415
1415 if (!tcp_nagle_check(tp, skb, cur_mss, nonagle)) 1416 if (!tcp_nagle_check(tp, skb, cur_mss, nonagle))
@@ -1497,9 +1498,9 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
1497 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(buff)->seq; 1498 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(buff)->seq;
1498 1499
1499 /* PSH and FIN should only be set in the second packet. */ 1500 /* PSH and FIN should only be set in the second packet. */
1500 flags = TCP_SKB_CB(skb)->flags; 1501 flags = TCP_SKB_CB(skb)->tcp_flags;
1501 TCP_SKB_CB(skb)->flags = flags & ~(TCPHDR_FIN | TCPHDR_PSH); 1502 TCP_SKB_CB(skb)->tcp_flags = flags & ~(TCPHDR_FIN | TCPHDR_PSH);
1502 TCP_SKB_CB(buff)->flags = flags; 1503 TCP_SKB_CB(buff)->tcp_flags = flags;
1503 1504
1504 /* This packet was never sent out yet, so no SACK bits. */ 1505 /* This packet was never sent out yet, so no SACK bits. */
1505 TCP_SKB_CB(buff)->sacked = 0; 1506 TCP_SKB_CB(buff)->sacked = 0;
@@ -1530,7 +1531,7 @@ static int tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb)
1530 u32 send_win, cong_win, limit, in_flight; 1531 u32 send_win, cong_win, limit, in_flight;
1531 int win_divisor; 1532 int win_divisor;
1532 1533
1533 if (TCP_SKB_CB(skb)->flags & TCPHDR_FIN) 1534 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
1534 goto send_now; 1535 goto send_now;
1535 1536
1536 if (icsk->icsk_ca_state != TCP_CA_Open) 1537 if (icsk->icsk_ca_state != TCP_CA_Open)
@@ -1657,7 +1658,7 @@ static int tcp_mtu_probe(struct sock *sk)
1657 1658
1658 TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(skb)->seq; 1659 TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(skb)->seq;
1659 TCP_SKB_CB(nskb)->end_seq = TCP_SKB_CB(skb)->seq + probe_size; 1660 TCP_SKB_CB(nskb)->end_seq = TCP_SKB_CB(skb)->seq + probe_size;
1660 TCP_SKB_CB(nskb)->flags = TCPHDR_ACK; 1661 TCP_SKB_CB(nskb)->tcp_flags = TCPHDR_ACK;
1661 TCP_SKB_CB(nskb)->sacked = 0; 1662 TCP_SKB_CB(nskb)->sacked = 0;
1662 nskb->csum = 0; 1663 nskb->csum = 0;
1663 nskb->ip_summed = skb->ip_summed; 1664 nskb->ip_summed = skb->ip_summed;
@@ -1677,11 +1678,11 @@ static int tcp_mtu_probe(struct sock *sk)
1677 if (skb->len <= copy) { 1678 if (skb->len <= copy) {
1678 /* We've eaten all the data from this skb. 1679 /* We've eaten all the data from this skb.
1679 * Throw it away. */ 1680 * Throw it away. */
1680 TCP_SKB_CB(nskb)->flags |= TCP_SKB_CB(skb)->flags; 1681 TCP_SKB_CB(nskb)->tcp_flags |= TCP_SKB_CB(skb)->tcp_flags;
1681 tcp_unlink_write_queue(skb, sk); 1682 tcp_unlink_write_queue(skb, sk);
1682 sk_wmem_free_skb(sk, skb); 1683 sk_wmem_free_skb(sk, skb);
1683 } else { 1684 } else {
1684 TCP_SKB_CB(nskb)->flags |= TCP_SKB_CB(skb)->flags & 1685 TCP_SKB_CB(nskb)->tcp_flags |= TCP_SKB_CB(skb)->tcp_flags &
1685 ~(TCPHDR_FIN|TCPHDR_PSH); 1686 ~(TCPHDR_FIN|TCPHDR_PSH);
1686 if (!skb_shinfo(skb)->nr_frags) { 1687 if (!skb_shinfo(skb)->nr_frags) {
1687 skb_pull(skb, copy); 1688 skb_pull(skb, copy);
@@ -1796,11 +1797,13 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
1796 tcp_event_new_data_sent(sk, skb); 1797 tcp_event_new_data_sent(sk, skb);
1797 1798
1798 tcp_minshall_update(tp, mss_now, skb); 1799 tcp_minshall_update(tp, mss_now, skb);
1799 sent_pkts++; 1800 sent_pkts += tcp_skb_pcount(skb);
1800 1801
1801 if (push_one) 1802 if (push_one)
1802 break; 1803 break;
1803 } 1804 }
1805 if (inet_csk(sk)->icsk_ca_state == TCP_CA_Recovery)
1806 tp->prr_out += sent_pkts;
1804 1807
1805 if (likely(sent_pkts)) { 1808 if (likely(sent_pkts)) {
1806 tcp_cwnd_validate(sk); 1809 tcp_cwnd_validate(sk);
@@ -1985,7 +1988,7 @@ static void tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb)
1985 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(next_skb)->end_seq; 1988 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(next_skb)->end_seq;
1986 1989
1987 /* Merge over control information. This moves PSH/FIN etc. over */ 1990 /* Merge over control information. This moves PSH/FIN etc. over */
1988 TCP_SKB_CB(skb)->flags |= TCP_SKB_CB(next_skb)->flags; 1991 TCP_SKB_CB(skb)->tcp_flags |= TCP_SKB_CB(next_skb)->tcp_flags;
1989 1992
1990 /* All done, get rid of second SKB and account for it so 1993 /* All done, get rid of second SKB and account for it so
1991 * packet counting does not break. 1994 * packet counting does not break.
@@ -2033,7 +2036,7 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *to,
2033 2036
2034 if (!sysctl_tcp_retrans_collapse) 2037 if (!sysctl_tcp_retrans_collapse)
2035 return; 2038 return;
2036 if (TCP_SKB_CB(skb)->flags & TCPHDR_SYN) 2039 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)
2037 return; 2040 return;
2038 2041
2039 tcp_for_write_queue_from_safe(skb, tmp, sk) { 2042 tcp_for_write_queue_from_safe(skb, tmp, sk) {
@@ -2125,12 +2128,12 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
2125 * since it is cheap to do so and saves bytes on the network. 2128 * since it is cheap to do so and saves bytes on the network.
2126 */ 2129 */
2127 if (skb->len > 0 && 2130 if (skb->len > 0 &&
2128 (TCP_SKB_CB(skb)->flags & TCPHDR_FIN) && 2131 (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) &&
2129 tp->snd_una == (TCP_SKB_CB(skb)->end_seq - 1)) { 2132 tp->snd_una == (TCP_SKB_CB(skb)->end_seq - 1)) {
2130 if (!pskb_trim(skb, 0)) { 2133 if (!pskb_trim(skb, 0)) {
2131 /* Reuse, even though it does some unnecessary work */ 2134 /* Reuse, even though it does some unnecessary work */
2132 tcp_init_nondata_skb(skb, TCP_SKB_CB(skb)->end_seq - 1, 2135 tcp_init_nondata_skb(skb, TCP_SKB_CB(skb)->end_seq - 1,
2133 TCP_SKB_CB(skb)->flags); 2136 TCP_SKB_CB(skb)->tcp_flags);
2134 skb->ip_summed = CHECKSUM_NONE; 2137 skb->ip_summed = CHECKSUM_NONE;
2135 } 2138 }
2136 } 2139 }
@@ -2294,6 +2297,9 @@ begin_fwd:
2294 return; 2297 return;
2295 NET_INC_STATS_BH(sock_net(sk), mib_idx); 2298 NET_INC_STATS_BH(sock_net(sk), mib_idx);
2296 2299
2300 if (inet_csk(sk)->icsk_ca_state == TCP_CA_Recovery)
2301 tp->prr_out += tcp_skb_pcount(skb);
2302
2297 if (skb == tcp_write_queue_head(sk)) 2303 if (skb == tcp_write_queue_head(sk))
2298 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, 2304 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
2299 inet_csk(sk)->icsk_rto, 2305 inet_csk(sk)->icsk_rto,
@@ -2317,7 +2323,7 @@ void tcp_send_fin(struct sock *sk)
2317 mss_now = tcp_current_mss(sk); 2323 mss_now = tcp_current_mss(sk);
2318 2324
2319 if (tcp_send_head(sk) != NULL) { 2325 if (tcp_send_head(sk) != NULL) {
2320 TCP_SKB_CB(skb)->flags |= TCPHDR_FIN; 2326 TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_FIN;
2321 TCP_SKB_CB(skb)->end_seq++; 2327 TCP_SKB_CB(skb)->end_seq++;
2322 tp->write_seq++; 2328 tp->write_seq++;
2323 } else { 2329 } else {
@@ -2379,11 +2385,11 @@ int tcp_send_synack(struct sock *sk)
2379 struct sk_buff *skb; 2385 struct sk_buff *skb;
2380 2386
2381 skb = tcp_write_queue_head(sk); 2387 skb = tcp_write_queue_head(sk);
2382 if (skb == NULL || !(TCP_SKB_CB(skb)->flags & TCPHDR_SYN)) { 2388 if (skb == NULL || !(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)) {
2383 printk(KERN_DEBUG "tcp_send_synack: wrong queue state\n"); 2389 printk(KERN_DEBUG "tcp_send_synack: wrong queue state\n");
2384 return -EFAULT; 2390 return -EFAULT;
2385 } 2391 }
2386 if (!(TCP_SKB_CB(skb)->flags & TCPHDR_ACK)) { 2392 if (!(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_ACK)) {
2387 if (skb_cloned(skb)) { 2393 if (skb_cloned(skb)) {
2388 struct sk_buff *nskb = skb_copy(skb, GFP_ATOMIC); 2394 struct sk_buff *nskb = skb_copy(skb, GFP_ATOMIC);
2389 if (nskb == NULL) 2395 if (nskb == NULL)
@@ -2397,7 +2403,7 @@ int tcp_send_synack(struct sock *sk)
2397 skb = nskb; 2403 skb = nskb;
2398 } 2404 }
2399 2405
2400 TCP_SKB_CB(skb)->flags |= TCPHDR_ACK; 2406 TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_ACK;
2401 TCP_ECN_send_synack(tcp_sk(sk), skb); 2407 TCP_ECN_send_synack(tcp_sk(sk), skb);
2402 } 2408 }
2403 TCP_SKB_CB(skb)->when = tcp_time_stamp; 2409 TCP_SKB_CB(skb)->when = tcp_time_stamp;
@@ -2794,13 +2800,13 @@ int tcp_write_wakeup(struct sock *sk)
2794 if (seg_size < TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq || 2800 if (seg_size < TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq ||
2795 skb->len > mss) { 2801 skb->len > mss) {
2796 seg_size = min(seg_size, mss); 2802 seg_size = min(seg_size, mss);
2797 TCP_SKB_CB(skb)->flags |= TCPHDR_PSH; 2803 TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_PSH;
2798 if (tcp_fragment(sk, skb, seg_size, mss)) 2804 if (tcp_fragment(sk, skb, seg_size, mss))
2799 return -1; 2805 return -1;
2800 } else if (!tcp_skb_pcount(skb)) 2806 } else if (!tcp_skb_pcount(skb))
2801 tcp_set_skb_tso_segs(sk, skb, mss); 2807 tcp_set_skb_tso_segs(sk, skb, mss);
2802 2808
2803 TCP_SKB_CB(skb)->flags |= TCPHDR_PSH; 2809 TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_PSH;
2804 TCP_SKB_CB(skb)->when = tcp_time_stamp; 2810 TCP_SKB_CB(skb)->when = tcp_time_stamp;
2805 err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC); 2811 err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
2806 if (!err) 2812 if (!err)
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 1b5a19340a95..ebaa96bd3464 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1267,7 +1267,7 @@ int udp_disconnect(struct sock *sk, int flags)
1267 sk->sk_state = TCP_CLOSE; 1267 sk->sk_state = TCP_CLOSE;
1268 inet->inet_daddr = 0; 1268 inet->inet_daddr = 0;
1269 inet->inet_dport = 0; 1269 inet->inet_dport = 0;
1270 sock_rps_save_rxhash(sk, 0); 1270 sock_rps_reset_rxhash(sk);
1271 sk->sk_bound_dev_if = 0; 1271 sk->sk_bound_dev_if = 0;
1272 if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK)) 1272 if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
1273 inet_reset_saddr(sk); 1273 inet_reset_saddr(sk);
@@ -1355,7 +1355,7 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
1355 int rc; 1355 int rc;
1356 1356
1357 if (inet_sk(sk)->inet_daddr) 1357 if (inet_sk(sk)->inet_daddr)
1358 sock_rps_save_rxhash(sk, skb->rxhash); 1358 sock_rps_save_rxhash(sk, skb);
1359 1359
1360 rc = ip_queue_rcv_skb(sk, skb); 1360 rc = ip_queue_rcv_skb(sk, skb);
1361 if (rc < 0) { 1361 if (rc < 0) {
@@ -1461,10 +1461,9 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
1461 } 1461 }
1462 } 1462 }
1463 1463
1464 if (rcu_dereference_raw(sk->sk_filter)) { 1464 if (rcu_access_pointer(sk->sk_filter) &&
1465 if (udp_lib_checksum_complete(skb)) 1465 udp_lib_checksum_complete(skb))
1466 goto drop; 1466 goto drop;
1467 }
1468 1467
1469 1468
1470 if (sk_rcvqueues_full(sk, skb)) 1469 if (sk_rcvqueues_full(sk, skb))
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index a55500cc0b29..e39239e6426e 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -374,8 +374,8 @@ static struct inet6_dev * ipv6_add_dev(struct net_device *dev)
374 "%s(): cannot allocate memory for statistics; dev=%s.\n", 374 "%s(): cannot allocate memory for statistics; dev=%s.\n",
375 __func__, dev->name)); 375 __func__, dev->name));
376 neigh_parms_release(&nd_tbl, ndev->nd_parms); 376 neigh_parms_release(&nd_tbl, ndev->nd_parms);
377 ndev->dead = 1; 377 dev_put(dev);
378 in6_dev_finish_destroy(ndev); 378 kfree(ndev);
379 return NULL; 379 return NULL;
380 } 380 }
381 381
@@ -428,7 +428,7 @@ static struct inet6_dev * ipv6_add_dev(struct net_device *dev)
428 ndev->tstamp = jiffies; 428 ndev->tstamp = jiffies;
429 addrconf_sysctl_register(ndev); 429 addrconf_sysctl_register(ndev);
430 /* protected by rtnl_lock */ 430 /* protected by rtnl_lock */
431 rcu_assign_pointer(dev->ip6_ptr, ndev); 431 RCU_INIT_POINTER(dev->ip6_ptr, ndev);
432 432
433 /* Join all-node multicast group */ 433 /* Join all-node multicast group */
434 ipv6_dev_mc_inc(dev, &in6addr_linklocal_allnodes); 434 ipv6_dev_mc_inc(dev, &in6addr_linklocal_allnodes);
@@ -656,7 +656,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen,
656 * layer address of our nexhop router 656 * layer address of our nexhop router
657 */ 657 */
658 658
659 if (dst_get_neighbour(&rt->dst) == NULL) 659 if (dst_get_neighbour_raw(&rt->dst) == NULL)
660 ifa->flags &= ~IFA_F_OPTIMISTIC; 660 ifa->flags &= ~IFA_F_OPTIMISTIC;
661 661
662 ifa->idev = idev; 662 ifa->idev = idev;
@@ -824,12 +824,13 @@ static int ipv6_create_tempaddr(struct inet6_ifaddr *ifp, struct inet6_ifaddr *i
824{ 824{
825 struct inet6_dev *idev = ifp->idev; 825 struct inet6_dev *idev = ifp->idev;
826 struct in6_addr addr, *tmpaddr; 826 struct in6_addr addr, *tmpaddr;
827 unsigned long tmp_prefered_lft, tmp_valid_lft, tmp_cstamp, tmp_tstamp, age; 827 unsigned long tmp_prefered_lft, tmp_valid_lft, tmp_tstamp, age;
828 unsigned long regen_advance; 828 unsigned long regen_advance;
829 int tmp_plen; 829 int tmp_plen;
830 int ret = 0; 830 int ret = 0;
831 int max_addresses; 831 int max_addresses;
832 u32 addr_flags; 832 u32 addr_flags;
833 unsigned long now = jiffies;
833 834
834 write_lock(&idev->lock); 835 write_lock(&idev->lock);
835 if (ift) { 836 if (ift) {
@@ -874,7 +875,7 @@ retry:
874 goto out; 875 goto out;
875 } 876 }
876 memcpy(&addr.s6_addr[8], idev->rndid, 8); 877 memcpy(&addr.s6_addr[8], idev->rndid, 8);
877 age = (jiffies - ifp->tstamp) / HZ; 878 age = (now - ifp->tstamp) / HZ;
878 tmp_valid_lft = min_t(__u32, 879 tmp_valid_lft = min_t(__u32,
879 ifp->valid_lft, 880 ifp->valid_lft,
880 idev->cnf.temp_valid_lft + age); 881 idev->cnf.temp_valid_lft + age);
@@ -884,7 +885,6 @@ retry:
884 idev->cnf.max_desync_factor); 885 idev->cnf.max_desync_factor);
885 tmp_plen = ifp->prefix_len; 886 tmp_plen = ifp->prefix_len;
886 max_addresses = idev->cnf.max_addresses; 887 max_addresses = idev->cnf.max_addresses;
887 tmp_cstamp = ifp->cstamp;
888 tmp_tstamp = ifp->tstamp; 888 tmp_tstamp = ifp->tstamp;
889 spin_unlock_bh(&ifp->lock); 889 spin_unlock_bh(&ifp->lock);
890 890
@@ -929,7 +929,7 @@ retry:
929 ift->ifpub = ifp; 929 ift->ifpub = ifp;
930 ift->valid_lft = tmp_valid_lft; 930 ift->valid_lft = tmp_valid_lft;
931 ift->prefered_lft = tmp_prefered_lft; 931 ift->prefered_lft = tmp_prefered_lft;
932 ift->cstamp = tmp_cstamp; 932 ift->cstamp = now;
933 ift->tstamp = tmp_tstamp; 933 ift->tstamp = tmp_tstamp;
934 spin_unlock_bh(&ift->lock); 934 spin_unlock_bh(&ift->lock);
935 935
@@ -1999,25 +1999,50 @@ ok:
1999#ifdef CONFIG_IPV6_PRIVACY 1999#ifdef CONFIG_IPV6_PRIVACY
2000 read_lock_bh(&in6_dev->lock); 2000 read_lock_bh(&in6_dev->lock);
2001 /* update all temporary addresses in the list */ 2001 /* update all temporary addresses in the list */
2002 list_for_each_entry(ift, &in6_dev->tempaddr_list, tmp_list) { 2002 list_for_each_entry(ift, &in6_dev->tempaddr_list,
2003 /* 2003 tmp_list) {
2004 * When adjusting the lifetimes of an existing 2004 int age, max_valid, max_prefered;
2005 * temporary address, only lower the lifetimes. 2005
2006 * Implementations must not increase the
2007 * lifetimes of an existing temporary address
2008 * when processing a Prefix Information Option.
2009 */
2010 if (ifp != ift->ifpub) 2006 if (ifp != ift->ifpub)
2011 continue; 2007 continue;
2012 2008
2009 /*
2010 * RFC 4941 section 3.3:
2011 * If a received option will extend the lifetime
2012 * of a public address, the lifetimes of
2013 * temporary addresses should be extended,
2014 * subject to the overall constraint that no
2015 * temporary addresses should ever remain
2016 * "valid" or "preferred" for a time longer than
2017 * (TEMP_VALID_LIFETIME) or
2018 * (TEMP_PREFERRED_LIFETIME - DESYNC_FACTOR),
2019 * respectively.
2020 */
2021 age = (now - ift->cstamp) / HZ;
2022 max_valid = in6_dev->cnf.temp_valid_lft - age;
2023 if (max_valid < 0)
2024 max_valid = 0;
2025
2026 max_prefered = in6_dev->cnf.temp_prefered_lft -
2027 in6_dev->cnf.max_desync_factor -
2028 age;
2029 if (max_prefered < 0)
2030 max_prefered = 0;
2031
2032 if (valid_lft > max_valid)
2033 valid_lft = max_valid;
2034
2035 if (prefered_lft > max_prefered)
2036 prefered_lft = max_prefered;
2037
2013 spin_lock(&ift->lock); 2038 spin_lock(&ift->lock);
2014 flags = ift->flags; 2039 flags = ift->flags;
2015 if (ift->valid_lft > valid_lft && 2040 ift->valid_lft = valid_lft;
2016 ift->valid_lft - valid_lft > (jiffies - ift->tstamp) / HZ) 2041 ift->prefered_lft = prefered_lft;
2017 ift->valid_lft = valid_lft + (jiffies - ift->tstamp) / HZ; 2042 ift->tstamp = now;
2018 if (ift->prefered_lft > prefered_lft && 2043 if (prefered_lft > 0)
2019 ift->prefered_lft - prefered_lft > (jiffies - ift->tstamp) / HZ) 2044 ift->flags &= ~IFA_F_DEPRECATED;
2020 ift->prefered_lft = prefered_lft + (jiffies - ift->tstamp) / HZ; 2045
2021 spin_unlock(&ift->lock); 2046 spin_unlock(&ift->lock);
2022 if (!(flags&IFA_F_TENTATIVE)) 2047 if (!(flags&IFA_F_TENTATIVE))
2023 ipv6_ifa_notify(0, ift); 2048 ipv6_ifa_notify(0, ift);
@@ -2025,9 +2050,11 @@ ok:
2025 2050
2026 if ((create || list_empty(&in6_dev->tempaddr_list)) && in6_dev->cnf.use_tempaddr > 0) { 2051 if ((create || list_empty(&in6_dev->tempaddr_list)) && in6_dev->cnf.use_tempaddr > 0) {
2027 /* 2052 /*
2028 * When a new public address is created as described in [ADDRCONF], 2053 * When a new public address is created as
2029 * also create a new temporary address. Also create a temporary 2054 * described in [ADDRCONF], also create a new
2030 * address if it's enabled but no temporary address currently exists. 2055 * temporary address. Also create a temporary
2056 * address if it's enabled but no temporary
2057 * address currently exists.
2031 */ 2058 */
2032 read_unlock_bh(&in6_dev->lock); 2059 read_unlock_bh(&in6_dev->lock);
2033 ipv6_create_tempaddr(ifp, NULL); 2060 ipv6_create_tempaddr(ifp, NULL);
@@ -2706,7 +2733,7 @@ static int addrconf_ifdown(struct net_device *dev, int how)
2706 idev->dead = 1; 2733 idev->dead = 1;
2707 2734
2708 /* protected by rtnl_lock */ 2735 /* protected by rtnl_lock */
2709 rcu_assign_pointer(dev->ip6_ptr, NULL); 2736 RCU_INIT_POINTER(dev->ip6_ptr, NULL);
2710 2737
2711 /* Step 1.5: remove snmp6 entry */ 2738 /* Step 1.5: remove snmp6 entry */
2712 snmp6_unregister_dev(idev); 2739 snmp6_unregister_dev(idev);
@@ -2969,12 +2996,12 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp)
2969 2996
2970 ipv6_ifa_notify(RTM_NEWADDR, ifp); 2997 ipv6_ifa_notify(RTM_NEWADDR, ifp);
2971 2998
2972 /* If added prefix is link local and forwarding is off, 2999 /* If added prefix is link local and we are prepared to process
2973 start sending router solicitations. 3000 router advertisements, start sending router solicitations.
2974 */ 3001 */
2975 3002
2976 if ((ifp->idev->cnf.forwarding == 0 || 3003 if (((ifp->idev->cnf.accept_ra == 1 && !ifp->idev->cnf.forwarding) ||
2977 ifp->idev->cnf.forwarding == 2) && 3004 ifp->idev->cnf.accept_ra == 2) &&
2978 ifp->idev->cnf.rtr_solicits > 0 && 3005 ifp->idev->cnf.rtr_solicits > 0 &&
2979 (dev->flags&IFF_LOOPBACK) == 0 && 3006 (dev->flags&IFF_LOOPBACK) == 0 &&
2980 (ipv6_addr_type(&ifp->addr) & IPV6_ADDR_LINKLOCAL)) { 3007 (ipv6_addr_type(&ifp->addr) & IPV6_ADDR_LINKLOCAL)) {
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 16560336eb72..b46e9f88ce37 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -33,6 +33,11 @@
33#include <linux/errqueue.h> 33#include <linux/errqueue.h>
34#include <asm/uaccess.h> 34#include <asm/uaccess.h>
35 35
36static inline int ipv6_mapped_addr_any(const struct in6_addr *a)
37{
38 return (ipv6_addr_v4mapped(a) && (a->s6_addr32[3] == 0));
39}
40
36int ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) 41int ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
37{ 42{
38 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr; 43 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
@@ -102,10 +107,12 @@ ipv4_connected:
102 107
103 ipv6_addr_set_v4mapped(inet->inet_daddr, &np->daddr); 108 ipv6_addr_set_v4mapped(inet->inet_daddr, &np->daddr);
104 109
105 if (ipv6_addr_any(&np->saddr)) 110 if (ipv6_addr_any(&np->saddr) ||
111 ipv6_mapped_addr_any(&np->saddr))
106 ipv6_addr_set_v4mapped(inet->inet_saddr, &np->saddr); 112 ipv6_addr_set_v4mapped(inet->inet_saddr, &np->saddr);
107 113
108 if (ipv6_addr_any(&np->rcv_saddr)) { 114 if (ipv6_addr_any(&np->rcv_saddr) ||
115 ipv6_mapped_addr_any(&np->rcv_saddr)) {
109 ipv6_addr_set_v4mapped(inet->inet_rcv_saddr, 116 ipv6_addr_set_v4mapped(inet->inet_rcv_saddr,
110 &np->rcv_saddr); 117 &np->rcv_saddr);
111 if (sk->sk_prot->rehash) 118 if (sk->sk_prot->rehash)
@@ -592,7 +599,7 @@ int datagram_recv_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb)
592 return 0; 599 return 0;
593} 600}
594 601
595int datagram_send_ctl(struct net *net, 602int datagram_send_ctl(struct net *net, struct sock *sk,
596 struct msghdr *msg, struct flowi6 *fl6, 603 struct msghdr *msg, struct flowi6 *fl6,
597 struct ipv6_txoptions *opt, 604 struct ipv6_txoptions *opt,
598 int *hlimit, int *tclass, int *dontfrag) 605 int *hlimit, int *tclass, int *dontfrag)
@@ -651,7 +658,8 @@ int datagram_send_ctl(struct net *net,
651 658
652 if (addr_type != IPV6_ADDR_ANY) { 659 if (addr_type != IPV6_ADDR_ANY) {
653 int strict = __ipv6_addr_src_scope(addr_type) <= IPV6_ADDR_SCOPE_LINKLOCAL; 660 int strict = __ipv6_addr_src_scope(addr_type) <= IPV6_ADDR_SCOPE_LINKLOCAL;
654 if (!ipv6_chk_addr(net, &src_info->ipi6_addr, 661 if (!inet_sk(sk)->transparent &&
662 !ipv6_chk_addr(net, &src_info->ipi6_addr,
655 strict ? dev : NULL, 0)) 663 strict ? dev : NULL, 0))
656 err = -EINVAL; 664 err = -EINVAL;
657 else 665 else
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index 79a485e8a700..1318de4c3e8d 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -273,12 +273,12 @@ static int ipv6_destopt_rcv(struct sk_buff *skb)
273#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) 273#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
274 __u16 dstbuf; 274 __u16 dstbuf;
275#endif 275#endif
276 struct dst_entry *dst; 276 struct dst_entry *dst = skb_dst(skb);
277 277
278 if (!pskb_may_pull(skb, skb_transport_offset(skb) + 8) || 278 if (!pskb_may_pull(skb, skb_transport_offset(skb) + 8) ||
279 !pskb_may_pull(skb, (skb_transport_offset(skb) + 279 !pskb_may_pull(skb, (skb_transport_offset(skb) +
280 ((skb_transport_header(skb)[1] + 1) << 3)))) { 280 ((skb_transport_header(skb)[1] + 1) << 3)))) {
281 IP6_INC_STATS_BH(dev_net(skb_dst(skb)->dev), ip6_dst_idev(skb_dst(skb)), 281 IP6_INC_STATS_BH(dev_net(dst->dev), ip6_dst_idev(dst),
282 IPSTATS_MIB_INHDRERRORS); 282 IPSTATS_MIB_INHDRERRORS);
283 kfree_skb(skb); 283 kfree_skb(skb);
284 return -1; 284 return -1;
@@ -289,9 +289,7 @@ static int ipv6_destopt_rcv(struct sk_buff *skb)
289 dstbuf = opt->dst1; 289 dstbuf = opt->dst1;
290#endif 290#endif
291 291
292 dst = dst_clone(skb_dst(skb));
293 if (ip6_parse_tlv(tlvprocdestopt_lst, skb)) { 292 if (ip6_parse_tlv(tlvprocdestopt_lst, skb)) {
294 dst_release(dst);
295 skb->transport_header += (skb_transport_header(skb)[1] + 1) << 3; 293 skb->transport_header += (skb_transport_header(skb)[1] + 1) << 3;
296 opt = IP6CB(skb); 294 opt = IP6CB(skb);
297#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) 295#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
@@ -304,7 +302,6 @@ static int ipv6_destopt_rcv(struct sk_buff *skb)
304 302
305 IP6_INC_STATS_BH(dev_net(dst->dev), 303 IP6_INC_STATS_BH(dev_net(dst->dev),
306 ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS); 304 ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS);
307 dst_release(dst);
308 return -1; 305 return -1;
309} 306}
310 307
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 11900417b1cc..90868fb42757 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -490,7 +490,8 @@ void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
490 goto out_dst_release; 490 goto out_dst_release;
491 } 491 }
492 492
493 idev = in6_dev_get(skb->dev); 493 rcu_read_lock();
494 idev = __in6_dev_get(skb->dev);
494 495
495 err = ip6_append_data(sk, icmpv6_getfrag, &msg, 496 err = ip6_append_data(sk, icmpv6_getfrag, &msg,
496 len + sizeof(struct icmp6hdr), 497 len + sizeof(struct icmp6hdr),
@@ -500,19 +501,16 @@ void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
500 if (err) { 501 if (err) {
501 ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTERRORS); 502 ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTERRORS);
502 ip6_flush_pending_frames(sk); 503 ip6_flush_pending_frames(sk);
503 goto out_put; 504 } else {
505 err = icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
506 len + sizeof(struct icmp6hdr));
504 } 507 }
505 err = icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr, len + sizeof(struct icmp6hdr)); 508 rcu_read_unlock();
506
507out_put:
508 if (likely(idev != NULL))
509 in6_dev_put(idev);
510out_dst_release: 509out_dst_release:
511 dst_release(dst); 510 dst_release(dst);
512out: 511out:
513 icmpv6_xmit_unlock(sk); 512 icmpv6_xmit_unlock(sk);
514} 513}
515
516EXPORT_SYMBOL(icmpv6_send); 514EXPORT_SYMBOL(icmpv6_send);
517 515
518static void icmpv6_echo_reply(struct sk_buff *skb) 516static void icmpv6_echo_reply(struct sk_buff *skb)
@@ -569,7 +567,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
569 if (hlimit < 0) 567 if (hlimit < 0)
570 hlimit = ip6_dst_hoplimit(dst); 568 hlimit = ip6_dst_hoplimit(dst);
571 569
572 idev = in6_dev_get(skb->dev); 570 idev = __in6_dev_get(skb->dev);
573 571
574 msg.skb = skb; 572 msg.skb = skb;
575 msg.offset = 0; 573 msg.offset = 0;
@@ -583,13 +581,10 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
583 if (err) { 581 if (err) {
584 ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTERRORS); 582 ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTERRORS);
585 ip6_flush_pending_frames(sk); 583 ip6_flush_pending_frames(sk);
586 goto out_put; 584 } else {
585 err = icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
586 skb->len + sizeof(struct icmp6hdr));
587 } 587 }
588 err = icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr, skb->len + sizeof(struct icmp6hdr));
589
590out_put:
591 if (likely(idev != NULL))
592 in6_dev_put(idev);
593 dst_release(dst); 588 dst_release(dst);
594out: 589out:
595 icmpv6_xmit_unlock(sk); 590 icmpv6_xmit_unlock(sk);
@@ -840,8 +835,7 @@ static int __net_init icmpv6_sk_init(struct net *net)
840 /* Enough space for 2 64K ICMP packets, including 835 /* Enough space for 2 64K ICMP packets, including
841 * sk_buff struct overhead. 836 * sk_buff struct overhead.
842 */ 837 */
843 sk->sk_sndbuf = 838 sk->sk_sndbuf = 2 * SKB_TRUESIZE(64 * 1024);
844 (2 * ((64 * 1024) + sizeof(struct sk_buff)));
845 } 839 }
846 return 0; 840 return 0;
847 841
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index 8a58e8cf6646..2916200f90c1 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -211,6 +211,7 @@ int inet6_csk_xmit(struct sk_buff *skb, struct flowi *fl_unused)
211 struct flowi6 fl6; 211 struct flowi6 fl6;
212 struct dst_entry *dst; 212 struct dst_entry *dst;
213 struct in6_addr *final_p, final; 213 struct in6_addr *final_p, final;
214 int res;
214 215
215 memset(&fl6, 0, sizeof(fl6)); 216 memset(&fl6, 0, sizeof(fl6));
216 fl6.flowi6_proto = sk->sk_protocol; 217 fl6.flowi6_proto = sk->sk_protocol;
@@ -241,12 +242,14 @@ int inet6_csk_xmit(struct sk_buff *skb, struct flowi *fl_unused)
241 __inet6_csk_dst_store(sk, dst, NULL, NULL); 242 __inet6_csk_dst_store(sk, dst, NULL, NULL);
242 } 243 }
243 244
244 skb_dst_set(skb, dst_clone(dst)); 245 rcu_read_lock();
246 skb_dst_set_noref(skb, dst);
245 247
246 /* Restore final destination back after routing done */ 248 /* Restore final destination back after routing done */
247 ipv6_addr_copy(&fl6.daddr, &np->daddr); 249 ipv6_addr_copy(&fl6.daddr, &np->daddr);
248 250
249 return ip6_xmit(sk, skb, &fl6, np->opt); 251 res = ip6_xmit(sk, skb, &fl6, np->opt);
252 rcu_read_unlock();
253 return res;
250} 254}
251
252EXPORT_SYMBOL_GPL(inet6_csk_xmit); 255EXPORT_SYMBOL_GPL(inet6_csk_xmit);
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index b53197233709..73f1a00a96af 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -20,6 +20,7 @@
20#include <net/inet_connection_sock.h> 20#include <net/inet_connection_sock.h>
21#include <net/inet_hashtables.h> 21#include <net/inet_hashtables.h>
22#include <net/inet6_hashtables.h> 22#include <net/inet6_hashtables.h>
23#include <net/secure_seq.h>
23#include <net/ip.h> 24#include <net/ip.h>
24 25
25int __inet6_hash(struct sock *sk, struct inet_timewait_sock *tw) 26int __inet6_hash(struct sock *sk, struct inet_timewait_sock *tw)
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 54a4678955bf..320d91d20ad7 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -1455,7 +1455,7 @@ static int fib6_age(struct rt6_info *rt, void *arg)
1455 RT6_TRACE("aging clone %p\n", rt); 1455 RT6_TRACE("aging clone %p\n", rt);
1456 return -1; 1456 return -1;
1457 } else if ((rt->rt6i_flags & RTF_GATEWAY) && 1457 } else if ((rt->rt6i_flags & RTF_GATEWAY) &&
1458 (!(dst_get_neighbour(&rt->dst)->flags & NTF_ROUTER))) { 1458 (!(dst_get_neighbour_raw(&rt->dst)->flags & NTF_ROUTER))) {
1459 RT6_TRACE("purging route %p via non-router but gateway\n", 1459 RT6_TRACE("purging route %p via non-router but gateway\n",
1460 rt); 1460 rt);
1461 return -1; 1461 return -1;
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index f3caf1b8d572..543039450193 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -322,8 +322,8 @@ static int fl6_renew(struct ip6_flowlabel *fl, unsigned long linger, unsigned lo
322} 322}
323 323
324static struct ip6_flowlabel * 324static struct ip6_flowlabel *
325fl_create(struct net *net, struct in6_flowlabel_req *freq, char __user *optval, 325fl_create(struct net *net, struct sock *sk, struct in6_flowlabel_req *freq,
326 int optlen, int *err_p) 326 char __user *optval, int optlen, int *err_p)
327{ 327{
328 struct ip6_flowlabel *fl = NULL; 328 struct ip6_flowlabel *fl = NULL;
329 int olen; 329 int olen;
@@ -360,7 +360,7 @@ fl_create(struct net *net, struct in6_flowlabel_req *freq, char __user *optval,
360 msg.msg_control = (void*)(fl->opt+1); 360 msg.msg_control = (void*)(fl->opt+1);
361 memset(&flowi6, 0, sizeof(flowi6)); 361 memset(&flowi6, 0, sizeof(flowi6));
362 362
363 err = datagram_send_ctl(net, &msg, &flowi6, fl->opt, &junk, 363 err = datagram_send_ctl(net, sk, &msg, &flowi6, fl->opt, &junk,
364 &junk, &junk); 364 &junk, &junk);
365 if (err) 365 if (err)
366 goto done; 366 goto done;
@@ -528,7 +528,7 @@ int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen)
528 if (freq.flr_label & ~IPV6_FLOWLABEL_MASK) 528 if (freq.flr_label & ~IPV6_FLOWLABEL_MASK)
529 return -EINVAL; 529 return -EINVAL;
530 530
531 fl = fl_create(net, &freq, optval, optlen, &err); 531 fl = fl_create(net, sk, &freq, optval, optlen, &err);
532 if (fl == NULL) 532 if (fl == NULL)
533 return err; 533 return err;
534 sfl1 = kmalloc(sizeof(*sfl1), GFP_KERNEL); 534 sfl1 = kmalloc(sizeof(*sfl1), GFP_KERNEL);
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 32e5339db0c8..835c04b5239f 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -135,10 +135,15 @@ static int ip6_finish_output2(struct sk_buff *skb)
135 skb->len); 135 skb->len);
136 } 136 }
137 137
138 rcu_read_lock();
138 neigh = dst_get_neighbour(dst); 139 neigh = dst_get_neighbour(dst);
139 if (neigh) 140 if (neigh) {
140 return neigh_output(neigh, skb); 141 int res = neigh_output(neigh, skb);
141 142
143 rcu_read_unlock();
144 return res;
145 }
146 rcu_read_unlock();
142 IP6_INC_STATS_BH(dev_net(dst->dev), 147 IP6_INC_STATS_BH(dev_net(dst->dev),
143 ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); 148 ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
144 kfree_skb(skb); 149 kfree_skb(skb);
@@ -975,12 +980,14 @@ static int ip6_dst_lookup_tail(struct sock *sk,
975 * dst entry and replace it instead with the 980 * dst entry and replace it instead with the
976 * dst entry of the nexthop router 981 * dst entry of the nexthop router
977 */ 982 */
983 rcu_read_lock();
978 n = dst_get_neighbour(*dst); 984 n = dst_get_neighbour(*dst);
979 if (n && !(n->nud_state & NUD_VALID)) { 985 if (n && !(n->nud_state & NUD_VALID)) {
980 struct inet6_ifaddr *ifp; 986 struct inet6_ifaddr *ifp;
981 struct flowi6 fl_gw6; 987 struct flowi6 fl_gw6;
982 int redirect; 988 int redirect;
983 989
990 rcu_read_unlock();
984 ifp = ipv6_get_ifaddr(net, &fl6->saddr, 991 ifp = ipv6_get_ifaddr(net, &fl6->saddr,
985 (*dst)->dev, 1); 992 (*dst)->dev, 1);
986 993
@@ -1000,6 +1007,8 @@ static int ip6_dst_lookup_tail(struct sock *sk,
1000 if ((err = (*dst)->error)) 1007 if ((err = (*dst)->error))
1001 goto out_err_release; 1008 goto out_err_release;
1002 } 1009 }
1010 } else {
1011 rcu_read_unlock();
1003 } 1012 }
1004#endif 1013#endif
1005 1014
@@ -1471,13 +1480,13 @@ alloc_new_skb:
1471 if (page && (left = PAGE_SIZE - off) > 0) { 1480 if (page && (left = PAGE_SIZE - off) > 0) {
1472 if (copy >= left) 1481 if (copy >= left)
1473 copy = left; 1482 copy = left;
1474 if (page != frag->page) { 1483 if (page != skb_frag_page(frag)) {
1475 if (i == MAX_SKB_FRAGS) { 1484 if (i == MAX_SKB_FRAGS) {
1476 err = -EMSGSIZE; 1485 err = -EMSGSIZE;
1477 goto error; 1486 goto error;
1478 } 1487 }
1479 get_page(page);
1480 skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0); 1488 skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
1489 skb_frag_ref(skb, i);
1481 frag = &skb_shinfo(skb)->frags[i]; 1490 frag = &skb_shinfo(skb)->frags[i];
1482 } 1491 }
1483 } else if(i < MAX_SKB_FRAGS) { 1492 } else if(i < MAX_SKB_FRAGS) {
@@ -1497,7 +1506,8 @@ alloc_new_skb:
1497 err = -EMSGSIZE; 1506 err = -EMSGSIZE;
1498 goto error; 1507 goto error;
1499 } 1508 }
1500 if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) { 1509 if (getfrag(from, skb_frag_address(frag)+frag->size,
1510 offset, copy, skb->len, skb) < 0) {
1501 err = -EFAULT; 1511 err = -EFAULT;
1502 goto error; 1512 goto error;
1503 } 1513 }
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 0bc98886c383..bdc15c9003d7 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -218,8 +218,8 @@ ip6_tnl_link(struct ip6_tnl_net *ip6n, struct ip6_tnl *t)
218{ 218{
219 struct ip6_tnl __rcu **tp = ip6_tnl_bucket(ip6n, &t->parms); 219 struct ip6_tnl __rcu **tp = ip6_tnl_bucket(ip6n, &t->parms);
220 220
221 rcu_assign_pointer(t->next , rtnl_dereference(*tp)); 221 RCU_INIT_POINTER(t->next , rtnl_dereference(*tp));
222 rcu_assign_pointer(*tp, t); 222 RCU_INIT_POINTER(*tp, t);
223} 223}
224 224
225/** 225/**
@@ -237,7 +237,7 @@ ip6_tnl_unlink(struct ip6_tnl_net *ip6n, struct ip6_tnl *t)
237 (iter = rtnl_dereference(*tp)) != NULL; 237 (iter = rtnl_dereference(*tp)) != NULL;
238 tp = &iter->next) { 238 tp = &iter->next) {
239 if (t == iter) { 239 if (t == iter) {
240 rcu_assign_pointer(*tp, t->next); 240 RCU_INIT_POINTER(*tp, t->next);
241 break; 241 break;
242 } 242 }
243 } 243 }
@@ -350,7 +350,7 @@ ip6_tnl_dev_uninit(struct net_device *dev)
350 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); 350 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
351 351
352 if (dev == ip6n->fb_tnl_dev) 352 if (dev == ip6n->fb_tnl_dev)
353 rcu_assign_pointer(ip6n->tnls_wc[0], NULL); 353 RCU_INIT_POINTER(ip6n->tnls_wc[0], NULL);
354 else 354 else
355 ip6_tnl_unlink(ip6n, t); 355 ip6_tnl_unlink(ip6n, t);
356 ip6_tnl_dst_reset(t); 356 ip6_tnl_dst_reset(t);
@@ -889,7 +889,7 @@ static int ip6_tnl_xmit2(struct sk_buff *skb,
889 struct net_device_stats *stats = &t->dev->stats; 889 struct net_device_stats *stats = &t->dev->stats;
890 struct ipv6hdr *ipv6h = ipv6_hdr(skb); 890 struct ipv6hdr *ipv6h = ipv6_hdr(skb);
891 struct ipv6_tel_txoption opt; 891 struct ipv6_tel_txoption opt;
892 struct dst_entry *dst; 892 struct dst_entry *dst = NULL, *ndst = NULL;
893 struct net_device *tdev; 893 struct net_device *tdev;
894 int mtu; 894 int mtu;
895 unsigned int max_headroom = sizeof(struct ipv6hdr); 895 unsigned int max_headroom = sizeof(struct ipv6hdr);
@@ -897,19 +897,20 @@ static int ip6_tnl_xmit2(struct sk_buff *skb,
897 int err = -1; 897 int err = -1;
898 int pkt_len; 898 int pkt_len;
899 899
900 if ((dst = ip6_tnl_dst_check(t)) != NULL) 900 if (!fl6->flowi6_mark)
901 dst_hold(dst); 901 dst = ip6_tnl_dst_check(t);
902 else { 902 if (!dst) {
903 dst = ip6_route_output(net, NULL, fl6); 903 ndst = ip6_route_output(net, NULL, fl6);
904 904
905 if (dst->error) 905 if (ndst->error)
906 goto tx_err_link_failure; 906 goto tx_err_link_failure;
907 dst = xfrm_lookup(net, dst, flowi6_to_flowi(fl6), NULL, 0); 907 ndst = xfrm_lookup(net, ndst, flowi6_to_flowi(fl6), NULL, 0);
908 if (IS_ERR(dst)) { 908 if (IS_ERR(ndst)) {
909 err = PTR_ERR(dst); 909 err = PTR_ERR(ndst);
910 dst = NULL; 910 ndst = NULL;
911 goto tx_err_link_failure; 911 goto tx_err_link_failure;
912 } 912 }
913 dst = ndst;
913 } 914 }
914 915
915 tdev = dst->dev; 916 tdev = dst->dev;
@@ -955,8 +956,12 @@ static int ip6_tnl_xmit2(struct sk_buff *skb,
955 skb = new_skb; 956 skb = new_skb;
956 } 957 }
957 skb_dst_drop(skb); 958 skb_dst_drop(skb);
958 skb_dst_set(skb, dst_clone(dst)); 959 if (fl6->flowi6_mark) {
959 960 skb_dst_set(skb, dst);
961 ndst = NULL;
962 } else {
963 skb_dst_set_noref(skb, dst);
964 }
960 skb->transport_header = skb->network_header; 965 skb->transport_header = skb->network_header;
961 966
962 proto = fl6->flowi6_proto; 967 proto = fl6->flowi6_proto;
@@ -987,13 +992,14 @@ static int ip6_tnl_xmit2(struct sk_buff *skb,
987 stats->tx_errors++; 992 stats->tx_errors++;
988 stats->tx_aborted_errors++; 993 stats->tx_aborted_errors++;
989 } 994 }
990 ip6_tnl_dst_store(t, dst); 995 if (ndst)
996 ip6_tnl_dst_store(t, ndst);
991 return 0; 997 return 0;
992tx_err_link_failure: 998tx_err_link_failure:
993 stats->tx_carrier_errors++; 999 stats->tx_carrier_errors++;
994 dst_link_failure(skb); 1000 dst_link_failure(skb);
995tx_err_dst_release: 1001tx_err_dst_release:
996 dst_release(dst); 1002 dst_release(ndst);
997 return err; 1003 return err;
998} 1004}
999 1005
@@ -1020,9 +1026,11 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
1020 1026
1021 dsfield = ipv4_get_dsfield(iph); 1027 dsfield = ipv4_get_dsfield(iph);
1022 1028
1023 if ((t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)) 1029 if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)
1024 fl6.flowlabel |= htonl((__u32)iph->tos << IPV6_TCLASS_SHIFT) 1030 fl6.flowlabel |= htonl((__u32)iph->tos << IPV6_TCLASS_SHIFT)
1025 & IPV6_TCLASS_MASK; 1031 & IPV6_TCLASS_MASK;
1032 if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
1033 fl6.flowi6_mark = skb->mark;
1026 1034
1027 err = ip6_tnl_xmit2(skb, dev, dsfield, &fl6, encap_limit, &mtu); 1035 err = ip6_tnl_xmit2(skb, dev, dsfield, &fl6, encap_limit, &mtu);
1028 if (err != 0) { 1036 if (err != 0) {
@@ -1069,10 +1077,12 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
1069 fl6.flowi6_proto = IPPROTO_IPV6; 1077 fl6.flowi6_proto = IPPROTO_IPV6;
1070 1078
1071 dsfield = ipv6_get_dsfield(ipv6h); 1079 dsfield = ipv6_get_dsfield(ipv6h);
1072 if ((t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)) 1080 if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)
1073 fl6.flowlabel |= (*(__be32 *) ipv6h & IPV6_TCLASS_MASK); 1081 fl6.flowlabel |= (*(__be32 *) ipv6h & IPV6_TCLASS_MASK);
1074 if ((t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL)) 1082 if (t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL)
1075 fl6.flowlabel |= (*(__be32 *) ipv6h & IPV6_FLOWLABEL_MASK); 1083 fl6.flowlabel |= (*(__be32 *) ipv6h & IPV6_FLOWLABEL_MASK);
1084 if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
1085 fl6.flowi6_mark = skb->mark;
1076 1086
1077 err = ip6_tnl_xmit2(skb, dev, dsfield, &fl6, encap_limit, &mtu); 1087 err = ip6_tnl_xmit2(skb, dev, dsfield, &fl6, encap_limit, &mtu);
1078 if (err != 0) { 1088 if (err != 0) {
@@ -1439,7 +1449,7 @@ static int __net_init ip6_fb_tnl_dev_init(struct net_device *dev)
1439 1449
1440 t->parms.proto = IPPROTO_IPV6; 1450 t->parms.proto = IPPROTO_IPV6;
1441 dev_hold(dev); 1451 dev_hold(dev);
1442 rcu_assign_pointer(ip6n->tnls_wc[0], t); 1452 RCU_INIT_POINTER(ip6n->tnls_wc[0], t);
1443 return 0; 1453 return 0;
1444} 1454}
1445 1455
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 705c82886281..def0538e2413 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -696,8 +696,10 @@ static netdev_tx_t reg_vif_xmit(struct sk_buff *skb,
696 int err; 696 int err;
697 697
698 err = ip6mr_fib_lookup(net, &fl6, &mrt); 698 err = ip6mr_fib_lookup(net, &fl6, &mrt);
699 if (err < 0) 699 if (err < 0) {
700 kfree_skb(skb);
700 return err; 701 return err;
702 }
701 703
702 read_lock(&mrt_lock); 704 read_lock(&mrt_lock);
703 dev->stats.tx_bytes += skb->len; 705 dev->stats.tx_bytes += skb->len;
@@ -2052,8 +2054,10 @@ int ip6_mr_input(struct sk_buff *skb)
2052 int err; 2054 int err;
2053 2055
2054 err = ip6mr_fib_lookup(net, &fl6, &mrt); 2056 err = ip6mr_fib_lookup(net, &fl6, &mrt);
2055 if (err < 0) 2057 if (err < 0) {
2058 kfree_skb(skb);
2056 return err; 2059 return err;
2060 }
2057 2061
2058 read_lock(&mrt_lock); 2062 read_lock(&mrt_lock);
2059 cache = ip6mr_cache_find(mrt, 2063 cache = ip6mr_cache_find(mrt,
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 9cb191ecaba8..2fbda5fc4cc4 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -475,7 +475,7 @@ sticky_done:
475 msg.msg_controllen = optlen; 475 msg.msg_controllen = optlen;
476 msg.msg_control = (void*)(opt+1); 476 msg.msg_control = (void*)(opt+1);
477 477
478 retv = datagram_send_ctl(net, &msg, &fl6, opt, &junk, &junk, 478 retv = datagram_send_ctl(net, sk, &msg, &fl6, opt, &junk, &junk,
479 &junk); 479 &junk);
480 if (retv) 480 if (retv)
481 goto done; 481 goto done;
@@ -913,7 +913,7 @@ static int ipv6_getsockopt_sticky(struct sock *sk, struct ipv6_txoptions *opt,
913} 913}
914 914
915static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, 915static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
916 char __user *optval, int __user *optlen) 916 char __user *optval, int __user *optlen, unsigned flags)
917{ 917{
918 struct ipv6_pinfo *np = inet6_sk(sk); 918 struct ipv6_pinfo *np = inet6_sk(sk);
919 int len; 919 int len;
@@ -962,7 +962,7 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
962 962
963 msg.msg_control = optval; 963 msg.msg_control = optval;
964 msg.msg_controllen = len; 964 msg.msg_controllen = len;
965 msg.msg_flags = 0; 965 msg.msg_flags = flags;
966 966
967 lock_sock(sk); 967 lock_sock(sk);
968 skb = np->pktoptions; 968 skb = np->pktoptions;
@@ -1222,7 +1222,7 @@ int ipv6_getsockopt(struct sock *sk, int level, int optname,
1222 if(level != SOL_IPV6) 1222 if(level != SOL_IPV6)
1223 return -ENOPROTOOPT; 1223 return -ENOPROTOOPT;
1224 1224
1225 err = do_ipv6_getsockopt(sk, level, optname, optval, optlen); 1225 err = do_ipv6_getsockopt(sk, level, optname, optval, optlen, 0);
1226#ifdef CONFIG_NETFILTER 1226#ifdef CONFIG_NETFILTER
1227 /* we need to exclude all possible ENOPROTOOPTs except default case */ 1227 /* we need to exclude all possible ENOPROTOOPTs except default case */
1228 if (err == -ENOPROTOOPT && optname != IPV6_2292PKTOPTIONS) { 1228 if (err == -ENOPROTOOPT && optname != IPV6_2292PKTOPTIONS) {
@@ -1264,7 +1264,8 @@ int compat_ipv6_getsockopt(struct sock *sk, int level, int optname,
1264 return compat_mc_getsockopt(sk, level, optname, optval, optlen, 1264 return compat_mc_getsockopt(sk, level, optname, optval, optlen,
1265 ipv6_getsockopt); 1265 ipv6_getsockopt);
1266 1266
1267 err = do_ipv6_getsockopt(sk, level, optname, optval, optlen); 1267 err = do_ipv6_getsockopt(sk, level, optname, optval, optlen,
1268 MSG_CMSG_COMPAT);
1268#ifdef CONFIG_NETFILTER 1269#ifdef CONFIG_NETFILTER
1269 /* we need to exclude all possible ENOPROTOOPTs except default case */ 1270 /* we need to exclude all possible ENOPROTOOPTs except default case */
1270 if (err == -ENOPROTOOPT && optname != IPV6_2292PKTOPTIONS) { 1271 if (err == -ENOPROTOOPT && optname != IPV6_2292PKTOPTIONS) {
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 3e6ebcdb4779..ee7839f4d6e3 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -1059,7 +1059,7 @@ static int mld_xmarksources(struct ifmcaddr6 *pmc, int nsrcs,
1059 break; 1059 break;
1060 for (i=0; i<nsrcs; i++) { 1060 for (i=0; i<nsrcs; i++) {
1061 /* skip inactive filters */ 1061 /* skip inactive filters */
1062 if (pmc->mca_sfcount[MCAST_INCLUDE] || 1062 if (psf->sf_count[MCAST_INCLUDE] ||
1063 pmc->mca_sfcount[MCAST_EXCLUDE] != 1063 pmc->mca_sfcount[MCAST_EXCLUDE] !=
1064 psf->sf_count[MCAST_EXCLUDE]) 1064 psf->sf_count[MCAST_EXCLUDE])
1065 continue; 1065 continue;
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 9da6e02eaaeb..1f52dd257631 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -533,7 +533,8 @@ void ndisc_send_skb(struct sk_buff *skb,
533 533
534 skb_dst_set(skb, dst); 534 skb_dst_set(skb, dst);
535 535
536 idev = in6_dev_get(dst->dev); 536 rcu_read_lock();
537 idev = __in6_dev_get(dst->dev);
537 IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len); 538 IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len);
538 539
539 err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL, dst->dev, 540 err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL, dst->dev,
@@ -543,8 +544,7 @@ void ndisc_send_skb(struct sk_buff *skb,
543 ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS); 544 ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
544 } 545 }
545 546
546 if (likely(idev != NULL)) 547 rcu_read_unlock();
547 in6_dev_put(idev);
548} 548}
549 549
550EXPORT_SYMBOL(ndisc_send_skb); 550EXPORT_SYMBOL(ndisc_send_skb);
@@ -1039,7 +1039,7 @@ static void ndisc_recv_rs(struct sk_buff *skb)
1039 if (skb->len < sizeof(*rs_msg)) 1039 if (skb->len < sizeof(*rs_msg))
1040 return; 1040 return;
1041 1041
1042 idev = in6_dev_get(skb->dev); 1042 idev = __in6_dev_get(skb->dev);
1043 if (!idev) { 1043 if (!idev) {
1044 if (net_ratelimit()) 1044 if (net_ratelimit())
1045 ND_PRINTK1("ICMP6 RS: can't find in6 device\n"); 1045 ND_PRINTK1("ICMP6 RS: can't find in6 device\n");
@@ -1080,7 +1080,7 @@ static void ndisc_recv_rs(struct sk_buff *skb)
1080 neigh_release(neigh); 1080 neigh_release(neigh);
1081 } 1081 }
1082out: 1082out:
1083 in6_dev_put(idev); 1083 return;
1084} 1084}
1085 1085
1086static void ndisc_ra_useropt(struct sk_buff *ra, struct nd_opt_hdr *opt) 1086static void ndisc_ra_useropt(struct sk_buff *ra, struct nd_opt_hdr *opt)
@@ -1179,7 +1179,7 @@ static void ndisc_router_discovery(struct sk_buff *skb)
1179 * set the RA_RECV flag in the interface 1179 * set the RA_RECV flag in the interface
1180 */ 1180 */
1181 1181
1182 in6_dev = in6_dev_get(skb->dev); 1182 in6_dev = __in6_dev_get(skb->dev);
1183 if (in6_dev == NULL) { 1183 if (in6_dev == NULL) {
1184 ND_PRINTK0(KERN_ERR 1184 ND_PRINTK0(KERN_ERR
1185 "ICMPv6 RA: can't find inet6 device for %s.\n", 1185 "ICMPv6 RA: can't find inet6 device for %s.\n",
@@ -1188,7 +1188,6 @@ static void ndisc_router_discovery(struct sk_buff *skb)
1188 } 1188 }
1189 1189
1190 if (!ndisc_parse_options(opt, optlen, &ndopts)) { 1190 if (!ndisc_parse_options(opt, optlen, &ndopts)) {
1191 in6_dev_put(in6_dev);
1192 ND_PRINTK2(KERN_WARNING 1191 ND_PRINTK2(KERN_WARNING
1193 "ICMP6 RA: invalid ND options\n"); 1192 "ICMP6 RA: invalid ND options\n");
1194 return; 1193 return;
@@ -1255,7 +1254,6 @@ static void ndisc_router_discovery(struct sk_buff *skb)
1255 ND_PRINTK0(KERN_ERR 1254 ND_PRINTK0(KERN_ERR
1256 "ICMPv6 RA: %s() failed to add default route.\n", 1255 "ICMPv6 RA: %s() failed to add default route.\n",
1257 __func__); 1256 __func__);
1258 in6_dev_put(in6_dev);
1259 return; 1257 return;
1260 } 1258 }
1261 1259
@@ -1265,7 +1263,6 @@ static void ndisc_router_discovery(struct sk_buff *skb)
1265 "ICMPv6 RA: %s() got default router without neighbour.\n", 1263 "ICMPv6 RA: %s() got default router without neighbour.\n",
1266 __func__); 1264 __func__);
1267 dst_release(&rt->dst); 1265 dst_release(&rt->dst);
1268 in6_dev_put(in6_dev);
1269 return; 1266 return;
1270 } 1267 }
1271 neigh->flags |= NTF_ROUTER; 1268 neigh->flags |= NTF_ROUTER;
@@ -1422,7 +1419,6 @@ out:
1422 dst_release(&rt->dst); 1419 dst_release(&rt->dst);
1423 else if (neigh) 1420 else if (neigh)
1424 neigh_release(neigh); 1421 neigh_release(neigh);
1425 in6_dev_put(in6_dev);
1426} 1422}
1427 1423
1428static void ndisc_redirect_rcv(struct sk_buff *skb) 1424static void ndisc_redirect_rcv(struct sk_buff *skb)
@@ -1481,13 +1477,11 @@ static void ndisc_redirect_rcv(struct sk_buff *skb)
1481 return; 1477 return;
1482 } 1478 }
1483 1479
1484 in6_dev = in6_dev_get(skb->dev); 1480 in6_dev = __in6_dev_get(skb->dev);
1485 if (!in6_dev) 1481 if (!in6_dev)
1486 return; 1482 return;
1487 if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects) { 1483 if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
1488 in6_dev_put(in6_dev);
1489 return; 1484 return;
1490 }
1491 1485
1492 /* RFC2461 8.1: 1486 /* RFC2461 8.1:
1493 * The IP source address of the Redirect MUST be the same as the current 1487 * The IP source address of the Redirect MUST be the same as the current
@@ -1497,7 +1491,6 @@ static void ndisc_redirect_rcv(struct sk_buff *skb)
1497 if (!ndisc_parse_options((u8*)(dest + 1), optlen, &ndopts)) { 1491 if (!ndisc_parse_options((u8*)(dest + 1), optlen, &ndopts)) {
1498 ND_PRINTK2(KERN_WARNING 1492 ND_PRINTK2(KERN_WARNING
1499 "ICMPv6 Redirect: invalid ND options\n"); 1493 "ICMPv6 Redirect: invalid ND options\n");
1500 in6_dev_put(in6_dev);
1501 return; 1494 return;
1502 } 1495 }
1503 if (ndopts.nd_opts_tgt_lladdr) { 1496 if (ndopts.nd_opts_tgt_lladdr) {
@@ -1506,7 +1499,6 @@ static void ndisc_redirect_rcv(struct sk_buff *skb)
1506 if (!lladdr) { 1499 if (!lladdr) {
1507 ND_PRINTK2(KERN_WARNING 1500 ND_PRINTK2(KERN_WARNING
1508 "ICMPv6 Redirect: invalid link-layer address length\n"); 1501 "ICMPv6 Redirect: invalid link-layer address length\n");
1509 in6_dev_put(in6_dev);
1510 return; 1502 return;
1511 } 1503 }
1512 } 1504 }
@@ -1518,7 +1510,6 @@ static void ndisc_redirect_rcv(struct sk_buff *skb)
1518 on_link); 1510 on_link);
1519 neigh_release(neigh); 1511 neigh_release(neigh);
1520 } 1512 }
1521 in6_dev_put(in6_dev);
1522} 1513}
1523 1514
1524void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh, 1515void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh,
@@ -1651,7 +1642,8 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh,
1651 csum_partial(icmph, len, 0)); 1642 csum_partial(icmph, len, 0));
1652 1643
1653 skb_dst_set(buff, dst); 1644 skb_dst_set(buff, dst);
1654 idev = in6_dev_get(dst->dev); 1645 rcu_read_lock();
1646 idev = __in6_dev_get(dst->dev);
1655 IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len); 1647 IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len);
1656 err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, buff, NULL, dst->dev, 1648 err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, buff, NULL, dst->dev,
1657 dst_output); 1649 dst_output);
@@ -1660,8 +1652,7 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh,
1660 ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS); 1652 ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
1661 } 1653 }
1662 1654
1663 if (likely(idev != NULL)) 1655 rcu_read_unlock();
1664 in6_dev_put(idev);
1665 return; 1656 return;
1666 1657
1667release: 1658release:
diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c
index 249394863284..e63c3972a739 100644
--- a/net/ipv6/netfilter/ip6_queue.c
+++ b/net/ipv6/netfilter/ip6_queue.c
@@ -218,6 +218,7 @@ ipq_build_packet_message(struct nf_queue_entry *entry, int *errp)
218 return skb; 218 return skb;
219 219
220nlmsg_failure: 220nlmsg_failure:
221 kfree_skb(skb);
221 *errp = -EINVAL; 222 *errp = -EINVAL;
222 printk(KERN_ERR "ip6_queue: error creating packet message\n"); 223 printk(KERN_ERR "ip6_queue: error creating packet message\n");
223 return NULL; 224 return NULL;
@@ -313,7 +314,7 @@ ipq_set_verdict(struct ipq_verdict_msg *vmsg, unsigned int len)
313{ 314{
314 struct nf_queue_entry *entry; 315 struct nf_queue_entry *entry;
315 316
316 if (vmsg->value > NF_MAX_VERDICT) 317 if (vmsg->value > NF_MAX_VERDICT || vmsg->value == NF_STOLEN)
317 return -EINVAL; 318 return -EINVAL;
318 319
319 entry = ipq_find_dequeue_entry(vmsg->id); 320 entry = ipq_find_dequeue_entry(vmsg->id);
@@ -358,12 +359,9 @@ ipq_receive_peer(struct ipq_peer_msg *pmsg,
358 break; 359 break;
359 360
360 case IPQM_VERDICT: 361 case IPQM_VERDICT:
361 if (pmsg->msg.verdict.value > NF_MAX_VERDICT) 362 status = ipq_set_verdict(&pmsg->msg.verdict,
362 status = -EINVAL; 363 len - sizeof(*pmsg));
363 else 364 break;
364 status = ipq_set_verdict(&pmsg->msg.verdict,
365 len - sizeof(*pmsg));
366 break;
367 default: 365 default:
368 status = -EINVAL; 366 status = -EINVAL;
369 } 367 }
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 6a79f3081bdb..3486f62befa3 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -130,14 +130,14 @@ static mh_filter_t __rcu *mh_filter __read_mostly;
130 130
131int rawv6_mh_filter_register(mh_filter_t filter) 131int rawv6_mh_filter_register(mh_filter_t filter)
132{ 132{
133 rcu_assign_pointer(mh_filter, filter); 133 RCU_INIT_POINTER(mh_filter, filter);
134 return 0; 134 return 0;
135} 135}
136EXPORT_SYMBOL(rawv6_mh_filter_register); 136EXPORT_SYMBOL(rawv6_mh_filter_register);
137 137
138int rawv6_mh_filter_unregister(mh_filter_t filter) 138int rawv6_mh_filter_unregister(mh_filter_t filter)
139{ 139{
140 rcu_assign_pointer(mh_filter, NULL); 140 RCU_INIT_POINTER(mh_filter, NULL);
141 synchronize_rcu(); 141 synchronize_rcu();
142 return 0; 142 return 0;
143} 143}
@@ -372,9 +372,9 @@ void raw6_icmp_error(struct sk_buff *skb, int nexthdr,
372 read_unlock(&raw_v6_hashinfo.lock); 372 read_unlock(&raw_v6_hashinfo.lock);
373} 373}
374 374
375static inline int rawv6_rcv_skb(struct sock * sk, struct sk_buff * skb) 375static inline int rawv6_rcv_skb(struct sock *sk, struct sk_buff *skb)
376{ 376{
377 if ((raw6_sk(sk)->checksum || rcu_dereference_raw(sk->sk_filter)) && 377 if ((raw6_sk(sk)->checksum || rcu_access_pointer(sk->sk_filter)) &&
378 skb_checksum_complete(skb)) { 378 skb_checksum_complete(skb)) {
379 atomic_inc(&sk->sk_drops); 379 atomic_inc(&sk->sk_drops);
380 kfree_skb(skb); 380 kfree_skb(skb);
@@ -817,8 +817,8 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
817 memset(opt, 0, sizeof(struct ipv6_txoptions)); 817 memset(opt, 0, sizeof(struct ipv6_txoptions));
818 opt->tot_len = sizeof(struct ipv6_txoptions); 818 opt->tot_len = sizeof(struct ipv6_txoptions);
819 819
820 err = datagram_send_ctl(sock_net(sk), msg, &fl6, opt, &hlimit, 820 err = datagram_send_ctl(sock_net(sk), sk, msg, &fl6, opt,
821 &tclass, &dontfrag); 821 &hlimit, &tclass, &dontfrag);
822 if (err < 0) { 822 if (err < 0) {
823 fl6_sock_release(flowlabel); 823 fl6_sock_release(flowlabel);
824 return err; 824 return err;
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index e8987da06667..fb545edef6ea 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -104,6 +104,9 @@ static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
104 struct inet_peer *peer; 104 struct inet_peer *peer;
105 u32 *p = NULL; 105 u32 *p = NULL;
106 106
107 if (!(rt->dst.flags & DST_HOST))
108 return NULL;
109
107 if (!rt->rt6i_peer) 110 if (!rt->rt6i_peer)
108 rt6_bind_peer(rt, 1); 111 rt6_bind_peer(rt, 1);
109 112
@@ -241,7 +244,9 @@ static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops,
241{ 244{
242 struct rt6_info *rt = dst_alloc(ops, dev, 0, 0, flags); 245 struct rt6_info *rt = dst_alloc(ops, dev, 0, 0, flags);
243 246
244 memset(&rt->rt6i_table, 0, sizeof(*rt) - sizeof(struct dst_entry)); 247 if (rt != NULL)
248 memset(&rt->rt6i_table, 0,
249 sizeof(*rt) - sizeof(struct dst_entry));
245 250
246 return rt; 251 return rt;
247} 252}
@@ -252,6 +257,9 @@ static void ip6_dst_destroy(struct dst_entry *dst)
252 struct inet6_dev *idev = rt->rt6i_idev; 257 struct inet6_dev *idev = rt->rt6i_idev;
253 struct inet_peer *peer = rt->rt6i_peer; 258 struct inet_peer *peer = rt->rt6i_peer;
254 259
260 if (!(rt->dst.flags & DST_HOST))
261 dst_destroy_metrics_generic(dst);
262
255 if (idev != NULL) { 263 if (idev != NULL) {
256 rt->rt6i_idev = NULL; 264 rt->rt6i_idev = NULL;
257 in6_dev_put(idev); 265 in6_dev_put(idev);
@@ -364,7 +372,7 @@ out:
364#ifdef CONFIG_IPV6_ROUTER_PREF 372#ifdef CONFIG_IPV6_ROUTER_PREF
365static void rt6_probe(struct rt6_info *rt) 373static void rt6_probe(struct rt6_info *rt)
366{ 374{
367 struct neighbour *neigh = rt ? dst_get_neighbour(&rt->dst) : NULL; 375 struct neighbour *neigh;
368 /* 376 /*
369 * Okay, this does not seem to be appropriate 377 * Okay, this does not seem to be appropriate
370 * for now, however, we need to check if it 378 * for now, however, we need to check if it
@@ -373,8 +381,10 @@ static void rt6_probe(struct rt6_info *rt)
373 * Router Reachability Probe MUST be rate-limited 381 * Router Reachability Probe MUST be rate-limited
374 * to no more than one per minute. 382 * to no more than one per minute.
375 */ 383 */
384 rcu_read_lock();
385 neigh = rt ? dst_get_neighbour(&rt->dst) : NULL;
376 if (!neigh || (neigh->nud_state & NUD_VALID)) 386 if (!neigh || (neigh->nud_state & NUD_VALID))
377 return; 387 goto out;
378 read_lock_bh(&neigh->lock); 388 read_lock_bh(&neigh->lock);
379 if (!(neigh->nud_state & NUD_VALID) && 389 if (!(neigh->nud_state & NUD_VALID) &&
380 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) { 390 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
@@ -387,8 +397,11 @@ static void rt6_probe(struct rt6_info *rt)
387 target = (struct in6_addr *)&neigh->primary_key; 397 target = (struct in6_addr *)&neigh->primary_key;
388 addrconf_addr_solict_mult(target, &mcaddr); 398 addrconf_addr_solict_mult(target, &mcaddr);
389 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL); 399 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
390 } else 400 } else {
391 read_unlock_bh(&neigh->lock); 401 read_unlock_bh(&neigh->lock);
402 }
403out:
404 rcu_read_unlock();
392} 405}
393#else 406#else
394static inline void rt6_probe(struct rt6_info *rt) 407static inline void rt6_probe(struct rt6_info *rt)
@@ -412,8 +425,11 @@ static inline int rt6_check_dev(struct rt6_info *rt, int oif)
412 425
413static inline int rt6_check_neigh(struct rt6_info *rt) 426static inline int rt6_check_neigh(struct rt6_info *rt)
414{ 427{
415 struct neighbour *neigh = dst_get_neighbour(&rt->dst); 428 struct neighbour *neigh;
416 int m; 429 int m;
430
431 rcu_read_lock();
432 neigh = dst_get_neighbour(&rt->dst);
417 if (rt->rt6i_flags & RTF_NONEXTHOP || 433 if (rt->rt6i_flags & RTF_NONEXTHOP ||
418 !(rt->rt6i_flags & RTF_GATEWAY)) 434 !(rt->rt6i_flags & RTF_GATEWAY))
419 m = 1; 435 m = 1;
@@ -430,6 +446,7 @@ static inline int rt6_check_neigh(struct rt6_info *rt)
430 read_unlock_bh(&neigh->lock); 446 read_unlock_bh(&neigh->lock);
431 } else 447 } else
432 m = 0; 448 m = 0;
449 rcu_read_unlock();
433 return m; 450 return m;
434} 451}
435 452
@@ -714,9 +731,7 @@ static struct rt6_info *rt6_alloc_cow(const struct rt6_info *ort,
714 ipv6_addr_copy(&rt->rt6i_gateway, daddr); 731 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
715 } 732 }
716 733
717 rt->rt6i_dst.plen = 128;
718 rt->rt6i_flags |= RTF_CACHE; 734 rt->rt6i_flags |= RTF_CACHE;
719 rt->dst.flags |= DST_HOST;
720 735
721#ifdef CONFIG_IPV6_SUBTREES 736#ifdef CONFIG_IPV6_SUBTREES
722 if (rt->rt6i_src.plen && saddr) { 737 if (rt->rt6i_src.plen && saddr) {
@@ -766,10 +781,8 @@ static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
766 struct rt6_info *rt = ip6_rt_copy(ort, daddr); 781 struct rt6_info *rt = ip6_rt_copy(ort, daddr);
767 782
768 if (rt) { 783 if (rt) {
769 rt->rt6i_dst.plen = 128;
770 rt->rt6i_flags |= RTF_CACHE; 784 rt->rt6i_flags |= RTF_CACHE;
771 rt->dst.flags |= DST_HOST; 785 dst_set_neighbour(&rt->dst, neigh_clone(dst_get_neighbour_raw(&ort->dst)));
772 dst_set_neighbour(&rt->dst, neigh_clone(dst_get_neighbour(&ort->dst)));
773 } 786 }
774 return rt; 787 return rt;
775} 788}
@@ -803,7 +816,7 @@ restart:
803 dst_hold(&rt->dst); 816 dst_hold(&rt->dst);
804 read_unlock_bh(&table->tb6_lock); 817 read_unlock_bh(&table->tb6_lock);
805 818
806 if (!dst_get_neighbour(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP)) 819 if (!dst_get_neighbour_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
807 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr); 820 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
808 else if (!(rt->dst.flags & DST_HOST)) 821 else if (!(rt->dst.flags & DST_HOST))
809 nrt = rt6_alloc_clone(rt, &fl6->daddr); 822 nrt = rt6_alloc_clone(rt, &fl6->daddr);
@@ -1069,12 +1082,15 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1069 neigh = NULL; 1082 neigh = NULL;
1070 } 1083 }
1071 1084
1072 rt->rt6i_idev = idev; 1085 rt->dst.flags |= DST_HOST;
1086 rt->dst.output = ip6_output;
1073 dst_set_neighbour(&rt->dst, neigh); 1087 dst_set_neighbour(&rt->dst, neigh);
1074 atomic_set(&rt->dst.__refcnt, 1); 1088 atomic_set(&rt->dst.__refcnt, 1);
1075 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1076 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255); 1089 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
1077 rt->dst.output = ip6_output; 1090
1091 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1092 rt->rt6i_dst.plen = 128;
1093 rt->rt6i_idev = idev;
1078 1094
1079 spin_lock_bh(&icmp6_dst_lock); 1095 spin_lock_bh(&icmp6_dst_lock);
1080 rt->dst.next = icmp6_dst_gc_list; 1096 rt->dst.next = icmp6_dst_gc_list;
@@ -1252,6 +1268,14 @@ int ip6_route_add(struct fib6_config *cfg)
1252 if (rt->rt6i_dst.plen == 128) 1268 if (rt->rt6i_dst.plen == 128)
1253 rt->dst.flags |= DST_HOST; 1269 rt->dst.flags |= DST_HOST;
1254 1270
1271 if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) {
1272 u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1273 if (!metrics) {
1274 err = -ENOMEM;
1275 goto out;
1276 }
1277 dst_init_metrics(&rt->dst, metrics, 0);
1278 }
1255#ifdef CONFIG_IPV6_SUBTREES 1279#ifdef CONFIG_IPV6_SUBTREES
1256 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len); 1280 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1257 rt->rt6i_src.plen = cfg->fc_src_len; 1281 rt->rt6i_src.plen = cfg->fc_src_len;
@@ -1587,7 +1611,7 @@ void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src,
1587 dst_confirm(&rt->dst); 1611 dst_confirm(&rt->dst);
1588 1612
1589 /* Duplicate redirect: silently ignore. */ 1613 /* Duplicate redirect: silently ignore. */
1590 if (neigh == dst_get_neighbour(&rt->dst)) 1614 if (neigh == dst_get_neighbour_raw(&rt->dst))
1591 goto out; 1615 goto out;
1592 1616
1593 nrt = ip6_rt_copy(rt, dest); 1617 nrt = ip6_rt_copy(rt, dest);
@@ -1598,9 +1622,6 @@ void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src,
1598 if (on_link) 1622 if (on_link)
1599 nrt->rt6i_flags &= ~RTF_GATEWAY; 1623 nrt->rt6i_flags &= ~RTF_GATEWAY;
1600 1624
1601 nrt->rt6i_dst.plen = 128;
1602 nrt->dst.flags |= DST_HOST;
1603
1604 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key); 1625 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1605 dst_set_neighbour(&nrt->dst, neigh_clone(neigh)); 1626 dst_set_neighbour(&nrt->dst, neigh_clone(neigh));
1606 1627
@@ -1682,7 +1703,7 @@ again:
1682 1. It is connected route. Action: COW 1703 1. It is connected route. Action: COW
1683 2. It is gatewayed route or NONEXTHOP route. Action: clone it. 1704 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1684 */ 1705 */
1685 if (!dst_get_neighbour(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP)) 1706 if (!dst_get_neighbour_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
1686 nrt = rt6_alloc_cow(rt, daddr, saddr); 1707 nrt = rt6_alloc_cow(rt, daddr, saddr);
1687 else 1708 else
1688 nrt = rt6_alloc_clone(rt, daddr); 1709 nrt = rt6_alloc_clone(rt, daddr);
@@ -1745,9 +1766,10 @@ static struct rt6_info *ip6_rt_copy(const struct rt6_info *ort,
1745 if (rt) { 1766 if (rt) {
1746 rt->dst.input = ort->dst.input; 1767 rt->dst.input = ort->dst.input;
1747 rt->dst.output = ort->dst.output; 1768 rt->dst.output = ort->dst.output;
1769 rt->dst.flags |= DST_HOST;
1748 1770
1749 ipv6_addr_copy(&rt->rt6i_dst.addr, dest); 1771 ipv6_addr_copy(&rt->rt6i_dst.addr, dest);
1750 rt->rt6i_dst.plen = ort->rt6i_dst.plen; 1772 rt->rt6i_dst.plen = 128;
1751 dst_copy_metrics(&rt->dst, &ort->dst); 1773 dst_copy_metrics(&rt->dst, &ort->dst);
1752 rt->dst.error = ort->dst.error; 1774 rt->dst.error = ort->dst.error;
1753 rt->rt6i_idev = ort->rt6i_idev; 1775 rt->rt6i_idev = ort->rt6i_idev;
@@ -2326,6 +2348,7 @@ static int rt6_fill_node(struct net *net,
2326 struct nlmsghdr *nlh; 2348 struct nlmsghdr *nlh;
2327 long expires; 2349 long expires;
2328 u32 table; 2350 u32 table;
2351 struct neighbour *n;
2329 2352
2330 if (prefix) { /* user wants prefix routes only */ 2353 if (prefix) { /* user wants prefix routes only */
2331 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) { 2354 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
@@ -2414,8 +2437,11 @@ static int rt6_fill_node(struct net *net,
2414 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0) 2437 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2415 goto nla_put_failure; 2438 goto nla_put_failure;
2416 2439
2417 if (dst_get_neighbour(&rt->dst)) 2440 rcu_read_lock();
2418 NLA_PUT(skb, RTA_GATEWAY, 16, &dst_get_neighbour(&rt->dst)->primary_key); 2441 n = dst_get_neighbour(&rt->dst);
2442 if (n)
2443 NLA_PUT(skb, RTA_GATEWAY, 16, &n->primary_key);
2444 rcu_read_unlock();
2419 2445
2420 if (rt->dst.dev) 2446 if (rt->dst.dev)
2421 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex); 2447 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
@@ -2608,12 +2634,14 @@ static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2608#else 2634#else
2609 seq_puts(m, "00000000000000000000000000000000 00 "); 2635 seq_puts(m, "00000000000000000000000000000000 00 ");
2610#endif 2636#endif
2637 rcu_read_lock();
2611 n = dst_get_neighbour(&rt->dst); 2638 n = dst_get_neighbour(&rt->dst);
2612 if (n) { 2639 if (n) {
2613 seq_printf(m, "%pi6", n->primary_key); 2640 seq_printf(m, "%pi6", n->primary_key);
2614 } else { 2641 } else {
2615 seq_puts(m, "00000000000000000000000000000000"); 2642 seq_puts(m, "00000000000000000000000000000000");
2616 } 2643 }
2644 rcu_read_unlock();
2617 seq_printf(m, " %08x %08x %08x %08x %8s\n", 2645 seq_printf(m, " %08x %08x %08x %08x %8s\n",
2618 rt->rt6i_metric, atomic_read(&rt->dst.__refcnt), 2646 rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2619 rt->dst.__use, rt->rt6i_flags, 2647 rt->dst.__use, rt->rt6i_flags,
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 07bf1085458f..a7a18602a046 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -182,7 +182,7 @@ static void ipip6_tunnel_unlink(struct sit_net *sitn, struct ip_tunnel *t)
182 (iter = rtnl_dereference(*tp)) != NULL; 182 (iter = rtnl_dereference(*tp)) != NULL;
183 tp = &iter->next) { 183 tp = &iter->next) {
184 if (t == iter) { 184 if (t == iter) {
185 rcu_assign_pointer(*tp, t->next); 185 RCU_INIT_POINTER(*tp, t->next);
186 break; 186 break;
187 } 187 }
188 } 188 }
@@ -192,8 +192,8 @@ static void ipip6_tunnel_link(struct sit_net *sitn, struct ip_tunnel *t)
192{ 192{
193 struct ip_tunnel __rcu **tp = ipip6_bucket(sitn, t); 193 struct ip_tunnel __rcu **tp = ipip6_bucket(sitn, t);
194 194
195 rcu_assign_pointer(t->next, rtnl_dereference(*tp)); 195 RCU_INIT_POINTER(t->next, rtnl_dereference(*tp));
196 rcu_assign_pointer(*tp, t); 196 RCU_INIT_POINTER(*tp, t);
197} 197}
198 198
199static void ipip6_tunnel_clone_6rd(struct net_device *dev, struct sit_net *sitn) 199static void ipip6_tunnel_clone_6rd(struct net_device *dev, struct sit_net *sitn)
@@ -391,7 +391,7 @@ ipip6_tunnel_add_prl(struct ip_tunnel *t, struct ip_tunnel_prl *a, int chg)
391 p->addr = a->addr; 391 p->addr = a->addr;
392 p->flags = a->flags; 392 p->flags = a->flags;
393 t->prl_count++; 393 t->prl_count++;
394 rcu_assign_pointer(t->prl, p); 394 RCU_INIT_POINTER(t->prl, p);
395out: 395out:
396 return err; 396 return err;
397} 397}
@@ -474,7 +474,7 @@ static void ipip6_tunnel_uninit(struct net_device *dev)
474 struct sit_net *sitn = net_generic(net, sit_net_id); 474 struct sit_net *sitn = net_generic(net, sit_net_id);
475 475
476 if (dev == sitn->fb_tunnel_dev) { 476 if (dev == sitn->fb_tunnel_dev) {
477 rcu_assign_pointer(sitn->tunnels_wc[0], NULL); 477 RCU_INIT_POINTER(sitn->tunnels_wc[0], NULL);
478 } else { 478 } else {
479 ipip6_tunnel_unlink(sitn, netdev_priv(dev)); 479 ipip6_tunnel_unlink(sitn, netdev_priv(dev));
480 ipip6_tunnel_del_prl(netdev_priv(dev), NULL); 480 ipip6_tunnel_del_prl(netdev_priv(dev), NULL);
@@ -672,6 +672,9 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
672 if (skb->protocol != htons(ETH_P_IPV6)) 672 if (skb->protocol != htons(ETH_P_IPV6))
673 goto tx_error; 673 goto tx_error;
674 674
675 if (tos == 1)
676 tos = ipv6_get_dsfield(iph6);
677
675 /* ISATAP (RFC4214) - must come before 6to4 */ 678 /* ISATAP (RFC4214) - must come before 6to4 */
676 if (dev->priv_flags & IFF_ISATAP) { 679 if (dev->priv_flags & IFF_ISATAP) {
677 struct neighbour *neigh = NULL; 680 struct neighbour *neigh = NULL;
@@ -1173,7 +1176,7 @@ static int __net_init ipip6_fb_tunnel_init(struct net_device *dev)
1173 if (!dev->tstats) 1176 if (!dev->tstats)
1174 return -ENOMEM; 1177 return -ENOMEM;
1175 dev_hold(dev); 1178 dev_hold(dev);
1176 rcu_assign_pointer(sitn->tunnels_wc[0], tunnel); 1179 RCU_INIT_POINTER(sitn->tunnels_wc[0], tunnel);
1177 return 0; 1180 return 0;
1178} 1181}
1179 1182
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 89d5bf806222..ac838965ff34 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -165,7 +165,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
165 int mss; 165 int mss;
166 struct dst_entry *dst; 166 struct dst_entry *dst;
167 __u8 rcv_wscale; 167 __u8 rcv_wscale;
168 bool ecn_ok; 168 bool ecn_ok = false;
169 169
170 if (!sysctl_tcp_syncookies || !th->ack || th->rst) 170 if (!sysctl_tcp_syncookies || !th->ack || th->rst)
171 goto out; 171 goto out;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 78aa53492b3e..5357902c7978 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -61,6 +61,7 @@
61#include <net/timewait_sock.h> 61#include <net/timewait_sock.h>
62#include <net/netdma.h> 62#include <net/netdma.h>
63#include <net/inet_common.h> 63#include <net/inet_common.h>
64#include <net/secure_seq.h>
64 65
65#include <asm/uaccess.h> 66#include <asm/uaccess.h>
66 67
@@ -530,20 +531,6 @@ static int tcp_v6_rtx_synack(struct sock *sk, struct request_sock *req,
530 return tcp_v6_send_synack(sk, req, rvp); 531 return tcp_v6_send_synack(sk, req, rvp);
531} 532}
532 533
533static inline void syn_flood_warning(struct sk_buff *skb)
534{
535#ifdef CONFIG_SYN_COOKIES
536 if (sysctl_tcp_syncookies)
537 printk(KERN_INFO
538 "TCPv6: Possible SYN flooding on port %d. "
539 "Sending cookies.\n", ntohs(tcp_hdr(skb)->dest));
540 else
541#endif
542 printk(KERN_INFO
543 "TCPv6: Possible SYN flooding on port %d. "
544 "Dropping request.\n", ntohs(tcp_hdr(skb)->dest));
545}
546
547static void tcp_v6_reqsk_destructor(struct request_sock *req) 534static void tcp_v6_reqsk_destructor(struct request_sock *req)
548{ 535{
549 kfree_skb(inet6_rsk(req)->pktopts); 536 kfree_skb(inet6_rsk(req)->pktopts);
@@ -604,7 +591,8 @@ static int tcp_v6_md5_do_add(struct sock *sk, const struct in6_addr *peer,
604 } 591 }
605 sk_nocaps_add(sk, NETIF_F_GSO_MASK); 592 sk_nocaps_add(sk, NETIF_F_GSO_MASK);
606 } 593 }
607 if (tcp_alloc_md5sig_pool(sk) == NULL) { 594 if (tp->md5sig_info->entries6 == 0 &&
595 tcp_alloc_md5sig_pool(sk) == NULL) {
608 kfree(newkey); 596 kfree(newkey);
609 return -ENOMEM; 597 return -ENOMEM;
610 } 598 }
@@ -613,8 +601,9 @@ static int tcp_v6_md5_do_add(struct sock *sk, const struct in6_addr *peer,
613 (tp->md5sig_info->entries6 + 1)), GFP_ATOMIC); 601 (tp->md5sig_info->entries6 + 1)), GFP_ATOMIC);
614 602
615 if (!keys) { 603 if (!keys) {
616 tcp_free_md5sig_pool();
617 kfree(newkey); 604 kfree(newkey);
605 if (tp->md5sig_info->entries6 == 0)
606 tcp_free_md5sig_pool();
618 return -ENOMEM; 607 return -ENOMEM;
619 } 608 }
620 609
@@ -660,6 +649,7 @@ static int tcp_v6_md5_do_del(struct sock *sk, const struct in6_addr *peer)
660 kfree(tp->md5sig_info->keys6); 649 kfree(tp->md5sig_info->keys6);
661 tp->md5sig_info->keys6 = NULL; 650 tp->md5sig_info->keys6 = NULL;
662 tp->md5sig_info->alloced6 = 0; 651 tp->md5sig_info->alloced6 = 0;
652 tcp_free_md5sig_pool();
663 } else { 653 } else {
664 /* shrink the database */ 654 /* shrink the database */
665 if (tp->md5sig_info->entries6 != i) 655 if (tp->md5sig_info->entries6 != i)
@@ -668,7 +658,6 @@ static int tcp_v6_md5_do_del(struct sock *sk, const struct in6_addr *peer)
668 (tp->md5sig_info->entries6 - i) 658 (tp->md5sig_info->entries6 - i)
669 * sizeof (tp->md5sig_info->keys6[0])); 659 * sizeof (tp->md5sig_info->keys6[0]));
670 } 660 }
671 tcp_free_md5sig_pool();
672 return 0; 661 return 0;
673 } 662 }
674 } 663 }
@@ -1178,11 +1167,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1178 struct tcp_sock *tp = tcp_sk(sk); 1167 struct tcp_sock *tp = tcp_sk(sk);
1179 __u32 isn = TCP_SKB_CB(skb)->when; 1168 __u32 isn = TCP_SKB_CB(skb)->when;
1180 struct dst_entry *dst = NULL; 1169 struct dst_entry *dst = NULL;
1181#ifdef CONFIG_SYN_COOKIES
1182 int want_cookie = 0; 1170 int want_cookie = 0;
1183#else
1184#define want_cookie 0
1185#endif
1186 1171
1187 if (skb->protocol == htons(ETH_P_IP)) 1172 if (skb->protocol == htons(ETH_P_IP))
1188 return tcp_v4_conn_request(sk, skb); 1173 return tcp_v4_conn_request(sk, skb);
@@ -1191,14 +1176,9 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1191 goto drop; 1176 goto drop;
1192 1177
1193 if (inet_csk_reqsk_queue_is_full(sk) && !isn) { 1178 if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
1194 if (net_ratelimit()) 1179 want_cookie = tcp_syn_flood_action(sk, skb, "TCPv6");
1195 syn_flood_warning(skb); 1180 if (!want_cookie)
1196#ifdef CONFIG_SYN_COOKIES 1181 goto drop;
1197 if (sysctl_tcp_syncookies)
1198 want_cookie = 1;
1199 else
1200#endif
1201 goto drop;
1202 } 1182 }
1203 1183
1204 if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) 1184 if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
@@ -1248,9 +1228,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1248 while (l-- > 0) 1228 while (l-- > 0)
1249 *c++ ^= *hash_location++; 1229 *c++ ^= *hash_location++;
1250 1230
1251#ifdef CONFIG_SYN_COOKIES
1252 want_cookie = 0; /* not our kind of cookie */ 1231 want_cookie = 0; /* not our kind of cookie */
1253#endif
1254 tmp_ext.cookie_out_never = 0; /* false */ 1232 tmp_ext.cookie_out_never = 0; /* false */
1255 tmp_ext.cookie_plus = tmp_opt.cookie_plus; 1233 tmp_ext.cookie_plus = tmp_opt.cookie_plus;
1256 } else if (!tp->rx_opt.cookie_in_always) { 1234 } else if (!tp->rx_opt.cookie_in_always) {
@@ -1407,6 +1385,8 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1407 newtp->af_specific = &tcp_sock_ipv6_mapped_specific; 1385 newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
1408#endif 1386#endif
1409 1387
1388 newnp->ipv6_ac_list = NULL;
1389 newnp->ipv6_fl_list = NULL;
1410 newnp->pktoptions = NULL; 1390 newnp->pktoptions = NULL;
1411 newnp->opt = NULL; 1391 newnp->opt = NULL;
1412 newnp->mcast_oif = inet6_iif(skb); 1392 newnp->mcast_oif = inet6_iif(skb);
@@ -1471,6 +1451,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1471 First: no IPv4 options. 1451 First: no IPv4 options.
1472 */ 1452 */
1473 newinet->inet_opt = NULL; 1453 newinet->inet_opt = NULL;
1454 newnp->ipv6_ac_list = NULL;
1474 newnp->ipv6_fl_list = NULL; 1455 newnp->ipv6_fl_list = NULL;
1475 1456
1476 /* Clone RX bits */ 1457 /* Clone RX bits */
@@ -1627,7 +1608,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1627 opt_skb = skb_clone(skb, GFP_ATOMIC); 1608 opt_skb = skb_clone(skb, GFP_ATOMIC);
1628 1609
1629 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ 1610 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1630 sock_rps_save_rxhash(sk, skb->rxhash); 1611 sock_rps_save_rxhash(sk, skb);
1631 if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) 1612 if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len))
1632 goto reset; 1613 goto reset;
1633 if (opt_skb) 1614 if (opt_skb)
@@ -1649,7 +1630,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1649 * the new socket.. 1630 * the new socket..
1650 */ 1631 */
1651 if(nsk != sk) { 1632 if(nsk != sk) {
1652 sock_rps_save_rxhash(nsk, skb->rxhash); 1633 sock_rps_save_rxhash(nsk, skb);
1653 if (tcp_child_process(sk, nsk, skb)) 1634 if (tcp_child_process(sk, nsk, skb))
1654 goto reset; 1635 goto reset;
1655 if (opt_skb) 1636 if (opt_skb)
@@ -1657,7 +1638,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1657 return 0; 1638 return 0;
1658 } 1639 }
1659 } else 1640 } else
1660 sock_rps_save_rxhash(sk, skb->rxhash); 1641 sock_rps_save_rxhash(sk, skb);
1661 1642
1662 if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) 1643 if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len))
1663 goto reset; 1644 goto reset;
@@ -1741,7 +1722,7 @@ static int tcp_v6_rcv(struct sk_buff *skb)
1741 skb->len - th->doff*4); 1722 skb->len - th->doff*4);
1742 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq); 1723 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1743 TCP_SKB_CB(skb)->when = 0; 1724 TCP_SKB_CB(skb)->when = 0;
1744 TCP_SKB_CB(skb)->flags = ipv6_get_dsfield(hdr); 1725 TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
1745 TCP_SKB_CB(skb)->sacked = 0; 1726 TCP_SKB_CB(skb)->sacked = 0;
1746 1727
1747 sk = __inet6_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest); 1728 sk = __inet6_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest);
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 29213b51c499..f4ca0a5b3457 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -509,7 +509,7 @@ int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
509 int is_udplite = IS_UDPLITE(sk); 509 int is_udplite = IS_UDPLITE(sk);
510 510
511 if (!ipv6_addr_any(&inet6_sk(sk)->daddr)) 511 if (!ipv6_addr_any(&inet6_sk(sk)->daddr))
512 sock_rps_save_rxhash(sk, skb->rxhash); 512 sock_rps_save_rxhash(sk, skb);
513 513
514 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) 514 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
515 goto drop; 515 goto drop;
@@ -533,7 +533,7 @@ int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
533 } 533 }
534 } 534 }
535 535
536 if (rcu_dereference_raw(sk->sk_filter)) { 536 if (rcu_access_pointer(sk->sk_filter)) {
537 if (udp_lib_checksum_complete(skb)) 537 if (udp_lib_checksum_complete(skb))
538 goto drop; 538 goto drop;
539 } 539 }
@@ -1090,8 +1090,8 @@ do_udp_sendmsg:
1090 memset(opt, 0, sizeof(struct ipv6_txoptions)); 1090 memset(opt, 0, sizeof(struct ipv6_txoptions));
1091 opt->tot_len = sizeof(*opt); 1091 opt->tot_len = sizeof(*opt);
1092 1092
1093 err = datagram_send_ctl(sock_net(sk), msg, &fl6, opt, &hlimit, 1093 err = datagram_send_ctl(sock_net(sk), sk, msg, &fl6, opt,
1094 &tclass, &dontfrag); 1094 &hlimit, &tclass, &dontfrag);
1095 if (err < 0) { 1095 if (err < 0) {
1096 fl6_sock_release(flowlabel); 1096 fl6_sock_release(flowlabel);
1097 return err; 1097 return err;
diff --git a/net/irda/irlan/irlan_eth.c b/net/irda/irlan/irlan_eth.c
index e8d5f4405d68..d14152e866d9 100644
--- a/net/irda/irlan/irlan_eth.c
+++ b/net/irda/irlan/irlan_eth.c
@@ -50,7 +50,7 @@ static const struct net_device_ops irlan_eth_netdev_ops = {
50 .ndo_open = irlan_eth_open, 50 .ndo_open = irlan_eth_open,
51 .ndo_stop = irlan_eth_close, 51 .ndo_stop = irlan_eth_close,
52 .ndo_start_xmit = irlan_eth_xmit, 52 .ndo_start_xmit = irlan_eth_xmit,
53 .ndo_set_multicast_list = irlan_eth_set_multicast_list, 53 .ndo_set_rx_mode = irlan_eth_set_multicast_list,
54 .ndo_change_mtu = eth_change_mtu, 54 .ndo_change_mtu = eth_change_mtu,
55 .ndo_validate_addr = eth_validate_addr, 55 .ndo_validate_addr = eth_validate_addr,
56}; 56};
diff --git a/net/irda/irsysctl.c b/net/irda/irsysctl.c
index d0b70dadf73b..2615ffc8e785 100644
--- a/net/irda/irsysctl.c
+++ b/net/irda/irsysctl.c
@@ -40,9 +40,9 @@ extern int sysctl_slot_timeout;
40extern int sysctl_fast_poll_increase; 40extern int sysctl_fast_poll_increase;
41extern char sysctl_devname[]; 41extern char sysctl_devname[];
42extern int sysctl_max_baud_rate; 42extern int sysctl_max_baud_rate;
43extern int sysctl_min_tx_turn_time; 43extern unsigned int sysctl_min_tx_turn_time;
44extern int sysctl_max_tx_data_size; 44extern unsigned int sysctl_max_tx_data_size;
45extern int sysctl_max_tx_window; 45extern unsigned int sysctl_max_tx_window;
46extern int sysctl_max_noreply_time; 46extern int sysctl_max_noreply_time;
47extern int sysctl_warn_noreply_time; 47extern int sysctl_warn_noreply_time;
48extern int sysctl_lap_keepalive_time; 48extern int sysctl_lap_keepalive_time;
diff --git a/net/irda/qos.c b/net/irda/qos.c
index 1b51bcf42394..4369f7f41bcb 100644
--- a/net/irda/qos.c
+++ b/net/irda/qos.c
@@ -60,7 +60,7 @@ int sysctl_max_noreply_time = 12;
60 * Default is 10us which means using the unmodified value given by the 60 * Default is 10us which means using the unmodified value given by the
61 * peer except if it's 0 (0 is likely a bug in the other stack). 61 * peer except if it's 0 (0 is likely a bug in the other stack).
62 */ 62 */
63unsigned sysctl_min_tx_turn_time = 10; 63unsigned int sysctl_min_tx_turn_time = 10;
64/* 64/*
65 * Maximum data size to be used in transmission in payload of LAP frame. 65 * Maximum data size to be used in transmission in payload of LAP frame.
66 * There is a bit of confusion in the IrDA spec : 66 * There is a bit of confusion in the IrDA spec :
@@ -75,13 +75,13 @@ unsigned sysctl_min_tx_turn_time = 10;
75 * bytes frames or all negotiated frame sizes, but you can use the sysctl 75 * bytes frames or all negotiated frame sizes, but you can use the sysctl
76 * to play with this value anyway. 76 * to play with this value anyway.
77 * Jean II */ 77 * Jean II */
78unsigned sysctl_max_tx_data_size = 2042; 78unsigned int sysctl_max_tx_data_size = 2042;
79/* 79/*
80 * Maximum transmit window, i.e. number of LAP frames between turn-around. 80 * Maximum transmit window, i.e. number of LAP frames between turn-around.
81 * This allow to override what the peer told us. Some peers are buggy and 81 * This allow to override what the peer told us. Some peers are buggy and
82 * don't always support what they tell us. 82 * don't always support what they tell us.
83 * Jean II */ 83 * Jean II */
84unsigned sysctl_max_tx_window = 7; 84unsigned int sysctl_max_tx_window = 7;
85 85
86static int irlap_param_baud_rate(void *instance, irda_param_t *param, int get); 86static int irlap_param_baud_rate(void *instance, irda_param_t *param, int get);
87static int irlap_param_link_disconnect(void *instance, irda_param_t *parm, 87static int irlap_param_link_disconnect(void *instance, irda_param_t *parm,
diff --git a/net/iucv/Kconfig b/net/iucv/Kconfig
index 16ce9cd4f39e..497fbe732def 100644
--- a/net/iucv/Kconfig
+++ b/net/iucv/Kconfig
@@ -1,15 +1,17 @@
1config IUCV 1config IUCV
2 tristate "IUCV support (S390 - z/VM only)"
3 depends on S390 2 depends on S390
3 def_tristate y if S390
4 prompt "IUCV support (S390 - z/VM only)"
4 help 5 help
5 Select this option if you want to use inter-user communication 6 Select this option if you want to use inter-user communication
6 under VM or VIF. If you run on z/VM, say "Y" to enable a fast 7 under VM or VIF. If you run on z/VM, say "Y" to enable a fast
7 communication link between VM guests. 8 communication link between VM guests.
8 9
9config AFIUCV 10config AFIUCV
10 tristate "AF_IUCV support (S390 - z/VM only)" 11 depends on S390
11 depends on IUCV 12 def_tristate m if QETH_L3 || IUCV
13 prompt "AF_IUCV Socket support (S390 - z/VM and HiperSockets transport)"
12 help 14 help
13 Select this option if you want to use inter-user communication under 15 Select this option if you want to use AF_IUCV socket applications
14 VM or VIF sockets. If you run on z/VM, say "Y" to enable a fast 16 based on z/VM inter-user communication vehicle or based on
15 communication link between VM guests. 17 HiperSockets.
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index e2013e434d03..274d150320c0 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -27,10 +27,9 @@
27#include <asm/cpcmd.h> 27#include <asm/cpcmd.h>
28#include <linux/kmod.h> 28#include <linux/kmod.h>
29 29
30#include <net/iucv/iucv.h>
31#include <net/iucv/af_iucv.h> 30#include <net/iucv/af_iucv.h>
32 31
33#define VERSION "1.1" 32#define VERSION "1.2"
34 33
35static char iucv_userid[80]; 34static char iucv_userid[80];
36 35
@@ -42,6 +41,8 @@ static struct proto iucv_proto = {
42 .obj_size = sizeof(struct iucv_sock), 41 .obj_size = sizeof(struct iucv_sock),
43}; 42};
44 43
44static struct iucv_interface *pr_iucv;
45
45/* special AF_IUCV IPRM messages */ 46/* special AF_IUCV IPRM messages */
46static const u8 iprm_shutdown[8] = 47static const u8 iprm_shutdown[8] =
47 {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01}; 48 {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01};
@@ -90,6 +91,12 @@ do { \
90static void iucv_sock_kill(struct sock *sk); 91static void iucv_sock_kill(struct sock *sk);
91static void iucv_sock_close(struct sock *sk); 92static void iucv_sock_close(struct sock *sk);
92 93
94static int afiucv_hs_rcv(struct sk_buff *skb, struct net_device *dev,
95 struct packet_type *pt, struct net_device *orig_dev);
96static int afiucv_hs_send(struct iucv_message *imsg, struct sock *sock,
97 struct sk_buff *skb, u8 flags);
98static void afiucv_hs_callback_txnotify(struct sk_buff *, enum iucv_tx_notify);
99
93/* Call Back functions */ 100/* Call Back functions */
94static void iucv_callback_rx(struct iucv_path *, struct iucv_message *); 101static void iucv_callback_rx(struct iucv_path *, struct iucv_message *);
95static void iucv_callback_txdone(struct iucv_path *, struct iucv_message *); 102static void iucv_callback_txdone(struct iucv_path *, struct iucv_message *);
@@ -165,7 +172,7 @@ static int afiucv_pm_freeze(struct device *dev)
165 case IUCV_CLOSING: 172 case IUCV_CLOSING:
166 case IUCV_CONNECTED: 173 case IUCV_CONNECTED:
167 if (iucv->path) { 174 if (iucv->path) {
168 err = iucv_path_sever(iucv->path, NULL); 175 err = pr_iucv->path_sever(iucv->path, NULL);
169 iucv_path_free(iucv->path); 176 iucv_path_free(iucv->path);
170 iucv->path = NULL; 177 iucv->path = NULL;
171 } 178 }
@@ -229,7 +236,7 @@ static const struct dev_pm_ops afiucv_pm_ops = {
229static struct device_driver af_iucv_driver = { 236static struct device_driver af_iucv_driver = {
230 .owner = THIS_MODULE, 237 .owner = THIS_MODULE,
231 .name = "afiucv", 238 .name = "afiucv",
232 .bus = &iucv_bus, 239 .bus = NULL,
233 .pm = &afiucv_pm_ops, 240 .pm = &afiucv_pm_ops,
234}; 241};
235 242
@@ -294,7 +301,11 @@ static inline int iucv_below_msglim(struct sock *sk)
294 301
295 if (sk->sk_state != IUCV_CONNECTED) 302 if (sk->sk_state != IUCV_CONNECTED)
296 return 1; 303 return 1;
297 return (skb_queue_len(&iucv->send_skb_q) < iucv->path->msglim); 304 if (iucv->transport == AF_IUCV_TRANS_IUCV)
305 return (skb_queue_len(&iucv->send_skb_q) < iucv->path->msglim);
306 else
307 return ((atomic_read(&iucv->msg_sent) < iucv->msglimit_peer) &&
308 (atomic_read(&iucv->pendings) <= 0));
298} 309}
299 310
300/** 311/**
@@ -312,6 +323,79 @@ static void iucv_sock_wake_msglim(struct sock *sk)
312 rcu_read_unlock(); 323 rcu_read_unlock();
313} 324}
314 325
326/**
327 * afiucv_hs_send() - send a message through HiperSockets transport
328 */
329static int afiucv_hs_send(struct iucv_message *imsg, struct sock *sock,
330 struct sk_buff *skb, u8 flags)
331{
332 struct net *net = sock_net(sock);
333 struct iucv_sock *iucv = iucv_sk(sock);
334 struct af_iucv_trans_hdr *phs_hdr;
335 struct sk_buff *nskb;
336 int err, confirm_recv = 0;
337
338 memset(skb->head, 0, ETH_HLEN);
339 phs_hdr = (struct af_iucv_trans_hdr *)skb_push(skb,
340 sizeof(struct af_iucv_trans_hdr));
341 skb_reset_mac_header(skb);
342 skb_reset_network_header(skb);
343 skb_push(skb, ETH_HLEN);
344 skb_reset_mac_header(skb);
345 memset(phs_hdr, 0, sizeof(struct af_iucv_trans_hdr));
346
347 phs_hdr->magic = ETH_P_AF_IUCV;
348 phs_hdr->version = 1;
349 phs_hdr->flags = flags;
350 if (flags == AF_IUCV_FLAG_SYN)
351 phs_hdr->window = iucv->msglimit;
352 else if ((flags == AF_IUCV_FLAG_WIN) || !flags) {
353 confirm_recv = atomic_read(&iucv->msg_recv);
354 phs_hdr->window = confirm_recv;
355 if (confirm_recv)
356 phs_hdr->flags = phs_hdr->flags | AF_IUCV_FLAG_WIN;
357 }
358 memcpy(phs_hdr->destUserID, iucv->dst_user_id, 8);
359 memcpy(phs_hdr->destAppName, iucv->dst_name, 8);
360 memcpy(phs_hdr->srcUserID, iucv->src_user_id, 8);
361 memcpy(phs_hdr->srcAppName, iucv->src_name, 8);
362 ASCEBC(phs_hdr->destUserID, sizeof(phs_hdr->destUserID));
363 ASCEBC(phs_hdr->destAppName, sizeof(phs_hdr->destAppName));
364 ASCEBC(phs_hdr->srcUserID, sizeof(phs_hdr->srcUserID));
365 ASCEBC(phs_hdr->srcAppName, sizeof(phs_hdr->srcAppName));
366 if (imsg)
367 memcpy(&phs_hdr->iucv_hdr, imsg, sizeof(struct iucv_message));
368
369 rcu_read_lock();
370 skb->dev = dev_get_by_index_rcu(net, sock->sk_bound_dev_if);
371 rcu_read_unlock();
372 if (!skb->dev)
373 return -ENODEV;
374 if (!(skb->dev->flags & IFF_UP))
375 return -ENETDOWN;
376 if (skb->len > skb->dev->mtu) {
377 if (sock->sk_type == SOCK_SEQPACKET)
378 return -EMSGSIZE;
379 else
380 skb_trim(skb, skb->dev->mtu);
381 }
382 skb->protocol = ETH_P_AF_IUCV;
383 skb_shinfo(skb)->tx_flags |= SKBTX_DRV_NEEDS_SK_REF;
384 nskb = skb_clone(skb, GFP_ATOMIC);
385 if (!nskb)
386 return -ENOMEM;
387 skb_queue_tail(&iucv->send_skb_q, nskb);
388 err = dev_queue_xmit(skb);
389 if (err) {
390 skb_unlink(nskb, &iucv->send_skb_q);
391 kfree_skb(nskb);
392 } else {
393 atomic_sub(confirm_recv, &iucv->msg_recv);
394 WARN_ON(atomic_read(&iucv->msg_recv) < 0);
395 }
396 return err;
397}
398
315/* Timers */ 399/* Timers */
316static void iucv_sock_timeout(unsigned long arg) 400static void iucv_sock_timeout(unsigned long arg)
317{ 401{
@@ -380,6 +464,8 @@ static void iucv_sock_close(struct sock *sk)
380 unsigned char user_data[16]; 464 unsigned char user_data[16];
381 struct iucv_sock *iucv = iucv_sk(sk); 465 struct iucv_sock *iucv = iucv_sk(sk);
382 unsigned long timeo; 466 unsigned long timeo;
467 int err, blen;
468 struct sk_buff *skb;
383 469
384 iucv_sock_clear_timer(sk); 470 iucv_sock_clear_timer(sk);
385 lock_sock(sk); 471 lock_sock(sk);
@@ -390,6 +476,20 @@ static void iucv_sock_close(struct sock *sk)
390 break; 476 break;
391 477
392 case IUCV_CONNECTED: 478 case IUCV_CONNECTED:
479 if (iucv->transport == AF_IUCV_TRANS_HIPER) {
480 /* send fin */
481 blen = sizeof(struct af_iucv_trans_hdr) + ETH_HLEN;
482 skb = sock_alloc_send_skb(sk, blen, 1, &err);
483 if (skb) {
484 skb_reserve(skb,
485 sizeof(struct af_iucv_trans_hdr) +
486 ETH_HLEN);
487 err = afiucv_hs_send(NULL, sk, skb,
488 AF_IUCV_FLAG_FIN);
489 }
490 sk->sk_state = IUCV_DISCONN;
491 sk->sk_state_change(sk);
492 }
393 case IUCV_DISCONN: 493 case IUCV_DISCONN:
394 sk->sk_state = IUCV_CLOSING; 494 sk->sk_state = IUCV_CLOSING;
395 sk->sk_state_change(sk); 495 sk->sk_state_change(sk);
@@ -412,7 +512,7 @@ static void iucv_sock_close(struct sock *sk)
412 low_nmcpy(user_data, iucv->src_name); 512 low_nmcpy(user_data, iucv->src_name);
413 high_nmcpy(user_data, iucv->dst_name); 513 high_nmcpy(user_data, iucv->dst_name);
414 ASCEBC(user_data, sizeof(user_data)); 514 ASCEBC(user_data, sizeof(user_data));
415 iucv_path_sever(iucv->path, user_data); 515 pr_iucv->path_sever(iucv->path, user_data);
416 iucv_path_free(iucv->path); 516 iucv_path_free(iucv->path);
417 iucv->path = NULL; 517 iucv->path = NULL;
418 } 518 }
@@ -444,23 +544,33 @@ static void iucv_sock_init(struct sock *sk, struct sock *parent)
444static struct sock *iucv_sock_alloc(struct socket *sock, int proto, gfp_t prio) 544static struct sock *iucv_sock_alloc(struct socket *sock, int proto, gfp_t prio)
445{ 545{
446 struct sock *sk; 546 struct sock *sk;
547 struct iucv_sock *iucv;
447 548
448 sk = sk_alloc(&init_net, PF_IUCV, prio, &iucv_proto); 549 sk = sk_alloc(&init_net, PF_IUCV, prio, &iucv_proto);
449 if (!sk) 550 if (!sk)
450 return NULL; 551 return NULL;
552 iucv = iucv_sk(sk);
451 553
452 sock_init_data(sock, sk); 554 sock_init_data(sock, sk);
453 INIT_LIST_HEAD(&iucv_sk(sk)->accept_q); 555 INIT_LIST_HEAD(&iucv->accept_q);
454 spin_lock_init(&iucv_sk(sk)->accept_q_lock); 556 spin_lock_init(&iucv->accept_q_lock);
455 skb_queue_head_init(&iucv_sk(sk)->send_skb_q); 557 skb_queue_head_init(&iucv->send_skb_q);
456 INIT_LIST_HEAD(&iucv_sk(sk)->message_q.list); 558 INIT_LIST_HEAD(&iucv->message_q.list);
457 spin_lock_init(&iucv_sk(sk)->message_q.lock); 559 spin_lock_init(&iucv->message_q.lock);
458 skb_queue_head_init(&iucv_sk(sk)->backlog_skb_q); 560 skb_queue_head_init(&iucv->backlog_skb_q);
459 iucv_sk(sk)->send_tag = 0; 561 iucv->send_tag = 0;
460 iucv_sk(sk)->flags = 0; 562 atomic_set(&iucv->pendings, 0);
461 iucv_sk(sk)->msglimit = IUCV_QUEUELEN_DEFAULT; 563 iucv->flags = 0;
462 iucv_sk(sk)->path = NULL; 564 iucv->msglimit = 0;
463 memset(&iucv_sk(sk)->src_user_id , 0, 32); 565 atomic_set(&iucv->msg_sent, 0);
566 atomic_set(&iucv->msg_recv, 0);
567 iucv->path = NULL;
568 iucv->sk_txnotify = afiucv_hs_callback_txnotify;
569 memset(&iucv->src_user_id , 0, 32);
570 if (pr_iucv)
571 iucv->transport = AF_IUCV_TRANS_IUCV;
572 else
573 iucv->transport = AF_IUCV_TRANS_HIPER;
464 574
465 sk->sk_destruct = iucv_sock_destruct; 575 sk->sk_destruct = iucv_sock_destruct;
466 sk->sk_sndtimeo = IUCV_CONN_TIMEOUT; 576 sk->sk_sndtimeo = IUCV_CONN_TIMEOUT;
@@ -591,7 +701,9 @@ static int iucv_sock_bind(struct socket *sock, struct sockaddr *addr,
591 struct sockaddr_iucv *sa = (struct sockaddr_iucv *) addr; 701 struct sockaddr_iucv *sa = (struct sockaddr_iucv *) addr;
592 struct sock *sk = sock->sk; 702 struct sock *sk = sock->sk;
593 struct iucv_sock *iucv; 703 struct iucv_sock *iucv;
594 int err; 704 int err = 0;
705 struct net_device *dev;
706 char uid[9];
595 707
596 /* Verify the input sockaddr */ 708 /* Verify the input sockaddr */
597 if (!addr || addr->sa_family != AF_IUCV) 709 if (!addr || addr->sa_family != AF_IUCV)
@@ -610,19 +722,46 @@ static int iucv_sock_bind(struct socket *sock, struct sockaddr *addr,
610 err = -EADDRINUSE; 722 err = -EADDRINUSE;
611 goto done_unlock; 723 goto done_unlock;
612 } 724 }
613 if (iucv->path) { 725 if (iucv->path)
614 err = 0;
615 goto done_unlock; 726 goto done_unlock;
616 }
617 727
618 /* Bind the socket */ 728 /* Bind the socket */
619 memcpy(iucv->src_name, sa->siucv_name, 8);
620 729
621 /* Copy the user id */ 730 if (pr_iucv)
622 memcpy(iucv->src_user_id, iucv_userid, 8); 731 if (!memcmp(sa->siucv_user_id, iucv_userid, 8))
623 sk->sk_state = IUCV_BOUND; 732 goto vm_bind; /* VM IUCV transport */
624 err = 0;
625 733
734 /* try hiper transport */
735 memcpy(uid, sa->siucv_user_id, sizeof(uid));
736 ASCEBC(uid, 8);
737 rcu_read_lock();
738 for_each_netdev_rcu(&init_net, dev) {
739 if (!memcmp(dev->perm_addr, uid, 8)) {
740 memcpy(iucv->src_name, sa->siucv_name, 8);
741 memcpy(iucv->src_user_id, sa->siucv_user_id, 8);
742 sock->sk->sk_bound_dev_if = dev->ifindex;
743 sk->sk_state = IUCV_BOUND;
744 iucv->transport = AF_IUCV_TRANS_HIPER;
745 if (!iucv->msglimit)
746 iucv->msglimit = IUCV_HIPER_MSGLIM_DEFAULT;
747 rcu_read_unlock();
748 goto done_unlock;
749 }
750 }
751 rcu_read_unlock();
752vm_bind:
753 if (pr_iucv) {
754 /* use local userid for backward compat */
755 memcpy(iucv->src_name, sa->siucv_name, 8);
756 memcpy(iucv->src_user_id, iucv_userid, 8);
757 sk->sk_state = IUCV_BOUND;
758 iucv->transport = AF_IUCV_TRANS_IUCV;
759 if (!iucv->msglimit)
760 iucv->msglimit = IUCV_QUEUELEN_DEFAULT;
761 goto done_unlock;
762 }
763 /* found no dev to bind */
764 err = -ENODEV;
626done_unlock: 765done_unlock:
627 /* Release the socket list lock */ 766 /* Release the socket list lock */
628 write_unlock_bh(&iucv_sk_list.lock); 767 write_unlock_bh(&iucv_sk_list.lock);
@@ -658,45 +797,44 @@ static int iucv_sock_autobind(struct sock *sk)
658 797
659 memcpy(&iucv->src_name, name, 8); 798 memcpy(&iucv->src_name, name, 8);
660 799
800 if (!iucv->msglimit)
801 iucv->msglimit = IUCV_QUEUELEN_DEFAULT;
802
661 return err; 803 return err;
662} 804}
663 805
664/* Connect an unconnected socket */ 806static int afiucv_hs_connect(struct socket *sock)
665static int iucv_sock_connect(struct socket *sock, struct sockaddr *addr,
666 int alen, int flags)
667{ 807{
668 struct sockaddr_iucv *sa = (struct sockaddr_iucv *) addr;
669 struct sock *sk = sock->sk; 808 struct sock *sk = sock->sk;
670 struct iucv_sock *iucv; 809 struct sk_buff *skb;
671 unsigned char user_data[16]; 810 int blen = sizeof(struct af_iucv_trans_hdr) + ETH_HLEN;
672 int err; 811 int err = 0;
673
674 if (addr->sa_family != AF_IUCV || alen < sizeof(struct sockaddr_iucv))
675 return -EINVAL;
676
677 if (sk->sk_state != IUCV_OPEN && sk->sk_state != IUCV_BOUND)
678 return -EBADFD;
679
680 if (sk->sk_type != SOCK_STREAM && sk->sk_type != SOCK_SEQPACKET)
681 return -EINVAL;
682 812
683 if (sk->sk_state == IUCV_OPEN) { 813 /* send syn */
684 err = iucv_sock_autobind(sk); 814 skb = sock_alloc_send_skb(sk, blen, 1, &err);
685 if (unlikely(err)) 815 if (!skb) {
686 return err; 816 err = -ENOMEM;
817 goto done;
687 } 818 }
819 skb->dev = NULL;
820 skb_reserve(skb, blen);
821 err = afiucv_hs_send(NULL, sk, skb, AF_IUCV_FLAG_SYN);
822done:
823 return err;
824}
688 825
689 lock_sock(sk); 826static int afiucv_path_connect(struct socket *sock, struct sockaddr *addr)
690 827{
691 /* Set the destination information */ 828 struct sockaddr_iucv *sa = (struct sockaddr_iucv *) addr;
692 memcpy(iucv_sk(sk)->dst_user_id, sa->siucv_user_id, 8); 829 struct sock *sk = sock->sk;
693 memcpy(iucv_sk(sk)->dst_name, sa->siucv_name, 8); 830 struct iucv_sock *iucv = iucv_sk(sk);
831 unsigned char user_data[16];
832 int err;
694 833
695 high_nmcpy(user_data, sa->siucv_name); 834 high_nmcpy(user_data, sa->siucv_name);
696 low_nmcpy(user_data, iucv_sk(sk)->src_name); 835 low_nmcpy(user_data, iucv->src_name);
697 ASCEBC(user_data, sizeof(user_data)); 836 ASCEBC(user_data, sizeof(user_data));
698 837
699 iucv = iucv_sk(sk);
700 /* Create path. */ 838 /* Create path. */
701 iucv->path = iucv_path_alloc(iucv->msglimit, 839 iucv->path = iucv_path_alloc(iucv->msglimit,
702 IUCV_IPRMDATA, GFP_KERNEL); 840 IUCV_IPRMDATA, GFP_KERNEL);
@@ -704,8 +842,9 @@ static int iucv_sock_connect(struct socket *sock, struct sockaddr *addr,
704 err = -ENOMEM; 842 err = -ENOMEM;
705 goto done; 843 goto done;
706 } 844 }
707 err = iucv_path_connect(iucv->path, &af_iucv_handler, 845 err = pr_iucv->path_connect(iucv->path, &af_iucv_handler,
708 sa->siucv_user_id, NULL, user_data, sk); 846 sa->siucv_user_id, NULL, user_data,
847 sk);
709 if (err) { 848 if (err) {
710 iucv_path_free(iucv->path); 849 iucv_path_free(iucv->path);
711 iucv->path = NULL; 850 iucv->path = NULL;
@@ -724,21 +863,62 @@ static int iucv_sock_connect(struct socket *sock, struct sockaddr *addr,
724 err = -ECONNREFUSED; 863 err = -ECONNREFUSED;
725 break; 864 break;
726 } 865 }
727 goto done;
728 } 866 }
867done:
868 return err;
869}
729 870
730 if (sk->sk_state != IUCV_CONNECTED) { 871/* Connect an unconnected socket */
872static int iucv_sock_connect(struct socket *sock, struct sockaddr *addr,
873 int alen, int flags)
874{
875 struct sockaddr_iucv *sa = (struct sockaddr_iucv *) addr;
876 struct sock *sk = sock->sk;
877 struct iucv_sock *iucv = iucv_sk(sk);
878 int err;
879
880 if (addr->sa_family != AF_IUCV || alen < sizeof(struct sockaddr_iucv))
881 return -EINVAL;
882
883 if (sk->sk_state != IUCV_OPEN && sk->sk_state != IUCV_BOUND)
884 return -EBADFD;
885
886 if (sk->sk_state == IUCV_OPEN &&
887 iucv->transport == AF_IUCV_TRANS_HIPER)
888 return -EBADFD; /* explicit bind required */
889
890 if (sk->sk_type != SOCK_STREAM && sk->sk_type != SOCK_SEQPACKET)
891 return -EINVAL;
892
893 if (sk->sk_state == IUCV_OPEN) {
894 err = iucv_sock_autobind(sk);
895 if (unlikely(err))
896 return err;
897 }
898
899 lock_sock(sk);
900
901 /* Set the destination information */
902 memcpy(iucv->dst_user_id, sa->siucv_user_id, 8);
903 memcpy(iucv->dst_name, sa->siucv_name, 8);
904
905 if (iucv->transport == AF_IUCV_TRANS_HIPER)
906 err = afiucv_hs_connect(sock);
907 else
908 err = afiucv_path_connect(sock, addr);
909 if (err)
910 goto done;
911
912 if (sk->sk_state != IUCV_CONNECTED)
731 err = iucv_sock_wait(sk, iucv_sock_in_state(sk, IUCV_CONNECTED, 913 err = iucv_sock_wait(sk, iucv_sock_in_state(sk, IUCV_CONNECTED,
732 IUCV_DISCONN), 914 IUCV_DISCONN),
733 sock_sndtimeo(sk, flags & O_NONBLOCK)); 915 sock_sndtimeo(sk, flags & O_NONBLOCK));
734 }
735 916
736 if (sk->sk_state == IUCV_DISCONN) { 917 if (sk->sk_state == IUCV_DISCONN || sk->sk_state == IUCV_CLOSED)
737 err = -ECONNREFUSED; 918 err = -ECONNREFUSED;
738 }
739 919
740 if (err) { 920 if (err && iucv->transport == AF_IUCV_TRANS_IUCV) {
741 iucv_path_sever(iucv->path, NULL); 921 pr_iucv->path_sever(iucv->path, NULL);
742 iucv_path_free(iucv->path); 922 iucv_path_free(iucv->path);
743 iucv->path = NULL; 923 iucv->path = NULL;
744 } 924 }
@@ -833,20 +1013,21 @@ static int iucv_sock_getname(struct socket *sock, struct sockaddr *addr,
833{ 1013{
834 struct sockaddr_iucv *siucv = (struct sockaddr_iucv *) addr; 1014 struct sockaddr_iucv *siucv = (struct sockaddr_iucv *) addr;
835 struct sock *sk = sock->sk; 1015 struct sock *sk = sock->sk;
1016 struct iucv_sock *iucv = iucv_sk(sk);
836 1017
837 addr->sa_family = AF_IUCV; 1018 addr->sa_family = AF_IUCV;
838 *len = sizeof(struct sockaddr_iucv); 1019 *len = sizeof(struct sockaddr_iucv);
839 1020
840 if (peer) { 1021 if (peer) {
841 memcpy(siucv->siucv_user_id, iucv_sk(sk)->dst_user_id, 8); 1022 memcpy(siucv->siucv_user_id, iucv->dst_user_id, 8);
842 memcpy(siucv->siucv_name, &iucv_sk(sk)->dst_name, 8); 1023 memcpy(siucv->siucv_name, iucv->dst_name, 8);
843 } else { 1024 } else {
844 memcpy(siucv->siucv_user_id, iucv_sk(sk)->src_user_id, 8); 1025 memcpy(siucv->siucv_user_id, iucv->src_user_id, 8);
845 memcpy(siucv->siucv_name, iucv_sk(sk)->src_name, 8); 1026 memcpy(siucv->siucv_name, iucv->src_name, 8);
846 } 1027 }
847 memset(&siucv->siucv_port, 0, sizeof(siucv->siucv_port)); 1028 memset(&siucv->siucv_port, 0, sizeof(siucv->siucv_port));
848 memset(&siucv->siucv_addr, 0, sizeof(siucv->siucv_addr)); 1029 memset(&siucv->siucv_addr, 0, sizeof(siucv->siucv_addr));
849 memset(siucv->siucv_nodeid, 0, sizeof(siucv->siucv_nodeid)); 1030 memset(&siucv->siucv_nodeid, 0, sizeof(siucv->siucv_nodeid));
850 1031
851 return 0; 1032 return 0;
852} 1033}
@@ -871,7 +1052,7 @@ static int iucv_send_iprm(struct iucv_path *path, struct iucv_message *msg,
871 1052
872 memcpy(prmdata, (void *) skb->data, skb->len); 1053 memcpy(prmdata, (void *) skb->data, skb->len);
873 prmdata[7] = 0xff - (u8) skb->len; 1054 prmdata[7] = 0xff - (u8) skb->len;
874 return iucv_message_send(path, msg, IUCV_IPRMDATA, 0, 1055 return pr_iucv->message_send(path, msg, IUCV_IPRMDATA, 0,
875 (void *) prmdata, 8); 1056 (void *) prmdata, 8);
876} 1057}
877 1058
@@ -960,9 +1141,16 @@ static int iucv_sock_sendmsg(struct kiocb *iocb, struct socket *sock,
960 * this is fine for SOCK_SEQPACKET (unless we want to support 1141 * this is fine for SOCK_SEQPACKET (unless we want to support
961 * segmented records using the MSG_EOR flag), but 1142 * segmented records using the MSG_EOR flag), but
962 * for SOCK_STREAM we might want to improve it in future */ 1143 * for SOCK_STREAM we might want to improve it in future */
963 skb = sock_alloc_send_skb(sk, len, noblock, &err); 1144 if (iucv->transport == AF_IUCV_TRANS_HIPER)
1145 skb = sock_alloc_send_skb(sk,
1146 len + sizeof(struct af_iucv_trans_hdr) + ETH_HLEN,
1147 noblock, &err);
1148 else
1149 skb = sock_alloc_send_skb(sk, len, noblock, &err);
964 if (!skb) 1150 if (!skb)
965 goto out; 1151 goto out;
1152 if (iucv->transport == AF_IUCV_TRANS_HIPER)
1153 skb_reserve(skb, sizeof(struct af_iucv_trans_hdr) + ETH_HLEN);
966 if (memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len)) { 1154 if (memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len)) {
967 err = -EFAULT; 1155 err = -EFAULT;
968 goto fail; 1156 goto fail;
@@ -983,6 +1171,15 @@ static int iucv_sock_sendmsg(struct kiocb *iocb, struct socket *sock,
983 /* increment and save iucv message tag for msg_completion cbk */ 1171 /* increment and save iucv message tag for msg_completion cbk */
984 txmsg.tag = iucv->send_tag++; 1172 txmsg.tag = iucv->send_tag++;
985 memcpy(CB_TAG(skb), &txmsg.tag, CB_TAG_LEN); 1173 memcpy(CB_TAG(skb), &txmsg.tag, CB_TAG_LEN);
1174 if (iucv->transport == AF_IUCV_TRANS_HIPER) {
1175 atomic_inc(&iucv->msg_sent);
1176 err = afiucv_hs_send(&txmsg, sk, skb, 0);
1177 if (err) {
1178 atomic_dec(&iucv->msg_sent);
1179 goto fail;
1180 }
1181 goto release;
1182 }
986 skb_queue_tail(&iucv->send_skb_q, skb); 1183 skb_queue_tail(&iucv->send_skb_q, skb);
987 1184
988 if (((iucv->path->flags & IUCV_IPRMDATA) & iucv->flags) 1185 if (((iucv->path->flags & IUCV_IPRMDATA) & iucv->flags)
@@ -999,13 +1196,13 @@ static int iucv_sock_sendmsg(struct kiocb *iocb, struct socket *sock,
999 /* this error should never happen since the 1196 /* this error should never happen since the
1000 * IUCV_IPRMDATA path flag is set... sever path */ 1197 * IUCV_IPRMDATA path flag is set... sever path */
1001 if (err == 0x15) { 1198 if (err == 0x15) {
1002 iucv_path_sever(iucv->path, NULL); 1199 pr_iucv->path_sever(iucv->path, NULL);
1003 skb_unlink(skb, &iucv->send_skb_q); 1200 skb_unlink(skb, &iucv->send_skb_q);
1004 err = -EPIPE; 1201 err = -EPIPE;
1005 goto fail; 1202 goto fail;
1006 } 1203 }
1007 } else 1204 } else
1008 err = iucv_message_send(iucv->path, &txmsg, 0, 0, 1205 err = pr_iucv->message_send(iucv->path, &txmsg, 0, 0,
1009 (void *) skb->data, skb->len); 1206 (void *) skb->data, skb->len);
1010 if (err) { 1207 if (err) {
1011 if (err == 3) { 1208 if (err == 3) {
@@ -1023,6 +1220,7 @@ static int iucv_sock_sendmsg(struct kiocb *iocb, struct socket *sock,
1023 goto fail; 1220 goto fail;
1024 } 1221 }
1025 1222
1223release:
1026 release_sock(sk); 1224 release_sock(sk);
1027 return len; 1225 return len;
1028 1226
@@ -1095,8 +1293,9 @@ static void iucv_process_message(struct sock *sk, struct sk_buff *skb,
1095 skb->len = 0; 1293 skb->len = 0;
1096 } 1294 }
1097 } else { 1295 } else {
1098 rc = iucv_message_receive(path, msg, msg->flags & IUCV_IPRMDATA, 1296 rc = pr_iucv->message_receive(path, msg,
1099 skb->data, len, NULL); 1297 msg->flags & IUCV_IPRMDATA,
1298 skb->data, len, NULL);
1100 if (rc) { 1299 if (rc) {
1101 kfree_skb(skb); 1300 kfree_skb(skb);
1102 return; 1301 return;
@@ -1110,7 +1309,7 @@ static void iucv_process_message(struct sock *sk, struct sk_buff *skb,
1110 kfree_skb(skb); 1309 kfree_skb(skb);
1111 skb = NULL; 1310 skb = NULL;
1112 if (rc) { 1311 if (rc) {
1113 iucv_path_sever(path, NULL); 1312 pr_iucv->path_sever(path, NULL);
1114 return; 1313 return;
1115 } 1314 }
1116 skb = skb_dequeue(&iucv_sk(sk)->backlog_skb_q); 1315 skb = skb_dequeue(&iucv_sk(sk)->backlog_skb_q);
@@ -1154,7 +1353,8 @@ static int iucv_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
1154 struct sock *sk = sock->sk; 1353 struct sock *sk = sock->sk;
1155 struct iucv_sock *iucv = iucv_sk(sk); 1354 struct iucv_sock *iucv = iucv_sk(sk);
1156 unsigned int copied, rlen; 1355 unsigned int copied, rlen;
1157 struct sk_buff *skb, *rskb, *cskb; 1356 struct sk_buff *skb, *rskb, *cskb, *sskb;
1357 int blen;
1158 int err = 0; 1358 int err = 0;
1159 1359
1160 if ((sk->sk_state == IUCV_DISCONN || sk->sk_state == IUCV_SEVERED) && 1360 if ((sk->sk_state == IUCV_DISCONN || sk->sk_state == IUCV_SEVERED) &&
@@ -1179,7 +1379,7 @@ static int iucv_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
1179 copied = min_t(unsigned int, rlen, len); 1379 copied = min_t(unsigned int, rlen, len);
1180 1380
1181 cskb = skb; 1381 cskb = skb;
1182 if (memcpy_toiovec(msg->msg_iov, cskb->data, copied)) { 1382 if (skb_copy_datagram_iovec(cskb, 0, msg->msg_iov, copied)) {
1183 if (!(flags & MSG_PEEK)) 1383 if (!(flags & MSG_PEEK))
1184 skb_queue_head(&sk->sk_receive_queue, skb); 1384 skb_queue_head(&sk->sk_receive_queue, skb);
1185 return -EFAULT; 1385 return -EFAULT;
@@ -1217,6 +1417,7 @@ static int iucv_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
1217 } 1417 }
1218 1418
1219 kfree_skb(skb); 1419 kfree_skb(skb);
1420 atomic_inc(&iucv->msg_recv);
1220 1421
1221 /* Queue backlog skbs */ 1422 /* Queue backlog skbs */
1222 spin_lock_bh(&iucv->message_q.lock); 1423 spin_lock_bh(&iucv->message_q.lock);
@@ -1233,6 +1434,24 @@ static int iucv_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
1233 if (skb_queue_empty(&iucv->backlog_skb_q)) { 1434 if (skb_queue_empty(&iucv->backlog_skb_q)) {
1234 if (!list_empty(&iucv->message_q.list)) 1435 if (!list_empty(&iucv->message_q.list))
1235 iucv_process_message_q(sk); 1436 iucv_process_message_q(sk);
1437 if (atomic_read(&iucv->msg_recv) >=
1438 iucv->msglimit / 2) {
1439 /* send WIN to peer */
1440 blen = sizeof(struct af_iucv_trans_hdr) +
1441 ETH_HLEN;
1442 sskb = sock_alloc_send_skb(sk, blen, 1, &err);
1443 if (sskb) {
1444 skb_reserve(sskb,
1445 sizeof(struct af_iucv_trans_hdr)
1446 + ETH_HLEN);
1447 err = afiucv_hs_send(NULL, sk, sskb,
1448 AF_IUCV_FLAG_WIN);
1449 }
1450 if (err) {
1451 sk->sk_state = IUCV_DISCONN;
1452 sk->sk_state_change(sk);
1453 }
1454 }
1236 } 1455 }
1237 spin_unlock_bh(&iucv->message_q.lock); 1456 spin_unlock_bh(&iucv->message_q.lock);
1238 } 1457 }
@@ -1327,8 +1546,8 @@ static int iucv_sock_shutdown(struct socket *sock, int how)
1327 if (how == SEND_SHUTDOWN || how == SHUTDOWN_MASK) { 1546 if (how == SEND_SHUTDOWN || how == SHUTDOWN_MASK) {
1328 txmsg.class = 0; 1547 txmsg.class = 0;
1329 txmsg.tag = 0; 1548 txmsg.tag = 0;
1330 err = iucv_message_send(iucv->path, &txmsg, IUCV_IPRMDATA, 0, 1549 err = pr_iucv->message_send(iucv->path, &txmsg, IUCV_IPRMDATA,
1331 (void *) iprm_shutdown, 8); 1550 0, (void *) iprm_shutdown, 8);
1332 if (err) { 1551 if (err) {
1333 switch (err) { 1552 switch (err) {
1334 case 1: 1553 case 1:
@@ -1345,7 +1564,7 @@ static int iucv_sock_shutdown(struct socket *sock, int how)
1345 } 1564 }
1346 1565
1347 if (how == RCV_SHUTDOWN || how == SHUTDOWN_MASK) { 1566 if (how == RCV_SHUTDOWN || how == SHUTDOWN_MASK) {
1348 err = iucv_path_quiesce(iucv_sk(sk)->path, NULL); 1567 err = pr_iucv->path_quiesce(iucv->path, NULL);
1349 if (err) 1568 if (err)
1350 err = -ENOTCONN; 1569 err = -ENOTCONN;
1351 1570
@@ -1372,7 +1591,7 @@ static int iucv_sock_release(struct socket *sock)
1372 1591
1373 /* Unregister with IUCV base support */ 1592 /* Unregister with IUCV base support */
1374 if (iucv_sk(sk)->path) { 1593 if (iucv_sk(sk)->path) {
1375 iucv_path_sever(iucv_sk(sk)->path, NULL); 1594 pr_iucv->path_sever(iucv_sk(sk)->path, NULL);
1376 iucv_path_free(iucv_sk(sk)->path); 1595 iucv_path_free(iucv_sk(sk)->path);
1377 iucv_sk(sk)->path = NULL; 1596 iucv_sk(sk)->path = NULL;
1378 } 1597 }
@@ -1514,14 +1733,14 @@ static int iucv_callback_connreq(struct iucv_path *path,
1514 high_nmcpy(user_data, iucv->dst_name); 1733 high_nmcpy(user_data, iucv->dst_name);
1515 ASCEBC(user_data, sizeof(user_data)); 1734 ASCEBC(user_data, sizeof(user_data));
1516 if (sk->sk_state != IUCV_LISTEN) { 1735 if (sk->sk_state != IUCV_LISTEN) {
1517 err = iucv_path_sever(path, user_data); 1736 err = pr_iucv->path_sever(path, user_data);
1518 iucv_path_free(path); 1737 iucv_path_free(path);
1519 goto fail; 1738 goto fail;
1520 } 1739 }
1521 1740
1522 /* Check for backlog size */ 1741 /* Check for backlog size */
1523 if (sk_acceptq_is_full(sk)) { 1742 if (sk_acceptq_is_full(sk)) {
1524 err = iucv_path_sever(path, user_data); 1743 err = pr_iucv->path_sever(path, user_data);
1525 iucv_path_free(path); 1744 iucv_path_free(path);
1526 goto fail; 1745 goto fail;
1527 } 1746 }
@@ -1529,7 +1748,7 @@ static int iucv_callback_connreq(struct iucv_path *path,
1529 /* Create the new socket */ 1748 /* Create the new socket */
1530 nsk = iucv_sock_alloc(NULL, sk->sk_type, GFP_ATOMIC); 1749 nsk = iucv_sock_alloc(NULL, sk->sk_type, GFP_ATOMIC);
1531 if (!nsk) { 1750 if (!nsk) {
1532 err = iucv_path_sever(path, user_data); 1751 err = pr_iucv->path_sever(path, user_data);
1533 iucv_path_free(path); 1752 iucv_path_free(path);
1534 goto fail; 1753 goto fail;
1535 } 1754 }
@@ -1553,9 +1772,9 @@ static int iucv_callback_connreq(struct iucv_path *path,
1553 /* set message limit for path based on msglimit of accepting socket */ 1772 /* set message limit for path based on msglimit of accepting socket */
1554 niucv->msglimit = iucv->msglimit; 1773 niucv->msglimit = iucv->msglimit;
1555 path->msglim = iucv->msglimit; 1774 path->msglim = iucv->msglimit;
1556 err = iucv_path_accept(path, &af_iucv_handler, nuser_data, nsk); 1775 err = pr_iucv->path_accept(path, &af_iucv_handler, nuser_data, nsk);
1557 if (err) { 1776 if (err) {
1558 err = iucv_path_sever(path, user_data); 1777 err = pr_iucv->path_sever(path, user_data);
1559 iucv_path_free(path); 1778 iucv_path_free(path);
1560 iucv_sock_kill(nsk); 1779 iucv_sock_kill(nsk);
1561 goto fail; 1780 goto fail;
@@ -1589,7 +1808,7 @@ static void iucv_callback_rx(struct iucv_path *path, struct iucv_message *msg)
1589 int len; 1808 int len;
1590 1809
1591 if (sk->sk_shutdown & RCV_SHUTDOWN) { 1810 if (sk->sk_shutdown & RCV_SHUTDOWN) {
1592 iucv_message_reject(path, msg); 1811 pr_iucv->message_reject(path, msg);
1593 return; 1812 return;
1594 } 1813 }
1595 1814
@@ -1600,7 +1819,7 @@ static void iucv_callback_rx(struct iucv_path *path, struct iucv_message *msg)
1600 goto save_message; 1819 goto save_message;
1601 1820
1602 len = atomic_read(&sk->sk_rmem_alloc); 1821 len = atomic_read(&sk->sk_rmem_alloc);
1603 len += iucv_msg_length(msg) + sizeof(struct sk_buff); 1822 len += SKB_TRUESIZE(iucv_msg_length(msg));
1604 if (len > sk->sk_rcvbuf) 1823 if (len > sk->sk_rcvbuf)
1605 goto save_message; 1824 goto save_message;
1606 1825
@@ -1692,6 +1911,389 @@ static void iucv_callback_shutdown(struct iucv_path *path, u8 ipuser[16])
1692 bh_unlock_sock(sk); 1911 bh_unlock_sock(sk);
1693} 1912}
1694 1913
1914/***************** HiperSockets transport callbacks ********************/
1915static void afiucv_swap_src_dest(struct sk_buff *skb)
1916{
1917 struct af_iucv_trans_hdr *trans_hdr =
1918 (struct af_iucv_trans_hdr *)skb->data;
1919 char tmpID[8];
1920 char tmpName[8];
1921
1922 ASCEBC(trans_hdr->destUserID, sizeof(trans_hdr->destUserID));
1923 ASCEBC(trans_hdr->destAppName, sizeof(trans_hdr->destAppName));
1924 ASCEBC(trans_hdr->srcUserID, sizeof(trans_hdr->srcUserID));
1925 ASCEBC(trans_hdr->srcAppName, sizeof(trans_hdr->srcAppName));
1926 memcpy(tmpID, trans_hdr->srcUserID, 8);
1927 memcpy(tmpName, trans_hdr->srcAppName, 8);
1928 memcpy(trans_hdr->srcUserID, trans_hdr->destUserID, 8);
1929 memcpy(trans_hdr->srcAppName, trans_hdr->destAppName, 8);
1930 memcpy(trans_hdr->destUserID, tmpID, 8);
1931 memcpy(trans_hdr->destAppName, tmpName, 8);
1932 skb_push(skb, ETH_HLEN);
1933 memset(skb->data, 0, ETH_HLEN);
1934}
1935
1936/**
1937 * afiucv_hs_callback_syn - react on received SYN
1938 **/
1939static int afiucv_hs_callback_syn(struct sock *sk, struct sk_buff *skb)
1940{
1941 struct sock *nsk;
1942 struct iucv_sock *iucv, *niucv;
1943 struct af_iucv_trans_hdr *trans_hdr;
1944 int err;
1945
1946 iucv = iucv_sk(sk);
1947 trans_hdr = (struct af_iucv_trans_hdr *)skb->data;
1948 if (!iucv) {
1949 /* no sock - connection refused */
1950 afiucv_swap_src_dest(skb);
1951 trans_hdr->flags = AF_IUCV_FLAG_SYN | AF_IUCV_FLAG_FIN;
1952 err = dev_queue_xmit(skb);
1953 goto out;
1954 }
1955
1956 nsk = iucv_sock_alloc(NULL, sk->sk_type, GFP_ATOMIC);
1957 bh_lock_sock(sk);
1958 if ((sk->sk_state != IUCV_LISTEN) ||
1959 sk_acceptq_is_full(sk) ||
1960 !nsk) {
1961 /* error on server socket - connection refused */
1962 if (nsk)
1963 sk_free(nsk);
1964 afiucv_swap_src_dest(skb);
1965 trans_hdr->flags = AF_IUCV_FLAG_SYN | AF_IUCV_FLAG_FIN;
1966 err = dev_queue_xmit(skb);
1967 bh_unlock_sock(sk);
1968 goto out;
1969 }
1970
1971 niucv = iucv_sk(nsk);
1972 iucv_sock_init(nsk, sk);
1973 niucv->transport = AF_IUCV_TRANS_HIPER;
1974 niucv->msglimit = iucv->msglimit;
1975 if (!trans_hdr->window)
1976 niucv->msglimit_peer = IUCV_HIPER_MSGLIM_DEFAULT;
1977 else
1978 niucv->msglimit_peer = trans_hdr->window;
1979 memcpy(niucv->dst_name, trans_hdr->srcAppName, 8);
1980 memcpy(niucv->dst_user_id, trans_hdr->srcUserID, 8);
1981 memcpy(niucv->src_name, iucv->src_name, 8);
1982 memcpy(niucv->src_user_id, iucv->src_user_id, 8);
1983 nsk->sk_bound_dev_if = sk->sk_bound_dev_if;
1984 afiucv_swap_src_dest(skb);
1985 trans_hdr->flags = AF_IUCV_FLAG_SYN | AF_IUCV_FLAG_ACK;
1986 trans_hdr->window = niucv->msglimit;
1987 /* if receiver acks the xmit connection is established */
1988 err = dev_queue_xmit(skb);
1989 if (!err) {
1990 iucv_accept_enqueue(sk, nsk);
1991 nsk->sk_state = IUCV_CONNECTED;
1992 sk->sk_data_ready(sk, 1);
1993 } else
1994 iucv_sock_kill(nsk);
1995 bh_unlock_sock(sk);
1996
1997out:
1998 return NET_RX_SUCCESS;
1999}
2000
2001/**
2002 * afiucv_hs_callback_synack() - react on received SYN-ACK
2003 **/
2004static int afiucv_hs_callback_synack(struct sock *sk, struct sk_buff *skb)
2005{
2006 struct iucv_sock *iucv = iucv_sk(sk);
2007 struct af_iucv_trans_hdr *trans_hdr =
2008 (struct af_iucv_trans_hdr *)skb->data;
2009
2010 if (!iucv)
2011 goto out;
2012 if (sk->sk_state != IUCV_BOUND)
2013 goto out;
2014 bh_lock_sock(sk);
2015 iucv->msglimit_peer = trans_hdr->window;
2016 sk->sk_state = IUCV_CONNECTED;
2017 sk->sk_state_change(sk);
2018 bh_unlock_sock(sk);
2019out:
2020 kfree_skb(skb);
2021 return NET_RX_SUCCESS;
2022}
2023
2024/**
2025 * afiucv_hs_callback_synfin() - react on received SYN_FIN
2026 **/
2027static int afiucv_hs_callback_synfin(struct sock *sk, struct sk_buff *skb)
2028{
2029 struct iucv_sock *iucv = iucv_sk(sk);
2030
2031 if (!iucv)
2032 goto out;
2033 if (sk->sk_state != IUCV_BOUND)
2034 goto out;
2035 bh_lock_sock(sk);
2036 sk->sk_state = IUCV_DISCONN;
2037 sk->sk_state_change(sk);
2038 bh_unlock_sock(sk);
2039out:
2040 kfree_skb(skb);
2041 return NET_RX_SUCCESS;
2042}
2043
2044/**
2045 * afiucv_hs_callback_fin() - react on received FIN
2046 **/
2047static int afiucv_hs_callback_fin(struct sock *sk, struct sk_buff *skb)
2048{
2049 struct iucv_sock *iucv = iucv_sk(sk);
2050
2051 /* other end of connection closed */
2052 if (iucv) {
2053 bh_lock_sock(sk);
2054 if (!list_empty(&iucv->accept_q))
2055 sk->sk_state = IUCV_SEVERED;
2056 else
2057 sk->sk_state = IUCV_DISCONN;
2058 sk->sk_state_change(sk);
2059 bh_unlock_sock(sk);
2060 }
2061 kfree_skb(skb);
2062 return NET_RX_SUCCESS;
2063}
2064
2065/**
2066 * afiucv_hs_callback_win() - react on received WIN
2067 **/
2068static int afiucv_hs_callback_win(struct sock *sk, struct sk_buff *skb)
2069{
2070 struct iucv_sock *iucv = iucv_sk(sk);
2071 struct af_iucv_trans_hdr *trans_hdr =
2072 (struct af_iucv_trans_hdr *)skb->data;
2073
2074 if (!iucv)
2075 return NET_RX_SUCCESS;
2076
2077 if (sk->sk_state != IUCV_CONNECTED)
2078 return NET_RX_SUCCESS;
2079
2080 atomic_sub(trans_hdr->window, &iucv->msg_sent);
2081 iucv_sock_wake_msglim(sk);
2082 return NET_RX_SUCCESS;
2083}
2084
2085/**
2086 * afiucv_hs_callback_rx() - react on received data
2087 **/
2088static int afiucv_hs_callback_rx(struct sock *sk, struct sk_buff *skb)
2089{
2090 struct iucv_sock *iucv = iucv_sk(sk);
2091
2092 if (!iucv) {
2093 kfree_skb(skb);
2094 return NET_RX_SUCCESS;
2095 }
2096
2097 if (sk->sk_state != IUCV_CONNECTED) {
2098 kfree_skb(skb);
2099 return NET_RX_SUCCESS;
2100 }
2101
2102 /* write stuff from iucv_msg to skb cb */
2103 if (skb->len <= sizeof(struct af_iucv_trans_hdr)) {
2104 kfree_skb(skb);
2105 return NET_RX_SUCCESS;
2106 }
2107 skb_pull(skb, sizeof(struct af_iucv_trans_hdr));
2108 skb_reset_transport_header(skb);
2109 skb_reset_network_header(skb);
2110 spin_lock(&iucv->message_q.lock);
2111 if (skb_queue_empty(&iucv->backlog_skb_q)) {
2112 if (sock_queue_rcv_skb(sk, skb)) {
2113 /* handle rcv queue full */
2114 skb_queue_tail(&iucv->backlog_skb_q, skb);
2115 }
2116 } else
2117 skb_queue_tail(&iucv_sk(sk)->backlog_skb_q, skb);
2118 spin_unlock(&iucv->message_q.lock);
2119 return NET_RX_SUCCESS;
2120}
2121
2122/**
2123 * afiucv_hs_rcv() - base function for arriving data through HiperSockets
2124 * transport
2125 * called from netif RX softirq
2126 **/
2127static int afiucv_hs_rcv(struct sk_buff *skb, struct net_device *dev,
2128 struct packet_type *pt, struct net_device *orig_dev)
2129{
2130 struct hlist_node *node;
2131 struct sock *sk;
2132 struct iucv_sock *iucv;
2133 struct af_iucv_trans_hdr *trans_hdr;
2134 char nullstring[8];
2135 int err = 0;
2136
2137 skb_pull(skb, ETH_HLEN);
2138 trans_hdr = (struct af_iucv_trans_hdr *)skb->data;
2139 EBCASC(trans_hdr->destAppName, sizeof(trans_hdr->destAppName));
2140 EBCASC(trans_hdr->destUserID, sizeof(trans_hdr->destUserID));
2141 EBCASC(trans_hdr->srcAppName, sizeof(trans_hdr->srcAppName));
2142 EBCASC(trans_hdr->srcUserID, sizeof(trans_hdr->srcUserID));
2143 memset(nullstring, 0, sizeof(nullstring));
2144 iucv = NULL;
2145 sk = NULL;
2146 read_lock(&iucv_sk_list.lock);
2147 sk_for_each(sk, node, &iucv_sk_list.head) {
2148 if (trans_hdr->flags == AF_IUCV_FLAG_SYN) {
2149 if ((!memcmp(&iucv_sk(sk)->src_name,
2150 trans_hdr->destAppName, 8)) &&
2151 (!memcmp(&iucv_sk(sk)->src_user_id,
2152 trans_hdr->destUserID, 8)) &&
2153 (!memcmp(&iucv_sk(sk)->dst_name, nullstring, 8)) &&
2154 (!memcmp(&iucv_sk(sk)->dst_user_id,
2155 nullstring, 8))) {
2156 iucv = iucv_sk(sk);
2157 break;
2158 }
2159 } else {
2160 if ((!memcmp(&iucv_sk(sk)->src_name,
2161 trans_hdr->destAppName, 8)) &&
2162 (!memcmp(&iucv_sk(sk)->src_user_id,
2163 trans_hdr->destUserID, 8)) &&
2164 (!memcmp(&iucv_sk(sk)->dst_name,
2165 trans_hdr->srcAppName, 8)) &&
2166 (!memcmp(&iucv_sk(sk)->dst_user_id,
2167 trans_hdr->srcUserID, 8))) {
2168 iucv = iucv_sk(sk);
2169 break;
2170 }
2171 }
2172 }
2173 read_unlock(&iucv_sk_list.lock);
2174 if (!iucv)
2175 sk = NULL;
2176
2177 /* no sock
2178 how should we send with no sock
2179 1) send without sock no send rc checking?
2180 2) introduce default sock to handle this cases
2181
2182 SYN -> send SYN|ACK in good case, send SYN|FIN in bad case
2183 data -> send FIN
2184 SYN|ACK, SYN|FIN, FIN -> no action? */
2185
2186 switch (trans_hdr->flags) {
2187 case AF_IUCV_FLAG_SYN:
2188 /* connect request */
2189 err = afiucv_hs_callback_syn(sk, skb);
2190 break;
2191 case (AF_IUCV_FLAG_SYN | AF_IUCV_FLAG_ACK):
2192 /* connect request confirmed */
2193 err = afiucv_hs_callback_synack(sk, skb);
2194 break;
2195 case (AF_IUCV_FLAG_SYN | AF_IUCV_FLAG_FIN):
2196 /* connect request refused */
2197 err = afiucv_hs_callback_synfin(sk, skb);
2198 break;
2199 case (AF_IUCV_FLAG_FIN):
2200 /* close request */
2201 err = afiucv_hs_callback_fin(sk, skb);
2202 break;
2203 case (AF_IUCV_FLAG_WIN):
2204 err = afiucv_hs_callback_win(sk, skb);
2205 if (skb->len > sizeof(struct af_iucv_trans_hdr))
2206 err = afiucv_hs_callback_rx(sk, skb);
2207 else
2208 kfree(skb);
2209 break;
2210 case 0:
2211 /* plain data frame */
2212 err = afiucv_hs_callback_rx(sk, skb);
2213 break;
2214 default:
2215 ;
2216 }
2217
2218 return err;
2219}
2220
2221/**
2222 * afiucv_hs_callback_txnotify() - handle send notifcations from HiperSockets
2223 * transport
2224 **/
2225static void afiucv_hs_callback_txnotify(struct sk_buff *skb,
2226 enum iucv_tx_notify n)
2227{
2228 struct sock *isk = skb->sk;
2229 struct sock *sk = NULL;
2230 struct iucv_sock *iucv = NULL;
2231 struct sk_buff_head *list;
2232 struct sk_buff *list_skb;
2233 struct sk_buff *this = NULL;
2234 unsigned long flags;
2235 struct hlist_node *node;
2236
2237 read_lock(&iucv_sk_list.lock);
2238 sk_for_each(sk, node, &iucv_sk_list.head)
2239 if (sk == isk) {
2240 iucv = iucv_sk(sk);
2241 break;
2242 }
2243 read_unlock(&iucv_sk_list.lock);
2244
2245 if (!iucv)
2246 return;
2247
2248 bh_lock_sock(sk);
2249 list = &iucv->send_skb_q;
2250 list_skb = list->next;
2251 if (skb_queue_empty(list))
2252 goto out_unlock;
2253
2254 spin_lock_irqsave(&list->lock, flags);
2255 while (list_skb != (struct sk_buff *)list) {
2256 if (skb_shinfo(list_skb) == skb_shinfo(skb)) {
2257 this = list_skb;
2258 switch (n) {
2259 case TX_NOTIFY_OK:
2260 __skb_unlink(this, list);
2261 iucv_sock_wake_msglim(sk);
2262 kfree_skb(this);
2263 break;
2264 case TX_NOTIFY_PENDING:
2265 atomic_inc(&iucv->pendings);
2266 break;
2267 case TX_NOTIFY_DELAYED_OK:
2268 __skb_unlink(this, list);
2269 atomic_dec(&iucv->pendings);
2270 if (atomic_read(&iucv->pendings) <= 0)
2271 iucv_sock_wake_msglim(sk);
2272 kfree_skb(this);
2273 break;
2274 case TX_NOTIFY_UNREACHABLE:
2275 case TX_NOTIFY_DELAYED_UNREACHABLE:
2276 case TX_NOTIFY_TPQFULL: /* not yet used */
2277 case TX_NOTIFY_GENERALERROR:
2278 case TX_NOTIFY_DELAYED_GENERALERROR:
2279 __skb_unlink(this, list);
2280 kfree_skb(this);
2281 if (!list_empty(&iucv->accept_q))
2282 sk->sk_state = IUCV_SEVERED;
2283 else
2284 sk->sk_state = IUCV_DISCONN;
2285 sk->sk_state_change(sk);
2286 break;
2287 }
2288 break;
2289 }
2290 list_skb = list_skb->next;
2291 }
2292 spin_unlock_irqrestore(&list->lock, flags);
2293
2294out_unlock:
2295 bh_unlock_sock(sk);
2296}
1695static const struct proto_ops iucv_sock_ops = { 2297static const struct proto_ops iucv_sock_ops = {
1696 .family = PF_IUCV, 2298 .family = PF_IUCV,
1697 .owner = THIS_MODULE, 2299 .owner = THIS_MODULE,
@@ -1718,71 +2320,104 @@ static const struct net_proto_family iucv_sock_family_ops = {
1718 .create = iucv_sock_create, 2320 .create = iucv_sock_create,
1719}; 2321};
1720 2322
1721static int __init afiucv_init(void) 2323static struct packet_type iucv_packet_type = {
2324 .type = cpu_to_be16(ETH_P_AF_IUCV),
2325 .func = afiucv_hs_rcv,
2326};
2327
2328static int afiucv_iucv_init(void)
1722{ 2329{
1723 int err; 2330 int err;
1724 2331
1725 if (!MACHINE_IS_VM) { 2332 err = pr_iucv->iucv_register(&af_iucv_handler, 0);
1726 pr_err("The af_iucv module cannot be loaded"
1727 " without z/VM\n");
1728 err = -EPROTONOSUPPORT;
1729 goto out;
1730 }
1731 cpcmd("QUERY USERID", iucv_userid, sizeof(iucv_userid), &err);
1732 if (unlikely(err)) {
1733 WARN_ON(err);
1734 err = -EPROTONOSUPPORT;
1735 goto out;
1736 }
1737
1738 err = iucv_register(&af_iucv_handler, 0);
1739 if (err) 2333 if (err)
1740 goto out; 2334 goto out;
1741 err = proto_register(&iucv_proto, 0);
1742 if (err)
1743 goto out_iucv;
1744 err = sock_register(&iucv_sock_family_ops);
1745 if (err)
1746 goto out_proto;
1747 /* establish dummy device */ 2335 /* establish dummy device */
2336 af_iucv_driver.bus = pr_iucv->bus;
1748 err = driver_register(&af_iucv_driver); 2337 err = driver_register(&af_iucv_driver);
1749 if (err) 2338 if (err)
1750 goto out_sock; 2339 goto out_iucv;
1751 af_iucv_dev = kzalloc(sizeof(struct device), GFP_KERNEL); 2340 af_iucv_dev = kzalloc(sizeof(struct device), GFP_KERNEL);
1752 if (!af_iucv_dev) { 2341 if (!af_iucv_dev) {
1753 err = -ENOMEM; 2342 err = -ENOMEM;
1754 goto out_driver; 2343 goto out_driver;
1755 } 2344 }
1756 dev_set_name(af_iucv_dev, "af_iucv"); 2345 dev_set_name(af_iucv_dev, "af_iucv");
1757 af_iucv_dev->bus = &iucv_bus; 2346 af_iucv_dev->bus = pr_iucv->bus;
1758 af_iucv_dev->parent = iucv_root; 2347 af_iucv_dev->parent = pr_iucv->root;
1759 af_iucv_dev->release = (void (*)(struct device *))kfree; 2348 af_iucv_dev->release = (void (*)(struct device *))kfree;
1760 af_iucv_dev->driver = &af_iucv_driver; 2349 af_iucv_dev->driver = &af_iucv_driver;
1761 err = device_register(af_iucv_dev); 2350 err = device_register(af_iucv_dev);
1762 if (err) 2351 if (err)
1763 goto out_driver; 2352 goto out_driver;
1764
1765 return 0; 2353 return 0;
1766 2354
1767out_driver: 2355out_driver:
1768 driver_unregister(&af_iucv_driver); 2356 driver_unregister(&af_iucv_driver);
2357out_iucv:
2358 pr_iucv->iucv_unregister(&af_iucv_handler, 0);
2359out:
2360 return err;
2361}
2362
2363static int __init afiucv_init(void)
2364{
2365 int err;
2366
2367 if (MACHINE_IS_VM) {
2368 cpcmd("QUERY USERID", iucv_userid, sizeof(iucv_userid), &err);
2369 if (unlikely(err)) {
2370 WARN_ON(err);
2371 err = -EPROTONOSUPPORT;
2372 goto out;
2373 }
2374
2375 pr_iucv = try_then_request_module(symbol_get(iucv_if), "iucv");
2376 if (!pr_iucv) {
2377 printk(KERN_WARNING "iucv_if lookup failed\n");
2378 memset(&iucv_userid, 0, sizeof(iucv_userid));
2379 }
2380 } else {
2381 memset(&iucv_userid, 0, sizeof(iucv_userid));
2382 pr_iucv = NULL;
2383 }
2384
2385 err = proto_register(&iucv_proto, 0);
2386 if (err)
2387 goto out;
2388 err = sock_register(&iucv_sock_family_ops);
2389 if (err)
2390 goto out_proto;
2391
2392 if (pr_iucv) {
2393 err = afiucv_iucv_init();
2394 if (err)
2395 goto out_sock;
2396 }
2397 dev_add_pack(&iucv_packet_type);
2398 return 0;
2399
1769out_sock: 2400out_sock:
1770 sock_unregister(PF_IUCV); 2401 sock_unregister(PF_IUCV);
1771out_proto: 2402out_proto:
1772 proto_unregister(&iucv_proto); 2403 proto_unregister(&iucv_proto);
1773out_iucv:
1774 iucv_unregister(&af_iucv_handler, 0);
1775out: 2404out:
2405 if (pr_iucv)
2406 symbol_put(iucv_if);
1776 return err; 2407 return err;
1777} 2408}
1778 2409
1779static void __exit afiucv_exit(void) 2410static void __exit afiucv_exit(void)
1780{ 2411{
1781 device_unregister(af_iucv_dev); 2412 if (pr_iucv) {
1782 driver_unregister(&af_iucv_driver); 2413 device_unregister(af_iucv_dev);
2414 driver_unregister(&af_iucv_driver);
2415 pr_iucv->iucv_unregister(&af_iucv_handler, 0);
2416 symbol_put(iucv_if);
2417 }
2418 dev_remove_pack(&iucv_packet_type);
1783 sock_unregister(PF_IUCV); 2419 sock_unregister(PF_IUCV);
1784 proto_unregister(&iucv_proto); 2420 proto_unregister(&iucv_proto);
1785 iucv_unregister(&af_iucv_handler, 0);
1786} 2421}
1787 2422
1788module_init(afiucv_init); 2423module_init(afiucv_init);
@@ -1793,3 +2428,4 @@ MODULE_DESCRIPTION("IUCV Sockets ver " VERSION);
1793MODULE_VERSION(VERSION); 2428MODULE_VERSION(VERSION);
1794MODULE_LICENSE("GPL"); 2429MODULE_LICENSE("GPL");
1795MODULE_ALIAS_NETPROTO(PF_IUCV); 2430MODULE_ALIAS_NETPROTO(PF_IUCV);
2431
diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c
index 075a3808aa40..403be43b793d 100644
--- a/net/iucv/iucv.c
+++ b/net/iucv/iucv.c
@@ -1974,6 +1974,27 @@ out:
1974 return rc; 1974 return rc;
1975} 1975}
1976 1976
1977struct iucv_interface iucv_if = {
1978 .message_receive = iucv_message_receive,
1979 .__message_receive = __iucv_message_receive,
1980 .message_reply = iucv_message_reply,
1981 .message_reject = iucv_message_reject,
1982 .message_send = iucv_message_send,
1983 .__message_send = __iucv_message_send,
1984 .message_send2way = iucv_message_send2way,
1985 .message_purge = iucv_message_purge,
1986 .path_accept = iucv_path_accept,
1987 .path_connect = iucv_path_connect,
1988 .path_quiesce = iucv_path_quiesce,
1989 .path_resume = iucv_path_resume,
1990 .path_sever = iucv_path_sever,
1991 .iucv_register = iucv_register,
1992 .iucv_unregister = iucv_unregister,
1993 .bus = NULL,
1994 .root = NULL,
1995};
1996EXPORT_SYMBOL(iucv_if);
1997
1977/** 1998/**
1978 * iucv_init 1999 * iucv_init
1979 * 2000 *
@@ -2038,6 +2059,8 @@ static int __init iucv_init(void)
2038 rc = bus_register(&iucv_bus); 2059 rc = bus_register(&iucv_bus);
2039 if (rc) 2060 if (rc)
2040 goto out_reboot; 2061 goto out_reboot;
2062 iucv_if.root = iucv_root;
2063 iucv_if.bus = &iucv_bus;
2041 return 0; 2064 return 0;
2042 2065
2043out_reboot: 2066out_reboot:
diff --git a/net/lapb/lapb_iface.c b/net/lapb/lapb_iface.c
index 956b7e47dc52..8d0324bac01c 100644
--- a/net/lapb/lapb_iface.c
+++ b/net/lapb/lapb_iface.c
@@ -139,7 +139,8 @@ out:
139 return lapb; 139 return lapb;
140} 140}
141 141
142int lapb_register(struct net_device *dev, struct lapb_register_struct *callbacks) 142int lapb_register(struct net_device *dev,
143 const struct lapb_register_struct *callbacks)
143{ 144{
144 struct lapb_cb *lapb; 145 struct lapb_cb *lapb;
145 int rc = LAPB_BADTOKEN; 146 int rc = LAPB_BADTOKEN;
@@ -158,7 +159,7 @@ int lapb_register(struct net_device *dev, struct lapb_register_struct *callbacks
158 goto out; 159 goto out;
159 160
160 lapb->dev = dev; 161 lapb->dev = dev;
161 lapb->callbacks = *callbacks; 162 lapb->callbacks = callbacks;
162 163
163 __lapb_insert_cb(lapb); 164 __lapb_insert_cb(lapb);
164 165
@@ -380,32 +381,32 @@ int lapb_data_received(struct net_device *dev, struct sk_buff *skb)
380 381
381void lapb_connect_confirmation(struct lapb_cb *lapb, int reason) 382void lapb_connect_confirmation(struct lapb_cb *lapb, int reason)
382{ 383{
383 if (lapb->callbacks.connect_confirmation) 384 if (lapb->callbacks->connect_confirmation)
384 lapb->callbacks.connect_confirmation(lapb->dev, reason); 385 lapb->callbacks->connect_confirmation(lapb->dev, reason);
385} 386}
386 387
387void lapb_connect_indication(struct lapb_cb *lapb, int reason) 388void lapb_connect_indication(struct lapb_cb *lapb, int reason)
388{ 389{
389 if (lapb->callbacks.connect_indication) 390 if (lapb->callbacks->connect_indication)
390 lapb->callbacks.connect_indication(lapb->dev, reason); 391 lapb->callbacks->connect_indication(lapb->dev, reason);
391} 392}
392 393
393void lapb_disconnect_confirmation(struct lapb_cb *lapb, int reason) 394void lapb_disconnect_confirmation(struct lapb_cb *lapb, int reason)
394{ 395{
395 if (lapb->callbacks.disconnect_confirmation) 396 if (lapb->callbacks->disconnect_confirmation)
396 lapb->callbacks.disconnect_confirmation(lapb->dev, reason); 397 lapb->callbacks->disconnect_confirmation(lapb->dev, reason);
397} 398}
398 399
399void lapb_disconnect_indication(struct lapb_cb *lapb, int reason) 400void lapb_disconnect_indication(struct lapb_cb *lapb, int reason)
400{ 401{
401 if (lapb->callbacks.disconnect_indication) 402 if (lapb->callbacks->disconnect_indication)
402 lapb->callbacks.disconnect_indication(lapb->dev, reason); 403 lapb->callbacks->disconnect_indication(lapb->dev, reason);
403} 404}
404 405
405int lapb_data_indication(struct lapb_cb *lapb, struct sk_buff *skb) 406int lapb_data_indication(struct lapb_cb *lapb, struct sk_buff *skb)
406{ 407{
407 if (lapb->callbacks.data_indication) 408 if (lapb->callbacks->data_indication)
408 return lapb->callbacks.data_indication(lapb->dev, skb); 409 return lapb->callbacks->data_indication(lapb->dev, skb);
409 410
410 kfree_skb(skb); 411 kfree_skb(skb);
411 return NET_RX_SUCCESS; /* For now; must be != NET_RX_DROP */ 412 return NET_RX_SUCCESS; /* For now; must be != NET_RX_DROP */
@@ -415,8 +416,8 @@ int lapb_data_transmit(struct lapb_cb *lapb, struct sk_buff *skb)
415{ 416{
416 int used = 0; 417 int used = 0;
417 418
418 if (lapb->callbacks.data_transmit) { 419 if (lapb->callbacks->data_transmit) {
419 lapb->callbacks.data_transmit(lapb->dev, skb); 420 lapb->callbacks->data_transmit(lapb->dev, skb);
420 used = 1; 421 used = 1;
421 } 422 }
422 423
diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c
index 0cde8df6828d..97f33588b65f 100644
--- a/net/mac80211/agg-rx.c
+++ b/net/mac80211/agg-rx.c
@@ -69,7 +69,7 @@ void ___ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid,
69 if (!tid_rx) 69 if (!tid_rx)
70 return; 70 return;
71 71
72 rcu_assign_pointer(sta->ampdu_mlme.tid_rx[tid], NULL); 72 RCU_INIT_POINTER(sta->ampdu_mlme.tid_rx[tid], NULL);
73 73
74#ifdef CONFIG_MAC80211_HT_DEBUG 74#ifdef CONFIG_MAC80211_HT_DEBUG
75 printk(KERN_DEBUG "Rx BA session stop requested for %pM tid %u\n", 75 printk(KERN_DEBUG "Rx BA session stop requested for %pM tid %u\n",
@@ -325,7 +325,7 @@ void ieee80211_process_addba_request(struct ieee80211_local *local,
325 status = WLAN_STATUS_SUCCESS; 325 status = WLAN_STATUS_SUCCESS;
326 326
327 /* activate it for RX */ 327 /* activate it for RX */
328 rcu_assign_pointer(sta->ampdu_mlme.tid_rx[tid], tid_agg_rx); 328 RCU_INIT_POINTER(sta->ampdu_mlme.tid_rx[tid], tid_agg_rx);
329 329
330 if (timeout) 330 if (timeout)
331 mod_timer(&tid_agg_rx->session_timer, TU_TO_EXP_TIME(timeout)); 331 mod_timer(&tid_agg_rx->session_timer, TU_TO_EXP_TIME(timeout));
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index e253afa13001..ebd7fb101fbf 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -63,7 +63,7 @@ static int ieee80211_change_iface(struct wiphy *wiphy,
63 63
64 if (type == NL80211_IFTYPE_AP_VLAN && 64 if (type == NL80211_IFTYPE_AP_VLAN &&
65 params && params->use_4addr == 0) 65 params && params->use_4addr == 0)
66 rcu_assign_pointer(sdata->u.vlan.sta, NULL); 66 RCU_INIT_POINTER(sdata->u.vlan.sta, NULL);
67 else if (type == NL80211_IFTYPE_STATION && 67 else if (type == NL80211_IFTYPE_STATION &&
68 params && params->use_4addr >= 0) 68 params && params->use_4addr >= 0)
69 sdata->u.mgd.use_4addr = params->use_4addr; 69 sdata->u.mgd.use_4addr = params->use_4addr;
@@ -575,7 +575,7 @@ static int ieee80211_config_beacon(struct ieee80211_sub_if_data *sdata,
575 575
576 sdata->vif.bss_conf.dtim_period = new->dtim_period; 576 sdata->vif.bss_conf.dtim_period = new->dtim_period;
577 577
578 rcu_assign_pointer(sdata->u.ap.beacon, new); 578 RCU_INIT_POINTER(sdata->u.ap.beacon, new);
579 579
580 synchronize_rcu(); 580 synchronize_rcu();
581 581
@@ -630,7 +630,7 @@ static int ieee80211_del_beacon(struct wiphy *wiphy, struct net_device *dev)
630 if (!old) 630 if (!old)
631 return -ENOENT; 631 return -ENOENT;
632 632
633 rcu_assign_pointer(sdata->u.ap.beacon, NULL); 633 RCU_INIT_POINTER(sdata->u.ap.beacon, NULL);
634 synchronize_rcu(); 634 synchronize_rcu();
635 kfree(old); 635 kfree(old);
636 636
@@ -922,7 +922,7 @@ static int ieee80211_change_station(struct wiphy *wiphy,
922 return -EBUSY; 922 return -EBUSY;
923 } 923 }
924 924
925 rcu_assign_pointer(vlansdata->u.vlan.sta, sta); 925 RCU_INIT_POINTER(vlansdata->u.vlan.sta, sta);
926 } 926 }
927 927
928 sta->sdata = vlansdata; 928 sta->sdata = vlansdata;
diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index 2da3040787a7..ede9a8b341ac 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -84,7 +84,7 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
84 drv_reset_tsf(local, sdata); 84 drv_reset_tsf(local, sdata);
85 85
86 skb = ifibss->skb; 86 skb = ifibss->skb;
87 rcu_assign_pointer(ifibss->presp, NULL); 87 RCU_INIT_POINTER(ifibss->presp, NULL);
88 synchronize_rcu(); 88 synchronize_rcu();
89 skb->data = skb->head; 89 skb->data = skb->head;
90 skb->len = 0; 90 skb->len = 0;
@@ -184,7 +184,7 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
184 *pos++ = 0; /* U-APSD no in use */ 184 *pos++ = 0; /* U-APSD no in use */
185 } 185 }
186 186
187 rcu_assign_pointer(ifibss->presp, skb); 187 RCU_INIT_POINTER(ifibss->presp, skb);
188 188
189 sdata->vif.bss_conf.beacon_int = beacon_int; 189 sdata->vif.bss_conf.beacon_int = beacon_int;
190 sdata->vif.bss_conf.basic_rates = basic_rates; 190 sdata->vif.bss_conf.basic_rates = basic_rates;
@@ -995,7 +995,7 @@ int ieee80211_ibss_leave(struct ieee80211_sub_if_data *sdata)
995 kfree(sdata->u.ibss.ie); 995 kfree(sdata->u.ibss.ie);
996 skb = rcu_dereference_protected(sdata->u.ibss.presp, 996 skb = rcu_dereference_protected(sdata->u.ibss.presp,
997 lockdep_is_held(&sdata->u.ibss.mtx)); 997 lockdep_is_held(&sdata->u.ibss.mtx));
998 rcu_assign_pointer(sdata->u.ibss.presp, NULL); 998 RCU_INIT_POINTER(sdata->u.ibss.presp, NULL);
999 sdata->vif.bss_conf.ibss_joined = false; 999 sdata->vif.bss_conf.ibss_joined = false;
1000 ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON_ENABLED | 1000 ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON_ENABLED |
1001 BSS_CHANGED_IBSS); 1001 BSS_CHANGED_IBSS);
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index ef741e8dbedb..30d73552e9ab 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -456,7 +456,7 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata,
456 BSS_CHANGED_BEACON_ENABLED); 456 BSS_CHANGED_BEACON_ENABLED);
457 457
458 /* remove beacon */ 458 /* remove beacon */
459 rcu_assign_pointer(sdata->u.ap.beacon, NULL); 459 RCU_INIT_POINTER(sdata->u.ap.beacon, NULL);
460 synchronize_rcu(); 460 synchronize_rcu();
461 kfree(old_beacon); 461 kfree(old_beacon);
462 462
@@ -643,7 +643,7 @@ static const struct net_device_ops ieee80211_dataif_ops = {
643 .ndo_stop = ieee80211_stop, 643 .ndo_stop = ieee80211_stop,
644 .ndo_uninit = ieee80211_teardown_sdata, 644 .ndo_uninit = ieee80211_teardown_sdata,
645 .ndo_start_xmit = ieee80211_subif_start_xmit, 645 .ndo_start_xmit = ieee80211_subif_start_xmit,
646 .ndo_set_multicast_list = ieee80211_set_multicast_list, 646 .ndo_set_rx_mode = ieee80211_set_multicast_list,
647 .ndo_change_mtu = ieee80211_change_mtu, 647 .ndo_change_mtu = ieee80211_change_mtu,
648 .ndo_set_mac_address = ieee80211_change_mac, 648 .ndo_set_mac_address = ieee80211_change_mac,
649 .ndo_select_queue = ieee80211_netdev_select_queue, 649 .ndo_select_queue = ieee80211_netdev_select_queue,
@@ -687,7 +687,7 @@ static const struct net_device_ops ieee80211_monitorif_ops = {
687 .ndo_stop = ieee80211_stop, 687 .ndo_stop = ieee80211_stop,
688 .ndo_uninit = ieee80211_teardown_sdata, 688 .ndo_uninit = ieee80211_teardown_sdata,
689 .ndo_start_xmit = ieee80211_monitor_start_xmit, 689 .ndo_start_xmit = ieee80211_monitor_start_xmit,
690 .ndo_set_multicast_list = ieee80211_set_multicast_list, 690 .ndo_set_rx_mode = ieee80211_set_multicast_list,
691 .ndo_change_mtu = ieee80211_change_mtu, 691 .ndo_change_mtu = ieee80211_change_mtu,
692 .ndo_set_mac_address = eth_mac_addr, 692 .ndo_set_mac_address = eth_mac_addr,
693 .ndo_select_queue = ieee80211_monitor_select_queue, 693 .ndo_select_queue = ieee80211_monitor_select_queue,
diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c
index 332b5ff1e885..7f54c5042235 100644
--- a/net/mac80211/mesh_pathtbl.c
+++ b/net/mac80211/mesh_pathtbl.c
@@ -1168,6 +1168,6 @@ void mesh_path_expire(struct ieee80211_sub_if_data *sdata)
1168void mesh_pathtbl_unregister(void) 1168void mesh_pathtbl_unregister(void)
1169{ 1169{
1170 /* no need for locking during exit path */ 1170 /* no need for locking during exit path */
1171 mesh_table_free(rcu_dereference_raw(mesh_paths), true); 1171 mesh_table_free(rcu_dereference_protected(mesh_paths, 1), true);
1172 mesh_table_free(rcu_dereference_raw(mpp_paths), true); 1172 mesh_table_free(rcu_dereference_protected(mpp_paths, 1), true);
1173} 1173}
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 2e2c71194c80..ce962d2c8782 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -73,7 +73,7 @@ static int sta_info_hash_del(struct ieee80211_local *local,
73 if (!s) 73 if (!s)
74 return -ENOENT; 74 return -ENOENT;
75 if (s == sta) { 75 if (s == sta) {
76 rcu_assign_pointer(local->sta_hash[STA_HASH(sta->sta.addr)], 76 RCU_INIT_POINTER(local->sta_hash[STA_HASH(sta->sta.addr)],
77 s->hnext); 77 s->hnext);
78 return 0; 78 return 0;
79 } 79 }
@@ -83,7 +83,7 @@ static int sta_info_hash_del(struct ieee80211_local *local,
83 s = rcu_dereference_protected(s->hnext, 83 s = rcu_dereference_protected(s->hnext,
84 lockdep_is_held(&local->sta_lock)); 84 lockdep_is_held(&local->sta_lock));
85 if (rcu_access_pointer(s->hnext)) { 85 if (rcu_access_pointer(s->hnext)) {
86 rcu_assign_pointer(s->hnext, sta->hnext); 86 RCU_INIT_POINTER(s->hnext, sta->hnext);
87 return 0; 87 return 0;
88 } 88 }
89 89
@@ -232,7 +232,7 @@ static void sta_info_hash_add(struct ieee80211_local *local,
232 struct sta_info *sta) 232 struct sta_info *sta)
233{ 233{
234 sta->hnext = local->sta_hash[STA_HASH(sta->sta.addr)]; 234 sta->hnext = local->sta_hash[STA_HASH(sta->sta.addr)];
235 rcu_assign_pointer(local->sta_hash[STA_HASH(sta->sta.addr)], sta); 235 RCU_INIT_POINTER(local->sta_hash[STA_HASH(sta->sta.addr)], sta);
236} 236}
237 237
238static void sta_unblock(struct work_struct *wk) 238static void sta_unblock(struct work_struct *wk)
@@ -906,7 +906,7 @@ static int __must_check __sta_info_destroy(struct sta_info *sta)
906 local->sta_generation++; 906 local->sta_generation++;
907 907
908 if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) 908 if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
909 rcu_assign_pointer(sdata->u.vlan.sta, NULL); 909 RCU_INIT_POINTER(sdata->u.vlan.sta, NULL);
910 910
911 if (sta->uploaded) { 911 if (sta->uploaded) {
912 if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) 912 if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 899b71c0ff5d..3346829ea07f 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -37,7 +37,7 @@ int nf_register_afinfo(const struct nf_afinfo *afinfo)
37 err = mutex_lock_interruptible(&afinfo_mutex); 37 err = mutex_lock_interruptible(&afinfo_mutex);
38 if (err < 0) 38 if (err < 0)
39 return err; 39 return err;
40 rcu_assign_pointer(nf_afinfo[afinfo->family], afinfo); 40 RCU_INIT_POINTER(nf_afinfo[afinfo->family], afinfo);
41 mutex_unlock(&afinfo_mutex); 41 mutex_unlock(&afinfo_mutex);
42 return 0; 42 return 0;
43} 43}
@@ -46,7 +46,7 @@ EXPORT_SYMBOL_GPL(nf_register_afinfo);
46void nf_unregister_afinfo(const struct nf_afinfo *afinfo) 46void nf_unregister_afinfo(const struct nf_afinfo *afinfo)
47{ 47{
48 mutex_lock(&afinfo_mutex); 48 mutex_lock(&afinfo_mutex);
49 rcu_assign_pointer(nf_afinfo[afinfo->family], NULL); 49 RCU_INIT_POINTER(nf_afinfo[afinfo->family], NULL);
50 mutex_unlock(&afinfo_mutex); 50 mutex_unlock(&afinfo_mutex);
51 synchronize_rcu(); 51 synchronize_rcu();
52} 52}
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index be43fd805bd0..5290ac353a5e 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -3679,7 +3679,7 @@ int __net_init ip_vs_control_net_init(struct net *net)
3679 int idx; 3679 int idx;
3680 struct netns_ipvs *ipvs = net_ipvs(net); 3680 struct netns_ipvs *ipvs = net_ipvs(net);
3681 3681
3682 ipvs->rs_lock = __RW_LOCK_UNLOCKED(ipvs->rs_lock); 3682 rwlock_init(&ipvs->rs_lock);
3683 3683
3684 /* Initialize rs_table */ 3684 /* Initialize rs_table */
3685 for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++) 3685 for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++)
@@ -3771,6 +3771,7 @@ err_sock:
3771void ip_vs_control_cleanup(void) 3771void ip_vs_control_cleanup(void)
3772{ 3772{
3773 EnterFunction(2); 3773 EnterFunction(2);
3774 unregister_netdevice_notifier(&ip_vs_dst_notifier);
3774 ip_vs_genl_unregister(); 3775 ip_vs_genl_unregister();
3775 nf_unregister_sockopt(&ip_vs_sockopts); 3776 nf_unregister_sockopt(&ip_vs_sockopts);
3776 LeaveFunction(2); 3777 LeaveFunction(2);
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index f7af8b866017..5acfaf59a9c3 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -779,7 +779,7 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
779 if (exp->helper) { 779 if (exp->helper) {
780 help = nf_ct_helper_ext_add(ct, GFP_ATOMIC); 780 help = nf_ct_helper_ext_add(ct, GFP_ATOMIC);
781 if (help) 781 if (help)
782 rcu_assign_pointer(help->helper, exp->helper); 782 RCU_INIT_POINTER(help->helper, exp->helper);
783 } 783 }
784 784
785#ifdef CONFIG_NF_CONNTRACK_MARK 785#ifdef CONFIG_NF_CONNTRACK_MARK
@@ -1317,7 +1317,7 @@ static void nf_conntrack_cleanup_net(struct net *net)
1317void nf_conntrack_cleanup(struct net *net) 1317void nf_conntrack_cleanup(struct net *net)
1318{ 1318{
1319 if (net_eq(net, &init_net)) 1319 if (net_eq(net, &init_net))
1320 rcu_assign_pointer(ip_ct_attach, NULL); 1320 RCU_INIT_POINTER(ip_ct_attach, NULL);
1321 1321
1322 /* This makes sure all current packets have passed through 1322 /* This makes sure all current packets have passed through
1323 netfilter framework. Roll on, two-stage module 1323 netfilter framework. Roll on, two-stage module
@@ -1327,7 +1327,7 @@ void nf_conntrack_cleanup(struct net *net)
1327 nf_conntrack_cleanup_net(net); 1327 nf_conntrack_cleanup_net(net);
1328 1328
1329 if (net_eq(net, &init_net)) { 1329 if (net_eq(net, &init_net)) {
1330 rcu_assign_pointer(nf_ct_destroy, NULL); 1330 RCU_INIT_POINTER(nf_ct_destroy, NULL);
1331 nf_conntrack_cleanup_init_net(); 1331 nf_conntrack_cleanup_init_net();
1332 } 1332 }
1333} 1333}
@@ -1576,11 +1576,11 @@ int nf_conntrack_init(struct net *net)
1576 1576
1577 if (net_eq(net, &init_net)) { 1577 if (net_eq(net, &init_net)) {
1578 /* For use by REJECT target */ 1578 /* For use by REJECT target */
1579 rcu_assign_pointer(ip_ct_attach, nf_conntrack_attach); 1579 RCU_INIT_POINTER(ip_ct_attach, nf_conntrack_attach);
1580 rcu_assign_pointer(nf_ct_destroy, destroy_conntrack); 1580 RCU_INIT_POINTER(nf_ct_destroy, destroy_conntrack);
1581 1581
1582 /* Howto get NAT offsets */ 1582 /* Howto get NAT offsets */
1583 rcu_assign_pointer(nf_ct_nat_offset, NULL); 1583 RCU_INIT_POINTER(nf_ct_nat_offset, NULL);
1584 } 1584 }
1585 return 0; 1585 return 0;
1586 1586
diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c
index 63a1b915a7e4..3add99439059 100644
--- a/net/netfilter/nf_conntrack_ecache.c
+++ b/net/netfilter/nf_conntrack_ecache.c
@@ -94,7 +94,7 @@ int nf_conntrack_register_notifier(struct nf_ct_event_notifier *new)
94 ret = -EBUSY; 94 ret = -EBUSY;
95 goto out_unlock; 95 goto out_unlock;
96 } 96 }
97 rcu_assign_pointer(nf_conntrack_event_cb, new); 97 RCU_INIT_POINTER(nf_conntrack_event_cb, new);
98 mutex_unlock(&nf_ct_ecache_mutex); 98 mutex_unlock(&nf_ct_ecache_mutex);
99 return ret; 99 return ret;
100 100
@@ -112,7 +112,7 @@ void nf_conntrack_unregister_notifier(struct nf_ct_event_notifier *new)
112 notify = rcu_dereference_protected(nf_conntrack_event_cb, 112 notify = rcu_dereference_protected(nf_conntrack_event_cb,
113 lockdep_is_held(&nf_ct_ecache_mutex)); 113 lockdep_is_held(&nf_ct_ecache_mutex));
114 BUG_ON(notify != new); 114 BUG_ON(notify != new);
115 rcu_assign_pointer(nf_conntrack_event_cb, NULL); 115 RCU_INIT_POINTER(nf_conntrack_event_cb, NULL);
116 mutex_unlock(&nf_ct_ecache_mutex); 116 mutex_unlock(&nf_ct_ecache_mutex);
117} 117}
118EXPORT_SYMBOL_GPL(nf_conntrack_unregister_notifier); 118EXPORT_SYMBOL_GPL(nf_conntrack_unregister_notifier);
@@ -129,7 +129,7 @@ int nf_ct_expect_register_notifier(struct nf_exp_event_notifier *new)
129 ret = -EBUSY; 129 ret = -EBUSY;
130 goto out_unlock; 130 goto out_unlock;
131 } 131 }
132 rcu_assign_pointer(nf_expect_event_cb, new); 132 RCU_INIT_POINTER(nf_expect_event_cb, new);
133 mutex_unlock(&nf_ct_ecache_mutex); 133 mutex_unlock(&nf_ct_ecache_mutex);
134 return ret; 134 return ret;
135 135
@@ -147,7 +147,7 @@ void nf_ct_expect_unregister_notifier(struct nf_exp_event_notifier *new)
147 notify = rcu_dereference_protected(nf_expect_event_cb, 147 notify = rcu_dereference_protected(nf_expect_event_cb,
148 lockdep_is_held(&nf_ct_ecache_mutex)); 148 lockdep_is_held(&nf_ct_ecache_mutex));
149 BUG_ON(notify != new); 149 BUG_ON(notify != new);
150 rcu_assign_pointer(nf_expect_event_cb, NULL); 150 RCU_INIT_POINTER(nf_expect_event_cb, NULL);
151 mutex_unlock(&nf_ct_ecache_mutex); 151 mutex_unlock(&nf_ct_ecache_mutex);
152} 152}
153EXPORT_SYMBOL_GPL(nf_ct_expect_unregister_notifier); 153EXPORT_SYMBOL_GPL(nf_ct_expect_unregister_notifier);
diff --git a/net/netfilter/nf_conntrack_extend.c b/net/netfilter/nf_conntrack_extend.c
index 05ecdc281a53..4605c947dcc4 100644
--- a/net/netfilter/nf_conntrack_extend.c
+++ b/net/netfilter/nf_conntrack_extend.c
@@ -169,7 +169,7 @@ int nf_ct_extend_register(struct nf_ct_ext_type *type)
169 before updating alloc_size */ 169 before updating alloc_size */
170 type->alloc_size = ALIGN(sizeof(struct nf_ct_ext), type->align) 170 type->alloc_size = ALIGN(sizeof(struct nf_ct_ext), type->align)
171 + type->len; 171 + type->len;
172 rcu_assign_pointer(nf_ct_ext_types[type->id], type); 172 RCU_INIT_POINTER(nf_ct_ext_types[type->id], type);
173 update_alloc_size(type); 173 update_alloc_size(type);
174out: 174out:
175 mutex_unlock(&nf_ct_ext_type_mutex); 175 mutex_unlock(&nf_ct_ext_type_mutex);
@@ -181,7 +181,7 @@ EXPORT_SYMBOL_GPL(nf_ct_extend_register);
181void nf_ct_extend_unregister(struct nf_ct_ext_type *type) 181void nf_ct_extend_unregister(struct nf_ct_ext_type *type)
182{ 182{
183 mutex_lock(&nf_ct_ext_type_mutex); 183 mutex_lock(&nf_ct_ext_type_mutex);
184 rcu_assign_pointer(nf_ct_ext_types[type->id], NULL); 184 RCU_INIT_POINTER(nf_ct_ext_types[type->id], NULL);
185 update_alloc_size(type); 185 update_alloc_size(type);
186 mutex_unlock(&nf_ct_ext_type_mutex); 186 mutex_unlock(&nf_ct_ext_type_mutex);
187 rcu_barrier(); /* Wait for completion of call_rcu()'s */ 187 rcu_barrier(); /* Wait for completion of call_rcu()'s */
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index 1bdfea357955..93c4bdbfc1ae 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -131,7 +131,7 @@ int __nf_ct_try_assign_helper(struct nf_conn *ct, struct nf_conn *tmpl,
131 helper = __nf_ct_helper_find(&ct->tuplehash[IP_CT_DIR_REPLY].tuple); 131 helper = __nf_ct_helper_find(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
132 if (helper == NULL) { 132 if (helper == NULL) {
133 if (help) 133 if (help)
134 rcu_assign_pointer(help->helper, NULL); 134 RCU_INIT_POINTER(help->helper, NULL);
135 goto out; 135 goto out;
136 } 136 }
137 137
@@ -145,7 +145,7 @@ int __nf_ct_try_assign_helper(struct nf_conn *ct, struct nf_conn *tmpl,
145 memset(&help->help, 0, sizeof(help->help)); 145 memset(&help->help, 0, sizeof(help->help));
146 } 146 }
147 147
148 rcu_assign_pointer(help->helper, helper); 148 RCU_INIT_POINTER(help->helper, helper);
149out: 149out:
150 return ret; 150 return ret;
151} 151}
@@ -162,7 +162,7 @@ static inline int unhelp(struct nf_conntrack_tuple_hash *i,
162 lockdep_is_held(&nf_conntrack_lock) 162 lockdep_is_held(&nf_conntrack_lock)
163 ) == me) { 163 ) == me) {
164 nf_conntrack_event(IPCT_HELPER, ct); 164 nf_conntrack_event(IPCT_HELPER, ct);
165 rcu_assign_pointer(help->helper, NULL); 165 RCU_INIT_POINTER(help->helper, NULL);
166 } 166 }
167 return 0; 167 return 0;
168} 168}
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 7dec88a1755b..e58aa9b1fe8a 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -1125,7 +1125,7 @@ ctnetlink_change_helper(struct nf_conn *ct, const struct nlattr * const cda[])
1125 if (help && help->helper) { 1125 if (help && help->helper) {
1126 /* we had a helper before ... */ 1126 /* we had a helper before ... */
1127 nf_ct_remove_expectations(ct); 1127 nf_ct_remove_expectations(ct);
1128 rcu_assign_pointer(help->helper, NULL); 1128 RCU_INIT_POINTER(help->helper, NULL);
1129 } 1129 }
1130 1130
1131 return 0; 1131 return 0;
@@ -1163,7 +1163,7 @@ ctnetlink_change_helper(struct nf_conn *ct, const struct nlattr * const cda[])
1163 return -EOPNOTSUPP; 1163 return -EOPNOTSUPP;
1164 } 1164 }
1165 1165
1166 rcu_assign_pointer(help->helper, helper); 1166 RCU_INIT_POINTER(help->helper, helper);
1167 1167
1168 return 0; 1168 return 0;
1169} 1169}
@@ -1386,7 +1386,7 @@ ctnetlink_create_conntrack(struct net *net, u16 zone,
1386 } 1386 }
1387 1387
1388 /* not in hash table yet so not strictly necessary */ 1388 /* not in hash table yet so not strictly necessary */
1389 rcu_assign_pointer(help->helper, helper); 1389 RCU_INIT_POINTER(help->helper, helper);
1390 } 1390 }
1391 } else { 1391 } else {
1392 /* try an implicit helper assignation */ 1392 /* try an implicit helper assignation */
diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c
index 2fd4565144de..31d56b23b9e9 100644
--- a/net/netfilter/nf_conntrack_pptp.c
+++ b/net/netfilter/nf_conntrack_pptp.c
@@ -364,6 +364,7 @@ pptp_inbound_pkt(struct sk_buff *skb,
364 break; 364 break;
365 365
366 case PPTP_WAN_ERROR_NOTIFY: 366 case PPTP_WAN_ERROR_NOTIFY:
367 case PPTP_SET_LINK_INFO:
367 case PPTP_ECHO_REQUEST: 368 case PPTP_ECHO_REQUEST:
368 case PPTP_ECHO_REPLY: 369 case PPTP_ECHO_REPLY:
369 /* I don't have to explain these ;) */ 370 /* I don't have to explain these ;) */
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 37bf94394be0..8235b86b4e87 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -409,7 +409,7 @@ static void tcp_options(const struct sk_buff *skb,
409 if (opsize < 2) /* "silly options" */ 409 if (opsize < 2) /* "silly options" */
410 return; 410 return;
411 if (opsize > length) 411 if (opsize > length)
412 break; /* don't parse partial options */ 412 return; /* don't parse partial options */
413 413
414 if (opcode == TCPOPT_SACK_PERM 414 if (opcode == TCPOPT_SACK_PERM
415 && opsize == TCPOLEN_SACK_PERM) 415 && opsize == TCPOLEN_SACK_PERM)
@@ -447,7 +447,7 @@ static void tcp_sack(const struct sk_buff *skb, unsigned int dataoff,
447 BUG_ON(ptr == NULL); 447 BUG_ON(ptr == NULL);
448 448
449 /* Fast path for timestamp-only option */ 449 /* Fast path for timestamp-only option */
450 if (length == TCPOLEN_TSTAMP_ALIGNED*4 450 if (length == TCPOLEN_TSTAMP_ALIGNED
451 && *(__be32 *)ptr == htonl((TCPOPT_NOP << 24) 451 && *(__be32 *)ptr == htonl((TCPOPT_NOP << 24)
452 | (TCPOPT_NOP << 16) 452 | (TCPOPT_NOP << 16)
453 | (TCPOPT_TIMESTAMP << 8) 453 | (TCPOPT_TIMESTAMP << 8)
@@ -469,7 +469,7 @@ static void tcp_sack(const struct sk_buff *skb, unsigned int dataoff,
469 if (opsize < 2) /* "silly options" */ 469 if (opsize < 2) /* "silly options" */
470 return; 470 return;
471 if (opsize > length) 471 if (opsize > length)
472 break; /* don't parse partial options */ 472 return; /* don't parse partial options */
473 473
474 if (opcode == TCPOPT_SACK 474 if (opcode == TCPOPT_SACK
475 && opsize >= (TCPOLEN_SACK_BASE 475 && opsize >= (TCPOLEN_SACK_BASE
diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c
index 20714edf6cd2..ce0c406f58a8 100644
--- a/net/netfilter/nf_log.c
+++ b/net/netfilter/nf_log.c
@@ -55,7 +55,7 @@ int nf_log_register(u_int8_t pf, struct nf_logger *logger)
55 llog = rcu_dereference_protected(nf_loggers[pf], 55 llog = rcu_dereference_protected(nf_loggers[pf],
56 lockdep_is_held(&nf_log_mutex)); 56 lockdep_is_held(&nf_log_mutex));
57 if (llog == NULL) 57 if (llog == NULL)
58 rcu_assign_pointer(nf_loggers[pf], logger); 58 RCU_INIT_POINTER(nf_loggers[pf], logger);
59 } 59 }
60 60
61 mutex_unlock(&nf_log_mutex); 61 mutex_unlock(&nf_log_mutex);
@@ -74,7 +74,7 @@ void nf_log_unregister(struct nf_logger *logger)
74 c_logger = rcu_dereference_protected(nf_loggers[i], 74 c_logger = rcu_dereference_protected(nf_loggers[i],
75 lockdep_is_held(&nf_log_mutex)); 75 lockdep_is_held(&nf_log_mutex));
76 if (c_logger == logger) 76 if (c_logger == logger)
77 rcu_assign_pointer(nf_loggers[i], NULL); 77 RCU_INIT_POINTER(nf_loggers[i], NULL);
78 list_del(&logger->list[i]); 78 list_del(&logger->list[i]);
79 } 79 }
80 mutex_unlock(&nf_log_mutex); 80 mutex_unlock(&nf_log_mutex);
@@ -92,7 +92,7 @@ int nf_log_bind_pf(u_int8_t pf, const struct nf_logger *logger)
92 mutex_unlock(&nf_log_mutex); 92 mutex_unlock(&nf_log_mutex);
93 return -ENOENT; 93 return -ENOENT;
94 } 94 }
95 rcu_assign_pointer(nf_loggers[pf], logger); 95 RCU_INIT_POINTER(nf_loggers[pf], logger);
96 mutex_unlock(&nf_log_mutex); 96 mutex_unlock(&nf_log_mutex);
97 return 0; 97 return 0;
98} 98}
@@ -103,7 +103,7 @@ void nf_log_unbind_pf(u_int8_t pf)
103 if (pf >= ARRAY_SIZE(nf_loggers)) 103 if (pf >= ARRAY_SIZE(nf_loggers))
104 return; 104 return;
105 mutex_lock(&nf_log_mutex); 105 mutex_lock(&nf_log_mutex);
106 rcu_assign_pointer(nf_loggers[pf], NULL); 106 RCU_INIT_POINTER(nf_loggers[pf], NULL);
107 mutex_unlock(&nf_log_mutex); 107 mutex_unlock(&nf_log_mutex);
108} 108}
109EXPORT_SYMBOL(nf_log_unbind_pf); 109EXPORT_SYMBOL(nf_log_unbind_pf);
@@ -250,7 +250,7 @@ static int nf_log_proc_dostring(ctl_table *table, int write,
250 mutex_unlock(&nf_log_mutex); 250 mutex_unlock(&nf_log_mutex);
251 return -ENOENT; 251 return -ENOENT;
252 } 252 }
253 rcu_assign_pointer(nf_loggers[tindex], logger); 253 RCU_INIT_POINTER(nf_loggers[tindex], logger);
254 mutex_unlock(&nf_log_mutex); 254 mutex_unlock(&nf_log_mutex);
255 } else { 255 } else {
256 mutex_lock(&nf_log_mutex); 256 mutex_lock(&nf_log_mutex);
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index 5b466cd1272f..99ffd2885088 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -40,7 +40,7 @@ int nf_register_queue_handler(u_int8_t pf, const struct nf_queue_handler *qh)
40 else if (old) 40 else if (old)
41 ret = -EBUSY; 41 ret = -EBUSY;
42 else { 42 else {
43 rcu_assign_pointer(queue_handler[pf], qh); 43 RCU_INIT_POINTER(queue_handler[pf], qh);
44 ret = 0; 44 ret = 0;
45 } 45 }
46 mutex_unlock(&queue_handler_mutex); 46 mutex_unlock(&queue_handler_mutex);
@@ -65,7 +65,7 @@ int nf_unregister_queue_handler(u_int8_t pf, const struct nf_queue_handler *qh)
65 return -EINVAL; 65 return -EINVAL;
66 } 66 }
67 67
68 rcu_assign_pointer(queue_handler[pf], NULL); 68 RCU_INIT_POINTER(queue_handler[pf], NULL);
69 mutex_unlock(&queue_handler_mutex); 69 mutex_unlock(&queue_handler_mutex);
70 70
71 synchronize_rcu(); 71 synchronize_rcu();
@@ -84,7 +84,7 @@ void nf_unregister_queue_handlers(const struct nf_queue_handler *qh)
84 queue_handler[pf], 84 queue_handler[pf],
85 lockdep_is_held(&queue_handler_mutex) 85 lockdep_is_held(&queue_handler_mutex)
86 ) == qh) 86 ) == qh)
87 rcu_assign_pointer(queue_handler[pf], NULL); 87 RCU_INIT_POINTER(queue_handler[pf], NULL);
88 } 88 }
89 mutex_unlock(&queue_handler_mutex); 89 mutex_unlock(&queue_handler_mutex);
90 90
@@ -312,6 +312,7 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
312 } 312 }
313 break; 313 break;
314 case NF_STOLEN: 314 case NF_STOLEN:
315 break;
315 default: 316 default:
316 kfree_skb(skb); 317 kfree_skb(skb);
317 } 318 }
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index 1905976b5135..c879c1a2370e 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -59,7 +59,7 @@ int nfnetlink_subsys_register(const struct nfnetlink_subsystem *n)
59 nfnl_unlock(); 59 nfnl_unlock();
60 return -EBUSY; 60 return -EBUSY;
61 } 61 }
62 rcu_assign_pointer(subsys_table[n->subsys_id], n); 62 RCU_INIT_POINTER(subsys_table[n->subsys_id], n);
63 nfnl_unlock(); 63 nfnl_unlock();
64 64
65 return 0; 65 return 0;
@@ -210,7 +210,7 @@ static int __net_init nfnetlink_net_init(struct net *net)
210 if (!nfnl) 210 if (!nfnl)
211 return -ENOMEM; 211 return -ENOMEM;
212 net->nfnl_stash = nfnl; 212 net->nfnl_stash = nfnl;
213 rcu_assign_pointer(net->nfnl, nfnl); 213 RCU_INIT_POINTER(net->nfnl, nfnl);
214 return 0; 214 return 0;
215} 215}
216 216
@@ -219,7 +219,7 @@ static void __net_exit nfnetlink_net_exit_batch(struct list_head *net_exit_list)
219 struct net *net; 219 struct net *net;
220 220
221 list_for_each_entry(net, net_exit_list, exit_list) 221 list_for_each_entry(net, net_exit_list, exit_list)
222 rcu_assign_pointer(net->nfnl, NULL); 222 RCU_INIT_POINTER(net->nfnl, NULL);
223 synchronize_net(); 223 synchronize_net();
224 list_for_each_entry(net, net_exit_list, exit_list) 224 list_for_each_entry(net, net_exit_list, exit_list)
225 netlink_kernel_release(net->nfnl_stash); 225 netlink_kernel_release(net->nfnl_stash);
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 00bd475eab4b..a80b0cb03f17 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -646,8 +646,8 @@ verdicthdr_get(const struct nlattr * const nfqa[])
646 return NULL; 646 return NULL;
647 647
648 vhdr = nla_data(nfqa[NFQA_VERDICT_HDR]); 648 vhdr = nla_data(nfqa[NFQA_VERDICT_HDR]);
649 verdict = ntohl(vhdr->verdict); 649 verdict = ntohl(vhdr->verdict) & NF_VERDICT_MASK;
650 if ((verdict & NF_VERDICT_MASK) > NF_MAX_VERDICT) 650 if (verdict > NF_MAX_VERDICT || verdict == NF_STOLEN)
651 return NULL; 651 return NULL;
652 return vhdr; 652 return vhdr;
653} 653}
diff --git a/net/netfilter/xt_rateest.c b/net/netfilter/xt_rateest.c
index 76a083184d8e..ed0db15ab00e 100644
--- a/net/netfilter/xt_rateest.c
+++ b/net/netfilter/xt_rateest.c
@@ -78,7 +78,7 @@ static int xt_rateest_mt_checkentry(const struct xt_mtchk_param *par)
78{ 78{
79 struct xt_rateest_match_info *info = par->matchinfo; 79 struct xt_rateest_match_info *info = par->matchinfo;
80 struct xt_rateest *est1, *est2; 80 struct xt_rateest *est1, *est2;
81 int ret = false; 81 int ret = -EINVAL;
82 82
83 if (hweight32(info->flags & (XT_RATEEST_MATCH_ABS | 83 if (hweight32(info->flags & (XT_RATEEST_MATCH_ABS |
84 XT_RATEEST_MATCH_REL)) != 1) 84 XT_RATEEST_MATCH_REL)) != 1)
@@ -101,13 +101,12 @@ static int xt_rateest_mt_checkentry(const struct xt_mtchk_param *par)
101 if (!est1) 101 if (!est1)
102 goto err1; 102 goto err1;
103 103
104 est2 = NULL;
104 if (info->flags & XT_RATEEST_MATCH_REL) { 105 if (info->flags & XT_RATEEST_MATCH_REL) {
105 est2 = xt_rateest_lookup(info->name2); 106 est2 = xt_rateest_lookup(info->name2);
106 if (!est2) 107 if (!est2)
107 goto err2; 108 goto err2;
108 } else 109 }
109 est2 = NULL;
110
111 110
112 info->est1 = est1; 111 info->est1 = est1;
113 info->est2 = est2; 112 info->est2 = est2;
@@ -116,7 +115,7 @@ static int xt_rateest_mt_checkentry(const struct xt_mtchk_param *par)
116err2: 115err2:
117 xt_rateest_put(est1); 116 xt_rateest_put(est1);
118err1: 117err1:
119 return -EINVAL; 118 return ret;
120} 119}
121 120
122static void xt_rateest_mt_destroy(const struct xt_mtdtor_param *par) 121static void xt_rateest_mt_destroy(const struct xt_mtdtor_param *par)
diff --git a/net/netlabel/Makefile b/net/netlabel/Makefile
index ea750e9df65f..d2732fc952e2 100644
--- a/net/netlabel/Makefile
+++ b/net/netlabel/Makefile
@@ -1,8 +1,6 @@
1# 1#
2# Makefile for the NetLabel subsystem. 2# Makefile for the NetLabel subsystem.
3# 3#
4# Feb 9, 2006, Paul Moore <paul.moore@hp.com>
5#
6 4
7# base objects 5# base objects
8obj-y := netlabel_user.o netlabel_kapi.o 6obj-y := netlabel_user.o netlabel_kapi.o
diff --git a/net/netlabel/netlabel_addrlist.c b/net/netlabel/netlabel_addrlist.c
index c0519139679e..96b749dacc34 100644
--- a/net/netlabel/netlabel_addrlist.c
+++ b/net/netlabel/netlabel_addrlist.c
@@ -6,7 +6,7 @@
6 * system manages static and dynamic label mappings for network protocols such 6 * system manages static and dynamic label mappings for network protocols such
7 * as CIPSO and RIPSO. 7 * as CIPSO and RIPSO.
8 * 8 *
9 * Author: Paul Moore <paul.moore@hp.com> 9 * Author: Paul Moore <paul@paul-moore.com>
10 * 10 *
11 */ 11 */
12 12
diff --git a/net/netlabel/netlabel_addrlist.h b/net/netlabel/netlabel_addrlist.h
index 2b9644e19de0..fdbc1d2c7352 100644
--- a/net/netlabel/netlabel_addrlist.h
+++ b/net/netlabel/netlabel_addrlist.h
@@ -6,7 +6,7 @@
6 * system manages static and dynamic label mappings for network protocols such 6 * system manages static and dynamic label mappings for network protocols such
7 * as CIPSO and RIPSO. 7 * as CIPSO and RIPSO.
8 * 8 *
9 * Author: Paul Moore <paul.moore@hp.com> 9 * Author: Paul Moore <paul@paul-moore.com>
10 * 10 *
11 */ 11 */
12 12
diff --git a/net/netlabel/netlabel_cipso_v4.c b/net/netlabel/netlabel_cipso_v4.c
index dd53a36d89af..6bf878335d94 100644
--- a/net/netlabel/netlabel_cipso_v4.c
+++ b/net/netlabel/netlabel_cipso_v4.c
@@ -5,7 +5,7 @@
5 * NetLabel system manages static and dynamic label mappings for network 5 * NetLabel system manages static and dynamic label mappings for network
6 * protocols such as CIPSO and RIPSO. 6 * protocols such as CIPSO and RIPSO.
7 * 7 *
8 * Author: Paul Moore <paul.moore@hp.com> 8 * Author: Paul Moore <paul@paul-moore.com>
9 * 9 *
10 */ 10 */
11 11
diff --git a/net/netlabel/netlabel_cipso_v4.h b/net/netlabel/netlabel_cipso_v4.h
index af7f3355103e..d24d774bfd62 100644
--- a/net/netlabel/netlabel_cipso_v4.h
+++ b/net/netlabel/netlabel_cipso_v4.h
@@ -5,7 +5,7 @@
5 * NetLabel system manages static and dynamic label mappings for network 5 * NetLabel system manages static and dynamic label mappings for network
6 * protocols such as CIPSO and RIPSO. 6 * protocols such as CIPSO and RIPSO.
7 * 7 *
8 * Author: Paul Moore <paul.moore@hp.com> 8 * Author: Paul Moore <paul@paul-moore.com>
9 * 9 *
10 */ 10 */
11 11
diff --git a/net/netlabel/netlabel_domainhash.c b/net/netlabel/netlabel_domainhash.c
index 2aa975e5452d..3f905e5370c2 100644
--- a/net/netlabel/netlabel_domainhash.c
+++ b/net/netlabel/netlabel_domainhash.c
@@ -6,7 +6,7 @@
6 * system manages static and dynamic label mappings for network protocols such 6 * system manages static and dynamic label mappings for network protocols such
7 * as CIPSO and RIPSO. 7 * as CIPSO and RIPSO.
8 * 8 *
9 * Author: Paul Moore <paul.moore@hp.com> 9 * Author: Paul Moore <paul@paul-moore.com>
10 * 10 *
11 */ 11 */
12 12
@@ -282,7 +282,7 @@ int __init netlbl_domhsh_init(u32 size)
282 INIT_LIST_HEAD(&hsh_tbl->tbl[iter]); 282 INIT_LIST_HEAD(&hsh_tbl->tbl[iter]);
283 283
284 spin_lock(&netlbl_domhsh_lock); 284 spin_lock(&netlbl_domhsh_lock);
285 rcu_assign_pointer(netlbl_domhsh, hsh_tbl); 285 RCU_INIT_POINTER(netlbl_domhsh, hsh_tbl);
286 spin_unlock(&netlbl_domhsh_lock); 286 spin_unlock(&netlbl_domhsh_lock);
287 287
288 return 0; 288 return 0;
@@ -330,7 +330,7 @@ int netlbl_domhsh_add(struct netlbl_dom_map *entry,
330 &rcu_dereference(netlbl_domhsh)->tbl[bkt]); 330 &rcu_dereference(netlbl_domhsh)->tbl[bkt]);
331 } else { 331 } else {
332 INIT_LIST_HEAD(&entry->list); 332 INIT_LIST_HEAD(&entry->list);
333 rcu_assign_pointer(netlbl_domhsh_def, entry); 333 RCU_INIT_POINTER(netlbl_domhsh_def, entry);
334 } 334 }
335 335
336 if (entry->type == NETLBL_NLTYPE_ADDRSELECT) { 336 if (entry->type == NETLBL_NLTYPE_ADDRSELECT) {
@@ -451,7 +451,7 @@ int netlbl_domhsh_remove_entry(struct netlbl_dom_map *entry,
451 if (entry != rcu_dereference(netlbl_domhsh_def)) 451 if (entry != rcu_dereference(netlbl_domhsh_def))
452 list_del_rcu(&entry->list); 452 list_del_rcu(&entry->list);
453 else 453 else
454 rcu_assign_pointer(netlbl_domhsh_def, NULL); 454 RCU_INIT_POINTER(netlbl_domhsh_def, NULL);
455 } else 455 } else
456 ret_val = -ENOENT; 456 ret_val = -ENOENT;
457 spin_unlock(&netlbl_domhsh_lock); 457 spin_unlock(&netlbl_domhsh_lock);
diff --git a/net/netlabel/netlabel_domainhash.h b/net/netlabel/netlabel_domainhash.h
index 0261dda3f2d2..bfcc0f7024c5 100644
--- a/net/netlabel/netlabel_domainhash.h
+++ b/net/netlabel/netlabel_domainhash.h
@@ -6,7 +6,7 @@
6 * system manages static and dynamic label mappings for network protocols such 6 * system manages static and dynamic label mappings for network protocols such
7 * as CIPSO and RIPSO. 7 * as CIPSO and RIPSO.
8 * 8 *
9 * Author: Paul Moore <paul.moore@hp.com> 9 * Author: Paul Moore <paul@paul-moore.com>
10 * 10 *
11 */ 11 */
12 12
diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c
index b528dd928d3c..9c24de10a657 100644
--- a/net/netlabel/netlabel_kapi.c
+++ b/net/netlabel/netlabel_kapi.c
@@ -5,7 +5,7 @@
5 * system manages static and dynamic label mappings for network protocols such 5 * system manages static and dynamic label mappings for network protocols such
6 * as CIPSO and RIPSO. 6 * as CIPSO and RIPSO.
7 * 7 *
8 * Author: Paul Moore <paul.moore@hp.com> 8 * Author: Paul Moore <paul@paul-moore.com>
9 * 9 *
10 */ 10 */
11 11
@@ -341,11 +341,11 @@ int netlbl_cfg_cipsov4_map_add(u32 doi,
341 341
342 entry = kzalloc(sizeof(*entry), GFP_ATOMIC); 342 entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
343 if (entry == NULL) 343 if (entry == NULL)
344 return -ENOMEM; 344 goto out_entry;
345 if (domain != NULL) { 345 if (domain != NULL) {
346 entry->domain = kstrdup(domain, GFP_ATOMIC); 346 entry->domain = kstrdup(domain, GFP_ATOMIC);
347 if (entry->domain == NULL) 347 if (entry->domain == NULL)
348 goto cfg_cipsov4_map_add_failure; 348 goto out_domain;
349 } 349 }
350 350
351 if (addr == NULL && mask == NULL) { 351 if (addr == NULL && mask == NULL) {
@@ -354,13 +354,13 @@ int netlbl_cfg_cipsov4_map_add(u32 doi,
354 } else if (addr != NULL && mask != NULL) { 354 } else if (addr != NULL && mask != NULL) {
355 addrmap = kzalloc(sizeof(*addrmap), GFP_ATOMIC); 355 addrmap = kzalloc(sizeof(*addrmap), GFP_ATOMIC);
356 if (addrmap == NULL) 356 if (addrmap == NULL)
357 goto cfg_cipsov4_map_add_failure; 357 goto out_addrmap;
358 INIT_LIST_HEAD(&addrmap->list4); 358 INIT_LIST_HEAD(&addrmap->list4);
359 INIT_LIST_HEAD(&addrmap->list6); 359 INIT_LIST_HEAD(&addrmap->list6);
360 360
361 addrinfo = kzalloc(sizeof(*addrinfo), GFP_ATOMIC); 361 addrinfo = kzalloc(sizeof(*addrinfo), GFP_ATOMIC);
362 if (addrinfo == NULL) 362 if (addrinfo == NULL)
363 goto cfg_cipsov4_map_add_failure; 363 goto out_addrinfo;
364 addrinfo->type_def.cipsov4 = doi_def; 364 addrinfo->type_def.cipsov4 = doi_def;
365 addrinfo->type = NETLBL_NLTYPE_CIPSOV4; 365 addrinfo->type = NETLBL_NLTYPE_CIPSOV4;
366 addrinfo->list.addr = addr->s_addr & mask->s_addr; 366 addrinfo->list.addr = addr->s_addr & mask->s_addr;
@@ -374,7 +374,7 @@ int netlbl_cfg_cipsov4_map_add(u32 doi,
374 entry->type = NETLBL_NLTYPE_ADDRSELECT; 374 entry->type = NETLBL_NLTYPE_ADDRSELECT;
375 } else { 375 } else {
376 ret_val = -EINVAL; 376 ret_val = -EINVAL;
377 goto cfg_cipsov4_map_add_failure; 377 goto out_addrmap;
378 } 378 }
379 379
380 ret_val = netlbl_domhsh_add(entry, audit_info); 380 ret_val = netlbl_domhsh_add(entry, audit_info);
@@ -384,11 +384,15 @@ int netlbl_cfg_cipsov4_map_add(u32 doi,
384 return 0; 384 return 0;
385 385
386cfg_cipsov4_map_add_failure: 386cfg_cipsov4_map_add_failure:
387 cipso_v4_doi_putdef(doi_def); 387 kfree(addrinfo);
388out_addrinfo:
389 kfree(addrmap);
390out_addrmap:
388 kfree(entry->domain); 391 kfree(entry->domain);
392out_domain:
389 kfree(entry); 393 kfree(entry);
390 kfree(addrmap); 394out_entry:
391 kfree(addrinfo); 395 cipso_v4_doi_putdef(doi_def);
392 return ret_val; 396 return ret_val;
393} 397}
394 398
diff --git a/net/netlabel/netlabel_mgmt.c b/net/netlabel/netlabel_mgmt.c
index dff8a0809245..bfa555869775 100644
--- a/net/netlabel/netlabel_mgmt.c
+++ b/net/netlabel/netlabel_mgmt.c
@@ -5,7 +5,7 @@
5 * NetLabel system manages static and dynamic label mappings for network 5 * NetLabel system manages static and dynamic label mappings for network
6 * protocols such as CIPSO and RIPSO. 6 * protocols such as CIPSO and RIPSO.
7 * 7 *
8 * Author: Paul Moore <paul.moore@hp.com> 8 * Author: Paul Moore <paul@paul-moore.com>
9 * 9 *
10 */ 10 */
11 11
diff --git a/net/netlabel/netlabel_mgmt.h b/net/netlabel/netlabel_mgmt.h
index 8db37f4c10f7..5a9f31ce5799 100644
--- a/net/netlabel/netlabel_mgmt.h
+++ b/net/netlabel/netlabel_mgmt.h
@@ -5,7 +5,7 @@
5 * NetLabel system manages static and dynamic label mappings for network 5 * NetLabel system manages static and dynamic label mappings for network
6 * protocols such as CIPSO and RIPSO. 6 * protocols such as CIPSO and RIPSO.
7 * 7 *
8 * Author: Paul Moore <paul.moore@hp.com> 8 * Author: Paul Moore <paul@paul-moore.com>
9 * 9 *
10 */ 10 */
11 11
diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c
index f1ecf848e3ac..e251c2c88521 100644
--- a/net/netlabel/netlabel_unlabeled.c
+++ b/net/netlabel/netlabel_unlabeled.c
@@ -5,7 +5,7 @@
5 * NetLabel system. The NetLabel system manages static and dynamic label 5 * NetLabel system. The NetLabel system manages static and dynamic label
6 * mappings for network protocols such as CIPSO and RIPSO. 6 * mappings for network protocols such as CIPSO and RIPSO.
7 * 7 *
8 * Author: Paul Moore <paul.moore@hp.com> 8 * Author: Paul Moore <paul@paul-moore.com>
9 * 9 *
10 */ 10 */
11 11
@@ -354,7 +354,7 @@ static struct netlbl_unlhsh_iface *netlbl_unlhsh_add_iface(int ifindex)
354 INIT_LIST_HEAD(&iface->list); 354 INIT_LIST_HEAD(&iface->list);
355 if (netlbl_unlhsh_rcu_deref(netlbl_unlhsh_def) != NULL) 355 if (netlbl_unlhsh_rcu_deref(netlbl_unlhsh_def) != NULL)
356 goto add_iface_failure; 356 goto add_iface_failure;
357 rcu_assign_pointer(netlbl_unlhsh_def, iface); 357 RCU_INIT_POINTER(netlbl_unlhsh_def, iface);
358 } 358 }
359 spin_unlock(&netlbl_unlhsh_lock); 359 spin_unlock(&netlbl_unlhsh_lock);
360 360
@@ -621,7 +621,7 @@ static void netlbl_unlhsh_condremove_iface(struct netlbl_unlhsh_iface *iface)
621 if (iface->ifindex > 0) 621 if (iface->ifindex > 0)
622 list_del_rcu(&iface->list); 622 list_del_rcu(&iface->list);
623 else 623 else
624 rcu_assign_pointer(netlbl_unlhsh_def, NULL); 624 RCU_INIT_POINTER(netlbl_unlhsh_def, NULL);
625 spin_unlock(&netlbl_unlhsh_lock); 625 spin_unlock(&netlbl_unlhsh_lock);
626 626
627 call_rcu(&iface->rcu, netlbl_unlhsh_free_iface); 627 call_rcu(&iface->rcu, netlbl_unlhsh_free_iface);
@@ -1449,7 +1449,7 @@ int __init netlbl_unlabel_init(u32 size)
1449 1449
1450 rcu_read_lock(); 1450 rcu_read_lock();
1451 spin_lock(&netlbl_unlhsh_lock); 1451 spin_lock(&netlbl_unlhsh_lock);
1452 rcu_assign_pointer(netlbl_unlhsh, hsh_tbl); 1452 RCU_INIT_POINTER(netlbl_unlhsh, hsh_tbl);
1453 spin_unlock(&netlbl_unlhsh_lock); 1453 spin_unlock(&netlbl_unlhsh_lock);
1454 rcu_read_unlock(); 1454 rcu_read_unlock();
1455 1455
diff --git a/net/netlabel/netlabel_unlabeled.h b/net/netlabel/netlabel_unlabeled.h
index 0bc8dc3f9e3c..700af49022a0 100644
--- a/net/netlabel/netlabel_unlabeled.h
+++ b/net/netlabel/netlabel_unlabeled.h
@@ -5,7 +5,7 @@
5 * NetLabel system. The NetLabel system manages static and dynamic label 5 * NetLabel system. The NetLabel system manages static and dynamic label
6 * mappings for network protocols such as CIPSO and RIPSO. 6 * mappings for network protocols such as CIPSO and RIPSO.
7 * 7 *
8 * Author: Paul Moore <paul.moore@hp.com> 8 * Author: Paul Moore <paul@paul-moore.com>
9 * 9 *
10 */ 10 */
11 11
diff --git a/net/netlabel/netlabel_user.c b/net/netlabel/netlabel_user.c
index a3fd75ac3fa5..9fae63f10298 100644
--- a/net/netlabel/netlabel_user.c
+++ b/net/netlabel/netlabel_user.c
@@ -5,7 +5,7 @@
5 * NetLabel system manages static and dynamic label mappings for network 5 * NetLabel system manages static and dynamic label mappings for network
6 * protocols such as CIPSO and RIPSO. 6 * protocols such as CIPSO and RIPSO.
7 * 7 *
8 * Author: Paul Moore <paul.moore@hp.com> 8 * Author: Paul Moore <paul@paul-moore.com>
9 * 9 *
10 */ 10 */
11 11
diff --git a/net/netlabel/netlabel_user.h b/net/netlabel/netlabel_user.h
index f4fc4c9ad567..81969785e279 100644
--- a/net/netlabel/netlabel_user.h
+++ b/net/netlabel/netlabel_user.h
@@ -5,7 +5,7 @@
5 * NetLabel system manages static and dynamic label mappings for network 5 * NetLabel system manages static and dynamic label mappings for network
6 * protocols such as CIPSO and RIPSO. 6 * protocols such as CIPSO and RIPSO.
7 * 7 *
8 * Author: Paul Moore <paul.moore@hp.com> 8 * Author: Paul Moore <paul@paul-moore.com>
9 * 9 *
10 */ 10 */
11 11
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 0a4db0211da0..1201b6d4183d 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1324,10 +1324,9 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock,
1324 if (msg->msg_flags&MSG_OOB) 1324 if (msg->msg_flags&MSG_OOB)
1325 return -EOPNOTSUPP; 1325 return -EOPNOTSUPP;
1326 1326
1327 if (NULL == siocb->scm) { 1327 if (NULL == siocb->scm)
1328 siocb->scm = &scm; 1328 siocb->scm = &scm;
1329 memset(&scm, 0, sizeof(scm)); 1329
1330 }
1331 err = scm_send(sock, msg, siocb->scm); 1330 err = scm_send(sock, msg, siocb->scm);
1332 if (err < 0) 1331 if (err < 0)
1333 return err; 1332 return err;
@@ -1578,7 +1577,7 @@ int __netlink_change_ngroups(struct sock *sk, unsigned int groups)
1578 new = kzalloc(sizeof(*new) + NLGRPSZ(groups), GFP_ATOMIC); 1577 new = kzalloc(sizeof(*new) + NLGRPSZ(groups), GFP_ATOMIC);
1579 if (!new) 1578 if (!new)
1580 return -ENOMEM; 1579 return -ENOMEM;
1581 old = rcu_dereference_raw(tbl->listeners); 1580 old = rcu_dereference_protected(tbl->listeners, 1);
1582 memcpy(new->masks, old->masks, NLGRPSZ(tbl->groups)); 1581 memcpy(new->masks, old->masks, NLGRPSZ(tbl->groups));
1583 rcu_assign_pointer(tbl->listeners, new); 1582 rcu_assign_pointer(tbl->listeners, new);
1584 1583
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index c698cec0a445..7b5f03253016 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -40,6 +40,10 @@
40 * byte arrays at the end of sockaddr_ll 40 * byte arrays at the end of sockaddr_ll
41 * and packet_mreq. 41 * and packet_mreq.
42 * Johann Baudy : Added TX RING. 42 * Johann Baudy : Added TX RING.
43 * Chetan Loke : Implemented TPACKET_V3 block abstraction
44 * layer.
45 * Copyright (C) 2011, <lokec@ccs.neu.edu>
46 *
43 * 47 *
44 * This program is free software; you can redistribute it and/or 48 * This program is free software; you can redistribute it and/or
45 * modify it under the terms of the GNU General Public License 49 * modify it under the terms of the GNU General Public License
@@ -161,9 +165,56 @@ struct packet_mreq_max {
161 unsigned char mr_address[MAX_ADDR_LEN]; 165 unsigned char mr_address[MAX_ADDR_LEN];
162}; 166};
163 167
164static int packet_set_ring(struct sock *sk, struct tpacket_req *req, 168static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
165 int closing, int tx_ring); 169 int closing, int tx_ring);
166 170
171
172#define V3_ALIGNMENT (8)
173
174#define BLK_HDR_LEN (ALIGN(sizeof(struct tpacket_block_desc), V3_ALIGNMENT))
175
176#define BLK_PLUS_PRIV(sz_of_priv) \
177 (BLK_HDR_LEN + ALIGN((sz_of_priv), V3_ALIGNMENT))
178
179/* kbdq - kernel block descriptor queue */
180struct tpacket_kbdq_core {
181 struct pgv *pkbdq;
182 unsigned int feature_req_word;
183 unsigned int hdrlen;
184 unsigned char reset_pending_on_curr_blk;
185 unsigned char delete_blk_timer;
186 unsigned short kactive_blk_num;
187 unsigned short blk_sizeof_priv;
188
189 /* last_kactive_blk_num:
190 * trick to see if user-space has caught up
191 * in order to avoid refreshing timer when every single pkt arrives.
192 */
193 unsigned short last_kactive_blk_num;
194
195 char *pkblk_start;
196 char *pkblk_end;
197 int kblk_size;
198 unsigned int knum_blocks;
199 uint64_t knxt_seq_num;
200 char *prev;
201 char *nxt_offset;
202 struct sk_buff *skb;
203
204 atomic_t blk_fill_in_prog;
205
206 /* Default is set to 8ms */
207#define DEFAULT_PRB_RETIRE_TOV (8)
208
209 unsigned short retire_blk_tov;
210 unsigned short version;
211 unsigned long tov_in_jiffies;
212
213 /* timer to retire an outstanding block */
214 struct timer_list retire_blk_timer;
215};
216
217#define PGV_FROM_VMALLOC 1
167struct pgv { 218struct pgv {
168 char *buffer; 219 char *buffer;
169}; 220};
@@ -179,12 +230,44 @@ struct packet_ring_buffer {
179 unsigned int pg_vec_pages; 230 unsigned int pg_vec_pages;
180 unsigned int pg_vec_len; 231 unsigned int pg_vec_len;
181 232
233 struct tpacket_kbdq_core prb_bdqc;
182 atomic_t pending; 234 atomic_t pending;
183}; 235};
184 236
237#define BLOCK_STATUS(x) ((x)->hdr.bh1.block_status)
238#define BLOCK_NUM_PKTS(x) ((x)->hdr.bh1.num_pkts)
239#define BLOCK_O2FP(x) ((x)->hdr.bh1.offset_to_first_pkt)
240#define BLOCK_LEN(x) ((x)->hdr.bh1.blk_len)
241#define BLOCK_SNUM(x) ((x)->hdr.bh1.seq_num)
242#define BLOCK_O2PRIV(x) ((x)->offset_to_priv)
243#define BLOCK_PRIV(x) ((void *)((char *)(x) + BLOCK_O2PRIV(x)))
244
185struct packet_sock; 245struct packet_sock;
186static int tpacket_snd(struct packet_sock *po, struct msghdr *msg); 246static int tpacket_snd(struct packet_sock *po, struct msghdr *msg);
187 247
248static void *packet_previous_frame(struct packet_sock *po,
249 struct packet_ring_buffer *rb,
250 int status);
251static void packet_increment_head(struct packet_ring_buffer *buff);
252static int prb_curr_blk_in_use(struct tpacket_kbdq_core *,
253 struct tpacket_block_desc *);
254static void *prb_dispatch_next_block(struct tpacket_kbdq_core *,
255 struct packet_sock *);
256static void prb_retire_current_block(struct tpacket_kbdq_core *,
257 struct packet_sock *, unsigned int status);
258static int prb_queue_frozen(struct tpacket_kbdq_core *);
259static void prb_open_block(struct tpacket_kbdq_core *,
260 struct tpacket_block_desc *);
261static void prb_retire_rx_blk_timer_expired(unsigned long);
262static void _prb_refresh_rx_retire_blk_timer(struct tpacket_kbdq_core *);
263static void prb_init_blk_timer(struct packet_sock *,
264 struct tpacket_kbdq_core *,
265 void (*func) (unsigned long));
266static void prb_fill_rxhash(struct tpacket_kbdq_core *, struct tpacket3_hdr *);
267static void prb_clear_rxhash(struct tpacket_kbdq_core *,
268 struct tpacket3_hdr *);
269static void prb_fill_vlan_info(struct tpacket_kbdq_core *,
270 struct tpacket3_hdr *);
188static void packet_flush_mclist(struct sock *sk); 271static void packet_flush_mclist(struct sock *sk);
189 272
190struct packet_fanout; 273struct packet_fanout;
@@ -193,6 +276,7 @@ struct packet_sock {
193 struct sock sk; 276 struct sock sk;
194 struct packet_fanout *fanout; 277 struct packet_fanout *fanout;
195 struct tpacket_stats stats; 278 struct tpacket_stats stats;
279 union tpacket_stats_u stats_u;
196 struct packet_ring_buffer rx_ring; 280 struct packet_ring_buffer rx_ring;
197 struct packet_ring_buffer tx_ring; 281 struct packet_ring_buffer tx_ring;
198 int copy_thresh; 282 int copy_thresh;
@@ -242,6 +326,15 @@ struct packet_skb_cb {
242 326
243#define PACKET_SKB_CB(__skb) ((struct packet_skb_cb *)((__skb)->cb)) 327#define PACKET_SKB_CB(__skb) ((struct packet_skb_cb *)((__skb)->cb))
244 328
329#define GET_PBDQC_FROM_RB(x) ((struct tpacket_kbdq_core *)(&(x)->prb_bdqc))
330#define GET_PBLOCK_DESC(x, bid) \
331 ((struct tpacket_block_desc *)((x)->pkbdq[(bid)].buffer))
332#define GET_CURR_PBLOCK_DESC_FROM_CORE(x) \
333 ((struct tpacket_block_desc *)((x)->pkbdq[(x)->kactive_blk_num].buffer))
334#define GET_NEXT_PRB_BLK_NUM(x) \
335 (((x)->kactive_blk_num < ((x)->knum_blocks-1)) ? \
336 ((x)->kactive_blk_num+1) : 0)
337
245static inline struct packet_sock *pkt_sk(struct sock *sk) 338static inline struct packet_sock *pkt_sk(struct sock *sk)
246{ 339{
247 return (struct packet_sock *)sk; 340 return (struct packet_sock *)sk;
@@ -325,8 +418,9 @@ static void __packet_set_status(struct packet_sock *po, void *frame, int status)
325 h.h2->tp_status = status; 418 h.h2->tp_status = status;
326 flush_dcache_page(pgv_to_page(&h.h2->tp_status)); 419 flush_dcache_page(pgv_to_page(&h.h2->tp_status));
327 break; 420 break;
421 case TPACKET_V3:
328 default: 422 default:
329 pr_err("TPACKET version not supported\n"); 423 WARN(1, "TPACKET version not supported.\n");
330 BUG(); 424 BUG();
331 } 425 }
332 426
@@ -351,8 +445,9 @@ static int __packet_get_status(struct packet_sock *po, void *frame)
351 case TPACKET_V2: 445 case TPACKET_V2:
352 flush_dcache_page(pgv_to_page(&h.h2->tp_status)); 446 flush_dcache_page(pgv_to_page(&h.h2->tp_status));
353 return h.h2->tp_status; 447 return h.h2->tp_status;
448 case TPACKET_V3:
354 default: 449 default:
355 pr_err("TPACKET version not supported\n"); 450 WARN(1, "TPACKET version not supported.\n");
356 BUG(); 451 BUG();
357 return 0; 452 return 0;
358 } 453 }
@@ -389,6 +484,670 @@ static inline void *packet_current_frame(struct packet_sock *po,
389 return packet_lookup_frame(po, rb, rb->head, status); 484 return packet_lookup_frame(po, rb, rb->head, status);
390} 485}
391 486
487static void prb_del_retire_blk_timer(struct tpacket_kbdq_core *pkc)
488{
489 del_timer_sync(&pkc->retire_blk_timer);
490}
491
492static void prb_shutdown_retire_blk_timer(struct packet_sock *po,
493 int tx_ring,
494 struct sk_buff_head *rb_queue)
495{
496 struct tpacket_kbdq_core *pkc;
497
498 pkc = tx_ring ? &po->tx_ring.prb_bdqc : &po->rx_ring.prb_bdqc;
499
500 spin_lock(&rb_queue->lock);
501 pkc->delete_blk_timer = 1;
502 spin_unlock(&rb_queue->lock);
503
504 prb_del_retire_blk_timer(pkc);
505}
506
507static void prb_init_blk_timer(struct packet_sock *po,
508 struct tpacket_kbdq_core *pkc,
509 void (*func) (unsigned long))
510{
511 init_timer(&pkc->retire_blk_timer);
512 pkc->retire_blk_timer.data = (long)po;
513 pkc->retire_blk_timer.function = func;
514 pkc->retire_blk_timer.expires = jiffies;
515}
516
517static void prb_setup_retire_blk_timer(struct packet_sock *po, int tx_ring)
518{
519 struct tpacket_kbdq_core *pkc;
520
521 if (tx_ring)
522 BUG();
523
524 pkc = tx_ring ? &po->tx_ring.prb_bdqc : &po->rx_ring.prb_bdqc;
525 prb_init_blk_timer(po, pkc, prb_retire_rx_blk_timer_expired);
526}
527
528static int prb_calc_retire_blk_tmo(struct packet_sock *po,
529 int blk_size_in_bytes)
530{
531 struct net_device *dev;
532 unsigned int mbits = 0, msec = 0, div = 0, tmo = 0;
533 struct ethtool_cmd ecmd;
534 int err;
535
536 rtnl_lock();
537 dev = __dev_get_by_index(sock_net(&po->sk), po->ifindex);
538 if (unlikely(!dev)) {
539 rtnl_unlock();
540 return DEFAULT_PRB_RETIRE_TOV;
541 }
542 err = __ethtool_get_settings(dev, &ecmd);
543 rtnl_unlock();
544 if (!err) {
545 switch (ecmd.speed) {
546 case SPEED_10000:
547 msec = 1;
548 div = 10000/1000;
549 break;
550 case SPEED_1000:
551 msec = 1;
552 div = 1000/1000;
553 break;
554 /*
555 * If the link speed is so slow you don't really
556 * need to worry about perf anyways
557 */
558 case SPEED_100:
559 case SPEED_10:
560 default:
561 return DEFAULT_PRB_RETIRE_TOV;
562 }
563 }
564
565 mbits = (blk_size_in_bytes * 8) / (1024 * 1024);
566
567 if (div)
568 mbits /= div;
569
570 tmo = mbits * msec;
571
572 if (div)
573 return tmo+1;
574 return tmo;
575}
576
577static void prb_init_ft_ops(struct tpacket_kbdq_core *p1,
578 union tpacket_req_u *req_u)
579{
580 p1->feature_req_word = req_u->req3.tp_feature_req_word;
581}
582
583static void init_prb_bdqc(struct packet_sock *po,
584 struct packet_ring_buffer *rb,
585 struct pgv *pg_vec,
586 union tpacket_req_u *req_u, int tx_ring)
587{
588 struct tpacket_kbdq_core *p1 = &rb->prb_bdqc;
589 struct tpacket_block_desc *pbd;
590
591 memset(p1, 0x0, sizeof(*p1));
592
593 p1->knxt_seq_num = 1;
594 p1->pkbdq = pg_vec;
595 pbd = (struct tpacket_block_desc *)pg_vec[0].buffer;
596 p1->pkblk_start = (char *)pg_vec[0].buffer;
597 p1->kblk_size = req_u->req3.tp_block_size;
598 p1->knum_blocks = req_u->req3.tp_block_nr;
599 p1->hdrlen = po->tp_hdrlen;
600 p1->version = po->tp_version;
601 p1->last_kactive_blk_num = 0;
602 po->stats_u.stats3.tp_freeze_q_cnt = 0;
603 if (req_u->req3.tp_retire_blk_tov)
604 p1->retire_blk_tov = req_u->req3.tp_retire_blk_tov;
605 else
606 p1->retire_blk_tov = prb_calc_retire_blk_tmo(po,
607 req_u->req3.tp_block_size);
608 p1->tov_in_jiffies = msecs_to_jiffies(p1->retire_blk_tov);
609 p1->blk_sizeof_priv = req_u->req3.tp_sizeof_priv;
610
611 prb_init_ft_ops(p1, req_u);
612 prb_setup_retire_blk_timer(po, tx_ring);
613 prb_open_block(p1, pbd);
614}
615
616/* Do NOT update the last_blk_num first.
617 * Assumes sk_buff_head lock is held.
618 */
619static void _prb_refresh_rx_retire_blk_timer(struct tpacket_kbdq_core *pkc)
620{
621 mod_timer(&pkc->retire_blk_timer,
622 jiffies + pkc->tov_in_jiffies);
623 pkc->last_kactive_blk_num = pkc->kactive_blk_num;
624}
625
626/*
627 * Timer logic:
628 * 1) We refresh the timer only when we open a block.
629 * By doing this we don't waste cycles refreshing the timer
630 * on packet-by-packet basis.
631 *
632 * With a 1MB block-size, on a 1Gbps line, it will take
633 * i) ~8 ms to fill a block + ii) memcpy etc.
634 * In this cut we are not accounting for the memcpy time.
635 *
636 * So, if the user sets the 'tmo' to 10ms then the timer
637 * will never fire while the block is still getting filled
638 * (which is what we want). However, the user could choose
639 * to close a block early and that's fine.
640 *
641 * But when the timer does fire, we check whether or not to refresh it.
642 * Since the tmo granularity is in msecs, it is not too expensive
643 * to refresh the timer, lets say every '8' msecs.
644 * Either the user can set the 'tmo' or we can derive it based on
645 * a) line-speed and b) block-size.
646 * prb_calc_retire_blk_tmo() calculates the tmo.
647 *
648 */
649static void prb_retire_rx_blk_timer_expired(unsigned long data)
650{
651 struct packet_sock *po = (struct packet_sock *)data;
652 struct tpacket_kbdq_core *pkc = &po->rx_ring.prb_bdqc;
653 unsigned int frozen;
654 struct tpacket_block_desc *pbd;
655
656 spin_lock(&po->sk.sk_receive_queue.lock);
657
658 frozen = prb_queue_frozen(pkc);
659 pbd = GET_CURR_PBLOCK_DESC_FROM_CORE(pkc);
660
661 if (unlikely(pkc->delete_blk_timer))
662 goto out;
663
664 /* We only need to plug the race when the block is partially filled.
665 * tpacket_rcv:
666 * lock(); increment BLOCK_NUM_PKTS; unlock()
667 * copy_bits() is in progress ...
668 * timer fires on other cpu:
669 * we can't retire the current block because copy_bits
670 * is in progress.
671 *
672 */
673 if (BLOCK_NUM_PKTS(pbd)) {
674 while (atomic_read(&pkc->blk_fill_in_prog)) {
675 /* Waiting for skb_copy_bits to finish... */
676 cpu_relax();
677 }
678 }
679
680 if (pkc->last_kactive_blk_num == pkc->kactive_blk_num) {
681 if (!frozen) {
682 prb_retire_current_block(pkc, po, TP_STATUS_BLK_TMO);
683 if (!prb_dispatch_next_block(pkc, po))
684 goto refresh_timer;
685 else
686 goto out;
687 } else {
688 /* Case 1. Queue was frozen because user-space was
689 * lagging behind.
690 */
691 if (prb_curr_blk_in_use(pkc, pbd)) {
692 /*
693 * Ok, user-space is still behind.
694 * So just refresh the timer.
695 */
696 goto refresh_timer;
697 } else {
698 /* Case 2. queue was frozen,user-space caught up,
699 * now the link went idle && the timer fired.
700 * We don't have a block to close.So we open this
701 * block and restart the timer.
702 * opening a block thaws the queue,restarts timer
703 * Thawing/timer-refresh is a side effect.
704 */
705 prb_open_block(pkc, pbd);
706 goto out;
707 }
708 }
709 }
710
711refresh_timer:
712 _prb_refresh_rx_retire_blk_timer(pkc);
713
714out:
715 spin_unlock(&po->sk.sk_receive_queue.lock);
716}
717
718static inline void prb_flush_block(struct tpacket_kbdq_core *pkc1,
719 struct tpacket_block_desc *pbd1, __u32 status)
720{
721 /* Flush everything minus the block header */
722
723#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1
724 u8 *start, *end;
725
726 start = (u8 *)pbd1;
727
728 /* Skip the block header(we know header WILL fit in 4K) */
729 start += PAGE_SIZE;
730
731 end = (u8 *)PAGE_ALIGN((unsigned long)pkc1->pkblk_end);
732 for (; start < end; start += PAGE_SIZE)
733 flush_dcache_page(pgv_to_page(start));
734
735 smp_wmb();
736#endif
737
738 /* Now update the block status. */
739
740 BLOCK_STATUS(pbd1) = status;
741
742 /* Flush the block header */
743
744#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1
745 start = (u8 *)pbd1;
746 flush_dcache_page(pgv_to_page(start));
747
748 smp_wmb();
749#endif
750}
751
752/*
753 * Side effect:
754 *
755 * 1) flush the block
756 * 2) Increment active_blk_num
757 *
758 * Note:We DONT refresh the timer on purpose.
759 * Because almost always the next block will be opened.
760 */
761static void prb_close_block(struct tpacket_kbdq_core *pkc1,
762 struct tpacket_block_desc *pbd1,
763 struct packet_sock *po, unsigned int stat)
764{
765 __u32 status = TP_STATUS_USER | stat;
766
767 struct tpacket3_hdr *last_pkt;
768 struct tpacket_hdr_v1 *h1 = &pbd1->hdr.bh1;
769
770 if (po->stats.tp_drops)
771 status |= TP_STATUS_LOSING;
772
773 last_pkt = (struct tpacket3_hdr *)pkc1->prev;
774 last_pkt->tp_next_offset = 0;
775
776 /* Get the ts of the last pkt */
777 if (BLOCK_NUM_PKTS(pbd1)) {
778 h1->ts_last_pkt.ts_sec = last_pkt->tp_sec;
779 h1->ts_last_pkt.ts_nsec = last_pkt->tp_nsec;
780 } else {
781 /* Ok, we tmo'd - so get the current time */
782 struct timespec ts;
783 getnstimeofday(&ts);
784 h1->ts_last_pkt.ts_sec = ts.tv_sec;
785 h1->ts_last_pkt.ts_nsec = ts.tv_nsec;
786 }
787
788 smp_wmb();
789
790 /* Flush the block */
791 prb_flush_block(pkc1, pbd1, status);
792
793 pkc1->kactive_blk_num = GET_NEXT_PRB_BLK_NUM(pkc1);
794}
795
796static inline void prb_thaw_queue(struct tpacket_kbdq_core *pkc)
797{
798 pkc->reset_pending_on_curr_blk = 0;
799}
800
801/*
802 * Side effect of opening a block:
803 *
804 * 1) prb_queue is thawed.
805 * 2) retire_blk_timer is refreshed.
806 *
807 */
808static void prb_open_block(struct tpacket_kbdq_core *pkc1,
809 struct tpacket_block_desc *pbd1)
810{
811 struct timespec ts;
812 struct tpacket_hdr_v1 *h1 = &pbd1->hdr.bh1;
813
814 smp_rmb();
815
816 if (likely(TP_STATUS_KERNEL == BLOCK_STATUS(pbd1))) {
817
818 /* We could have just memset this but we will lose the
819 * flexibility of making the priv area sticky
820 */
821 BLOCK_SNUM(pbd1) = pkc1->knxt_seq_num++;
822 BLOCK_NUM_PKTS(pbd1) = 0;
823 BLOCK_LEN(pbd1) = BLK_PLUS_PRIV(pkc1->blk_sizeof_priv);
824 getnstimeofday(&ts);
825 h1->ts_first_pkt.ts_sec = ts.tv_sec;
826 h1->ts_first_pkt.ts_nsec = ts.tv_nsec;
827 pkc1->pkblk_start = (char *)pbd1;
828 pkc1->nxt_offset = (char *)(pkc1->pkblk_start +
829 BLK_PLUS_PRIV(pkc1->blk_sizeof_priv));
830 BLOCK_O2FP(pbd1) = (__u32)BLK_PLUS_PRIV(pkc1->blk_sizeof_priv);
831 BLOCK_O2PRIV(pbd1) = BLK_HDR_LEN;
832 pbd1->version = pkc1->version;
833 pkc1->prev = pkc1->nxt_offset;
834 pkc1->pkblk_end = pkc1->pkblk_start + pkc1->kblk_size;
835 prb_thaw_queue(pkc1);
836 _prb_refresh_rx_retire_blk_timer(pkc1);
837
838 smp_wmb();
839
840 return;
841 }
842
843 WARN(1, "ERROR block:%p is NOT FREE status:%d kactive_blk_num:%d\n",
844 pbd1, BLOCK_STATUS(pbd1), pkc1->kactive_blk_num);
845 dump_stack();
846 BUG();
847}
848
849/*
850 * Queue freeze logic:
851 * 1) Assume tp_block_nr = 8 blocks.
852 * 2) At time 't0', user opens Rx ring.
853 * 3) Some time past 't0', kernel starts filling blocks starting from 0 .. 7
854 * 4) user-space is either sleeping or processing block '0'.
855 * 5) tpacket_rcv is currently filling block '7', since there is no space left,
856 * it will close block-7,loop around and try to fill block '0'.
857 * call-flow:
858 * __packet_lookup_frame_in_block
859 * prb_retire_current_block()
860 * prb_dispatch_next_block()
861 * |->(BLOCK_STATUS == USER) evaluates to true
862 * 5.1) Since block-0 is currently in-use, we just freeze the queue.
863 * 6) Now there are two cases:
864 * 6.1) Link goes idle right after the queue is frozen.
865 * But remember, the last open_block() refreshed the timer.
866 * When this timer expires,it will refresh itself so that we can
867 * re-open block-0 in near future.
868 * 6.2) Link is busy and keeps on receiving packets. This is a simple
869 * case and __packet_lookup_frame_in_block will check if block-0
870 * is free and can now be re-used.
871 */
872static inline void prb_freeze_queue(struct tpacket_kbdq_core *pkc,
873 struct packet_sock *po)
874{
875 pkc->reset_pending_on_curr_blk = 1;
876 po->stats_u.stats3.tp_freeze_q_cnt++;
877}
878
879#define TOTAL_PKT_LEN_INCL_ALIGN(length) (ALIGN((length), V3_ALIGNMENT))
880
881/*
882 * If the next block is free then we will dispatch it
883 * and return a good offset.
884 * Else, we will freeze the queue.
885 * So, caller must check the return value.
886 */
887static void *prb_dispatch_next_block(struct tpacket_kbdq_core *pkc,
888 struct packet_sock *po)
889{
890 struct tpacket_block_desc *pbd;
891
892 smp_rmb();
893
894 /* 1. Get current block num */
895 pbd = GET_CURR_PBLOCK_DESC_FROM_CORE(pkc);
896
897 /* 2. If this block is currently in_use then freeze the queue */
898 if (TP_STATUS_USER & BLOCK_STATUS(pbd)) {
899 prb_freeze_queue(pkc, po);
900 return NULL;
901 }
902
903 /*
904 * 3.
905 * open this block and return the offset where the first packet
906 * needs to get stored.
907 */
908 prb_open_block(pkc, pbd);
909 return (void *)pkc->nxt_offset;
910}
911
912static void prb_retire_current_block(struct tpacket_kbdq_core *pkc,
913 struct packet_sock *po, unsigned int status)
914{
915 struct tpacket_block_desc *pbd = GET_CURR_PBLOCK_DESC_FROM_CORE(pkc);
916
917 /* retire/close the current block */
918 if (likely(TP_STATUS_KERNEL == BLOCK_STATUS(pbd))) {
919 /*
920 * Plug the case where copy_bits() is in progress on
921 * cpu-0 and tpacket_rcv() got invoked on cpu-1, didn't
922 * have space to copy the pkt in the current block and
923 * called prb_retire_current_block()
924 *
925 * We don't need to worry about the TMO case because
926 * the timer-handler already handled this case.
927 */
928 if (!(status & TP_STATUS_BLK_TMO)) {
929 while (atomic_read(&pkc->blk_fill_in_prog)) {
930 /* Waiting for skb_copy_bits to finish... */
931 cpu_relax();
932 }
933 }
934 prb_close_block(pkc, pbd, po, status);
935 return;
936 }
937
938 WARN(1, "ERROR-pbd[%d]:%p\n", pkc->kactive_blk_num, pbd);
939 dump_stack();
940 BUG();
941}
942
943static inline int prb_curr_blk_in_use(struct tpacket_kbdq_core *pkc,
944 struct tpacket_block_desc *pbd)
945{
946 return TP_STATUS_USER & BLOCK_STATUS(pbd);
947}
948
949static inline int prb_queue_frozen(struct tpacket_kbdq_core *pkc)
950{
951 return pkc->reset_pending_on_curr_blk;
952}
953
954static inline void prb_clear_blk_fill_status(struct packet_ring_buffer *rb)
955{
956 struct tpacket_kbdq_core *pkc = GET_PBDQC_FROM_RB(rb);
957 atomic_dec(&pkc->blk_fill_in_prog);
958}
959
960static inline void prb_fill_rxhash(struct tpacket_kbdq_core *pkc,
961 struct tpacket3_hdr *ppd)
962{
963 ppd->hv1.tp_rxhash = skb_get_rxhash(pkc->skb);
964}
965
966static inline void prb_clear_rxhash(struct tpacket_kbdq_core *pkc,
967 struct tpacket3_hdr *ppd)
968{
969 ppd->hv1.tp_rxhash = 0;
970}
971
972static inline void prb_fill_vlan_info(struct tpacket_kbdq_core *pkc,
973 struct tpacket3_hdr *ppd)
974{
975 if (vlan_tx_tag_present(pkc->skb)) {
976 ppd->hv1.tp_vlan_tci = vlan_tx_tag_get(pkc->skb);
977 ppd->tp_status = TP_STATUS_VLAN_VALID;
978 } else {
979 ppd->hv1.tp_vlan_tci = ppd->tp_status = 0;
980 }
981}
982
983static void prb_run_all_ft_ops(struct tpacket_kbdq_core *pkc,
984 struct tpacket3_hdr *ppd)
985{
986 prb_fill_vlan_info(pkc, ppd);
987
988 if (pkc->feature_req_word & TP_FT_REQ_FILL_RXHASH)
989 prb_fill_rxhash(pkc, ppd);
990 else
991 prb_clear_rxhash(pkc, ppd);
992}
993
994static inline void prb_fill_curr_block(char *curr,
995 struct tpacket_kbdq_core *pkc,
996 struct tpacket_block_desc *pbd,
997 unsigned int len)
998{
999 struct tpacket3_hdr *ppd;
1000
1001 ppd = (struct tpacket3_hdr *)curr;
1002 ppd->tp_next_offset = TOTAL_PKT_LEN_INCL_ALIGN(len);
1003 pkc->prev = curr;
1004 pkc->nxt_offset += TOTAL_PKT_LEN_INCL_ALIGN(len);
1005 BLOCK_LEN(pbd) += TOTAL_PKT_LEN_INCL_ALIGN(len);
1006 BLOCK_NUM_PKTS(pbd) += 1;
1007 atomic_inc(&pkc->blk_fill_in_prog);
1008 prb_run_all_ft_ops(pkc, ppd);
1009}
1010
1011/* Assumes caller has the sk->rx_queue.lock */
1012static void *__packet_lookup_frame_in_block(struct packet_sock *po,
1013 struct sk_buff *skb,
1014 int status,
1015 unsigned int len
1016 )
1017{
1018 struct tpacket_kbdq_core *pkc;
1019 struct tpacket_block_desc *pbd;
1020 char *curr, *end;
1021
1022 pkc = GET_PBDQC_FROM_RB(((struct packet_ring_buffer *)&po->rx_ring));
1023 pbd = GET_CURR_PBLOCK_DESC_FROM_CORE(pkc);
1024
1025 /* Queue is frozen when user space is lagging behind */
1026 if (prb_queue_frozen(pkc)) {
1027 /*
1028 * Check if that last block which caused the queue to freeze,
1029 * is still in_use by user-space.
1030 */
1031 if (prb_curr_blk_in_use(pkc, pbd)) {
1032 /* Can't record this packet */
1033 return NULL;
1034 } else {
1035 /*
1036 * Ok, the block was released by user-space.
1037 * Now let's open that block.
1038 * opening a block also thaws the queue.
1039 * Thawing is a side effect.
1040 */
1041 prb_open_block(pkc, pbd);
1042 }
1043 }
1044
1045 smp_mb();
1046 curr = pkc->nxt_offset;
1047 pkc->skb = skb;
1048 end = (char *) ((char *)pbd + pkc->kblk_size);
1049
1050 /* first try the current block */
1051 if (curr+TOTAL_PKT_LEN_INCL_ALIGN(len) < end) {
1052 prb_fill_curr_block(curr, pkc, pbd, len);
1053 return (void *)curr;
1054 }
1055
1056 /* Ok, close the current block */
1057 prb_retire_current_block(pkc, po, 0);
1058
1059 /* Now, try to dispatch the next block */
1060 curr = (char *)prb_dispatch_next_block(pkc, po);
1061 if (curr) {
1062 pbd = GET_CURR_PBLOCK_DESC_FROM_CORE(pkc);
1063 prb_fill_curr_block(curr, pkc, pbd, len);
1064 return (void *)curr;
1065 }
1066
1067 /*
1068 * No free blocks are available.user_space hasn't caught up yet.
1069 * Queue was just frozen and now this packet will get dropped.
1070 */
1071 return NULL;
1072}
1073
1074static inline void *packet_current_rx_frame(struct packet_sock *po,
1075 struct sk_buff *skb,
1076 int status, unsigned int len)
1077{
1078 char *curr = NULL;
1079 switch (po->tp_version) {
1080 case TPACKET_V1:
1081 case TPACKET_V2:
1082 curr = packet_lookup_frame(po, &po->rx_ring,
1083 po->rx_ring.head, status);
1084 return curr;
1085 case TPACKET_V3:
1086 return __packet_lookup_frame_in_block(po, skb, status, len);
1087 default:
1088 WARN(1, "TPACKET version not supported\n");
1089 BUG();
1090 return 0;
1091 }
1092}
1093
1094static inline void *prb_lookup_block(struct packet_sock *po,
1095 struct packet_ring_buffer *rb,
1096 unsigned int previous,
1097 int status)
1098{
1099 struct tpacket_kbdq_core *pkc = GET_PBDQC_FROM_RB(rb);
1100 struct tpacket_block_desc *pbd = GET_PBLOCK_DESC(pkc, previous);
1101
1102 if (status != BLOCK_STATUS(pbd))
1103 return NULL;
1104 return pbd;
1105}
1106
1107static inline int prb_previous_blk_num(struct packet_ring_buffer *rb)
1108{
1109 unsigned int prev;
1110 if (rb->prb_bdqc.kactive_blk_num)
1111 prev = rb->prb_bdqc.kactive_blk_num-1;
1112 else
1113 prev = rb->prb_bdqc.knum_blocks-1;
1114 return prev;
1115}
1116
1117/* Assumes caller has held the rx_queue.lock */
1118static inline void *__prb_previous_block(struct packet_sock *po,
1119 struct packet_ring_buffer *rb,
1120 int status)
1121{
1122 unsigned int previous = prb_previous_blk_num(rb);
1123 return prb_lookup_block(po, rb, previous, status);
1124}
1125
1126static inline void *packet_previous_rx_frame(struct packet_sock *po,
1127 struct packet_ring_buffer *rb,
1128 int status)
1129{
1130 if (po->tp_version <= TPACKET_V2)
1131 return packet_previous_frame(po, rb, status);
1132
1133 return __prb_previous_block(po, rb, status);
1134}
1135
1136static inline void packet_increment_rx_head(struct packet_sock *po,
1137 struct packet_ring_buffer *rb)
1138{
1139 switch (po->tp_version) {
1140 case TPACKET_V1:
1141 case TPACKET_V2:
1142 return packet_increment_head(rb);
1143 case TPACKET_V3:
1144 default:
1145 WARN(1, "TPACKET version not supported.\n");
1146 BUG();
1147 return;
1148 }
1149}
1150
392static inline void *packet_previous_frame(struct packet_sock *po, 1151static inline void *packet_previous_frame(struct packet_sock *po,
393 struct packet_ring_buffer *rb, 1152 struct packet_ring_buffer *rb,
394 int status) 1153 int status)
@@ -961,7 +1720,10 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev,
961 return 0; 1720 return 0;
962 1721
963drop_n_acct: 1722drop_n_acct:
964 po->stats.tp_drops = atomic_inc_return(&sk->sk_drops); 1723 spin_lock(&sk->sk_receive_queue.lock);
1724 po->stats.tp_drops++;
1725 atomic_inc(&sk->sk_drops);
1726 spin_unlock(&sk->sk_receive_queue.lock);
965 1727
966drop_n_restore: 1728drop_n_restore:
967 if (skb_head != skb->data && skb_shared(skb)) { 1729 if (skb_head != skb->data && skb_shared(skb)) {
@@ -982,12 +1744,13 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
982 union { 1744 union {
983 struct tpacket_hdr *h1; 1745 struct tpacket_hdr *h1;
984 struct tpacket2_hdr *h2; 1746 struct tpacket2_hdr *h2;
1747 struct tpacket3_hdr *h3;
985 void *raw; 1748 void *raw;
986 } h; 1749 } h;
987 u8 *skb_head = skb->data; 1750 u8 *skb_head = skb->data;
988 int skb_len = skb->len; 1751 int skb_len = skb->len;
989 unsigned int snaplen, res; 1752 unsigned int snaplen, res;
990 unsigned long status = TP_STATUS_LOSING|TP_STATUS_USER; 1753 unsigned long status = TP_STATUS_USER;
991 unsigned short macoff, netoff, hdrlen; 1754 unsigned short macoff, netoff, hdrlen;
992 struct sk_buff *copy_skb = NULL; 1755 struct sk_buff *copy_skb = NULL;
993 struct timeval tv; 1756 struct timeval tv;
@@ -1033,37 +1796,46 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
1033 po->tp_reserve; 1796 po->tp_reserve;
1034 macoff = netoff - maclen; 1797 macoff = netoff - maclen;
1035 } 1798 }
1036 1799 if (po->tp_version <= TPACKET_V2) {
1037 if (macoff + snaplen > po->rx_ring.frame_size) { 1800 if (macoff + snaplen > po->rx_ring.frame_size) {
1038 if (po->copy_thresh && 1801 if (po->copy_thresh &&
1039 atomic_read(&sk->sk_rmem_alloc) + skb->truesize < 1802 atomic_read(&sk->sk_rmem_alloc) + skb->truesize
1040 (unsigned)sk->sk_rcvbuf) { 1803 < (unsigned)sk->sk_rcvbuf) {
1041 if (skb_shared(skb)) { 1804 if (skb_shared(skb)) {
1042 copy_skb = skb_clone(skb, GFP_ATOMIC); 1805 copy_skb = skb_clone(skb, GFP_ATOMIC);
1043 } else { 1806 } else {
1044 copy_skb = skb_get(skb); 1807 copy_skb = skb_get(skb);
1045 skb_head = skb->data; 1808 skb_head = skb->data;
1809 }
1810 if (copy_skb)
1811 skb_set_owner_r(copy_skb, sk);
1046 } 1812 }
1047 if (copy_skb) 1813 snaplen = po->rx_ring.frame_size - macoff;
1048 skb_set_owner_r(copy_skb, sk); 1814 if ((int)snaplen < 0)
1815 snaplen = 0;
1049 } 1816 }
1050 snaplen = po->rx_ring.frame_size - macoff;
1051 if ((int)snaplen < 0)
1052 snaplen = 0;
1053 } 1817 }
1054
1055 spin_lock(&sk->sk_receive_queue.lock); 1818 spin_lock(&sk->sk_receive_queue.lock);
1056 h.raw = packet_current_frame(po, &po->rx_ring, TP_STATUS_KERNEL); 1819 h.raw = packet_current_rx_frame(po, skb,
1820 TP_STATUS_KERNEL, (macoff+snaplen));
1057 if (!h.raw) 1821 if (!h.raw)
1058 goto ring_is_full; 1822 goto ring_is_full;
1059 packet_increment_head(&po->rx_ring); 1823 if (po->tp_version <= TPACKET_V2) {
1824 packet_increment_rx_head(po, &po->rx_ring);
1825 /*
1826 * LOSING will be reported till you read the stats,
1827 * because it's COR - Clear On Read.
1828 * Anyways, moving it for V1/V2 only as V3 doesn't need this
1829 * at packet level.
1830 */
1831 if (po->stats.tp_drops)
1832 status |= TP_STATUS_LOSING;
1833 }
1060 po->stats.tp_packets++; 1834 po->stats.tp_packets++;
1061 if (copy_skb) { 1835 if (copy_skb) {
1062 status |= TP_STATUS_COPY; 1836 status |= TP_STATUS_COPY;
1063 __skb_queue_tail(&sk->sk_receive_queue, copy_skb); 1837 __skb_queue_tail(&sk->sk_receive_queue, copy_skb);
1064 } 1838 }
1065 if (!po->stats.tp_drops)
1066 status &= ~TP_STATUS_LOSING;
1067 spin_unlock(&sk->sk_receive_queue.lock); 1839 spin_unlock(&sk->sk_receive_queue.lock);
1068 1840
1069 skb_copy_bits(skb, 0, h.raw + macoff, snaplen); 1841 skb_copy_bits(skb, 0, h.raw + macoff, snaplen);
@@ -1114,6 +1886,29 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
1114 h.h2->tp_padding = 0; 1886 h.h2->tp_padding = 0;
1115 hdrlen = sizeof(*h.h2); 1887 hdrlen = sizeof(*h.h2);
1116 break; 1888 break;
1889 case TPACKET_V3:
1890 /* tp_nxt_offset,vlan are already populated above.
1891 * So DONT clear those fields here
1892 */
1893 h.h3->tp_status |= status;
1894 h.h3->tp_len = skb->len;
1895 h.h3->tp_snaplen = snaplen;
1896 h.h3->tp_mac = macoff;
1897 h.h3->tp_net = netoff;
1898 if ((po->tp_tstamp & SOF_TIMESTAMPING_SYS_HARDWARE)
1899 && shhwtstamps->syststamp.tv64)
1900 ts = ktime_to_timespec(shhwtstamps->syststamp);
1901 else if ((po->tp_tstamp & SOF_TIMESTAMPING_RAW_HARDWARE)
1902 && shhwtstamps->hwtstamp.tv64)
1903 ts = ktime_to_timespec(shhwtstamps->hwtstamp);
1904 else if (skb->tstamp.tv64)
1905 ts = ktime_to_timespec(skb->tstamp);
1906 else
1907 getnstimeofday(&ts);
1908 h.h3->tp_sec = ts.tv_sec;
1909 h.h3->tp_nsec = ts.tv_nsec;
1910 hdrlen = sizeof(*h.h3);
1911 break;
1117 default: 1912 default:
1118 BUG(); 1913 BUG();
1119 } 1914 }
@@ -1134,13 +1929,19 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
1134 { 1929 {
1135 u8 *start, *end; 1930 u8 *start, *end;
1136 1931
1137 end = (u8 *)PAGE_ALIGN((unsigned long)h.raw + macoff + snaplen); 1932 if (po->tp_version <= TPACKET_V2) {
1138 for (start = h.raw; start < end; start += PAGE_SIZE) 1933 end = (u8 *)PAGE_ALIGN((unsigned long)h.raw
1139 flush_dcache_page(pgv_to_page(start)); 1934 + macoff + snaplen);
1935 for (start = h.raw; start < end; start += PAGE_SIZE)
1936 flush_dcache_page(pgv_to_page(start));
1937 }
1140 smp_wmb(); 1938 smp_wmb();
1141 } 1939 }
1142#endif 1940#endif
1143 __packet_set_status(po, h.raw, status); 1941 if (po->tp_version <= TPACKET_V2)
1942 __packet_set_status(po, h.raw, status);
1943 else
1944 prb_clear_blk_fill_status(&po->rx_ring);
1144 1945
1145 sk->sk_data_ready(sk, 0); 1946 sk->sk_data_ready(sk, 0);
1146 1947
@@ -1167,8 +1968,6 @@ static void tpacket_destruct_skb(struct sk_buff *skb)
1167 struct packet_sock *po = pkt_sk(skb->sk); 1968 struct packet_sock *po = pkt_sk(skb->sk);
1168 void *ph; 1969 void *ph;
1169 1970
1170 BUG_ON(skb == NULL);
1171
1172 if (likely(po->tx_ring.pg_vec)) { 1971 if (likely(po->tx_ring.pg_vec)) {
1173 ph = skb_shinfo(skb)->destructor_arg; 1972 ph = skb_shinfo(skb)->destructor_arg;
1174 BUG_ON(__packet_get_status(po, ph) != TP_STATUS_SENDING); 1973 BUG_ON(__packet_get_status(po, ph) != TP_STATUS_SENDING);
@@ -1631,7 +2430,7 @@ static int packet_release(struct socket *sock)
1631 struct sock *sk = sock->sk; 2430 struct sock *sk = sock->sk;
1632 struct packet_sock *po; 2431 struct packet_sock *po;
1633 struct net *net; 2432 struct net *net;
1634 struct tpacket_req req; 2433 union tpacket_req_u req_u;
1635 2434
1636 if (!sk) 2435 if (!sk)
1637 return 0; 2436 return 0;
@@ -1654,13 +2453,13 @@ static int packet_release(struct socket *sock)
1654 2453
1655 packet_flush_mclist(sk); 2454 packet_flush_mclist(sk);
1656 2455
1657 memset(&req, 0, sizeof(req)); 2456 memset(&req_u, 0, sizeof(req_u));
1658 2457
1659 if (po->rx_ring.pg_vec) 2458 if (po->rx_ring.pg_vec)
1660 packet_set_ring(sk, &req, 1, 0); 2459 packet_set_ring(sk, &req_u, 1, 0);
1661 2460
1662 if (po->tx_ring.pg_vec) 2461 if (po->tx_ring.pg_vec)
1663 packet_set_ring(sk, &req, 1, 1); 2462 packet_set_ring(sk, &req_u, 1, 1);
1664 2463
1665 fanout_release(sk); 2464 fanout_release(sk);
1666 2465
@@ -2280,15 +3079,27 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
2280 case PACKET_RX_RING: 3079 case PACKET_RX_RING:
2281 case PACKET_TX_RING: 3080 case PACKET_TX_RING:
2282 { 3081 {
2283 struct tpacket_req req; 3082 union tpacket_req_u req_u;
3083 int len;
2284 3084
2285 if (optlen < sizeof(req)) 3085 switch (po->tp_version) {
3086 case TPACKET_V1:
3087 case TPACKET_V2:
3088 len = sizeof(req_u.req);
3089 break;
3090 case TPACKET_V3:
3091 default:
3092 len = sizeof(req_u.req3);
3093 break;
3094 }
3095 if (optlen < len)
2286 return -EINVAL; 3096 return -EINVAL;
2287 if (pkt_sk(sk)->has_vnet_hdr) 3097 if (pkt_sk(sk)->has_vnet_hdr)
2288 return -EINVAL; 3098 return -EINVAL;
2289 if (copy_from_user(&req, optval, sizeof(req))) 3099 if (copy_from_user(&req_u.req, optval, len))
2290 return -EFAULT; 3100 return -EFAULT;
2291 return packet_set_ring(sk, &req, 0, optname == PACKET_TX_RING); 3101 return packet_set_ring(sk, &req_u, 0,
3102 optname == PACKET_TX_RING);
2292 } 3103 }
2293 case PACKET_COPY_THRESH: 3104 case PACKET_COPY_THRESH:
2294 { 3105 {
@@ -2315,6 +3126,7 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
2315 switch (val) { 3126 switch (val) {
2316 case TPACKET_V1: 3127 case TPACKET_V1:
2317 case TPACKET_V2: 3128 case TPACKET_V2:
3129 case TPACKET_V3:
2318 po->tp_version = val; 3130 po->tp_version = val;
2319 return 0; 3131 return 0;
2320 default: 3132 default:
@@ -2424,6 +3236,7 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,
2424 struct packet_sock *po = pkt_sk(sk); 3236 struct packet_sock *po = pkt_sk(sk);
2425 void *data; 3237 void *data;
2426 struct tpacket_stats st; 3238 struct tpacket_stats st;
3239 union tpacket_stats_u st_u;
2427 3240
2428 if (level != SOL_PACKET) 3241 if (level != SOL_PACKET)
2429 return -ENOPROTOOPT; 3242 return -ENOPROTOOPT;
@@ -2436,15 +3249,27 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,
2436 3249
2437 switch (optname) { 3250 switch (optname) {
2438 case PACKET_STATISTICS: 3251 case PACKET_STATISTICS:
2439 if (len > sizeof(struct tpacket_stats)) 3252 if (po->tp_version == TPACKET_V3) {
2440 len = sizeof(struct tpacket_stats); 3253 len = sizeof(struct tpacket_stats_v3);
3254 } else {
3255 if (len > sizeof(struct tpacket_stats))
3256 len = sizeof(struct tpacket_stats);
3257 }
2441 spin_lock_bh(&sk->sk_receive_queue.lock); 3258 spin_lock_bh(&sk->sk_receive_queue.lock);
2442 st = po->stats; 3259 if (po->tp_version == TPACKET_V3) {
3260 memcpy(&st_u.stats3, &po->stats,
3261 sizeof(struct tpacket_stats));
3262 st_u.stats3.tp_freeze_q_cnt =
3263 po->stats_u.stats3.tp_freeze_q_cnt;
3264 st_u.stats3.tp_packets += po->stats.tp_drops;
3265 data = &st_u.stats3;
3266 } else {
3267 st = po->stats;
3268 st.tp_packets += st.tp_drops;
3269 data = &st;
3270 }
2443 memset(&po->stats, 0, sizeof(st)); 3271 memset(&po->stats, 0, sizeof(st));
2444 spin_unlock_bh(&sk->sk_receive_queue.lock); 3272 spin_unlock_bh(&sk->sk_receive_queue.lock);
2445 st.tp_packets += st.tp_drops;
2446
2447 data = &st;
2448 break; 3273 break;
2449 case PACKET_AUXDATA: 3274 case PACKET_AUXDATA:
2450 if (len > sizeof(int)) 3275 if (len > sizeof(int))
@@ -2485,6 +3310,9 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,
2485 case TPACKET_V2: 3310 case TPACKET_V2:
2486 val = sizeof(struct tpacket2_hdr); 3311 val = sizeof(struct tpacket2_hdr);
2487 break; 3312 break;
3313 case TPACKET_V3:
3314 val = sizeof(struct tpacket3_hdr);
3315 break;
2488 default: 3316 default:
2489 return -EINVAL; 3317 return -EINVAL;
2490 } 3318 }
@@ -2641,7 +3469,8 @@ static unsigned int packet_poll(struct file *file, struct socket *sock,
2641 3469
2642 spin_lock_bh(&sk->sk_receive_queue.lock); 3470 spin_lock_bh(&sk->sk_receive_queue.lock);
2643 if (po->rx_ring.pg_vec) { 3471 if (po->rx_ring.pg_vec) {
2644 if (!packet_previous_frame(po, &po->rx_ring, TP_STATUS_KERNEL)) 3472 if (!packet_previous_rx_frame(po, &po->rx_ring,
3473 TP_STATUS_KERNEL))
2645 mask |= POLLIN | POLLRDNORM; 3474 mask |= POLLIN | POLLRDNORM;
2646 } 3475 }
2647 spin_unlock_bh(&sk->sk_receive_queue.lock); 3476 spin_unlock_bh(&sk->sk_receive_queue.lock);
@@ -2760,7 +3589,7 @@ out_free_pgvec:
2760 goto out; 3589 goto out;
2761} 3590}
2762 3591
2763static int packet_set_ring(struct sock *sk, struct tpacket_req *req, 3592static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
2764 int closing, int tx_ring) 3593 int closing, int tx_ring)
2765{ 3594{
2766 struct pgv *pg_vec = NULL; 3595 struct pgv *pg_vec = NULL;
@@ -2769,7 +3598,15 @@ static int packet_set_ring(struct sock *sk, struct tpacket_req *req,
2769 struct packet_ring_buffer *rb; 3598 struct packet_ring_buffer *rb;
2770 struct sk_buff_head *rb_queue; 3599 struct sk_buff_head *rb_queue;
2771 __be16 num; 3600 __be16 num;
2772 int err; 3601 int err = -EINVAL;
3602 /* Added to avoid minimal code churn */
3603 struct tpacket_req *req = &req_u->req;
3604
3605 /* Opening a Tx-ring is NOT supported in TPACKET_V3 */
3606 if (!closing && tx_ring && (po->tp_version > TPACKET_V2)) {
3607 WARN(1, "Tx-ring is not supported.\n");
3608 goto out;
3609 }
2773 3610
2774 rb = tx_ring ? &po->tx_ring : &po->rx_ring; 3611 rb = tx_ring ? &po->tx_ring : &po->rx_ring;
2775 rb_queue = tx_ring ? &sk->sk_write_queue : &sk->sk_receive_queue; 3612 rb_queue = tx_ring ? &sk->sk_write_queue : &sk->sk_receive_queue;
@@ -2795,6 +3632,9 @@ static int packet_set_ring(struct sock *sk, struct tpacket_req *req,
2795 case TPACKET_V2: 3632 case TPACKET_V2:
2796 po->tp_hdrlen = TPACKET2_HDRLEN; 3633 po->tp_hdrlen = TPACKET2_HDRLEN;
2797 break; 3634 break;
3635 case TPACKET_V3:
3636 po->tp_hdrlen = TPACKET3_HDRLEN;
3637 break;
2798 } 3638 }
2799 3639
2800 err = -EINVAL; 3640 err = -EINVAL;
@@ -2820,6 +3660,17 @@ static int packet_set_ring(struct sock *sk, struct tpacket_req *req,
2820 pg_vec = alloc_pg_vec(req, order); 3660 pg_vec = alloc_pg_vec(req, order);
2821 if (unlikely(!pg_vec)) 3661 if (unlikely(!pg_vec))
2822 goto out; 3662 goto out;
3663 switch (po->tp_version) {
3664 case TPACKET_V3:
3665 /* Transmit path is not supported. We checked
3666 * it above but just being paranoid
3667 */
3668 if (!tx_ring)
3669 init_prb_bdqc(po, rb, pg_vec, req_u, tx_ring);
3670 break;
3671 default:
3672 break;
3673 }
2823 } 3674 }
2824 /* Done */ 3675 /* Done */
2825 else { 3676 else {
@@ -2872,7 +3723,11 @@ static int packet_set_ring(struct sock *sk, struct tpacket_req *req,
2872 register_prot_hook(sk); 3723 register_prot_hook(sk);
2873 } 3724 }
2874 spin_unlock(&po->bind_lock); 3725 spin_unlock(&po->bind_lock);
2875 3726 if (closing && (po->tp_version > TPACKET_V2)) {
3727 /* Because we don't support block-based V3 on tx-ring */
3728 if (!tx_ring)
3729 prb_shutdown_retire_blk_timer(po, tx_ring, rb_queue);
3730 }
2876 release_sock(sk); 3731 release_sock(sk);
2877 3732
2878 if (pg_vec) 3733 if (pg_vec)
diff --git a/net/phonet/af_phonet.c b/net/phonet/af_phonet.c
index c6fffd946d42..bf10ea8fbbf9 100644
--- a/net/phonet/af_phonet.c
+++ b/net/phonet/af_phonet.c
@@ -480,7 +480,7 @@ int __init_or_module phonet_proto_register(unsigned int protocol,
480 if (proto_tab[protocol]) 480 if (proto_tab[protocol])
481 err = -EBUSY; 481 err = -EBUSY;
482 else 482 else
483 rcu_assign_pointer(proto_tab[protocol], pp); 483 RCU_INIT_POINTER(proto_tab[protocol], pp);
484 mutex_unlock(&proto_tab_lock); 484 mutex_unlock(&proto_tab_lock);
485 485
486 return err; 486 return err;
@@ -491,7 +491,7 @@ void phonet_proto_unregister(unsigned int protocol, struct phonet_protocol *pp)
491{ 491{
492 mutex_lock(&proto_tab_lock); 492 mutex_lock(&proto_tab_lock);
493 BUG_ON(proto_tab[protocol] != pp); 493 BUG_ON(proto_tab[protocol] != pp);
494 rcu_assign_pointer(proto_tab[protocol], NULL); 494 RCU_INIT_POINTER(proto_tab[protocol], NULL);
495 mutex_unlock(&proto_tab_lock); 495 mutex_unlock(&proto_tab_lock);
496 synchronize_rcu(); 496 synchronize_rcu();
497 proto_unregister(pp->prot); 497 proto_unregister(pp->prot);
diff --git a/net/phonet/pn_dev.c b/net/phonet/pn_dev.c
index d2df8f33160b..c5827614376b 100644
--- a/net/phonet/pn_dev.c
+++ b/net/phonet/pn_dev.c
@@ -276,7 +276,7 @@ static void phonet_route_autodel(struct net_device *dev)
276 mutex_lock(&pnn->routes.lock); 276 mutex_lock(&pnn->routes.lock);
277 for (i = 0; i < 64; i++) 277 for (i = 0; i < 64; i++)
278 if (dev == pnn->routes.table[i]) { 278 if (dev == pnn->routes.table[i]) {
279 rcu_assign_pointer(pnn->routes.table[i], NULL); 279 RCU_INIT_POINTER(pnn->routes.table[i], NULL);
280 set_bit(i, deleted); 280 set_bit(i, deleted);
281 } 281 }
282 mutex_unlock(&pnn->routes.lock); 282 mutex_unlock(&pnn->routes.lock);
@@ -390,7 +390,7 @@ int phonet_route_add(struct net_device *dev, u8 daddr)
390 daddr = daddr >> 2; 390 daddr = daddr >> 2;
391 mutex_lock(&routes->lock); 391 mutex_lock(&routes->lock);
392 if (routes->table[daddr] == NULL) { 392 if (routes->table[daddr] == NULL) {
393 rcu_assign_pointer(routes->table[daddr], dev); 393 RCU_INIT_POINTER(routes->table[daddr], dev);
394 dev_hold(dev); 394 dev_hold(dev);
395 err = 0; 395 err = 0;
396 } 396 }
@@ -406,7 +406,7 @@ int phonet_route_del(struct net_device *dev, u8 daddr)
406 daddr = daddr >> 2; 406 daddr = daddr >> 2;
407 mutex_lock(&routes->lock); 407 mutex_lock(&routes->lock);
408 if (dev == routes->table[daddr]) 408 if (dev == routes->table[daddr])
409 rcu_assign_pointer(routes->table[daddr], NULL); 409 RCU_INIT_POINTER(routes->table[daddr], NULL);
410 else 410 else
411 dev = NULL; 411 dev = NULL;
412 mutex_unlock(&routes->lock); 412 mutex_unlock(&routes->lock);
diff --git a/net/phonet/socket.c b/net/phonet/socket.c
index ab07711cf2f4..676d18dc75b7 100644
--- a/net/phonet/socket.c
+++ b/net/phonet/socket.c
@@ -679,7 +679,7 @@ int pn_sock_bind_res(struct sock *sk, u8 res)
679 mutex_lock(&resource_mutex); 679 mutex_lock(&resource_mutex);
680 if (pnres.sk[res] == NULL) { 680 if (pnres.sk[res] == NULL) {
681 sock_hold(sk); 681 sock_hold(sk);
682 rcu_assign_pointer(pnres.sk[res], sk); 682 RCU_INIT_POINTER(pnres.sk[res], sk);
683 ret = 0; 683 ret = 0;
684 } 684 }
685 mutex_unlock(&resource_mutex); 685 mutex_unlock(&resource_mutex);
@@ -695,7 +695,7 @@ int pn_sock_unbind_res(struct sock *sk, u8 res)
695 695
696 mutex_lock(&resource_mutex); 696 mutex_lock(&resource_mutex);
697 if (pnres.sk[res] == sk) { 697 if (pnres.sk[res] == sk) {
698 rcu_assign_pointer(pnres.sk[res], NULL); 698 RCU_INIT_POINTER(pnres.sk[res], NULL);
699 ret = 0; 699 ret = 0;
700 } 700 }
701 mutex_unlock(&resource_mutex); 701 mutex_unlock(&resource_mutex);
@@ -714,7 +714,7 @@ void pn_sock_unbind_all_res(struct sock *sk)
714 mutex_lock(&resource_mutex); 714 mutex_lock(&resource_mutex);
715 for (res = 0; res < 256; res++) { 715 for (res = 0; res < 256; res++) {
716 if (pnres.sk[res] == sk) { 716 if (pnres.sk[res] == sk) {
717 rcu_assign_pointer(pnres.sk[res], NULL); 717 RCU_INIT_POINTER(pnres.sk[res], NULL);
718 match++; 718 match++;
719 } 719 }
720 } 720 }
diff --git a/net/rds/Kconfig b/net/rds/Kconfig
index ec753b3ae72a..4cf6dc7910e4 100644
--- a/net/rds/Kconfig
+++ b/net/rds/Kconfig
@@ -9,6 +9,7 @@ config RDS
9 9
10config RDS_RDMA 10config RDS_RDMA
11 tristate "RDS over Infiniband and iWARP" 11 tristate "RDS over Infiniband and iWARP"
12 select LLIST
12 depends on RDS && INFINIBAND && INFINIBAND_ADDR_TRANS 13 depends on RDS && INFINIBAND && INFINIBAND_ADDR_TRANS
13 ---help--- 14 ---help---
14 Allow RDS to use Infiniband and iWARP as a transport. 15 Allow RDS to use Infiniband and iWARP as a transport.
diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c
index 819c35a0d9cb..e8fdb172adbb 100644
--- a/net/rds/ib_rdma.c
+++ b/net/rds/ib_rdma.c
@@ -33,10 +33,10 @@
33#include <linux/kernel.h> 33#include <linux/kernel.h>
34#include <linux/slab.h> 34#include <linux/slab.h>
35#include <linux/rculist.h> 35#include <linux/rculist.h>
36#include <linux/llist.h>
36 37
37#include "rds.h" 38#include "rds.h"
38#include "ib.h" 39#include "ib.h"
39#include "xlist.h"
40 40
41static DEFINE_PER_CPU(unsigned long, clean_list_grace); 41static DEFINE_PER_CPU(unsigned long, clean_list_grace);
42#define CLEAN_LIST_BUSY_BIT 0 42#define CLEAN_LIST_BUSY_BIT 0
@@ -49,7 +49,7 @@ struct rds_ib_mr {
49 struct rds_ib_mr_pool *pool; 49 struct rds_ib_mr_pool *pool;
50 struct ib_fmr *fmr; 50 struct ib_fmr *fmr;
51 51
52 struct xlist_head xlist; 52 struct llist_node llnode;
53 53
54 /* unmap_list is for freeing */ 54 /* unmap_list is for freeing */
55 struct list_head unmap_list; 55 struct list_head unmap_list;
@@ -71,9 +71,9 @@ struct rds_ib_mr_pool {
71 atomic_t item_count; /* total # of MRs */ 71 atomic_t item_count; /* total # of MRs */
72 atomic_t dirty_count; /* # dirty of MRs */ 72 atomic_t dirty_count; /* # dirty of MRs */
73 73
74 struct xlist_head drop_list; /* MRs that have reached their max_maps limit */ 74 struct llist_head drop_list; /* MRs that have reached their max_maps limit */
75 struct xlist_head free_list; /* unused MRs */ 75 struct llist_head free_list; /* unused MRs */
76 struct xlist_head clean_list; /* global unused & unamapped MRs */ 76 struct llist_head clean_list; /* global unused & unamapped MRs */
77 wait_queue_head_t flush_wait; 77 wait_queue_head_t flush_wait;
78 78
79 atomic_t free_pinned; /* memory pinned by free MRs */ 79 atomic_t free_pinned; /* memory pinned by free MRs */
@@ -220,9 +220,9 @@ struct rds_ib_mr_pool *rds_ib_create_mr_pool(struct rds_ib_device *rds_ibdev)
220 if (!pool) 220 if (!pool)
221 return ERR_PTR(-ENOMEM); 221 return ERR_PTR(-ENOMEM);
222 222
223 INIT_XLIST_HEAD(&pool->free_list); 223 init_llist_head(&pool->free_list);
224 INIT_XLIST_HEAD(&pool->drop_list); 224 init_llist_head(&pool->drop_list);
225 INIT_XLIST_HEAD(&pool->clean_list); 225 init_llist_head(&pool->clean_list);
226 mutex_init(&pool->flush_lock); 226 mutex_init(&pool->flush_lock);
227 init_waitqueue_head(&pool->flush_wait); 227 init_waitqueue_head(&pool->flush_wait);
228 INIT_DELAYED_WORK(&pool->flush_worker, rds_ib_mr_pool_flush_worker); 228 INIT_DELAYED_WORK(&pool->flush_worker, rds_ib_mr_pool_flush_worker);
@@ -260,26 +260,18 @@ void rds_ib_destroy_mr_pool(struct rds_ib_mr_pool *pool)
260 kfree(pool); 260 kfree(pool);
261} 261}
262 262
263static void refill_local(struct rds_ib_mr_pool *pool, struct xlist_head *xl,
264 struct rds_ib_mr **ibmr_ret)
265{
266 struct xlist_head *ibmr_xl;
267 ibmr_xl = xlist_del_head_fast(xl);
268 *ibmr_ret = list_entry(ibmr_xl, struct rds_ib_mr, xlist);
269}
270
271static inline struct rds_ib_mr *rds_ib_reuse_fmr(struct rds_ib_mr_pool *pool) 263static inline struct rds_ib_mr *rds_ib_reuse_fmr(struct rds_ib_mr_pool *pool)
272{ 264{
273 struct rds_ib_mr *ibmr = NULL; 265 struct rds_ib_mr *ibmr = NULL;
274 struct xlist_head *ret; 266 struct llist_node *ret;
275 unsigned long *flag; 267 unsigned long *flag;
276 268
277 preempt_disable(); 269 preempt_disable();
278 flag = &__get_cpu_var(clean_list_grace); 270 flag = &__get_cpu_var(clean_list_grace);
279 set_bit(CLEAN_LIST_BUSY_BIT, flag); 271 set_bit(CLEAN_LIST_BUSY_BIT, flag);
280 ret = xlist_del_head(&pool->clean_list); 272 ret = llist_del_first(&pool->clean_list);
281 if (ret) 273 if (ret)
282 ibmr = list_entry(ret, struct rds_ib_mr, xlist); 274 ibmr = llist_entry(ret, struct rds_ib_mr, llnode);
283 275
284 clear_bit(CLEAN_LIST_BUSY_BIT, flag); 276 clear_bit(CLEAN_LIST_BUSY_BIT, flag);
285 preempt_enable(); 277 preempt_enable();
@@ -529,46 +521,44 @@ static inline unsigned int rds_ib_flush_goal(struct rds_ib_mr_pool *pool, int fr
529} 521}
530 522
531/* 523/*
532 * given an xlist of mrs, put them all into the list_head for more processing 524 * given an llist of mrs, put them all into the list_head for more processing
533 */ 525 */
534static void xlist_append_to_list(struct xlist_head *xlist, struct list_head *list) 526static void llist_append_to_list(struct llist_head *llist, struct list_head *list)
535{ 527{
536 struct rds_ib_mr *ibmr; 528 struct rds_ib_mr *ibmr;
537 struct xlist_head splice; 529 struct llist_node *node;
538 struct xlist_head *cur; 530 struct llist_node *next;
539 struct xlist_head *next; 531
540 532 node = llist_del_all(llist);
541 splice.next = NULL; 533 while (node) {
542 xlist_splice(xlist, &splice); 534 next = node->next;
543 cur = splice.next; 535 ibmr = llist_entry(node, struct rds_ib_mr, llnode);
544 while (cur) {
545 next = cur->next;
546 ibmr = list_entry(cur, struct rds_ib_mr, xlist);
547 list_add_tail(&ibmr->unmap_list, list); 536 list_add_tail(&ibmr->unmap_list, list);
548 cur = next; 537 node = next;
549 } 538 }
550} 539}
551 540
552/* 541/*
553 * this takes a list head of mrs and turns it into an xlist of clusters. 542 * this takes a list head of mrs and turns it into linked llist nodes
554 * each cluster has an xlist of MR_CLUSTER_SIZE mrs that are ready for 543 * of clusters. Each cluster has linked llist nodes of
555 * reuse. 544 * MR_CLUSTER_SIZE mrs that are ready for reuse.
556 */ 545 */
557static void list_append_to_xlist(struct rds_ib_mr_pool *pool, 546static void list_to_llist_nodes(struct rds_ib_mr_pool *pool,
558 struct list_head *list, struct xlist_head *xlist, 547 struct list_head *list,
559 struct xlist_head **tail_ret) 548 struct llist_node **nodes_head,
549 struct llist_node **nodes_tail)
560{ 550{
561 struct rds_ib_mr *ibmr; 551 struct rds_ib_mr *ibmr;
562 struct xlist_head *cur_mr = xlist; 552 struct llist_node *cur = NULL;
563 struct xlist_head *tail_mr = NULL; 553 struct llist_node **next = nodes_head;
564 554
565 list_for_each_entry(ibmr, list, unmap_list) { 555 list_for_each_entry(ibmr, list, unmap_list) {
566 tail_mr = &ibmr->xlist; 556 cur = &ibmr->llnode;
567 tail_mr->next = NULL; 557 *next = cur;
568 cur_mr->next = tail_mr; 558 next = &cur->next;
569 cur_mr = tail_mr;
570 } 559 }
571 *tail_ret = tail_mr; 560 *next = NULL;
561 *nodes_tail = cur;
572} 562}
573 563
574/* 564/*
@@ -581,8 +571,8 @@ static int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool,
581 int free_all, struct rds_ib_mr **ibmr_ret) 571 int free_all, struct rds_ib_mr **ibmr_ret)
582{ 572{
583 struct rds_ib_mr *ibmr, *next; 573 struct rds_ib_mr *ibmr, *next;
584 struct xlist_head clean_xlist; 574 struct llist_node *clean_nodes;
585 struct xlist_head *clean_tail; 575 struct llist_node *clean_tail;
586 LIST_HEAD(unmap_list); 576 LIST_HEAD(unmap_list);
587 LIST_HEAD(fmr_list); 577 LIST_HEAD(fmr_list);
588 unsigned long unpinned = 0; 578 unsigned long unpinned = 0;
@@ -603,7 +593,7 @@ static int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool,
603 593
604 prepare_to_wait(&pool->flush_wait, &wait, 594 prepare_to_wait(&pool->flush_wait, &wait,
605 TASK_UNINTERRUPTIBLE); 595 TASK_UNINTERRUPTIBLE);
606 if (xlist_empty(&pool->clean_list)) 596 if (llist_empty(&pool->clean_list))
607 schedule(); 597 schedule();
608 598
609 ibmr = rds_ib_reuse_fmr(pool); 599 ibmr = rds_ib_reuse_fmr(pool);
@@ -628,10 +618,10 @@ static int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool,
628 /* Get the list of all MRs to be dropped. Ordering matters - 618 /* Get the list of all MRs to be dropped. Ordering matters -
629 * we want to put drop_list ahead of free_list. 619 * we want to put drop_list ahead of free_list.
630 */ 620 */
631 xlist_append_to_list(&pool->drop_list, &unmap_list); 621 llist_append_to_list(&pool->drop_list, &unmap_list);
632 xlist_append_to_list(&pool->free_list, &unmap_list); 622 llist_append_to_list(&pool->free_list, &unmap_list);
633 if (free_all) 623 if (free_all)
634 xlist_append_to_list(&pool->clean_list, &unmap_list); 624 llist_append_to_list(&pool->clean_list, &unmap_list);
635 625
636 free_goal = rds_ib_flush_goal(pool, free_all); 626 free_goal = rds_ib_flush_goal(pool, free_all);
637 627
@@ -663,22 +653,22 @@ static int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool,
663 if (!list_empty(&unmap_list)) { 653 if (!list_empty(&unmap_list)) {
664 /* we have to make sure that none of the things we're about 654 /* we have to make sure that none of the things we're about
665 * to put on the clean list would race with other cpus trying 655 * to put on the clean list would race with other cpus trying
666 * to pull items off. The xlist would explode if we managed to 656 * to pull items off. The llist would explode if we managed to
667 * remove something from the clean list and then add it back again 657 * remove something from the clean list and then add it back again
668 * while another CPU was spinning on that same item in xlist_del_head. 658 * while another CPU was spinning on that same item in llist_del_first.
669 * 659 *
670 * This is pretty unlikely, but just in case wait for an xlist grace period 660 * This is pretty unlikely, but just in case wait for an llist grace period
671 * here before adding anything back into the clean list. 661 * here before adding anything back into the clean list.
672 */ 662 */
673 wait_clean_list_grace(); 663 wait_clean_list_grace();
674 664
675 list_append_to_xlist(pool, &unmap_list, &clean_xlist, &clean_tail); 665 list_to_llist_nodes(pool, &unmap_list, &clean_nodes, &clean_tail);
676 if (ibmr_ret) 666 if (ibmr_ret)
677 refill_local(pool, &clean_xlist, ibmr_ret); 667 *ibmr_ret = llist_entry(clean_nodes, struct rds_ib_mr, llnode);
678 668
679 /* refill_local may have emptied our list */ 669 /* more than one entry in llist nodes */
680 if (!xlist_empty(&clean_xlist)) 670 if (clean_nodes->next)
681 xlist_add(clean_xlist.next, clean_tail, &pool->clean_list); 671 llist_add_batch(clean_nodes->next, clean_tail, &pool->clean_list);
682 672
683 } 673 }
684 674
@@ -711,9 +701,9 @@ void rds_ib_free_mr(void *trans_private, int invalidate)
711 701
712 /* Return it to the pool's free list */ 702 /* Return it to the pool's free list */
713 if (ibmr->remap_count >= pool->fmr_attr.max_maps) 703 if (ibmr->remap_count >= pool->fmr_attr.max_maps)
714 xlist_add(&ibmr->xlist, &ibmr->xlist, &pool->drop_list); 704 llist_add(&ibmr->llnode, &pool->drop_list);
715 else 705 else
716 xlist_add(&ibmr->xlist, &ibmr->xlist, &pool->free_list); 706 llist_add(&ibmr->llnode, &pool->free_list);
717 707
718 atomic_add(ibmr->sg_len, &pool->free_pinned); 708 atomic_add(ibmr->sg_len, &pool->free_pinned);
719 atomic_inc(&pool->dirty_count); 709 atomic_inc(&pool->dirty_count);
diff --git a/net/rds/iw_rdma.c b/net/rds/iw_rdma.c
index 8b77edbab272..4e1de171866c 100644
--- a/net/rds/iw_rdma.c
+++ b/net/rds/iw_rdma.c
@@ -84,7 +84,8 @@ static int rds_iw_map_fastreg(struct rds_iw_mr_pool *pool,
84static void rds_iw_free_fastreg(struct rds_iw_mr_pool *pool, struct rds_iw_mr *ibmr); 84static void rds_iw_free_fastreg(struct rds_iw_mr_pool *pool, struct rds_iw_mr *ibmr);
85static unsigned int rds_iw_unmap_fastreg_list(struct rds_iw_mr_pool *pool, 85static unsigned int rds_iw_unmap_fastreg_list(struct rds_iw_mr_pool *pool,
86 struct list_head *unmap_list, 86 struct list_head *unmap_list,
87 struct list_head *kill_list); 87 struct list_head *kill_list,
88 int *unpinned);
88static void rds_iw_destroy_fastreg(struct rds_iw_mr_pool *pool, struct rds_iw_mr *ibmr); 89static void rds_iw_destroy_fastreg(struct rds_iw_mr_pool *pool, struct rds_iw_mr *ibmr);
89 90
90static int rds_iw_get_device(struct rds_sock *rs, struct rds_iw_device **rds_iwdev, struct rdma_cm_id **cm_id) 91static int rds_iw_get_device(struct rds_sock *rs, struct rds_iw_device **rds_iwdev, struct rdma_cm_id **cm_id)
@@ -499,7 +500,7 @@ static int rds_iw_flush_mr_pool(struct rds_iw_mr_pool *pool, int free_all)
499 LIST_HEAD(unmap_list); 500 LIST_HEAD(unmap_list);
500 LIST_HEAD(kill_list); 501 LIST_HEAD(kill_list);
501 unsigned long flags; 502 unsigned long flags;
502 unsigned int nfreed = 0, ncleaned = 0, free_goal; 503 unsigned int nfreed = 0, ncleaned = 0, unpinned = 0, free_goal;
503 int ret = 0; 504 int ret = 0;
504 505
505 rds_iw_stats_inc(s_iw_rdma_mr_pool_flush); 506 rds_iw_stats_inc(s_iw_rdma_mr_pool_flush);
@@ -524,7 +525,8 @@ static int rds_iw_flush_mr_pool(struct rds_iw_mr_pool *pool, int free_all)
524 * will be destroyed by the unmap function. 525 * will be destroyed by the unmap function.
525 */ 526 */
526 if (!list_empty(&unmap_list)) { 527 if (!list_empty(&unmap_list)) {
527 ncleaned = rds_iw_unmap_fastreg_list(pool, &unmap_list, &kill_list); 528 ncleaned = rds_iw_unmap_fastreg_list(pool, &unmap_list,
529 &kill_list, &unpinned);
528 /* If we've been asked to destroy all MRs, move those 530 /* If we've been asked to destroy all MRs, move those
529 * that were simply cleaned to the kill list */ 531 * that were simply cleaned to the kill list */
530 if (free_all) 532 if (free_all)
@@ -548,6 +550,7 @@ static int rds_iw_flush_mr_pool(struct rds_iw_mr_pool *pool, int free_all)
548 spin_unlock_irqrestore(&pool->list_lock, flags); 550 spin_unlock_irqrestore(&pool->list_lock, flags);
549 } 551 }
550 552
553 atomic_sub(unpinned, &pool->free_pinned);
551 atomic_sub(ncleaned, &pool->dirty_count); 554 atomic_sub(ncleaned, &pool->dirty_count);
552 atomic_sub(nfreed, &pool->item_count); 555 atomic_sub(nfreed, &pool->item_count);
553 556
@@ -828,7 +831,8 @@ static void rds_iw_free_fastreg(struct rds_iw_mr_pool *pool,
828 831
829static unsigned int rds_iw_unmap_fastreg_list(struct rds_iw_mr_pool *pool, 832static unsigned int rds_iw_unmap_fastreg_list(struct rds_iw_mr_pool *pool,
830 struct list_head *unmap_list, 833 struct list_head *unmap_list,
831 struct list_head *kill_list) 834 struct list_head *kill_list,
835 int *unpinned)
832{ 836{
833 struct rds_iw_mapping *mapping, *next; 837 struct rds_iw_mapping *mapping, *next;
834 unsigned int ncleaned = 0; 838 unsigned int ncleaned = 0;
@@ -855,6 +859,7 @@ static unsigned int rds_iw_unmap_fastreg_list(struct rds_iw_mr_pool *pool,
855 859
856 spin_lock_irqsave(&pool->list_lock, flags); 860 spin_lock_irqsave(&pool->list_lock, flags);
857 list_for_each_entry_safe(mapping, next, unmap_list, m_list) { 861 list_for_each_entry_safe(mapping, next, unmap_list, m_list) {
862 *unpinned += mapping->m_sg.len;
858 list_move(&mapping->m_list, &laundered); 863 list_move(&mapping->m_list, &laundered);
859 ncleaned++; 864 ncleaned++;
860 } 865 }
diff --git a/net/rds/xlist.h b/net/rds/xlist.h
deleted file mode 100644
index e6b5190daddd..000000000000
--- a/net/rds/xlist.h
+++ /dev/null
@@ -1,80 +0,0 @@
1#ifndef _LINUX_XLIST_H
2#define _LINUX_XLIST_H
3
4#include <linux/stddef.h>
5#include <linux/poison.h>
6#include <linux/prefetch.h>
7#include <asm/system.h>
8
9struct xlist_head {
10 struct xlist_head *next;
11};
12
13static inline void INIT_XLIST_HEAD(struct xlist_head *list)
14{
15 list->next = NULL;
16}
17
18static inline int xlist_empty(struct xlist_head *head)
19{
20 return head->next == NULL;
21}
22
23static inline void xlist_add(struct xlist_head *new, struct xlist_head *tail,
24 struct xlist_head *head)
25{
26 struct xlist_head *cur;
27 struct xlist_head *check;
28
29 while (1) {
30 cur = head->next;
31 tail->next = cur;
32 check = cmpxchg(&head->next, cur, new);
33 if (check == cur)
34 break;
35 }
36}
37
38static inline struct xlist_head *xlist_del_head(struct xlist_head *head)
39{
40 struct xlist_head *cur;
41 struct xlist_head *check;
42 struct xlist_head *next;
43
44 while (1) {
45 cur = head->next;
46 if (!cur)
47 goto out;
48
49 next = cur->next;
50 check = cmpxchg(&head->next, cur, next);
51 if (check == cur)
52 goto out;
53 }
54out:
55 return cur;
56}
57
58static inline struct xlist_head *xlist_del_head_fast(struct xlist_head *head)
59{
60 struct xlist_head *cur;
61
62 cur = head->next;
63 if (!cur)
64 return NULL;
65
66 head->next = cur->next;
67 return cur;
68}
69
70static inline void xlist_splice(struct xlist_head *list,
71 struct xlist_head *head)
72{
73 struct xlist_head *cur;
74
75 WARN_ON(head->next);
76 cur = xchg(&list->next, NULL);
77 head->next = cur;
78}
79
80#endif
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 102fc212cd64..e051398fdf6b 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -196,8 +196,7 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a,
196 196
197 skb2->skb_iif = skb->dev->ifindex; 197 skb2->skb_iif = skb->dev->ifindex;
198 skb2->dev = dev; 198 skb2->dev = dev;
199 dev_queue_xmit(skb2); 199 err = dev_queue_xmit(skb2);
200 err = 0;
201 200
202out: 201out:
203 if (err) { 202 if (err) {
diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h
index be4505ee67a9..b01427924f81 100644
--- a/net/sched/cls_rsvp.h
+++ b/net/sched/cls_rsvp.h
@@ -425,7 +425,7 @@ static int rsvp_change(struct tcf_proto *tp, unsigned long base,
425 struct rsvp_filter *f, **fp; 425 struct rsvp_filter *f, **fp;
426 struct rsvp_session *s, **sp; 426 struct rsvp_session *s, **sp;
427 struct tc_rsvp_pinfo *pinfo = NULL; 427 struct tc_rsvp_pinfo *pinfo = NULL;
428 struct nlattr *opt = tca[TCA_OPTIONS-1]; 428 struct nlattr *opt = tca[TCA_OPTIONS];
429 struct nlattr *tb[TCA_RSVP_MAX + 1]; 429 struct nlattr *tb[TCA_RSVP_MAX + 1];
430 struct tcf_exts e; 430 struct tcf_exts e;
431 unsigned int h1, h2; 431 unsigned int h1, h2;
@@ -439,7 +439,7 @@ static int rsvp_change(struct tcf_proto *tp, unsigned long base,
439 if (err < 0) 439 if (err < 0)
440 return err; 440 return err;
441 441
442 err = tcf_exts_validate(tp, tb, tca[TCA_RATE-1], &e, &rsvp_ext_map); 442 err = tcf_exts_validate(tp, tb, tca[TCA_RATE], &e, &rsvp_ext_map);
443 if (err < 0) 443 if (err < 0)
444 return err; 444 return err;
445 445
@@ -449,8 +449,8 @@ static int rsvp_change(struct tcf_proto *tp, unsigned long base,
449 449
450 if (f->handle != handle && handle) 450 if (f->handle != handle && handle)
451 goto errout2; 451 goto errout2;
452 if (tb[TCA_RSVP_CLASSID-1]) { 452 if (tb[TCA_RSVP_CLASSID]) {
453 f->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID-1]); 453 f->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID]);
454 tcf_bind_filter(tp, &f->res, base); 454 tcf_bind_filter(tp, &f->res, base);
455 } 455 }
456 456
@@ -462,7 +462,7 @@ static int rsvp_change(struct tcf_proto *tp, unsigned long base,
462 err = -EINVAL; 462 err = -EINVAL;
463 if (handle) 463 if (handle)
464 goto errout2; 464 goto errout2;
465 if (tb[TCA_RSVP_DST-1] == NULL) 465 if (tb[TCA_RSVP_DST] == NULL)
466 goto errout2; 466 goto errout2;
467 467
468 err = -ENOBUFS; 468 err = -ENOBUFS;
@@ -471,19 +471,19 @@ static int rsvp_change(struct tcf_proto *tp, unsigned long base,
471 goto errout2; 471 goto errout2;
472 472
473 h2 = 16; 473 h2 = 16;
474 if (tb[TCA_RSVP_SRC-1]) { 474 if (tb[TCA_RSVP_SRC]) {
475 memcpy(f->src, nla_data(tb[TCA_RSVP_SRC-1]), sizeof(f->src)); 475 memcpy(f->src, nla_data(tb[TCA_RSVP_SRC]), sizeof(f->src));
476 h2 = hash_src(f->src); 476 h2 = hash_src(f->src);
477 } 477 }
478 if (tb[TCA_RSVP_PINFO-1]) { 478 if (tb[TCA_RSVP_PINFO]) {
479 pinfo = nla_data(tb[TCA_RSVP_PINFO-1]); 479 pinfo = nla_data(tb[TCA_RSVP_PINFO]);
480 f->spi = pinfo->spi; 480 f->spi = pinfo->spi;
481 f->tunnelhdr = pinfo->tunnelhdr; 481 f->tunnelhdr = pinfo->tunnelhdr;
482 } 482 }
483 if (tb[TCA_RSVP_CLASSID-1]) 483 if (tb[TCA_RSVP_CLASSID])
484 f->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID-1]); 484 f->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID]);
485 485
486 dst = nla_data(tb[TCA_RSVP_DST-1]); 486 dst = nla_data(tb[TCA_RSVP_DST]);
487 h1 = hash_dst(dst, pinfo ? pinfo->protocol : 0, pinfo ? pinfo->tunnelid : 0); 487 h1 = hash_dst(dst, pinfo ? pinfo->protocol : 0, pinfo ? pinfo->tunnelid : 0);
488 488
489 err = -ENOMEM; 489 err = -ENOMEM;
@@ -642,8 +642,7 @@ nla_put_failure:
642 return -1; 642 return -1;
643} 643}
644 644
645static struct tcf_proto_ops RSVP_OPS = { 645static struct tcf_proto_ops RSVP_OPS __read_mostly = {
646 .next = NULL,
647 .kind = RSVP_ID, 646 .kind = RSVP_ID,
648 .classify = rsvp_classify, 647 .classify = rsvp_classify,
649 .init = rsvp_init, 648 .init = rsvp_init,
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index 2a318f2dc3e5..b5d56a22b1d2 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -112,7 +112,7 @@ static struct sk_buff *prio_dequeue(struct Qdisc *sch)
112 112
113 for (prio = 0; prio < q->bands; prio++) { 113 for (prio = 0; prio < q->bands; prio++) {
114 struct Qdisc *qdisc = q->queues[prio]; 114 struct Qdisc *qdisc = q->queues[prio];
115 struct sk_buff *skb = qdisc->dequeue(qdisc); 115 struct sk_buff *skb = qdisc_dequeue_peeked(qdisc);
116 if (skb) { 116 if (skb) {
117 qdisc_bstats_update(sch, skb); 117 qdisc_bstats_update(sch, skb);
118 sch->q.qlen--; 118 sch->q.qlen--;
diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c
index 0a833d0c1f61..e83c272c0325 100644
--- a/net/sched/sch_sfb.c
+++ b/net/sched/sch_sfb.c
@@ -287,6 +287,12 @@ static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
287 u32 r, slot, salt, sfbhash; 287 u32 r, slot, salt, sfbhash;
288 int ret = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; 288 int ret = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
289 289
290 if (unlikely(sch->q.qlen >= q->limit)) {
291 sch->qstats.overlimits++;
292 q->stats.queuedrop++;
293 goto drop;
294 }
295
290 if (q->rehash_interval > 0) { 296 if (q->rehash_interval > 0) {
291 unsigned long limit = q->rehash_time + q->rehash_interval; 297 unsigned long limit = q->rehash_time + q->rehash_interval;
292 298
@@ -332,12 +338,9 @@ static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
332 slot ^= 1; 338 slot ^= 1;
333 sfb_skb_cb(skb)->hashes[slot] = 0; 339 sfb_skb_cb(skb)->hashes[slot] = 0;
334 340
335 if (unlikely(minqlen >= q->max || sch->q.qlen >= q->limit)) { 341 if (unlikely(minqlen >= q->max)) {
336 sch->qstats.overlimits++; 342 sch->qstats.overlimits++;
337 if (minqlen >= q->max) 343 q->stats.bucketdrop++;
338 q->stats.bucketdrop++;
339 else
340 q->stats.queuedrop++;
341 goto drop; 344 goto drop;
342 } 345 }
343 346
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 4536ee64383e..4f5510e2bd6f 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -410,7 +410,12 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
410 /* Return Congestion Notification only if we dropped a packet 410 /* Return Congestion Notification only if we dropped a packet
411 * from this flow. 411 * from this flow.
412 */ 412 */
413 return (qlen != slot->qlen) ? NET_XMIT_CN : NET_XMIT_SUCCESS; 413 if (qlen != slot->qlen)
414 return NET_XMIT_CN;
415
416 /* As we dropped a packet, better let upper stack know this */
417 qdisc_tree_decrease_qlen(sch, 1);
418 return NET_XMIT_SUCCESS;
414} 419}
415 420
416static struct sk_buff * 421static struct sk_buff *
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index dc16b90ddb6f..152b5b3c3fff 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -282,6 +282,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a
282 asoc->peer.asconf_capable = 1; 282 asoc->peer.asconf_capable = 1;
283 asoc->asconf_addr_del_pending = NULL; 283 asoc->asconf_addr_del_pending = NULL;
284 asoc->src_out_of_asoc_ok = 0; 284 asoc->src_out_of_asoc_ok = 0;
285 asoc->new_transport = NULL;
285 286
286 /* Create an input queue. */ 287 /* Create an input queue. */
287 sctp_inq_init(&asoc->base.inqueue); 288 sctp_inq_init(&asoc->base.inqueue);
diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index a6d27bf563a5..14c2b06028ff 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -917,6 +917,8 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout)
917 * current cwnd). 917 * current cwnd).
918 */ 918 */
919 if (!list_empty(&q->retransmit)) { 919 if (!list_empty(&q->retransmit)) {
920 if (asoc->peer.retran_path->state == SCTP_UNCONFIRMED)
921 goto sctp_flush_out;
920 if (transport == asoc->peer.retran_path) 922 if (transport == asoc->peer.retran_path)
921 goto retran; 923 goto retran;
922 924
@@ -989,6 +991,8 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout)
989 ((new_transport->state == SCTP_INACTIVE) || 991 ((new_transport->state == SCTP_INACTIVE) ||
990 (new_transport->state == SCTP_UNCONFIRMED))) 992 (new_transport->state == SCTP_UNCONFIRMED)))
991 new_transport = asoc->peer.active_path; 993 new_transport = asoc->peer.active_path;
994 if (new_transport->state == SCTP_UNCONFIRMED)
995 continue;
992 996
993 /* Change packets if necessary. */ 997 /* Change packets if necessary. */
994 if (new_transport != transport) { 998 if (new_transport != transport) {
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 91784f44a2e2..61b9fca5a173 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -1299,7 +1299,7 @@ SCTP_STATIC __init int sctp_init(void)
1299 max_share = min(4UL*1024*1024, limit); 1299 max_share = min(4UL*1024*1024, limit);
1300 1300
1301 sysctl_sctp_rmem[0] = SK_MEM_QUANTUM; /* give each asoc 1 page min */ 1301 sysctl_sctp_rmem[0] = SK_MEM_QUANTUM; /* give each asoc 1 page min */
1302 sysctl_sctp_rmem[1] = (1500 *(sizeof(struct sk_buff) + 1)); 1302 sysctl_sctp_rmem[1] = 1500 * SKB_TRUESIZE(1);
1303 sysctl_sctp_rmem[2] = max(sysctl_sctp_rmem[1], max_share); 1303 sysctl_sctp_rmem[2] = max(sysctl_sctp_rmem[1], max_share);
1304 1304
1305 sysctl_sctp_wmem[0] = SK_MEM_QUANTUM; 1305 sysctl_sctp_wmem[0] = SK_MEM_QUANTUM;
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 81db4e385352..0121e0ab0351 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -3015,6 +3015,7 @@ static __be16 sctp_process_asconf_param(struct sctp_association *asoc,
3015 /* Start the heartbeat timer. */ 3015 /* Start the heartbeat timer. */
3016 if (!mod_timer(&peer->hb_timer, sctp_transport_timeout(peer))) 3016 if (!mod_timer(&peer->hb_timer, sctp_transport_timeout(peer)))
3017 sctp_transport_hold(peer); 3017 sctp_transport_hold(peer);
3018 asoc->new_transport = peer;
3018 break; 3019 break;
3019 case SCTP_PARAM_DEL_IP: 3020 case SCTP_PARAM_DEL_IP:
3020 /* ADDIP 4.3 D7) If a request is received to delete the 3021 /* ADDIP 4.3 D7) If a request is received to delete the
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index 167c880cf8da..76388b083f28 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -1689,6 +1689,11 @@ static int sctp_cmd_interpreter(sctp_event_t event_type,
1689 case SCTP_CMD_PURGE_ASCONF_QUEUE: 1689 case SCTP_CMD_PURGE_ASCONF_QUEUE:
1690 sctp_asconf_queue_teardown(asoc); 1690 sctp_asconf_queue_teardown(asoc);
1691 break; 1691 break;
1692
1693 case SCTP_CMD_SET_ASOC:
1694 asoc = cmd->obj.asoc;
1695 break;
1696
1692 default: 1697 default:
1693 pr_warn("Impossible command: %u, %p\n", 1698 pr_warn("Impossible command: %u, %p\n",
1694 cmd->verb, cmd->obj.ptr); 1699 cmd->verb, cmd->obj.ptr);
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index 49b847b00f99..891f5db8cc31 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -2047,6 +2047,12 @@ sctp_disposition_t sctp_sf_do_5_2_4_dupcook(const struct sctp_endpoint *ep,
2047 sctp_add_cmd_sf(commands, SCTP_CMD_NEW_ASOC, SCTP_ASOC(new_asoc)); 2047 sctp_add_cmd_sf(commands, SCTP_CMD_NEW_ASOC, SCTP_ASOC(new_asoc));
2048 sctp_add_cmd_sf(commands, SCTP_CMD_DELETE_TCB, SCTP_NULL()); 2048 sctp_add_cmd_sf(commands, SCTP_CMD_DELETE_TCB, SCTP_NULL());
2049 2049
2050 /* Restore association pointer to provide SCTP command interpeter
2051 * with a valid context in case it needs to manipulate
2052 * the queues */
2053 sctp_add_cmd_sf(commands, SCTP_CMD_SET_ASOC,
2054 SCTP_ASOC((struct sctp_association *)asoc));
2055
2050 return retval; 2056 return retval;
2051 2057
2052nomem: 2058nomem:
@@ -3612,6 +3618,11 @@ sctp_disposition_t sctp_sf_do_asconf(const struct sctp_endpoint *ep,
3612 */ 3618 */
3613 asconf_ack->dest = chunk->source; 3619 asconf_ack->dest = chunk->source;
3614 sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(asconf_ack)); 3620 sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(asconf_ack));
3621 if (asoc->new_transport) {
3622 sctp_sf_heartbeat(ep, asoc, type, asoc->new_transport,
3623 commands);
3624 ((struct sctp_association *)asoc)->new_transport = NULL;
3625 }
3615 3626
3616 return SCTP_DISPOSITION_CONSUME; 3627 return SCTP_DISPOSITION_CONSUME;
3617} 3628}
diff --git a/net/socket.c b/net/socket.c
index b1cbbcd92558..2877647f347b 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -1871,8 +1871,14 @@ SYSCALL_DEFINE2(shutdown, int, fd, int, how)
1871#define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen) 1871#define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen)
1872#define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags) 1872#define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags)
1873 1873
1874struct used_address {
1875 struct sockaddr_storage name;
1876 unsigned int name_len;
1877};
1878
1874static int __sys_sendmsg(struct socket *sock, struct msghdr __user *msg, 1879static int __sys_sendmsg(struct socket *sock, struct msghdr __user *msg,
1875 struct msghdr *msg_sys, unsigned flags, int nosec) 1880 struct msghdr *msg_sys, unsigned flags,
1881 struct used_address *used_address)
1876{ 1882{
1877 struct compat_msghdr __user *msg_compat = 1883 struct compat_msghdr __user *msg_compat =
1878 (struct compat_msghdr __user *)msg; 1884 (struct compat_msghdr __user *)msg;
@@ -1953,8 +1959,30 @@ static int __sys_sendmsg(struct socket *sock, struct msghdr __user *msg,
1953 1959
1954 if (sock->file->f_flags & O_NONBLOCK) 1960 if (sock->file->f_flags & O_NONBLOCK)
1955 msg_sys->msg_flags |= MSG_DONTWAIT; 1961 msg_sys->msg_flags |= MSG_DONTWAIT;
1956 err = (nosec ? sock_sendmsg_nosec : sock_sendmsg)(sock, msg_sys, 1962 /*
1957 total_len); 1963 * If this is sendmmsg() and current destination address is same as
1964 * previously succeeded address, omit asking LSM's decision.
1965 * used_address->name_len is initialized to UINT_MAX so that the first
1966 * destination address never matches.
1967 */
1968 if (used_address && msg_sys->msg_name &&
1969 used_address->name_len == msg_sys->msg_namelen &&
1970 !memcmp(&used_address->name, msg_sys->msg_name,
1971 used_address->name_len)) {
1972 err = sock_sendmsg_nosec(sock, msg_sys, total_len);
1973 goto out_freectl;
1974 }
1975 err = sock_sendmsg(sock, msg_sys, total_len);
1976 /*
1977 * If this is sendmmsg() and sending to current destination address was
1978 * successful, remember it.
1979 */
1980 if (used_address && err >= 0) {
1981 used_address->name_len = msg_sys->msg_namelen;
1982 if (msg_sys->msg_name)
1983 memcpy(&used_address->name, msg_sys->msg_name,
1984 used_address->name_len);
1985 }
1958 1986
1959out_freectl: 1987out_freectl:
1960 if (ctl_buf != ctl) 1988 if (ctl_buf != ctl)
@@ -1979,7 +2007,7 @@ SYSCALL_DEFINE3(sendmsg, int, fd, struct msghdr __user *, msg, unsigned, flags)
1979 if (!sock) 2007 if (!sock)
1980 goto out; 2008 goto out;
1981 2009
1982 err = __sys_sendmsg(sock, msg, &msg_sys, flags, 0); 2010 err = __sys_sendmsg(sock, msg, &msg_sys, flags, NULL);
1983 2011
1984 fput_light(sock->file, fput_needed); 2012 fput_light(sock->file, fput_needed);
1985out: 2013out:
@@ -1998,6 +2026,10 @@ int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
1998 struct mmsghdr __user *entry; 2026 struct mmsghdr __user *entry;
1999 struct compat_mmsghdr __user *compat_entry; 2027 struct compat_mmsghdr __user *compat_entry;
2000 struct msghdr msg_sys; 2028 struct msghdr msg_sys;
2029 struct used_address used_address;
2030
2031 if (vlen > UIO_MAXIOV)
2032 vlen = UIO_MAXIOV;
2001 2033
2002 datagrams = 0; 2034 datagrams = 0;
2003 2035
@@ -2005,27 +2037,22 @@ int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
2005 if (!sock) 2037 if (!sock)
2006 return err; 2038 return err;
2007 2039
2008 err = sock_error(sock->sk); 2040 used_address.name_len = UINT_MAX;
2009 if (err)
2010 goto out_put;
2011
2012 entry = mmsg; 2041 entry = mmsg;
2013 compat_entry = (struct compat_mmsghdr __user *)mmsg; 2042 compat_entry = (struct compat_mmsghdr __user *)mmsg;
2043 err = 0;
2014 2044
2015 while (datagrams < vlen) { 2045 while (datagrams < vlen) {
2016 /*
2017 * No need to ask LSM for more than the first datagram.
2018 */
2019 if (MSG_CMSG_COMPAT & flags) { 2046 if (MSG_CMSG_COMPAT & flags) {
2020 err = __sys_sendmsg(sock, (struct msghdr __user *)compat_entry, 2047 err = __sys_sendmsg(sock, (struct msghdr __user *)compat_entry,
2021 &msg_sys, flags, datagrams); 2048 &msg_sys, flags, &used_address);
2022 if (err < 0) 2049 if (err < 0)
2023 break; 2050 break;
2024 err = __put_user(err, &compat_entry->msg_len); 2051 err = __put_user(err, &compat_entry->msg_len);
2025 ++compat_entry; 2052 ++compat_entry;
2026 } else { 2053 } else {
2027 err = __sys_sendmsg(sock, (struct msghdr __user *)entry, 2054 err = __sys_sendmsg(sock, (struct msghdr __user *)entry,
2028 &msg_sys, flags, datagrams); 2055 &msg_sys, flags, &used_address);
2029 if (err < 0) 2056 if (err < 0)
2030 break; 2057 break;
2031 err = put_user(err, &entry->msg_len); 2058 err = put_user(err, &entry->msg_len);
@@ -2037,29 +2064,11 @@ int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
2037 ++datagrams; 2064 ++datagrams;
2038 } 2065 }
2039 2066
2040out_put:
2041 fput_light(sock->file, fput_needed); 2067 fput_light(sock->file, fput_needed);
2042 2068
2043 if (err == 0) 2069 /* We only return an error if no datagrams were able to be sent */
2044 return datagrams; 2070 if (datagrams != 0)
2045
2046 if (datagrams != 0) {
2047 /*
2048 * We may send less entries than requested (vlen) if the
2049 * sock is non blocking...
2050 */
2051 if (err != -EAGAIN) {
2052 /*
2053 * ... or if sendmsg returns an error after we
2054 * send some datagrams, where we record the
2055 * error to return on the next call or if the
2056 * app asks about it using getsockopt(SO_ERROR).
2057 */
2058 sock->sk->sk_err = -err;
2059 }
2060
2061 return datagrams; 2071 return datagrams;
2062 }
2063 2072
2064 return err; 2073 return err;
2065} 2074}
@@ -2463,7 +2472,7 @@ int sock_register(const struct net_proto_family *ops)
2463 lockdep_is_held(&net_family_lock))) 2472 lockdep_is_held(&net_family_lock)))
2464 err = -EEXIST; 2473 err = -EEXIST;
2465 else { 2474 else {
2466 rcu_assign_pointer(net_families[ops->family], ops); 2475 RCU_INIT_POINTER(net_families[ops->family], ops);
2467 err = 0; 2476 err = 0;
2468 } 2477 }
2469 spin_unlock(&net_family_lock); 2478 spin_unlock(&net_family_lock);
@@ -2491,7 +2500,7 @@ void sock_unregister(int family)
2491 BUG_ON(family < 0 || family >= NPROTO); 2500 BUG_ON(family < 0 || family >= NPROTO);
2492 2501
2493 spin_lock(&net_family_lock); 2502 spin_lock(&net_family_lock);
2494 rcu_assign_pointer(net_families[family], NULL); 2503 RCU_INIT_POINTER(net_families[family], NULL);
2495 spin_unlock(&net_family_lock); 2504 spin_unlock(&net_family_lock);
2496 2505
2497 synchronize_rcu(); 2506 synchronize_rcu();
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index 364eb45e989d..d4132754cbe1 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -122,7 +122,7 @@ gss_cred_set_ctx(struct rpc_cred *cred, struct gss_cl_ctx *ctx)
122 if (!test_bit(RPCAUTH_CRED_NEW, &cred->cr_flags)) 122 if (!test_bit(RPCAUTH_CRED_NEW, &cred->cr_flags))
123 return; 123 return;
124 gss_get_ctx(ctx); 124 gss_get_ctx(ctx);
125 rcu_assign_pointer(gss_cred->gc_ctx, ctx); 125 RCU_INIT_POINTER(gss_cred->gc_ctx, ctx);
126 set_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags); 126 set_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags);
127 smp_mb__before_clear_bit(); 127 smp_mb__before_clear_bit();
128 clear_bit(RPCAUTH_CRED_NEW, &cred->cr_flags); 128 clear_bit(RPCAUTH_CRED_NEW, &cred->cr_flags);
@@ -970,7 +970,7 @@ gss_destroy_nullcred(struct rpc_cred *cred)
970 struct gss_auth *gss_auth = container_of(cred->cr_auth, struct gss_auth, rpc_auth); 970 struct gss_auth *gss_auth = container_of(cred->cr_auth, struct gss_auth, rpc_auth);
971 struct gss_cl_ctx *ctx = gss_cred->gc_ctx; 971 struct gss_cl_ctx *ctx = gss_cred->gc_ctx;
972 972
973 rcu_assign_pointer(gss_cred->gc_ctx, NULL); 973 RCU_INIT_POINTER(gss_cred->gc_ctx, NULL);
974 call_rcu(&cred->cr_rcu, gss_free_cred_callback); 974 call_rcu(&cred->cr_rcu, gss_free_cred_callback);
975 if (ctx) 975 if (ctx)
976 gss_put_ctx(ctx); 976 gss_put_ctx(ctx);
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 9b6a4d1ea8f8..f4385e45a5fc 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -187,6 +187,7 @@ EXPORT_SYMBOL_GPL(xprt_load_transport);
187/** 187/**
188 * xprt_reserve_xprt - serialize write access to transports 188 * xprt_reserve_xprt - serialize write access to transports
189 * @task: task that is requesting access to the transport 189 * @task: task that is requesting access to the transport
190 * @xprt: pointer to the target transport
190 * 191 *
191 * This prevents mixing the payload of separate requests, and prevents 192 * This prevents mixing the payload of separate requests, and prevents
192 * transport connects from colliding with writes. No congestion control 193 * transport connects from colliding with writes. No congestion control
diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index 759b318b5ffb..28908f54459e 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -39,6 +39,7 @@
39#include "link.h" 39#include "link.h"
40#include "port.h" 40#include "port.h"
41#include "bcast.h" 41#include "bcast.h"
42#include "name_distr.h"
42 43
43#define MAX_PKT_DEFAULT_MCAST 1500 /* bcast link max packet size (fixed) */ 44#define MAX_PKT_DEFAULT_MCAST 1500 /* bcast link max packet size (fixed) */
44 45
@@ -298,14 +299,9 @@ static void bclink_send_nack(struct tipc_node *n_ptr)
298 msg_set_bcgap_to(msg, n_ptr->bclink.gap_to); 299 msg_set_bcgap_to(msg, n_ptr->bclink.gap_to);
299 msg_set_bcast_tag(msg, tipc_own_tag); 300 msg_set_bcast_tag(msg, tipc_own_tag);
300 301
301 if (tipc_bearer_send(&bcbearer->bearer, buf, NULL)) { 302 tipc_bearer_send(&bcbearer->bearer, buf, NULL);
302 bcl->stats.sent_nacks++; 303 bcl->stats.sent_nacks++;
303 buf_discard(buf); 304 buf_discard(buf);
304 } else {
305 tipc_bearer_schedule(bcl->b_ptr, bcl);
306 bcl->proto_msg_queue = buf;
307 bcl->stats.bearer_congs++;
308 }
309 305
310 /* 306 /*
311 * Ensure we doesn't send another NACK msg to the node 307 * Ensure we doesn't send another NACK msg to the node
@@ -426,20 +422,28 @@ int tipc_bclink_send_msg(struct sk_buff *buf)
426void tipc_bclink_recv_pkt(struct sk_buff *buf) 422void tipc_bclink_recv_pkt(struct sk_buff *buf)
427{ 423{
428 struct tipc_msg *msg = buf_msg(buf); 424 struct tipc_msg *msg = buf_msg(buf);
429 struct tipc_node *node = tipc_node_find(msg_prevnode(msg)); 425 struct tipc_node *node;
430 u32 next_in; 426 u32 next_in;
431 u32 seqno; 427 u32 seqno;
432 struct sk_buff *deferred; 428 struct sk_buff *deferred;
433 429
434 if (unlikely(!node || !tipc_node_is_up(node) || !node->bclink.supported || 430 /* Screen out unwanted broadcast messages */
435 (msg_mc_netid(msg) != tipc_net_id))) { 431
436 buf_discard(buf); 432 if (msg_mc_netid(msg) != tipc_net_id)
437 return; 433 goto exit;
438 } 434
435 node = tipc_node_find(msg_prevnode(msg));
436 if (unlikely(!node))
437 goto exit;
438
439 tipc_node_lock(node);
440 if (unlikely(!node->bclink.supported))
441 goto unlock;
439 442
440 if (unlikely(msg_user(msg) == BCAST_PROTOCOL)) { 443 if (unlikely(msg_user(msg) == BCAST_PROTOCOL)) {
444 if (msg_type(msg) != STATE_MSG)
445 goto unlock;
441 if (msg_destnode(msg) == tipc_own_addr) { 446 if (msg_destnode(msg) == tipc_own_addr) {
442 tipc_node_lock(node);
443 tipc_bclink_acknowledge(node, msg_bcast_ack(msg)); 447 tipc_bclink_acknowledge(node, msg_bcast_ack(msg));
444 tipc_node_unlock(node); 448 tipc_node_unlock(node);
445 spin_lock_bh(&bc_lock); 449 spin_lock_bh(&bc_lock);
@@ -449,18 +453,18 @@ void tipc_bclink_recv_pkt(struct sk_buff *buf)
449 msg_bcgap_to(msg)); 453 msg_bcgap_to(msg));
450 spin_unlock_bh(&bc_lock); 454 spin_unlock_bh(&bc_lock);
451 } else { 455 } else {
456 tipc_node_unlock(node);
452 tipc_bclink_peek_nack(msg_destnode(msg), 457 tipc_bclink_peek_nack(msg_destnode(msg),
453 msg_bcast_tag(msg), 458 msg_bcast_tag(msg),
454 msg_bcgap_after(msg), 459 msg_bcgap_after(msg),
455 msg_bcgap_to(msg)); 460 msg_bcgap_to(msg));
456 } 461 }
457 buf_discard(buf); 462 goto exit;
458 return;
459 } 463 }
460 464
461 tipc_node_lock(node); 465 /* Handle in-sequence broadcast message */
466
462receive: 467receive:
463 deferred = node->bclink.deferred_head;
464 next_in = mod(node->bclink.last_in + 1); 468 next_in = mod(node->bclink.last_in + 1);
465 seqno = msg_seqno(msg); 469 seqno = msg_seqno(msg);
466 470
@@ -474,7 +478,10 @@ receive:
474 } 478 }
475 if (likely(msg_isdata(msg))) { 479 if (likely(msg_isdata(msg))) {
476 tipc_node_unlock(node); 480 tipc_node_unlock(node);
477 tipc_port_recv_mcast(buf, NULL); 481 if (likely(msg_mcast(msg)))
482 tipc_port_recv_mcast(buf, NULL);
483 else
484 buf_discard(buf);
478 } else if (msg_user(msg) == MSG_BUNDLER) { 485 } else if (msg_user(msg) == MSG_BUNDLER) {
479 bcl->stats.recv_bundles++; 486 bcl->stats.recv_bundles++;
480 bcl->stats.recv_bundled += msg_msgcnt(msg); 487 bcl->stats.recv_bundled += msg_msgcnt(msg);
@@ -487,18 +494,22 @@ receive:
487 bcl->stats.recv_fragmented++; 494 bcl->stats.recv_fragmented++;
488 tipc_node_unlock(node); 495 tipc_node_unlock(node);
489 tipc_net_route_msg(buf); 496 tipc_net_route_msg(buf);
497 } else if (msg_user(msg) == NAME_DISTRIBUTOR) {
498 tipc_node_unlock(node);
499 tipc_named_recv(buf);
490 } else { 500 } else {
491 tipc_node_unlock(node); 501 tipc_node_unlock(node);
492 tipc_net_route_msg(buf); 502 buf_discard(buf);
493 } 503 }
504 buf = NULL;
505 tipc_node_lock(node);
506 deferred = node->bclink.deferred_head;
494 if (deferred && (buf_seqno(deferred) == mod(next_in + 1))) { 507 if (deferred && (buf_seqno(deferred) == mod(next_in + 1))) {
495 tipc_node_lock(node);
496 buf = deferred; 508 buf = deferred;
497 msg = buf_msg(buf); 509 msg = buf_msg(buf);
498 node->bclink.deferred_head = deferred->next; 510 node->bclink.deferred_head = deferred->next;
499 goto receive; 511 goto receive;
500 } 512 }
501 return;
502 } else if (less(next_in, seqno)) { 513 } else if (less(next_in, seqno)) {
503 u32 gap_after = node->bclink.gap_after; 514 u32 gap_after = node->bclink.gap_after;
504 u32 gap_to = node->bclink.gap_to; 515 u32 gap_to = node->bclink.gap_to;
@@ -513,6 +524,7 @@ receive:
513 else if (less(gap_after, seqno) && less(seqno, gap_to)) 524 else if (less(gap_after, seqno) && less(seqno, gap_to))
514 node->bclink.gap_to = seqno; 525 node->bclink.gap_to = seqno;
515 } 526 }
527 buf = NULL;
516 if (bclink_ack_allowed(node->bclink.nack_sync)) { 528 if (bclink_ack_allowed(node->bclink.nack_sync)) {
517 if (gap_to != gap_after) 529 if (gap_to != gap_after)
518 bclink_send_nack(node); 530 bclink_send_nack(node);
@@ -520,9 +532,11 @@ receive:
520 } 532 }
521 } else { 533 } else {
522 bcl->stats.duplicates++; 534 bcl->stats.duplicates++;
523 buf_discard(buf);
524 } 535 }
536unlock:
525 tipc_node_unlock(node); 537 tipc_node_unlock(node);
538exit:
539 buf_discard(buf);
526} 540}
527 541
528u32 tipc_bclink_acks_missing(struct tipc_node *n_ptr) 542u32 tipc_bclink_acks_missing(struct tipc_node *n_ptr)
@@ -535,10 +549,11 @@ u32 tipc_bclink_acks_missing(struct tipc_node *n_ptr)
535/** 549/**
536 * tipc_bcbearer_send - send a packet through the broadcast pseudo-bearer 550 * tipc_bcbearer_send - send a packet through the broadcast pseudo-bearer
537 * 551 *
538 * Send through as many bearers as necessary to reach all nodes 552 * Send packet over as many bearers as necessary to reach all nodes
539 * that support TIPC multicasting. 553 * that have joined the broadcast link.
540 * 554 *
541 * Returns 0 if packet sent successfully, non-zero if not 555 * Returns 0 (packet sent successfully) under all circumstances,
556 * since the broadcast link's pseudo-bearer never blocks
542 */ 557 */
543 558
544static int tipc_bcbearer_send(struct sk_buff *buf, 559static int tipc_bcbearer_send(struct sk_buff *buf,
@@ -547,7 +562,12 @@ static int tipc_bcbearer_send(struct sk_buff *buf,
547{ 562{
548 int bp_index; 563 int bp_index;
549 564
550 /* Prepare buffer for broadcasting (if first time trying to send it) */ 565 /*
566 * Prepare broadcast link message for reliable transmission,
567 * if first time trying to send it;
568 * preparation is skipped for broadcast link protocol messages
569 * since they are sent in an unreliable manner and don't need it
570 */
551 571
552 if (likely(!msg_non_seq(buf_msg(buf)))) { 572 if (likely(!msg_non_seq(buf_msg(buf)))) {
553 struct tipc_msg *msg; 573 struct tipc_msg *msg;
@@ -596,18 +616,12 @@ static int tipc_bcbearer_send(struct sk_buff *buf,
596 } 616 }
597 617
598 if (bcbearer->remains_new.count == 0) 618 if (bcbearer->remains_new.count == 0)
599 return 0; 619 break; /* all targets reached */
600 620
601 bcbearer->remains = bcbearer->remains_new; 621 bcbearer->remains = bcbearer->remains_new;
602 } 622 }
603 623
604 /* 624 return 0;
605 * Unable to reach all targets (indicate success, since currently
606 * there isn't code in place to properly block & unblock the
607 * pseudo-bearer used by the broadcast link)
608 */
609
610 return TIPC_OK;
611} 625}
612 626
613/** 627/**
@@ -667,27 +681,6 @@ void tipc_bcbearer_sort(void)
667 spin_unlock_bh(&bc_lock); 681 spin_unlock_bh(&bc_lock);
668} 682}
669 683
670/**
671 * tipc_bcbearer_push - resolve bearer congestion
672 *
673 * Forces bclink to push out any unsent packets, until all packets are gone
674 * or congestion reoccurs.
675 * No locks set when function called
676 */
677
678void tipc_bcbearer_push(void)
679{
680 struct tipc_bearer *b_ptr;
681
682 spin_lock_bh(&bc_lock);
683 b_ptr = &bcbearer->bearer;
684 if (b_ptr->blocked) {
685 b_ptr->blocked = 0;
686 tipc_bearer_lock_push(b_ptr);
687 }
688 spin_unlock_bh(&bc_lock);
689}
690
691 684
692int tipc_bclink_stats(char *buf, const u32 buf_size) 685int tipc_bclink_stats(char *buf, const u32 buf_size)
693{ 686{
@@ -764,7 +757,7 @@ int tipc_bclink_init(void)
764 bcbearer = kzalloc(sizeof(*bcbearer), GFP_ATOMIC); 757 bcbearer = kzalloc(sizeof(*bcbearer), GFP_ATOMIC);
765 bclink = kzalloc(sizeof(*bclink), GFP_ATOMIC); 758 bclink = kzalloc(sizeof(*bclink), GFP_ATOMIC);
766 if (!bcbearer || !bclink) { 759 if (!bcbearer || !bclink) {
767 warn("Multicast link creation failed, no memory\n"); 760 warn("Broadcast link creation failed, no memory\n");
768 kfree(bcbearer); 761 kfree(bcbearer);
769 bcbearer = NULL; 762 bcbearer = NULL;
770 kfree(bclink); 763 kfree(bclink);
@@ -775,7 +768,7 @@ int tipc_bclink_init(void)
775 INIT_LIST_HEAD(&bcbearer->bearer.cong_links); 768 INIT_LIST_HEAD(&bcbearer->bearer.cong_links);
776 bcbearer->bearer.media = &bcbearer->media; 769 bcbearer->bearer.media = &bcbearer->media;
777 bcbearer->media.send_msg = tipc_bcbearer_send; 770 bcbearer->media.send_msg = tipc_bcbearer_send;
778 sprintf(bcbearer->media.name, "tipc-multicast"); 771 sprintf(bcbearer->media.name, "tipc-broadcast");
779 772
780 bcl = &bclink->link; 773 bcl = &bclink->link;
781 INIT_LIST_HEAD(&bcl->waiting_ports); 774 INIT_LIST_HEAD(&bcl->waiting_ports);
diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h
index 500c97f1c859..06740da5ae61 100644
--- a/net/tipc/bcast.h
+++ b/net/tipc/bcast.h
@@ -101,6 +101,5 @@ int tipc_bclink_stats(char *stats_buf, const u32 buf_size);
101int tipc_bclink_reset_stats(void); 101int tipc_bclink_reset_stats(void);
102int tipc_bclink_set_queue_limits(u32 limit); 102int tipc_bclink_set_queue_limits(u32 limit);
103void tipc_bcbearer_sort(void); 103void tipc_bcbearer_sort(void);
104void tipc_bcbearer_push(void);
105 104
106#endif 105#endif
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index 85eba9c08ee9..e2202de3d93e 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -385,13 +385,9 @@ static int bearer_push(struct tipc_bearer *b_ptr)
385 385
386void tipc_bearer_lock_push(struct tipc_bearer *b_ptr) 386void tipc_bearer_lock_push(struct tipc_bearer *b_ptr)
387{ 387{
388 int res;
389
390 spin_lock_bh(&b_ptr->lock); 388 spin_lock_bh(&b_ptr->lock);
391 res = bearer_push(b_ptr); 389 bearer_push(b_ptr);
392 spin_unlock_bh(&b_ptr->lock); 390 spin_unlock_bh(&b_ptr->lock);
393 if (res)
394 tipc_bcbearer_push();
395} 391}
396 392
397 393
@@ -608,6 +604,7 @@ int tipc_block_bearer(const char *name)
608 info("Blocking bearer <%s>\n", name); 604 info("Blocking bearer <%s>\n", name);
609 spin_lock_bh(&b_ptr->lock); 605 spin_lock_bh(&b_ptr->lock);
610 b_ptr->blocked = 1; 606 b_ptr->blocked = 1;
607 list_splice_init(&b_ptr->cong_links, &b_ptr->links);
611 list_for_each_entry_safe(l_ptr, temp_l_ptr, &b_ptr->links, link_list) { 608 list_for_each_entry_safe(l_ptr, temp_l_ptr, &b_ptr->links, link_list) {
612 struct tipc_node *n_ptr = l_ptr->owner; 609 struct tipc_node *n_ptr = l_ptr->owner;
613 610
@@ -635,6 +632,7 @@ static void bearer_disable(struct tipc_bearer *b_ptr)
635 spin_lock_bh(&b_ptr->lock); 632 spin_lock_bh(&b_ptr->lock);
636 b_ptr->blocked = 1; 633 b_ptr->blocked = 1;
637 b_ptr->media->disable_bearer(b_ptr); 634 b_ptr->media->disable_bearer(b_ptr);
635 list_splice_init(&b_ptr->cong_links, &b_ptr->links);
638 list_for_each_entry_safe(l_ptr, temp_l_ptr, &b_ptr->links, link_list) { 636 list_for_each_entry_safe(l_ptr, temp_l_ptr, &b_ptr->links, link_list) {
639 tipc_link_delete(l_ptr); 637 tipc_link_delete(l_ptr);
640 } 638 }
diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h
index 5ad70eff1ebf..d696f9e414e3 100644
--- a/net/tipc/bearer.h
+++ b/net/tipc/bearer.h
@@ -39,8 +39,8 @@
39 39
40#include "bcast.h" 40#include "bcast.h"
41 41
42#define MAX_BEARERS 8 42#define MAX_BEARERS 2
43#define MAX_MEDIA 4 43#define MAX_MEDIA 2
44 44
45/* 45/*
46 * Identifiers of supported TIPC media types 46 * Identifiers of supported TIPC media types
diff --git a/net/tipc/config.h b/net/tipc/config.h
index 443159a166fd..80da6ebc2785 100644
--- a/net/tipc/config.h
+++ b/net/tipc/config.h
@@ -65,7 +65,6 @@ struct sk_buff *tipc_cfg_do_cmd(u32 orig_node, u16 cmd,
65 const void *req_tlv_area, int req_tlv_space, 65 const void *req_tlv_area, int req_tlv_space,
66 int headroom); 66 int headroom);
67 67
68void tipc_cfg_link_event(u32 addr, char *name, int up);
69int tipc_cfg_init(void); 68int tipc_cfg_init(void);
70void tipc_cfg_stop(void); 69void tipc_cfg_stop(void);
71 70
diff --git a/net/tipc/discover.c b/net/tipc/discover.c
index 0987933155b9..f2fb96e86ee8 100644
--- a/net/tipc/discover.c
+++ b/net/tipc/discover.c
@@ -159,12 +159,6 @@ void tipc_disc_recv_msg(struct sk_buff *buf, struct tipc_bearer *b_ptr)
159 } 159 }
160 tipc_node_lock(n_ptr); 160 tipc_node_lock(n_ptr);
161 161
162 /* Don't talk to neighbor during cleanup after last session */
163 if (n_ptr->cleanup_required) {
164 tipc_node_unlock(n_ptr);
165 return;
166 }
167
168 link = n_ptr->links[b_ptr->identity]; 162 link = n_ptr->links[b_ptr->identity];
169 163
170 /* Create a link endpoint for this bearer, if necessary */ 164 /* Create a link endpoint for this bearer, if necessary */
diff --git a/net/tipc/eth_media.c b/net/tipc/eth_media.c
index b69092eb95d8..e728d4ce2a1b 100644
--- a/net/tipc/eth_media.c
+++ b/net/tipc/eth_media.c
@@ -2,7 +2,7 @@
2 * net/tipc/eth_media.c: Ethernet bearer support for TIPC 2 * net/tipc/eth_media.c: Ethernet bearer support for TIPC
3 * 3 *
4 * Copyright (c) 2001-2007, Ericsson AB 4 * Copyright (c) 2001-2007, Ericsson AB
5 * Copyright (c) 2005-2007, Wind River Systems 5 * Copyright (c) 2005-2008, 2011, Wind River Systems
6 * All rights reserved. 6 * All rights reserved.
7 * 7 *
8 * Redistribution and use in source and binary forms, with or without 8 * Redistribution and use in source and binary forms, with or without
@@ -37,7 +37,7 @@
37#include "core.h" 37#include "core.h"
38#include "bearer.h" 38#include "bearer.h"
39 39
40#define MAX_ETH_BEARERS 2 40#define MAX_ETH_BEARERS MAX_BEARERS
41#define ETH_LINK_PRIORITY TIPC_DEF_LINK_PRI 41#define ETH_LINK_PRIORITY TIPC_DEF_LINK_PRI
42#define ETH_LINK_TOLERANCE TIPC_DEF_LINK_TOL 42#define ETH_LINK_TOLERANCE TIPC_DEF_LINK_TOL
43#define ETH_LINK_WINDOW TIPC_DEF_LINK_WIN 43#define ETH_LINK_WINDOW TIPC_DEF_LINK_WIN
@@ -144,31 +144,27 @@ static int enable_bearer(struct tipc_bearer *tb_ptr)
144 144
145 /* Find device with specified name */ 145 /* Find device with specified name */
146 146
147 read_lock(&dev_base_lock);
147 for_each_netdev(&init_net, pdev) { 148 for_each_netdev(&init_net, pdev) {
148 if (!strncmp(pdev->name, driver_name, IFNAMSIZ)) { 149 if (!strncmp(pdev->name, driver_name, IFNAMSIZ)) {
149 dev = pdev; 150 dev = pdev;
151 dev_hold(dev);
150 break; 152 break;
151 } 153 }
152 } 154 }
155 read_unlock(&dev_base_lock);
153 if (!dev) 156 if (!dev)
154 return -ENODEV; 157 return -ENODEV;
155 158
156 /* Find Ethernet bearer for device (or create one) */ 159 /* Create Ethernet bearer for device */
157 160
158 while ((eb_ptr != stop) && eb_ptr->dev && (eb_ptr->dev != dev)) 161 eb_ptr->dev = dev;
159 eb_ptr++; 162 eb_ptr->tipc_packet_type.type = htons(ETH_P_TIPC);
160 if (eb_ptr == stop) 163 eb_ptr->tipc_packet_type.dev = dev;
161 return -EDQUOT; 164 eb_ptr->tipc_packet_type.func = recv_msg;
162 if (!eb_ptr->dev) { 165 eb_ptr->tipc_packet_type.af_packet_priv = eb_ptr;
163 eb_ptr->dev = dev; 166 INIT_LIST_HEAD(&(eb_ptr->tipc_packet_type.list));
164 eb_ptr->tipc_packet_type.type = htons(ETH_P_TIPC); 167 dev_add_pack(&eb_ptr->tipc_packet_type);
165 eb_ptr->tipc_packet_type.dev = dev;
166 eb_ptr->tipc_packet_type.func = recv_msg;
167 eb_ptr->tipc_packet_type.af_packet_priv = eb_ptr;
168 INIT_LIST_HEAD(&(eb_ptr->tipc_packet_type.list));
169 dev_hold(dev);
170 dev_add_pack(&eb_ptr->tipc_packet_type);
171 }
172 168
173 /* Associate TIPC bearer with Ethernet bearer */ 169 /* Associate TIPC bearer with Ethernet bearer */
174 170
diff --git a/net/tipc/link.c b/net/tipc/link.c
index f89570c54f54..ae98a72da11a 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -332,15 +332,16 @@ struct link *tipc_link_create(struct tipc_node *n_ptr,
332 332
333 l_ptr->addr = peer; 333 l_ptr->addr = peer;
334 if_name = strchr(b_ptr->name, ':') + 1; 334 if_name = strchr(b_ptr->name, ':') + 1;
335 sprintf(l_ptr->name, "%u.%u.%u:%s-%u.%u.%u:", 335 sprintf(l_ptr->name, "%u.%u.%u:%s-%u.%u.%u:unknown",
336 tipc_zone(tipc_own_addr), tipc_cluster(tipc_own_addr), 336 tipc_zone(tipc_own_addr), tipc_cluster(tipc_own_addr),
337 tipc_node(tipc_own_addr), 337 tipc_node(tipc_own_addr),
338 if_name, 338 if_name,
339 tipc_zone(peer), tipc_cluster(peer), tipc_node(peer)); 339 tipc_zone(peer), tipc_cluster(peer), tipc_node(peer));
340 /* note: peer i/f is appended to link name by reset/activate */ 340 /* note: peer i/f name is updated by reset/activate message */
341 memcpy(&l_ptr->media_addr, media_addr, sizeof(*media_addr)); 341 memcpy(&l_ptr->media_addr, media_addr, sizeof(*media_addr));
342 l_ptr->owner = n_ptr; 342 l_ptr->owner = n_ptr;
343 l_ptr->checkpoint = 1; 343 l_ptr->checkpoint = 1;
344 l_ptr->peer_session = INVALID_SESSION;
344 l_ptr->b_ptr = b_ptr; 345 l_ptr->b_ptr = b_ptr;
345 link_set_supervision_props(l_ptr, b_ptr->media->tolerance); 346 link_set_supervision_props(l_ptr, b_ptr->media->tolerance);
346 l_ptr->state = RESET_UNKNOWN; 347 l_ptr->state = RESET_UNKNOWN;
@@ -536,9 +537,6 @@ void tipc_link_stop(struct link *l_ptr)
536 l_ptr->proto_msg_queue = NULL; 537 l_ptr->proto_msg_queue = NULL;
537} 538}
538 539
539/* LINK EVENT CODE IS NOT SUPPORTED AT PRESENT */
540#define link_send_event(fcn, l_ptr, up) do { } while (0)
541
542void tipc_link_reset(struct link *l_ptr) 540void tipc_link_reset(struct link *l_ptr)
543{ 541{
544 struct sk_buff *buf; 542 struct sk_buff *buf;
@@ -596,10 +594,6 @@ void tipc_link_reset(struct link *l_ptr)
596 l_ptr->fsm_msg_cnt = 0; 594 l_ptr->fsm_msg_cnt = 0;
597 l_ptr->stale_count = 0; 595 l_ptr->stale_count = 0;
598 link_reset_statistics(l_ptr); 596 link_reset_statistics(l_ptr);
599
600 link_send_event(tipc_cfg_link_event, l_ptr, 0);
601 if (!in_own_cluster(l_ptr->addr))
602 link_send_event(tipc_disc_link_event, l_ptr, 0);
603} 597}
604 598
605 599
@@ -608,9 +602,6 @@ static void link_activate(struct link *l_ptr)
608 l_ptr->next_in_no = l_ptr->stats.recv_info = 1; 602 l_ptr->next_in_no = l_ptr->stats.recv_info = 1;
609 tipc_node_link_up(l_ptr->owner, l_ptr); 603 tipc_node_link_up(l_ptr->owner, l_ptr);
610 tipc_bearer_add_dest(l_ptr->b_ptr, l_ptr->addr); 604 tipc_bearer_add_dest(l_ptr->b_ptr, l_ptr->addr);
611 link_send_event(tipc_cfg_link_event, l_ptr, 1);
612 if (!in_own_cluster(l_ptr->addr))
613 link_send_event(tipc_disc_link_event, l_ptr, 1);
614} 605}
615 606
616/** 607/**
@@ -985,6 +976,51 @@ int tipc_link_send(struct sk_buff *buf, u32 dest, u32 selector)
985} 976}
986 977
987/* 978/*
979 * tipc_link_send_names - send name table entries to new neighbor
980 *
981 * Send routine for bulk delivery of name table messages when contact
982 * with a new neighbor occurs. No link congestion checking is performed
983 * because name table messages *must* be delivered. The messages must be
984 * small enough not to require fragmentation.
985 * Called without any locks held.
986 */
987
988void tipc_link_send_names(struct list_head *message_list, u32 dest)
989{
990 struct tipc_node *n_ptr;
991 struct link *l_ptr;
992 struct sk_buff *buf;
993 struct sk_buff *temp_buf;
994
995 if (list_empty(message_list))
996 return;
997
998 read_lock_bh(&tipc_net_lock);
999 n_ptr = tipc_node_find(dest);
1000 if (n_ptr) {
1001 tipc_node_lock(n_ptr);
1002 l_ptr = n_ptr->active_links[0];
1003 if (l_ptr) {
1004 /* convert circular list to linear list */
1005 ((struct sk_buff *)message_list->prev)->next = NULL;
1006 link_add_chain_to_outqueue(l_ptr,
1007 (struct sk_buff *)message_list->next, 0);
1008 tipc_link_push_queue(l_ptr);
1009 INIT_LIST_HEAD(message_list);
1010 }
1011 tipc_node_unlock(n_ptr);
1012 }
1013 read_unlock_bh(&tipc_net_lock);
1014
1015 /* discard the messages if they couldn't be sent */
1016
1017 list_for_each_safe(buf, temp_buf, ((struct sk_buff *)message_list)) {
1018 list_del((struct list_head *)buf);
1019 buf_discard(buf);
1020 }
1021}
1022
1023/*
988 * link_send_buf_fast: Entry for data messages where the 1024 * link_send_buf_fast: Entry for data messages where the
989 * destination link is known and the header is complete, 1025 * destination link is known and the header is complete,
990 * inclusive total message length. Very time critical. 1026 * inclusive total message length. Very time critical.
@@ -1031,9 +1067,6 @@ int tipc_send_buf_fast(struct sk_buff *buf, u32 destnode)
1031 u32 selector = msg_origport(buf_msg(buf)) & 1; 1067 u32 selector = msg_origport(buf_msg(buf)) & 1;
1032 u32 dummy; 1068 u32 dummy;
1033 1069
1034 if (destnode == tipc_own_addr)
1035 return tipc_port_recv_msg(buf);
1036
1037 read_lock_bh(&tipc_net_lock); 1070 read_lock_bh(&tipc_net_lock);
1038 n_ptr = tipc_node_find(destnode); 1071 n_ptr = tipc_node_find(destnode);
1039 if (likely(n_ptr)) { 1072 if (likely(n_ptr)) {
@@ -1658,19 +1691,12 @@ void tipc_recv_msg(struct sk_buff *head, struct tipc_bearer *b_ptr)
1658 continue; 1691 continue;
1659 } 1692 }
1660 1693
1694 /* Discard unicast link messages destined for another node */
1695
1661 if (unlikely(!msg_short(msg) && 1696 if (unlikely(!msg_short(msg) &&
1662 (msg_destnode(msg) != tipc_own_addr))) 1697 (msg_destnode(msg) != tipc_own_addr)))
1663 goto cont; 1698 goto cont;
1664 1699
1665 /* Discard non-routeable messages destined for another node */
1666
1667 if (unlikely(!msg_isdata(msg) &&
1668 (msg_destnode(msg) != tipc_own_addr))) {
1669 if ((msg_user(msg) != CONN_MANAGER) &&
1670 (msg_user(msg) != MSG_FRAGMENTER))
1671 goto cont;
1672 }
1673
1674 /* Locate neighboring node that sent message */ 1700 /* Locate neighboring node that sent message */
1675 1701
1676 n_ptr = tipc_node_find(msg_prevnode(msg)); 1702 n_ptr = tipc_node_find(msg_prevnode(msg));
@@ -1678,17 +1704,24 @@ void tipc_recv_msg(struct sk_buff *head, struct tipc_bearer *b_ptr)
1678 goto cont; 1704 goto cont;
1679 tipc_node_lock(n_ptr); 1705 tipc_node_lock(n_ptr);
1680 1706
1681 /* Don't talk to neighbor during cleanup after last session */ 1707 /* Locate unicast link endpoint that should handle message */
1682 1708
1683 if (n_ptr->cleanup_required) { 1709 l_ptr = n_ptr->links[b_ptr->identity];
1710 if (unlikely(!l_ptr)) {
1684 tipc_node_unlock(n_ptr); 1711 tipc_node_unlock(n_ptr);
1685 goto cont; 1712 goto cont;
1686 } 1713 }
1687 1714
1688 /* Locate unicast link endpoint that should handle message */ 1715 /* Verify that communication with node is currently allowed */
1689 1716
1690 l_ptr = n_ptr->links[b_ptr->identity]; 1717 if ((n_ptr->block_setup & WAIT_PEER_DOWN) &&
1691 if (unlikely(!l_ptr)) { 1718 msg_user(msg) == LINK_PROTOCOL &&
1719 (msg_type(msg) == RESET_MSG ||
1720 msg_type(msg) == ACTIVATE_MSG) &&
1721 !msg_redundant_link(msg))
1722 n_ptr->block_setup &= ~WAIT_PEER_DOWN;
1723
1724 if (n_ptr->block_setup) {
1692 tipc_node_unlock(n_ptr); 1725 tipc_node_unlock(n_ptr);
1693 goto cont; 1726 goto cont;
1694 } 1727 }
@@ -1923,6 +1956,12 @@ void tipc_link_send_proto_msg(struct link *l_ptr, u32 msg_typ, int probe_msg,
1923 1956
1924 if (link_blocked(l_ptr)) 1957 if (link_blocked(l_ptr))
1925 return; 1958 return;
1959
1960 /* Abort non-RESET send if communication with node is prohibited */
1961
1962 if ((l_ptr->owner->block_setup) && (msg_typ != RESET_MSG))
1963 return;
1964
1926 msg_set_type(msg, msg_typ); 1965 msg_set_type(msg, msg_typ);
1927 msg_set_net_plane(msg, l_ptr->b_ptr->net_plane); 1966 msg_set_net_plane(msg, l_ptr->b_ptr->net_plane);
1928 msg_set_bcast_ack(msg, mod(l_ptr->owner->bclink.last_in)); 1967 msg_set_bcast_ack(msg, mod(l_ptr->owner->bclink.last_in));
@@ -2051,9 +2090,19 @@ static void link_recv_proto_msg(struct link *l_ptr, struct sk_buff *buf)
2051 case RESET_MSG: 2090 case RESET_MSG:
2052 if (!link_working_unknown(l_ptr) && 2091 if (!link_working_unknown(l_ptr) &&
2053 (l_ptr->peer_session != INVALID_SESSION)) { 2092 (l_ptr->peer_session != INVALID_SESSION)) {
2054 if (msg_session(msg) == l_ptr->peer_session) 2093 if (less_eq(msg_session(msg), l_ptr->peer_session))
2055 break; /* duplicate: ignore */ 2094 break; /* duplicate or old reset: ignore */
2095 }
2096
2097 if (!msg_redundant_link(msg) && (link_working_working(l_ptr) ||
2098 link_working_unknown(l_ptr))) {
2099 /*
2100 * peer has lost contact -- don't allow peer's links
2101 * to reactivate before we recognize loss & clean up
2102 */
2103 l_ptr->owner->block_setup = WAIT_NODE_DOWN;
2056 } 2104 }
2105
2057 /* fall thru' */ 2106 /* fall thru' */
2058 case ACTIVATE_MSG: 2107 case ACTIVATE_MSG:
2059 /* Update link settings according other endpoint's values */ 2108 /* Update link settings according other endpoint's values */
diff --git a/net/tipc/link.h b/net/tipc/link.h
index 74fbecab1ea0..e56cb532913e 100644
--- a/net/tipc/link.h
+++ b/net/tipc/link.h
@@ -223,6 +223,7 @@ struct sk_buff *tipc_link_cmd_show_stats(const void *req_tlv_area, int req_tlv_s
223struct sk_buff *tipc_link_cmd_reset_stats(const void *req_tlv_area, int req_tlv_space); 223struct sk_buff *tipc_link_cmd_reset_stats(const void *req_tlv_area, int req_tlv_space);
224void tipc_link_reset(struct link *l_ptr); 224void tipc_link_reset(struct link *l_ptr);
225int tipc_link_send(struct sk_buff *buf, u32 dest, u32 selector); 225int tipc_link_send(struct sk_buff *buf, u32 dest, u32 selector);
226void tipc_link_send_names(struct list_head *message_list, u32 dest);
226int tipc_link_send_buf(struct link *l_ptr, struct sk_buff *buf); 227int tipc_link_send_buf(struct link *l_ptr, struct sk_buff *buf);
227u32 tipc_link_get_max_pkt(u32 dest, u32 selector); 228u32 tipc_link_get_max_pkt(u32 dest, u32 selector);
228int tipc_link_send_sections_fast(struct tipc_port *sender, 229int tipc_link_send_sections_fast(struct tipc_port *sender,
diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c
index cd356e504332..b7ca1bd7b151 100644
--- a/net/tipc/name_distr.c
+++ b/net/tipc/name_distr.c
@@ -173,18 +173,40 @@ void tipc_named_withdraw(struct publication *publ)
173 * tipc_named_node_up - tell specified node about all publications by this node 173 * tipc_named_node_up - tell specified node about all publications by this node
174 */ 174 */
175 175
176void tipc_named_node_up(unsigned long node) 176void tipc_named_node_up(unsigned long nodearg)
177{ 177{
178 struct tipc_node *n_ptr;
179 struct link *l_ptr;
178 struct publication *publ; 180 struct publication *publ;
179 struct distr_item *item = NULL; 181 struct distr_item *item = NULL;
180 struct sk_buff *buf = NULL; 182 struct sk_buff *buf = NULL;
183 struct list_head message_list;
184 u32 node = (u32)nodearg;
181 u32 left = 0; 185 u32 left = 0;
182 u32 rest; 186 u32 rest;
183 u32 max_item_buf; 187 u32 max_item_buf = 0;
188
189 /* compute maximum amount of publication data to send per message */
190
191 read_lock_bh(&tipc_net_lock);
192 n_ptr = tipc_node_find(node);
193 if (n_ptr) {
194 tipc_node_lock(n_ptr);
195 l_ptr = n_ptr->active_links[0];
196 if (l_ptr)
197 max_item_buf = ((l_ptr->max_pkt - INT_H_SIZE) /
198 ITEM_SIZE) * ITEM_SIZE;
199 tipc_node_unlock(n_ptr);
200 }
201 read_unlock_bh(&tipc_net_lock);
202 if (!max_item_buf)
203 return;
204
205 /* create list of publication messages, then send them as a unit */
206
207 INIT_LIST_HEAD(&message_list);
184 208
185 read_lock_bh(&tipc_nametbl_lock); 209 read_lock_bh(&tipc_nametbl_lock);
186 max_item_buf = TIPC_MAX_USER_MSG_SIZE / ITEM_SIZE;
187 max_item_buf *= ITEM_SIZE;
188 rest = publ_cnt * ITEM_SIZE; 210 rest = publ_cnt * ITEM_SIZE;
189 211
190 list_for_each_entry(publ, &publ_root, local_list) { 212 list_for_each_entry(publ, &publ_root, local_list) {
@@ -202,13 +224,14 @@ void tipc_named_node_up(unsigned long node)
202 item++; 224 item++;
203 left -= ITEM_SIZE; 225 left -= ITEM_SIZE;
204 if (!left) { 226 if (!left) {
205 msg_set_link_selector(buf_msg(buf), node); 227 list_add_tail((struct list_head *)buf, &message_list);
206 tipc_link_send(buf, node, node);
207 buf = NULL; 228 buf = NULL;
208 } 229 }
209 } 230 }
210exit: 231exit:
211 read_unlock_bh(&tipc_nametbl_lock); 232 read_unlock_bh(&tipc_nametbl_lock);
233
234 tipc_link_send_names(&message_list, (u32)node);
212} 235}
213 236
214/** 237/**
diff --git a/net/tipc/net.c b/net/tipc/net.c
index 68b3dd637291..fafef6c3c0f6 100644
--- a/net/tipc/net.c
+++ b/net/tipc/net.c
@@ -141,17 +141,6 @@ void tipc_net_route_msg(struct sk_buff *buf)
141 return; 141 return;
142 msg = buf_msg(buf); 142 msg = buf_msg(buf);
143 143
144 msg_incr_reroute_cnt(msg);
145 if (msg_reroute_cnt(msg) > 6) {
146 if (msg_errcode(msg)) {
147 buf_discard(buf);
148 } else {
149 tipc_reject_msg(buf, msg_destport(msg) ?
150 TIPC_ERR_NO_PORT : TIPC_ERR_NO_NAME);
151 }
152 return;
153 }
154
155 /* Handle message for this node */ 144 /* Handle message for this node */
156 dnode = msg_short(msg) ? tipc_own_addr : msg_destnode(msg); 145 dnode = msg_short(msg) ? tipc_own_addr : msg_destnode(msg);
157 if (tipc_in_scope(dnode, tipc_own_addr)) { 146 if (tipc_in_scope(dnode, tipc_own_addr)) {
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 2d106ef4fa4c..27b4bb0cca6c 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -112,6 +112,7 @@ struct tipc_node *tipc_node_create(u32 addr)
112 break; 112 break;
113 } 113 }
114 list_add_tail(&n_ptr->list, &temp_node->list); 114 list_add_tail(&n_ptr->list, &temp_node->list);
115 n_ptr->block_setup = WAIT_PEER_DOWN;
115 116
116 tipc_num_nodes++; 117 tipc_num_nodes++;
117 118
@@ -312,7 +313,7 @@ static void node_established_contact(struct tipc_node *n_ptr)
312 } 313 }
313} 314}
314 315
315static void node_cleanup_finished(unsigned long node_addr) 316static void node_name_purge_complete(unsigned long node_addr)
316{ 317{
317 struct tipc_node *n_ptr; 318 struct tipc_node *n_ptr;
318 319
@@ -320,7 +321,7 @@ static void node_cleanup_finished(unsigned long node_addr)
320 n_ptr = tipc_node_find(node_addr); 321 n_ptr = tipc_node_find(node_addr);
321 if (n_ptr) { 322 if (n_ptr) {
322 tipc_node_lock(n_ptr); 323 tipc_node_lock(n_ptr);
323 n_ptr->cleanup_required = 0; 324 n_ptr->block_setup &= ~WAIT_NAMES_GONE;
324 tipc_node_unlock(n_ptr); 325 tipc_node_unlock(n_ptr);
325 } 326 }
326 read_unlock_bh(&tipc_net_lock); 327 read_unlock_bh(&tipc_net_lock);
@@ -331,28 +332,32 @@ static void node_lost_contact(struct tipc_node *n_ptr)
331 char addr_string[16]; 332 char addr_string[16];
332 u32 i; 333 u32 i;
333 334
334 /* Clean up broadcast reception remains */ 335 info("Lost contact with %s\n",
335 n_ptr->bclink.gap_after = n_ptr->bclink.gap_to = 0; 336 tipc_addr_string_fill(addr_string, n_ptr->addr));
336 while (n_ptr->bclink.deferred_head) { 337
337 struct sk_buff *buf = n_ptr->bclink.deferred_head; 338 /* Flush broadcast link info associated with lost node */
338 n_ptr->bclink.deferred_head = buf->next;
339 buf_discard(buf);
340 }
341 if (n_ptr->bclink.defragm) {
342 buf_discard(n_ptr->bclink.defragm);
343 n_ptr->bclink.defragm = NULL;
344 }
345 339
346 if (n_ptr->bclink.supported) { 340 if (n_ptr->bclink.supported) {
341 n_ptr->bclink.gap_after = n_ptr->bclink.gap_to = 0;
342 while (n_ptr->bclink.deferred_head) {
343 struct sk_buff *buf = n_ptr->bclink.deferred_head;
344 n_ptr->bclink.deferred_head = buf->next;
345 buf_discard(buf);
346 }
347
348 if (n_ptr->bclink.defragm) {
349 buf_discard(n_ptr->bclink.defragm);
350 n_ptr->bclink.defragm = NULL;
351 }
352
353 tipc_nmap_remove(&tipc_bcast_nmap, n_ptr->addr);
347 tipc_bclink_acknowledge(n_ptr, 354 tipc_bclink_acknowledge(n_ptr,
348 mod(n_ptr->bclink.acked + 10000)); 355 mod(n_ptr->bclink.acked + 10000));
349 tipc_nmap_remove(&tipc_bcast_nmap, n_ptr->addr);
350 if (n_ptr->addr < tipc_own_addr) 356 if (n_ptr->addr < tipc_own_addr)
351 tipc_own_tag--; 357 tipc_own_tag--;
352 }
353 358
354 info("Lost contact with %s\n", 359 n_ptr->bclink.supported = 0;
355 tipc_addr_string_fill(addr_string, n_ptr->addr)); 360 }
356 361
357 /* Abort link changeover */ 362 /* Abort link changeover */
358 for (i = 0; i < MAX_BEARERS; i++) { 363 for (i = 0; i < MAX_BEARERS; i++) {
@@ -367,10 +372,10 @@ static void node_lost_contact(struct tipc_node *n_ptr)
367 /* Notify subscribers */ 372 /* Notify subscribers */
368 tipc_nodesub_notify(n_ptr); 373 tipc_nodesub_notify(n_ptr);
369 374
370 /* Prevent re-contact with node until all cleanup is done */ 375 /* Prevent re-contact with node until cleanup is done */
371 376
372 n_ptr->cleanup_required = 1; 377 n_ptr->block_setup = WAIT_PEER_DOWN | WAIT_NAMES_GONE;
373 tipc_k_signal((Handler)node_cleanup_finished, n_ptr->addr); 378 tipc_k_signal((Handler)node_name_purge_complete, n_ptr->addr);
374} 379}
375 380
376struct sk_buff *tipc_node_get_nodes(const void *req_tlv_area, int req_tlv_space) 381struct sk_buff *tipc_node_get_nodes(const void *req_tlv_area, int req_tlv_space)
diff --git a/net/tipc/node.h b/net/tipc/node.h
index 5c61afc7a0b9..4f15cb40aaa4 100644
--- a/net/tipc/node.h
+++ b/net/tipc/node.h
@@ -42,6 +42,12 @@
42#include "net.h" 42#include "net.h"
43#include "bearer.h" 43#include "bearer.h"
44 44
45/* Flags used to block (re)establishment of contact with a neighboring node */
46
47#define WAIT_PEER_DOWN 0x0001 /* wait to see that peer's links are down */
48#define WAIT_NAMES_GONE 0x0002 /* wait for peer's publications to be purged */
49#define WAIT_NODE_DOWN 0x0004 /* wait until peer node is declared down */
50
45/** 51/**
46 * struct tipc_node - TIPC node structure 52 * struct tipc_node - TIPC node structure
47 * @addr: network address of node 53 * @addr: network address of node
@@ -52,7 +58,7 @@
52 * @active_links: pointers to active links to node 58 * @active_links: pointers to active links to node
53 * @links: pointers to all links to node 59 * @links: pointers to all links to node
54 * @working_links: number of working links to node (both active and standby) 60 * @working_links: number of working links to node (both active and standby)
55 * @cleanup_required: non-zero if cleaning up after a prior loss of contact 61 * @block_setup: bit mask of conditions preventing link establishment to node
56 * @link_cnt: number of links to node 62 * @link_cnt: number of links to node
57 * @permit_changeover: non-zero if node has redundant links to this system 63 * @permit_changeover: non-zero if node has redundant links to this system
58 * @bclink: broadcast-related info 64 * @bclink: broadcast-related info
@@ -77,7 +83,7 @@ struct tipc_node {
77 struct link *links[MAX_BEARERS]; 83 struct link *links[MAX_BEARERS];
78 int link_cnt; 84 int link_cnt;
79 int working_links; 85 int working_links;
80 int cleanup_required; 86 int block_setup;
81 int permit_changeover; 87 int permit_changeover;
82 struct { 88 struct {
83 int supported; 89 int supported;
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index adb2eff4a102..9440a3d48ca0 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -49,7 +49,7 @@ struct tipc_sock {
49 struct sock sk; 49 struct sock sk;
50 struct tipc_port *p; 50 struct tipc_port *p;
51 struct tipc_portid peer_name; 51 struct tipc_portid peer_name;
52 long conn_timeout; 52 unsigned int conn_timeout;
53}; 53};
54 54
55#define tipc_sk(sk) ((struct tipc_sock *)(sk)) 55#define tipc_sk(sk) ((struct tipc_sock *)(sk))
@@ -231,7 +231,7 @@ static int tipc_create(struct net *net, struct socket *sock, int protocol,
231 sock_init_data(sock, sk); 231 sock_init_data(sock, sk);
232 sk->sk_backlog_rcv = backlog_rcv; 232 sk->sk_backlog_rcv = backlog_rcv;
233 tipc_sk(sk)->p = tp_ptr; 233 tipc_sk(sk)->p = tp_ptr;
234 tipc_sk(sk)->conn_timeout = msecs_to_jiffies(CONN_TIMEOUT_DEFAULT); 234 tipc_sk(sk)->conn_timeout = CONN_TIMEOUT_DEFAULT;
235 235
236 spin_unlock_bh(tp_ptr->lock); 236 spin_unlock_bh(tp_ptr->lock);
237 237
@@ -525,6 +525,7 @@ static int send_msg(struct kiocb *iocb, struct socket *sock,
525 struct tipc_port *tport = tipc_sk_port(sk); 525 struct tipc_port *tport = tipc_sk_port(sk);
526 struct sockaddr_tipc *dest = (struct sockaddr_tipc *)m->msg_name; 526 struct sockaddr_tipc *dest = (struct sockaddr_tipc *)m->msg_name;
527 int needs_conn; 527 int needs_conn;
528 long timeout_val;
528 int res = -EINVAL; 529 int res = -EINVAL;
529 530
530 if (unlikely(!dest)) 531 if (unlikely(!dest))
@@ -564,6 +565,8 @@ static int send_msg(struct kiocb *iocb, struct socket *sock,
564 reject_rx_queue(sk); 565 reject_rx_queue(sk);
565 } 566 }
566 567
568 timeout_val = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT);
569
567 do { 570 do {
568 if (dest->addrtype == TIPC_ADDR_NAME) { 571 if (dest->addrtype == TIPC_ADDR_NAME) {
569 res = dest_name_check(dest, m); 572 res = dest_name_check(dest, m);
@@ -600,16 +603,14 @@ static int send_msg(struct kiocb *iocb, struct socket *sock,
600 sock->state = SS_CONNECTING; 603 sock->state = SS_CONNECTING;
601 break; 604 break;
602 } 605 }
603 if (m->msg_flags & MSG_DONTWAIT) { 606 if (timeout_val <= 0L) {
604 res = -EWOULDBLOCK; 607 res = timeout_val ? timeout_val : -EWOULDBLOCK;
605 break; 608 break;
606 } 609 }
607 release_sock(sk); 610 release_sock(sk);
608 res = wait_event_interruptible(*sk_sleep(sk), 611 timeout_val = wait_event_interruptible_timeout(*sk_sleep(sk),
609 !tport->congested); 612 !tport->congested, timeout_val);
610 lock_sock(sk); 613 lock_sock(sk);
611 if (res)
612 break;
613 } while (1); 614 } while (1);
614 615
615exit: 616exit:
@@ -636,6 +637,7 @@ static int send_packet(struct kiocb *iocb, struct socket *sock,
636 struct sock *sk = sock->sk; 637 struct sock *sk = sock->sk;
637 struct tipc_port *tport = tipc_sk_port(sk); 638 struct tipc_port *tport = tipc_sk_port(sk);
638 struct sockaddr_tipc *dest = (struct sockaddr_tipc *)m->msg_name; 639 struct sockaddr_tipc *dest = (struct sockaddr_tipc *)m->msg_name;
640 long timeout_val;
639 int res; 641 int res;
640 642
641 /* Handle implied connection establishment */ 643 /* Handle implied connection establishment */
@@ -650,6 +652,8 @@ static int send_packet(struct kiocb *iocb, struct socket *sock,
650 if (iocb) 652 if (iocb)
651 lock_sock(sk); 653 lock_sock(sk);
652 654
655 timeout_val = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT);
656
653 do { 657 do {
654 if (unlikely(sock->state != SS_CONNECTED)) { 658 if (unlikely(sock->state != SS_CONNECTED)) {
655 if (sock->state == SS_DISCONNECTING) 659 if (sock->state == SS_DISCONNECTING)
@@ -663,16 +667,14 @@ static int send_packet(struct kiocb *iocb, struct socket *sock,
663 total_len); 667 total_len);
664 if (likely(res != -ELINKCONG)) 668 if (likely(res != -ELINKCONG))
665 break; 669 break;
666 if (m->msg_flags & MSG_DONTWAIT) { 670 if (timeout_val <= 0L) {
667 res = -EWOULDBLOCK; 671 res = timeout_val ? timeout_val : -EWOULDBLOCK;
668 break; 672 break;
669 } 673 }
670 release_sock(sk); 674 release_sock(sk);
671 res = wait_event_interruptible(*sk_sleep(sk), 675 timeout_val = wait_event_interruptible_timeout(*sk_sleep(sk),
672 (!tport->congested || !tport->connected)); 676 (!tport->congested || !tport->connected), timeout_val);
673 lock_sock(sk); 677 lock_sock(sk);
674 if (res)
675 break;
676 } while (1); 678 } while (1);
677 679
678 if (iocb) 680 if (iocb)
@@ -1369,7 +1371,7 @@ static int connect(struct socket *sock, struct sockaddr *dest, int destlen,
1369 struct msghdr m = {NULL,}; 1371 struct msghdr m = {NULL,};
1370 struct sk_buff *buf; 1372 struct sk_buff *buf;
1371 struct tipc_msg *msg; 1373 struct tipc_msg *msg;
1372 long timeout; 1374 unsigned int timeout;
1373 int res; 1375 int res;
1374 1376
1375 lock_sock(sk); 1377 lock_sock(sk);
@@ -1434,7 +1436,8 @@ static int connect(struct socket *sock, struct sockaddr *dest, int destlen,
1434 res = wait_event_interruptible_timeout(*sk_sleep(sk), 1436 res = wait_event_interruptible_timeout(*sk_sleep(sk),
1435 (!skb_queue_empty(&sk->sk_receive_queue) || 1437 (!skb_queue_empty(&sk->sk_receive_queue) ||
1436 (sock->state != SS_CONNECTING)), 1438 (sock->state != SS_CONNECTING)),
1437 timeout ? timeout : MAX_SCHEDULE_TIMEOUT); 1439 timeout ? (long)msecs_to_jiffies(timeout)
1440 : MAX_SCHEDULE_TIMEOUT);
1438 lock_sock(sk); 1441 lock_sock(sk);
1439 1442
1440 if (res > 0) { 1443 if (res > 0) {
@@ -1480,9 +1483,7 @@ static int listen(struct socket *sock, int len)
1480 1483
1481 lock_sock(sk); 1484 lock_sock(sk);
1482 1485
1483 if (sock->state == SS_READY) 1486 if (sock->state != SS_UNCONNECTED)
1484 res = -EOPNOTSUPP;
1485 else if (sock->state != SS_UNCONNECTED)
1486 res = -EINVAL; 1487 res = -EINVAL;
1487 else { 1488 else {
1488 sock->state = SS_LISTENING; 1489 sock->state = SS_LISTENING;
@@ -1510,10 +1511,6 @@ static int accept(struct socket *sock, struct socket *new_sock, int flags)
1510 1511
1511 lock_sock(sk); 1512 lock_sock(sk);
1512 1513
1513 if (sock->state == SS_READY) {
1514 res = -EOPNOTSUPP;
1515 goto exit;
1516 }
1517 if (sock->state != SS_LISTENING) { 1514 if (sock->state != SS_LISTENING) {
1518 res = -EINVAL; 1515 res = -EINVAL;
1519 goto exit; 1516 goto exit;
@@ -1696,7 +1693,7 @@ static int setsockopt(struct socket *sock,
1696 res = tipc_set_portunreturnable(tport->ref, value); 1693 res = tipc_set_portunreturnable(tport->ref, value);
1697 break; 1694 break;
1698 case TIPC_CONN_TIMEOUT: 1695 case TIPC_CONN_TIMEOUT:
1699 tipc_sk(sk)->conn_timeout = msecs_to_jiffies(value); 1696 tipc_sk(sk)->conn_timeout = value;
1700 /* no need to set "res", since already 0 at this point */ 1697 /* no need to set "res", since already 0 at this point */
1701 break; 1698 break;
1702 default: 1699 default:
@@ -1752,7 +1749,7 @@ static int getsockopt(struct socket *sock,
1752 res = tipc_portunreturnable(tport->ref, &value); 1749 res = tipc_portunreturnable(tport->ref, &value);
1753 break; 1750 break;
1754 case TIPC_CONN_TIMEOUT: 1751 case TIPC_CONN_TIMEOUT:
1755 value = jiffies_to_msecs(tipc_sk(sk)->conn_timeout); 1752 value = tipc_sk(sk)->conn_timeout;
1756 /* no need to set "res", since already 0 at this point */ 1753 /* no need to set "res", since already 0 at this point */
1757 break; 1754 break;
1758 case TIPC_NODE_RECVQ_DEPTH: 1755 case TIPC_NODE_RECVQ_DEPTH:
@@ -1790,11 +1787,11 @@ static const struct proto_ops msg_ops = {
1790 .bind = bind, 1787 .bind = bind,
1791 .connect = connect, 1788 .connect = connect,
1792 .socketpair = sock_no_socketpair, 1789 .socketpair = sock_no_socketpair,
1793 .accept = accept, 1790 .accept = sock_no_accept,
1794 .getname = get_name, 1791 .getname = get_name,
1795 .poll = poll, 1792 .poll = poll,
1796 .ioctl = sock_no_ioctl, 1793 .ioctl = sock_no_ioctl,
1797 .listen = listen, 1794 .listen = sock_no_listen,
1798 .shutdown = shutdown, 1795 .shutdown = shutdown,
1799 .setsockopt = setsockopt, 1796 .setsockopt = setsockopt,
1800 .getsockopt = getsockopt, 1797 .getsockopt = getsockopt,
diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c
index 6cf726863485..198371723b41 100644
--- a/net/tipc/subscr.c
+++ b/net/tipc/subscr.c
@@ -151,7 +151,7 @@ void tipc_subscr_report_overlap(struct subscription *sub,
151 if (!must && !(sub->filter & TIPC_SUB_PORTS)) 151 if (!must && !(sub->filter & TIPC_SUB_PORTS))
152 return; 152 return;
153 153
154 sub->event_cb(sub, found_lower, found_upper, event, port_ref, node); 154 subscr_send_event(sub, found_lower, found_upper, event, port_ref, node);
155} 155}
156 156
157/** 157/**
@@ -365,7 +365,6 @@ static struct subscription *subscr_subscribe(struct tipc_subscr *s,
365 subscr_terminate(subscriber); 365 subscr_terminate(subscriber);
366 return NULL; 366 return NULL;
367 } 367 }
368 sub->event_cb = subscr_send_event;
369 INIT_LIST_HEAD(&sub->nameseq_list); 368 INIT_LIST_HEAD(&sub->nameseq_list);
370 list_add(&sub->subscription_list, &subscriber->subscription_list); 369 list_add(&sub->subscription_list, &subscriber->subscription_list);
371 sub->server_ref = subscriber->port_ref; 370 sub->server_ref = subscriber->port_ref;
diff --git a/net/tipc/subscr.h b/net/tipc/subscr.h
index 45d89bf4d202..4b06ef6f8401 100644
--- a/net/tipc/subscr.h
+++ b/net/tipc/subscr.h
@@ -39,16 +39,11 @@
39 39
40struct subscription; 40struct subscription;
41 41
42typedef void (*tipc_subscr_event) (struct subscription *sub,
43 u32 found_lower, u32 found_upper,
44 u32 event, u32 port_ref, u32 node);
45
46/** 42/**
47 * struct subscription - TIPC network topology subscription object 43 * struct subscription - TIPC network topology subscription object
48 * @seq: name sequence associated with subscription 44 * @seq: name sequence associated with subscription
49 * @timeout: duration of subscription (in ms) 45 * @timeout: duration of subscription (in ms)
50 * @filter: event filtering to be done for subscription 46 * @filter: event filtering to be done for subscription
51 * @event_cb: routine invoked when a subscription event is detected
52 * @timer: timer governing subscription duration (optional) 47 * @timer: timer governing subscription duration (optional)
53 * @nameseq_list: adjacent subscriptions in name sequence's subscription list 48 * @nameseq_list: adjacent subscriptions in name sequence's subscription list
54 * @subscription_list: adjacent subscriptions in subscriber's subscription list 49 * @subscription_list: adjacent subscriptions in subscriber's subscription list
@@ -61,7 +56,6 @@ struct subscription {
61 struct tipc_name_seq seq; 56 struct tipc_name_seq seq;
62 u32 timeout; 57 u32 timeout;
63 u32 filter; 58 u32 filter;
64 tipc_subscr_event event_cb;
65 struct timer_list timer; 59 struct timer_list timer;
66 struct list_head nameseq_list; 60 struct list_head nameseq_list;
67 struct list_head subscription_list; 61 struct list_head subscription_list;
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index ec68e1c05b85..466fbcc5cf77 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -1381,8 +1381,10 @@ static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1381static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds) 1381static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
1382{ 1382{
1383 int err = 0; 1383 int err = 0;
1384
1384 UNIXCB(skb).pid = get_pid(scm->pid); 1385 UNIXCB(skb).pid = get_pid(scm->pid);
1385 UNIXCB(skb).cred = get_cred(scm->cred); 1386 if (scm->cred)
1387 UNIXCB(skb).cred = get_cred(scm->cred);
1386 UNIXCB(skb).fp = NULL; 1388 UNIXCB(skb).fp = NULL;
1387 if (scm->fp && send_fds) 1389 if (scm->fp && send_fds)
1388 err = unix_attach_fds(scm, skb); 1390 err = unix_attach_fds(scm, skb);
@@ -1392,6 +1394,24 @@ static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool sen
1392} 1394}
1393 1395
1394/* 1396/*
1397 * Some apps rely on write() giving SCM_CREDENTIALS
1398 * We include credentials if source or destination socket
1399 * asserted SOCK_PASSCRED.
1400 */
1401static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
1402 const struct sock *other)
1403{
1404 if (UNIXCB(skb).cred)
1405 return;
1406 if (test_bit(SOCK_PASSCRED, &sock->flags) ||
1407 !other->sk_socket ||
1408 test_bit(SOCK_PASSCRED, &other->sk_socket->flags)) {
1409 UNIXCB(skb).pid = get_pid(task_tgid(current));
1410 UNIXCB(skb).cred = get_current_cred();
1411 }
1412}
1413
1414/*
1395 * Send AF_UNIX data. 1415 * Send AF_UNIX data.
1396 */ 1416 */
1397 1417
@@ -1538,6 +1558,7 @@ restart:
1538 1558
1539 if (sock_flag(other, SOCK_RCVTSTAMP)) 1559 if (sock_flag(other, SOCK_RCVTSTAMP))
1540 __net_timestamp(skb); 1560 __net_timestamp(skb);
1561 maybe_add_creds(skb, sock, other);
1541 skb_queue_tail(&other->sk_receive_queue, skb); 1562 skb_queue_tail(&other->sk_receive_queue, skb);
1542 if (max_level > unix_sk(other)->recursion_level) 1563 if (max_level > unix_sk(other)->recursion_level)
1543 unix_sk(other)->recursion_level = max_level; 1564 unix_sk(other)->recursion_level = max_level;
@@ -1652,6 +1673,7 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
1652 (other->sk_shutdown & RCV_SHUTDOWN)) 1673 (other->sk_shutdown & RCV_SHUTDOWN))
1653 goto pipe_err_free; 1674 goto pipe_err_free;
1654 1675
1676 maybe_add_creds(skb, sock, other);
1655 skb_queue_tail(&other->sk_receive_queue, skb); 1677 skb_queue_tail(&other->sk_receive_queue, skb);
1656 if (max_level > unix_sk(other)->recursion_level) 1678 if (max_level > unix_sk(other)->recursion_level)
1657 unix_sk(other)->recursion_level = max_level; 1679 unix_sk(other)->recursion_level = max_level;
diff --git a/net/xfrm/xfrm_algo.c b/net/xfrm/xfrm_algo.c
index 58064d9e565d..791ab2e77f3f 100644
--- a/net/xfrm/xfrm_algo.c
+++ b/net/xfrm/xfrm_algo.c
@@ -462,8 +462,8 @@ static struct xfrm_algo_desc ealg_list[] = {
462 .desc = { 462 .desc = {
463 .sadb_alg_id = SADB_X_EALG_AESCTR, 463 .sadb_alg_id = SADB_X_EALG_AESCTR,
464 .sadb_alg_ivlen = 8, 464 .sadb_alg_ivlen = 8,
465 .sadb_alg_minbits = 128, 465 .sadb_alg_minbits = 160,
466 .sadb_alg_maxbits = 256 466 .sadb_alg_maxbits = 288
467 } 467 }
468}, 468},
469}; 469};
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index a026b0ef2443..54a0dc2e2f8d 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -212,6 +212,11 @@ resume:
212 /* only the first xfrm gets the encap type */ 212 /* only the first xfrm gets the encap type */
213 encap_type = 0; 213 encap_type = 0;
214 214
215 if (async && x->repl->check(x, skb, seq)) {
216 XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATESEQERROR);
217 goto drop_unlock;
218 }
219
215 x->repl->advance(x, seq); 220 x->repl->advance(x, seq);
216 221
217 x->curlft.bytes += skb->len; 222 x->curlft.bytes += skb->len;
diff --git a/net/xfrm/xfrm_ipcomp.c b/net/xfrm/xfrm_ipcomp.c
index fc91ad7ee26e..f781b9ab8a54 100644
--- a/net/xfrm/xfrm_ipcomp.c
+++ b/net/xfrm/xfrm_ipcomp.c
@@ -70,26 +70,29 @@ static int ipcomp_decompress(struct xfrm_state *x, struct sk_buff *skb)
70 70
71 while ((scratch += len, dlen -= len) > 0) { 71 while ((scratch += len, dlen -= len) > 0) {
72 skb_frag_t *frag; 72 skb_frag_t *frag;
73 struct page *page;
73 74
74 err = -EMSGSIZE; 75 err = -EMSGSIZE;
75 if (WARN_ON(skb_shinfo(skb)->nr_frags >= MAX_SKB_FRAGS)) 76 if (WARN_ON(skb_shinfo(skb)->nr_frags >= MAX_SKB_FRAGS))
76 goto out; 77 goto out;
77 78
78 frag = skb_shinfo(skb)->frags + skb_shinfo(skb)->nr_frags; 79 frag = skb_shinfo(skb)->frags + skb_shinfo(skb)->nr_frags;
79 frag->page = alloc_page(GFP_ATOMIC); 80 page = alloc_page(GFP_ATOMIC);
80 81
81 err = -ENOMEM; 82 err = -ENOMEM;
82 if (!frag->page) 83 if (!page)
83 goto out; 84 goto out;
84 85
86 __skb_frag_set_page(frag, page);
87
85 len = PAGE_SIZE; 88 len = PAGE_SIZE;
86 if (dlen < len) 89 if (dlen < len)
87 len = dlen; 90 len = dlen;
88 91
89 memcpy(page_address(frag->page), scratch, len);
90
91 frag->page_offset = 0; 92 frag->page_offset = 0;
92 frag->size = len; 93 frag->size = len;
94 memcpy(skb_frag_address(frag), scratch, len);
95
93 skb->truesize += len; 96 skb->truesize += len;
94 skb->data_len += len; 97 skb->data_len += len;
95 skb->len += len; 98 skb->len += len;
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 94fdcc7f1030..552df27dcf53 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1349,14 +1349,16 @@ static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family)
1349 BUG(); 1349 BUG();
1350 } 1350 }
1351 xdst = dst_alloc(dst_ops, NULL, 0, 0, 0); 1351 xdst = dst_alloc(dst_ops, NULL, 0, 0, 0);
1352 memset(&xdst->u.rt6.rt6i_table, 0, sizeof(*xdst) - sizeof(struct dst_entry));
1353 xfrm_policy_put_afinfo(afinfo);
1354 1352
1355 if (likely(xdst)) 1353 if (likely(xdst)) {
1354 memset(&xdst->u.rt6.rt6i_table, 0,
1355 sizeof(*xdst) - sizeof(struct dst_entry));
1356 xdst->flo.ops = &xfrm_bundle_fc_ops; 1356 xdst->flo.ops = &xfrm_bundle_fc_ops;
1357 else 1357 } else
1358 xdst = ERR_PTR(-ENOBUFS); 1358 xdst = ERR_PTR(-ENOBUFS);
1359 1359
1360 xfrm_policy_put_afinfo(afinfo);
1361
1360 return xdst; 1362 return xdst;
1361} 1363}
1362 1364
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 0256b8a0a7cf..d0a42df5160e 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -2927,7 +2927,7 @@ static int __net_init xfrm_user_net_init(struct net *net)
2927 if (nlsk == NULL) 2927 if (nlsk == NULL)
2928 return -ENOMEM; 2928 return -ENOMEM;
2929 net->xfrm.nlsk_stash = nlsk; /* Don't set to NULL */ 2929 net->xfrm.nlsk_stash = nlsk; /* Don't set to NULL */
2930 rcu_assign_pointer(net->xfrm.nlsk, nlsk); 2930 RCU_INIT_POINTER(net->xfrm.nlsk, nlsk);
2931 return 0; 2931 return 0;
2932} 2932}
2933 2933
@@ -2935,7 +2935,7 @@ static void __net_exit xfrm_user_net_exit(struct list_head *net_exit_list)
2935{ 2935{
2936 struct net *net; 2936 struct net *net;
2937 list_for_each_entry(net, net_exit_list, exit_list) 2937 list_for_each_entry(net, net_exit_list, exit_list)
2938 rcu_assign_pointer(net->xfrm.nlsk, NULL); 2938 RCU_INIT_POINTER(net->xfrm.nlsk, NULL);
2939 synchronize_net(); 2939 synchronize_net();
2940 list_for_each_entry(net, net_exit_list, exit_list) 2940 list_for_each_entry(net, net_exit_list, exit_list)
2941 netlink_kernel_release(net->xfrm.nlsk_stash); 2941 netlink_kernel_release(net->xfrm.nlsk_stash);