aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorTony Lindgren <tony@atomide.com>2016-03-30 13:36:06 -0400
committerTony Lindgren <tony@atomide.com>2016-03-30 13:36:06 -0400
commit1809de7e7d37c585e01a1bcc583ea92b78fc759d (patch)
tree76c5b35c2b04eafce86a1a729c02ab705eba44bc /net
parentebf24414809200915b9ddf7f109bba7c278c8210 (diff)
parent3ca4a238106dedc285193ee47f494a6584b6fd2f (diff)
Merge tag 'for-v4.6-rc/omap-fixes-a' of git://git.kernel.org/pub/scm/linux/kernel/git/pjw/omap-pending into omap-for-v4.6/fixes
ARM: OMAP2+: first hwmod fix for v4.6-rc Fix a longstanding bug in the hwmod code that could cause hardware SYSCONFIG register values to not match the kernel's idea of what they should be, and that could result in lower performance during IP block idle entry. Basic build, boot, and PM test logs are available here: http://www.pwsan.com/omap/testlogs/omap-hwmod-fixes-a-for-v4.6-rc/20160326231727/
Diffstat (limited to 'net')
-rw-r--r--net/6lowpan/core.c39
-rw-r--r--net/6lowpan/debugfs.c247
-rw-r--r--net/6lowpan/iphc.c413
-rw-r--r--net/8021q/vlan.c2
-rw-r--r--net/8021q/vlan_dev.c10
-rw-r--r--net/8021q/vlanproc.c3
-rw-r--r--net/8021q/vlanproc.h4
-rw-r--r--net/9p/trans_rdma.c86
-rw-r--r--net/Kconfig24
-rw-r--r--net/Makefile1
-rw-r--r--net/appletalk/ddp.c2
-rw-r--r--net/ax25/ax25_ip.c15
-rw-r--r--net/batman-adv/Kconfig16
-rw-r--r--net/batman-adv/Makefile5
-rw-r--r--net/batman-adv/bat_algo.h30
-rw-r--r--net/batman-adv/bat_iv_ogm.c115
-rw-r--r--net/batman-adv/bat_v.c347
-rw-r--r--net/batman-adv/bat_v_elp.c515
-rw-r--r--net/batman-adv/bat_v_elp.h33
-rw-r--r--net/batman-adv/bat_v_ogm.c833
-rw-r--r--net/batman-adv/bat_v_ogm.h36
-rw-r--r--net/batman-adv/bitarray.c14
-rw-r--r--net/batman-adv/bitarray.h14
-rw-r--r--net/batman-adv/bridge_loop_avoidance.c329
-rw-r--r--net/batman-adv/bridge_loop_avoidance.h2
-rw-r--r--net/batman-adv/debugfs.c8
-rw-r--r--net/batman-adv/debugfs.h2
-rw-r--r--net/batman-adv/distributed-arp-table.c100
-rw-r--r--net/batman-adv/distributed-arp-table.h2
-rw-r--r--net/batman-adv/fragmentation.c34
-rw-r--r--net/batman-adv/fragmentation.h4
-rw-r--r--net/batman-adv/gateway_client.c132
-rw-r--r--net/batman-adv/gateway_client.h2
-rw-r--r--net/batman-adv/gateway_common.c8
-rw-r--r--net/batman-adv/gateway_common.h4
-rw-r--r--net/batman-adv/hard-interface.c79
-rw-r--r--net/batman-adv/hard-interface.h18
-rw-r--r--net/batman-adv/hash.c2
-rw-r--r--net/batman-adv/hash.h24
-rw-r--r--net/batman-adv/icmp_socket.c10
-rw-r--r--net/batman-adv/icmp_socket.h2
-rw-r--r--net/batman-adv/main.c112
-rw-r--r--net/batman-adv/main.h39
-rw-r--r--net/batman-adv/multicast.c44
-rw-r--r--net/batman-adv/multicast.h4
-rw-r--r--net/batman-adv/network-coding.c164
-rw-r--r--net/batman-adv/network-coding.h2
-rw-r--r--net/batman-adv/originator.c246
-rw-r--r--net/batman-adv/originator.h18
-rw-r--r--net/batman-adv/packet.h68
-rw-r--r--net/batman-adv/routing.c112
-rw-r--r--net/batman-adv/routing.h5
-rw-r--r--net/batman-adv/send.c98
-rw-r--r--net/batman-adv/send.h16
-rw-r--r--net/batman-adv/soft-interface.c73
-rw-r--r--net/batman-adv/soft-interface.h4
-rw-r--r--net/batman-adv/sysfs.c162
-rw-r--r--net/batman-adv/sysfs.h2
-rw-r--r--net/batman-adv/translation-table.c335
-rw-r--r--net/batman-adv/translation-table.h2
-rw-r--r--net/batman-adv/types.h160
-rw-r--r--net/bluetooth/6lowpan.c7
-rw-r--r--net/bluetooth/Kconfig9
-rw-r--r--net/bluetooth/Makefile1
-rw-r--r--net/bluetooth/hci_conn.c17
-rw-r--r--net/bluetooth/hci_core.c13
-rw-r--r--net/bluetooth/hci_request.c84
-rw-r--r--net/bluetooth/hci_request.h2
-rw-r--r--net/bluetooth/l2cap_core.c14
-rw-r--r--net/bluetooth/l2cap_sock.c12
-rw-r--r--net/bluetooth/leds.c74
-rw-r--r--net/bluetooth/leds.h16
-rw-r--r--net/bluetooth/mgmt.c26
-rw-r--r--net/bluetooth/smp.c151
-rw-r--r--net/bridge/br.c3
-rw-r--r--net/bridge/br_fdb.c15
-rw-r--r--net/bridge/br_forward.c1
-rw-r--r--net/bridge/br_if.c59
-rw-r--r--net/bridge/br_input.c16
-rw-r--r--net/bridge/br_mdb.c128
-rw-r--r--net/bridge/br_multicast.c101
-rw-r--r--net/bridge/br_netfilter_hooks.c68
-rw-r--r--net/bridge/br_netlink.c1
-rw-r--r--net/bridge/br_private.h12
-rw-r--r--net/bridge/br_stp.c25
-rw-r--r--net/bridge/br_stp_if.c2
-rw-r--r--net/bridge/br_stp_timer.c1
-rw-r--r--net/bridge/br_vlan.c11
-rw-r--r--net/bridge/netfilter/nft_reject_bridge.c8
-rw-r--r--net/caif/cfpkt_skbuff.c2
-rw-r--r--net/caif/cfrfml.c2
-rw-r--r--net/ceph/ceph_common.c4
-rw-r--r--net/ceph/crush/mapper.c33
-rw-r--r--net/ceph/crypto.c101
-rw-r--r--net/ceph/debugfs.c17
-rw-r--r--net/ceph/messenger.c44
-rw-r--r--net/ceph/mon_client.c457
-rw-r--r--net/ceph/osd_client.c123
-rw-r--r--net/ceph/osdmap.c19
-rw-r--r--net/ceph/pagevec.c2
-rw-r--r--net/core/Makefile3
-rw-r--r--net/core/dev.c52
-rw-r--r--net/core/devlink.c738
-rw-r--r--net/core/dst.c10
-rw-r--r--net/core/dst_cache.c168
-rw-r--r--net/core/ethtool.c638
-rw-r--r--net/core/filter.c256
-rw-r--r--net/core/flow_dissector.c74
-rw-r--r--net/core/gen_estimator.c2
-rw-r--r--net/core/gen_stats.c1
-rw-r--r--net/core/hwbm.c87
-rw-r--r--net/core/lwtunnel.c37
-rw-r--r--net/core/net-sysfs.c18
-rw-r--r--net/core/netclassid_cgroup.c1
-rw-r--r--net/core/netprio_cgroup.c1
-rw-r--r--net/core/pktgen.c4
-rw-r--r--net/core/rtnetlink.c125
-rw-r--r--net/core/scm.c7
-rw-r--r--net/core/skbuff.c189
-rw-r--r--net/core/sock.c7
-rw-r--r--net/core/sock_reuseport.c9
-rw-r--r--net/core/sysctl_net_core.c10
-rw-r--r--net/dccp/ipv4.c18
-rw-r--r--net/dccp/ipv6.c18
-rw-r--r--net/dsa/dsa.c43
-rw-r--r--net/dsa/slave.c215
-rw-r--r--net/ethernet/eth.c3
-rw-r--r--net/ieee802154/6lowpan/core.c7
-rw-r--r--net/ieee802154/socket.c17
-rw-r--r--net/ipv4/Kconfig10
-rw-r--r--net/ipv4/Makefile1
-rw-r--r--net/ipv4/af_inet.c69
-rw-r--r--net/ipv4/arp.c41
-rw-r--r--net/ipv4/devinet.c72
-rw-r--r--net/ipv4/fib_frontend.c20
-rw-r--r--net/ipv4/fib_trie.c7
-rw-r--r--net/ipv4/fou.c30
-rw-r--r--net/ipv4/gre_offload.c109
-rw-r--r--net/ipv4/icmp.c5
-rw-r--r--net/ipv4/igmp.c81
-rw-r--r--net/ipv4/inet_connection_sock.c268
-rw-r--r--net/ipv4/inet_diag.c24
-rw-r--r--net/ipv4/inet_hashtables.c237
-rw-r--r--net/ipv4/inet_lro.c374
-rw-r--r--net/ipv4/ip_forward.c1
-rw-r--r--net/ipv4/ip_fragment.c30
-rw-r--r--net/ipv4/ip_gre.c49
-rw-r--r--net/ipv4/ip_input.c33
-rw-r--r--net/ipv4/ip_options.c14
-rw-r--r--net/ipv4/ip_output.c8
-rw-r--r--net/ipv4/ip_sockglue.c12
-rw-r--r--net/ipv4/ip_tunnel.c101
-rw-r--r--net/ipv4/ip_tunnel_core.c39
-rw-r--r--net/ipv4/ipconfig.c4
-rw-r--r--net/ipv4/ipip.c4
-rw-r--r--net/ipv4/netfilter/arp_tables.c66
-rw-r--r--net/ipv4/netfilter/arptable_filter.c40
-rw-r--r--net/ipv4/netfilter/ip_tables.c63
-rw-r--r--net/ipv4/netfilter/ipt_SYNPROXY.c3
-rw-r--r--net/ipv4/netfilter/iptable_filter.c44
-rw-r--r--net/ipv4/netfilter/iptable_mangle.c41
-rw-r--r--net/ipv4/netfilter/iptable_nat.c41
-rw-r--r--net/ipv4/netfilter/iptable_raw.c38
-rw-r--r--net/ipv4/netfilter/iptable_security.c44
-rw-r--r--net/ipv4/netfilter/nf_defrag_ipv4.c6
-rw-r--r--net/ipv4/netfilter/nf_nat_l3proto_ipv4.c30
-rw-r--r--net/ipv4/netfilter/nf_nat_masquerade_ipv4.c12
-rw-r--r--net/ipv4/netfilter/nft_masq_ipv4.c7
-rw-r--r--net/ipv4/ping.c15
-rw-r--r--net/ipv4/proc.c2
-rw-r--r--net/ipv4/raw.c8
-rw-r--r--net/ipv4/route.c77
-rw-r--r--net/ipv4/syncookies.c7
-rw-r--r--net/ipv4/sysctl_net_ipv4.c236
-rw-r--r--net/ipv4/tcp.c100
-rw-r--r--net/ipv4/tcp_fastopen.c79
-rw-r--r--net/ipv4/tcp_input.c193
-rw-r--r--net/ipv4/tcp_ipv4.c92
-rw-r--r--net/ipv4/tcp_metrics.c5
-rw-r--r--net/ipv4/tcp_minisocks.c6
-rw-r--r--net/ipv4/tcp_offload.c8
-rw-r--r--net/ipv4/tcp_output.c10
-rw-r--r--net/ipv4/tcp_probe.c8
-rw-r--r--net/ipv4/tcp_timer.c23
-rw-r--r--net/ipv4/udp.c80
-rw-r--r--net/ipv4/udp_offload.c114
-rw-r--r--net/ipv4/udp_tunnel.c2
-rw-r--r--net/ipv6/Kconfig2
-rw-r--r--net/ipv6/addrconf.c213
-rw-r--r--net/ipv6/af_inet6.c6
-rw-r--r--net/ipv6/datagram.c3
-rw-r--r--net/ipv6/exthdrs_core.c6
-rw-r--r--net/ipv6/ila/ila_common.c1
-rw-r--r--net/ipv6/inet6_connection_sock.c2
-rw-r--r--net/ipv6/inet6_hashtables.c78
-rw-r--r--net/ipv6/ip6_checksum.c26
-rw-r--r--net/ipv6/ip6_fib.c91
-rw-r--r--net/ipv6/ip6_flowlabel.c5
-rw-r--r--net/ipv6/ip6_gre.c15
-rw-r--r--net/ipv6/ip6_input.c12
-rw-r--r--net/ipv6/ip6_offload.c15
-rw-r--r--net/ipv6/ip6_output.c7
-rw-r--r--net/ipv6/ip6_tunnel.c105
-rw-r--r--net/ipv6/ip6_udp_tunnel.c6
-rw-r--r--net/ipv6/ip6_vti.c2
-rw-r--r--net/ipv6/mcast.c3
-rw-r--r--net/ipv6/ndisc.c9
-rw-r--r--net/ipv6/netfilter/ip6_tables.c65
-rw-r--r--net/ipv6/netfilter/ip6table_filter.c47
-rw-r--r--net/ipv6/netfilter/ip6table_mangle.c46
-rw-r--r--net/ipv6/netfilter/ip6table_nat.c41
-rw-r--r--net/ipv6/netfilter/ip6table_raw.c46
-rw-r--r--net/ipv6/netfilter/ip6table_security.c44
-rw-r--r--net/ipv6/netfilter/nf_nat_l3proto_ipv6.c30
-rw-r--r--net/ipv6/netfilter/nf_nat_masquerade_ipv6.c74
-rw-r--r--net/ipv6/netfilter/nft_masq_ipv6.c7
-rw-r--r--net/ipv6/ping.c59
-rw-r--r--net/ipv6/reassembly.c6
-rw-r--r--net/ipv6/route.c7
-rw-r--r--net/ipv6/sit.c33
-rw-r--r--net/ipv6/syncookies.c5
-rw-r--r--net/ipv6/tcp_ipv6.c54
-rw-r--r--net/ipv6/udp.c102
-rw-r--r--net/ipv6/udp_offload.c8
-rw-r--r--net/irda/ircomm/ircomm_param.c3
-rw-r--r--net/irda/ircomm/ircomm_tty.c15
-rw-r--r--net/irda/ircomm/ircomm_tty_ioctl.c13
-rw-r--r--net/iucv/af_iucv.c3
-rw-r--r--net/kcm/Kconfig10
-rw-r--r--net/kcm/Makefile3
-rw-r--r--net/kcm/kcmproc.c426
-rw-r--r--net/kcm/kcmsock.c2409
-rw-r--r--net/l2tp/l2tp_ip6.c3
-rw-r--r--net/l2tp/l2tp_netlink.c18
-rw-r--r--net/l3mdev/l3mdev.c11
-rw-r--r--net/llc/af_llc.c4
-rw-r--r--net/mac80211/agg-rx.c52
-rw-r--r--net/mac80211/agg-tx.c53
-rw-r--r--net/mac80211/cfg.c34
-rw-r--r--net/mac80211/chan.c2
-rw-r--r--net/mac80211/debugfs.c1
-rw-r--r--net/mac80211/debugfs_key.c5
-rw-r--r--net/mac80211/driver-ops.c10
-rw-r--r--net/mac80211/driver-ops.h4
-rw-r--r--net/mac80211/ht.c5
-rw-r--r--net/mac80211/ibss.c33
-rw-r--r--net/mac80211/ieee80211_i.h39
-rw-r--r--net/mac80211/iface.c14
-rw-r--r--net/mac80211/key.c86
-rw-r--r--net/mac80211/key.h10
-rw-r--r--net/mac80211/main.c6
-rw-r--r--net/mac80211/mesh.c20
-rw-r--r--net/mac80211/mesh.h7
-rw-r--r--net/mac80211/mesh_hwmp.c6
-rw-r--r--net/mac80211/mesh_pathtbl.c111
-rw-r--r--net/mac80211/mesh_plink.c10
-rw-r--r--net/mac80211/mlme.c81
-rw-r--r--net/mac80211/offchannel.c16
-rw-r--r--net/mac80211/rc80211_minstrel.c2
-rw-r--r--net/mac80211/rc80211_minstrel_ht.c16
-rw-r--r--net/mac80211/rx.c178
-rw-r--r--net/mac80211/scan.c20
-rw-r--r--net/mac80211/sta_info.c37
-rw-r--r--net/mac80211/sta_info.h24
-rw-r--r--net/mac80211/status.c7
-rw-r--r--net/mac80211/tkip.c36
-rw-r--r--net/mac80211/tkip.h2
-rw-r--r--net/mac80211/trace.h43
-rw-r--r--net/mac80211/tx.c100
-rw-r--r--net/mac80211/util.c132
-rw-r--r--net/mac80211/vht.c57
-rw-r--r--net/mac80211/wpa.c11
-rw-r--r--net/mac802154/llsec.c41
-rw-r--r--net/mac802154/llsec.h3
-rw-r--r--net/mac802154/main.c2
-rw-r--r--net/mpls/mpls_iptunnel.c1
-rw-r--r--net/netfilter/Kconfig2
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_ipmac.c2
-rw-r--r--net/netfilter/ipset/ip_set_core.c3
-rw-r--r--net/netfilter/ipset/ip_set_hash_mac.c3
-rw-r--r--net/netfilter/ipset/ip_set_hash_netiface.c4
-rw-r--r--net/netfilter/ipset/ip_set_list_set.c55
-rw-r--r--net/netfilter/ipvs/ip_vs_app.c8
-rw-r--r--net/netfilter/ipvs/ip_vs_core.c38
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c17
-rw-r--r--net/netfilter/ipvs/ip_vs_pe_sip.c6
-rw-r--r--net/netfilter/ipvs/ip_vs_xmit.c12
-rw-r--r--net/netfilter/nf_conntrack_core.c41
-rw-r--r--net/netfilter/nf_conntrack_helper.c2
-rw-r--r--net/netfilter/nf_conntrack_netlink.c2
-rw-r--r--net/netfilter/nf_dup_netdev.c1
-rw-r--r--net/netfilter/nf_tables_netdev.c8
-rw-r--r--net/netfilter/nfnetlink.c23
-rw-r--r--net/netfilter/nfnetlink_acct.c3
-rw-r--r--net/netfilter/nfnetlink_cttimeout.c2
-rw-r--r--net/netfilter/nfnetlink_log.c5
-rw-r--r--net/netfilter/nfnetlink_queue.c6
-rw-r--r--net/netfilter/nft_byteorder.c6
-rw-r--r--net/netfilter/nft_compat.c6
-rw-r--r--net/netfilter/nft_counter.c4
-rw-r--r--net/netfilter/nft_ct.c2
-rw-r--r--net/netfilter/nft_masq.c51
-rw-r--r--net/netfilter/nft_meta.c11
-rw-r--r--net/netfilter/x_tables.c68
-rw-r--r--net/netfilter/xt_TCPMSS.c9
-rw-r--r--net/netfilter/xt_TEE.c4
-rw-r--r--net/netfilter/xt_TPROXY.c31
-rw-r--r--net/netfilter/xt_osf.c2
-rw-r--r--net/netfilter/xt_socket.c28
-rw-r--r--net/netlabel/netlabel_domainhash.c4
-rw-r--r--net/netlabel/netlabel_unlabeled.c6
-rw-r--r--net/netlink/Kconfig9
-rw-r--r--net/netlink/af_netlink.c781
-rw-r--r--net/netlink/af_netlink.h15
-rw-r--r--net/netlink/diag.c39
-rw-r--r--net/netlink/genetlink.c25
-rw-r--r--net/nfc/llcp_commands.c4
-rw-r--r--net/nfc/llcp_sock.c6
-rw-r--r--net/nfc/nci/uart.c9
-rw-r--r--net/openvswitch/Kconfig4
-rw-r--r--net/openvswitch/actions.c8
-rw-r--r--net/openvswitch/conntrack.c660
-rw-r--r--net/openvswitch/conntrack.h3
-rw-r--r--net/openvswitch/datapath.c108
-rw-r--r--net/openvswitch/datapath.h4
-rw-r--r--net/openvswitch/flow.h2
-rw-r--r--net/openvswitch/flow_netlink.c9
-rw-r--r--net/openvswitch/vport-geneve.c2
-rw-r--r--net/openvswitch/vport-internal_dev.c10
-rw-r--r--net/openvswitch/vport-netdev.c2
-rw-r--r--net/openvswitch/vport-vxlan.c4
-rw-r--r--net/openvswitch/vport.h7
-rw-r--r--net/packet/af_packet.c472
-rw-r--r--net/phonet/socket.c6
-rw-r--r--net/rds/Kconfig7
-rw-r--r--net/rds/Makefile4
-rw-r--r--net/rds/af_rds.c26
-rw-r--r--net/rds/ib.c47
-rw-r--r--net/rds/ib.h37
-rw-r--r--net/rds/ib_cm.c59
-rw-r--r--net/rds/ib_fmr.c248
-rw-r--r--net/rds/ib_frmr.c376
-rw-r--r--net/rds/ib_mr.h148
-rw-r--r--net/rds/ib_rdma.c495
-rw-r--r--net/rds/ib_send.c6
-rw-r--r--net/rds/ib_stats.c2
-rw-r--r--net/rds/iw.c312
-rw-r--r--net/rds/iw.h398
-rw-r--r--net/rds/iw_cm.c769
-rw-r--r--net/rds/iw_rdma.c837
-rw-r--r--net/rds/iw_recv.c904
-rw-r--r--net/rds/iw_ring.c169
-rw-r--r--net/rds/iw_send.c981
-rw-r--r--net/rds/iw_stats.c95
-rw-r--r--net/rds/iw_sysctl.c123
-rw-r--r--net/rds/page.c4
-rw-r--r--net/rds/rdma_transport.c21
-rw-r--r--net/rds/rdma_transport.h5
-rw-r--r--net/rds/rds.h1
-rw-r--r--net/rds/recv.c20
-rw-r--r--net/rds/tcp.c146
-rw-r--r--net/rfkill/Kconfig3
-rw-r--r--net/rfkill/core.c188
-rw-r--r--net/rfkill/rfkill-gpio.c24
-rw-r--r--net/rxrpc/af_rxrpc.c39
-rw-r--r--net/rxrpc/ar-accept.c56
-rw-r--r--net/rxrpc/ar-ack.c225
-rw-r--r--net/rxrpc/ar-call.c88
-rw-r--r--net/rxrpc/ar-connection.c85
-rw-r--r--net/rxrpc/ar-connevent.c79
-rw-r--r--net/rxrpc/ar-error.c13
-rw-r--r--net/rxrpc/ar-input.c118
-rw-r--r--net/rxrpc/ar-internal.h220
-rw-r--r--net/rxrpc/ar-key.c12
-rw-r--r--net/rxrpc/ar-local.c29
-rw-r--r--net/rxrpc/ar-output.c75
-rw-r--r--net/rxrpc/ar-peer.c2
-rw-r--r--net/rxrpc/ar-proc.c10
-rw-r--r--net/rxrpc/ar-recvmsg.c20
-rw-r--r--net/rxrpc/ar-security.c6
-rw-r--r--net/rxrpc/ar-skbuff.c7
-rw-r--r--net/rxrpc/ar-transport.c3
-rw-r--r--net/rxrpc/rxkad.c337
-rw-r--r--net/rxrpc/sysctl.c34
-rw-r--r--net/sched/Kconfig22
-rw-r--r--net/sched/Makefile3
-rw-r--r--net/sched/act_api.c137
-rw-r--r--net/sched/act_bpf.c52
-rw-r--r--net/sched/act_connmark.c54
-rw-r--r--net/sched/act_csum.c67
-rw-r--r--net/sched/act_gact.c55
-rw-r--r--net/sched/act_ife.c870
-rw-r--r--net/sched/act_ipt.c129
-rw-r--r--net/sched/act_meta_mark.c79
-rw-r--r--net/sched/act_meta_skbprio.c76
-rw-r--r--net/sched/act_mirred.c55
-rw-r--r--net/sched/act_nat.c72
-rw-r--r--net/sched/act_pedit.c54
-rw-r--r--net/sched/act_police.c52
-rw-r--r--net/sched/act_simple.c55
-rw-r--r--net/sched/act_skbedit.c54
-rw-r--r--net/sched/act_vlan.c54
-rw-r--r--net/sched/cls_bpf.c13
-rw-r--r--net/sched/cls_flower.c64
-rw-r--r--net/sched/cls_u32.c118
-rw-r--r--net/sched/sch_api.c11
-rw-r--r--net/sched/sch_cbq.c12
-rw-r--r--net/sched/sch_choke.c6
-rw-r--r--net/sched/sch_codel.c10
-rw-r--r--net/sched/sch_drr.c11
-rw-r--r--net/sched/sch_dsmark.c13
-rw-r--r--net/sched/sch_fq.c4
-rw-r--r--net/sched/sch_fq_codel.c17
-rw-r--r--net/sched/sch_generic.c1
-rw-r--r--net/sched/sch_hfsc.c9
-rw-r--r--net/sched/sch_hhf.c10
-rw-r--r--net/sched/sch_htb.c24
-rw-r--r--net/sched/sch_mq.c2
-rw-r--r--net/sched/sch_mqprio.c11
-rw-r--r--net/sched/sch_multiq.c16
-rw-r--r--net/sched/sch_netem.c13
-rw-r--r--net/sched/sch_pie.c5
-rw-r--r--net/sched/sch_prio.c15
-rw-r--r--net/sched/sch_qfq.c9
-rw-r--r--net/sched/sch_red.c10
-rw-r--r--net/sched/sch_sfb.c10
-rw-r--r--net/sched/sch_sfq.c16
-rw-r--r--net/sched/sch_tbf.c15
-rw-r--r--net/sctp/associola.c7
-rw-r--r--net/sctp/auth.c36
-rw-r--r--net/sctp/bind_addr.c14
-rw-r--r--net/sctp/chunk.c19
-rw-r--r--net/sctp/endpointola.c1
-rw-r--r--net/sctp/input.c18
-rw-r--r--net/sctp/ipv6.c2
-rw-r--r--net/sctp/output.c6
-rw-r--r--net/sctp/outqueue.c36
-rw-r--r--net/sctp/probe.c10
-rw-r--r--net/sctp/proc.c20
-rw-r--r--net/sctp/protocol.c47
-rw-r--r--net/sctp/sm_make_chunk.c134
-rw-r--r--net/sctp/sm_sideeffect.c41
-rw-r--r--net/sctp/socket.c34
-rw-r--r--net/sctp/transport.c14
-rw-r--r--net/socket.c64
-rw-r--r--net/sunrpc/Makefile3
-rw-r--r--net/sunrpc/auth_gss/auth_gss.c6
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_crypto.c350
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_keys.c12
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_mech.c89
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_seqnum.c22
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_wrap.c24
-rw-r--r--net/sunrpc/auth_null.c4
-rw-r--r--net/sunrpc/auth_unix.c6
-rw-r--r--net/sunrpc/cache.c8
-rw-r--r--net/sunrpc/clnt.c328
-rw-r--r--net/sunrpc/rpcb_clnt.c10
-rw-r--r--net/sunrpc/sched.c2
-rw-r--r--net/sunrpc/xprt.c42
-rw-r--r--net/sunrpc/xprtmultipath.c475
-rw-r--r--net/sunrpc/xprtrdma/backchannel.c2
-rw-r--r--net/sunrpc/xprtrdma/fmr_ops.c28
-rw-r--r--net/sunrpc/xprtrdma/frwr_ops.c143
-rw-r--r--net/sunrpc/xprtrdma/physical_ops.c1
-rw-r--r--net/sunrpc/xprtrdma/rpc_rdma.c108
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_backchannel.c17
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_marshal.c64
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_recvfrom.c60
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_sendto.c196
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_transport.c445
-rw-r--r--net/sunrpc/xprtrdma/verbs.c204
-rw-r--r--net/sunrpc/xprtrdma/xprt_rdma.h14
-rw-r--r--net/sunrpc/xprtsock.c4
-rw-r--r--net/switchdev/switchdev.c20
-rw-r--r--net/tipc/bcast.c5
-rw-r--r--net/tipc/bcast.h1
-rw-r--r--net/tipc/bearer.c18
-rw-r--r--net/tipc/link.c171
-rw-r--r--net/tipc/link.h7
-rw-r--r--net/tipc/name_table.c20
-rw-r--r--net/tipc/net.c7
-rw-r--r--net/tipc/netlink.c69
-rw-r--r--net/tipc/netlink.h11
-rw-r--r--net/tipc/netlink_compat.c2
-rw-r--r--net/tipc/node.c142
-rw-r--r--net/tipc/server.c4
-rw-r--r--net/tipc/socket.c42
-rw-r--r--net/tipc/subscr.c132
-rw-r--r--net/tipc/subscr.h11
-rw-r--r--net/tipc/udp_media.c44
-rw-r--r--net/unix/af_unix.c36
-rw-r--r--net/unix/diag.c2
-rw-r--r--net/unix/garbage.c8
-rw-r--r--net/vmw_vsock/af_vsock.c158
-rw-r--r--net/wireless/Kconfig25
-rw-r--r--net/wireless/core.c12
-rw-r--r--net/wireless/lib80211_crypt_tkip.c99
-rw-r--r--net/wireless/lib80211_crypt_wep.c46
-rw-r--r--net/wireless/mlme.c3
-rw-r--r--net/wireless/nl80211.c31
-rw-r--r--net/wireless/radiotap.c1
-rw-r--r--net/wireless/reg.c167
-rw-r--r--net/wireless/sme.c15
-rw-r--r--net/wireless/util.c277
-rw-r--r--net/wireless/wext-core.c52
-rw-r--r--net/xfrm/xfrm_algo.c7
-rw-r--r--net/xfrm/xfrm_user.c2
507 files changed, 22172 insertions, 13937 deletions
diff --git a/net/6lowpan/core.c b/net/6lowpan/core.c
index faf65baed617..34e44c0c0836 100644
--- a/net/6lowpan/core.c
+++ b/net/6lowpan/core.c
@@ -20,7 +20,7 @@
20int lowpan_register_netdevice(struct net_device *dev, 20int lowpan_register_netdevice(struct net_device *dev,
21 enum lowpan_lltypes lltype) 21 enum lowpan_lltypes lltype)
22{ 22{
23 int ret; 23 int i, ret;
24 24
25 dev->addr_len = EUI64_ADDR_LEN; 25 dev->addr_len = EUI64_ADDR_LEN;
26 dev->type = ARPHRD_6LOWPAN; 26 dev->type = ARPHRD_6LOWPAN;
@@ -29,6 +29,10 @@ int lowpan_register_netdevice(struct net_device *dev,
29 29
30 lowpan_priv(dev)->lltype = lltype; 30 lowpan_priv(dev)->lltype = lltype;
31 31
32 spin_lock_init(&lowpan_priv(dev)->ctx.lock);
33 for (i = 0; i < LOWPAN_IPHC_CTX_TABLE_SIZE; i++)
34 lowpan_priv(dev)->ctx.table[i].id = i;
35
32 ret = register_netdevice(dev); 36 ret = register_netdevice(dev);
33 if (ret < 0) 37 if (ret < 0)
34 return ret; 38 return ret;
@@ -68,6 +72,32 @@ void lowpan_unregister_netdev(struct net_device *dev)
68} 72}
69EXPORT_SYMBOL(lowpan_unregister_netdev); 73EXPORT_SYMBOL(lowpan_unregister_netdev);
70 74
75static int lowpan_event(struct notifier_block *unused,
76 unsigned long event, void *ptr)
77{
78 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
79 int i;
80
81 if (dev->type != ARPHRD_6LOWPAN)
82 return NOTIFY_DONE;
83
84 switch (event) {
85 case NETDEV_DOWN:
86 for (i = 0; i < LOWPAN_IPHC_CTX_TABLE_SIZE; i++)
87 clear_bit(LOWPAN_IPHC_CTX_FLAG_ACTIVE,
88 &lowpan_priv(dev)->ctx.table[i].flags);
89 break;
90 default:
91 return NOTIFY_DONE;
92 }
93
94 return NOTIFY_OK;
95}
96
97static struct notifier_block lowpan_notifier = {
98 .notifier_call = lowpan_event,
99};
100
71static int __init lowpan_module_init(void) 101static int __init lowpan_module_init(void)
72{ 102{
73 int ret; 103 int ret;
@@ -76,6 +106,12 @@ static int __init lowpan_module_init(void)
76 if (ret < 0) 106 if (ret < 0)
77 return ret; 107 return ret;
78 108
109 ret = register_netdevice_notifier(&lowpan_notifier);
110 if (ret < 0) {
111 lowpan_debugfs_exit();
112 return ret;
113 }
114
79 request_module_nowait("ipv6"); 115 request_module_nowait("ipv6");
80 116
81 request_module_nowait("nhc_dest"); 117 request_module_nowait("nhc_dest");
@@ -92,6 +128,7 @@ static int __init lowpan_module_init(void)
92static void __exit lowpan_module_exit(void) 128static void __exit lowpan_module_exit(void)
93{ 129{
94 lowpan_debugfs_exit(); 130 lowpan_debugfs_exit();
131 unregister_netdevice_notifier(&lowpan_notifier);
95} 132}
96 133
97module_init(lowpan_module_init); 134module_init(lowpan_module_init);
diff --git a/net/6lowpan/debugfs.c b/net/6lowpan/debugfs.c
index 88eef84df0fc..0793a8157472 100644
--- a/net/6lowpan/debugfs.c
+++ b/net/6lowpan/debugfs.c
@@ -16,19 +16,266 @@
16 16
17#include "6lowpan_i.h" 17#include "6lowpan_i.h"
18 18
19#define LOWPAN_DEBUGFS_CTX_PFX_NUM_ARGS 8
20
19static struct dentry *lowpan_debugfs; 21static struct dentry *lowpan_debugfs;
20 22
23static int lowpan_ctx_flag_active_set(void *data, u64 val)
24{
25 struct lowpan_iphc_ctx *ctx = data;
26
27 if (val != 0 && val != 1)
28 return -EINVAL;
29
30 if (val)
31 set_bit(LOWPAN_IPHC_CTX_FLAG_ACTIVE, &ctx->flags);
32 else
33 clear_bit(LOWPAN_IPHC_CTX_FLAG_ACTIVE, &ctx->flags);
34
35 return 0;
36}
37
38static int lowpan_ctx_flag_active_get(void *data, u64 *val)
39{
40 *val = lowpan_iphc_ctx_is_active(data);
41 return 0;
42}
43
44DEFINE_SIMPLE_ATTRIBUTE(lowpan_ctx_flag_active_fops,
45 lowpan_ctx_flag_active_get,
46 lowpan_ctx_flag_active_set, "%llu\n");
47
48static int lowpan_ctx_flag_c_set(void *data, u64 val)
49{
50 struct lowpan_iphc_ctx *ctx = data;
51
52 if (val != 0 && val != 1)
53 return -EINVAL;
54
55 if (val)
56 set_bit(LOWPAN_IPHC_CTX_FLAG_COMPRESSION, &ctx->flags);
57 else
58 clear_bit(LOWPAN_IPHC_CTX_FLAG_COMPRESSION, &ctx->flags);
59
60 return 0;
61}
62
63static int lowpan_ctx_flag_c_get(void *data, u64 *val)
64{
65 *val = lowpan_iphc_ctx_is_compression(data);
66 return 0;
67}
68
69DEFINE_SIMPLE_ATTRIBUTE(lowpan_ctx_flag_c_fops, lowpan_ctx_flag_c_get,
70 lowpan_ctx_flag_c_set, "%llu\n");
71
72static int lowpan_ctx_plen_set(void *data, u64 val)
73{
74 struct lowpan_iphc_ctx *ctx = data;
75 struct lowpan_iphc_ctx_table *t =
76 container_of(ctx, struct lowpan_iphc_ctx_table, table[ctx->id]);
77
78 if (val > 128)
79 return -EINVAL;
80
81 spin_lock_bh(&t->lock);
82 ctx->plen = val;
83 spin_unlock_bh(&t->lock);
84
85 return 0;
86}
87
88static int lowpan_ctx_plen_get(void *data, u64 *val)
89{
90 struct lowpan_iphc_ctx *ctx = data;
91 struct lowpan_iphc_ctx_table *t =
92 container_of(ctx, struct lowpan_iphc_ctx_table, table[ctx->id]);
93
94 spin_lock_bh(&t->lock);
95 *val = ctx->plen;
96 spin_unlock_bh(&t->lock);
97 return 0;
98}
99
100DEFINE_SIMPLE_ATTRIBUTE(lowpan_ctx_plen_fops, lowpan_ctx_plen_get,
101 lowpan_ctx_plen_set, "%llu\n");
102
103static int lowpan_ctx_pfx_show(struct seq_file *file, void *offset)
104{
105 struct lowpan_iphc_ctx *ctx = file->private;
106 struct lowpan_iphc_ctx_table *t =
107 container_of(ctx, struct lowpan_iphc_ctx_table, table[ctx->id]);
108
109 spin_lock_bh(&t->lock);
110 seq_printf(file, "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n",
111 be16_to_cpu(ctx->pfx.s6_addr16[0]),
112 be16_to_cpu(ctx->pfx.s6_addr16[1]),
113 be16_to_cpu(ctx->pfx.s6_addr16[2]),
114 be16_to_cpu(ctx->pfx.s6_addr16[3]),
115 be16_to_cpu(ctx->pfx.s6_addr16[4]),
116 be16_to_cpu(ctx->pfx.s6_addr16[5]),
117 be16_to_cpu(ctx->pfx.s6_addr16[6]),
118 be16_to_cpu(ctx->pfx.s6_addr16[7]));
119 spin_unlock_bh(&t->lock);
120
121 return 0;
122}
123
124static int lowpan_ctx_pfx_open(struct inode *inode, struct file *file)
125{
126 return single_open(file, lowpan_ctx_pfx_show, inode->i_private);
127}
128
129static ssize_t lowpan_ctx_pfx_write(struct file *fp,
130 const char __user *user_buf, size_t count,
131 loff_t *ppos)
132{
133 char buf[128] = {};
134 struct seq_file *file = fp->private_data;
135 struct lowpan_iphc_ctx *ctx = file->private;
136 struct lowpan_iphc_ctx_table *t =
137 container_of(ctx, struct lowpan_iphc_ctx_table, table[ctx->id]);
138 int status = count, n, i;
139 unsigned int addr[8];
140
141 if (copy_from_user(&buf, user_buf, min_t(size_t, sizeof(buf) - 1,
142 count))) {
143 status = -EFAULT;
144 goto out;
145 }
146
147 n = sscanf(buf, "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x",
148 &addr[0], &addr[1], &addr[2], &addr[3], &addr[4],
149 &addr[5], &addr[6], &addr[7]);
150 if (n != LOWPAN_DEBUGFS_CTX_PFX_NUM_ARGS) {
151 status = -EINVAL;
152 goto out;
153 }
154
155 spin_lock_bh(&t->lock);
156 for (i = 0; i < 8; i++)
157 ctx->pfx.s6_addr16[i] = cpu_to_be16(addr[i] & 0xffff);
158 spin_unlock_bh(&t->lock);
159
160out:
161 return status;
162}
163
164static const struct file_operations lowpan_ctx_pfx_fops = {
165 .open = lowpan_ctx_pfx_open,
166 .read = seq_read,
167 .write = lowpan_ctx_pfx_write,
168 .llseek = seq_lseek,
169 .release = single_release,
170};
171
172static int lowpan_dev_debugfs_ctx_init(struct net_device *dev,
173 struct dentry *ctx, u8 id)
174{
175 struct lowpan_priv *lpriv = lowpan_priv(dev);
176 struct dentry *dentry, *root;
177 char buf[32];
178
179 WARN_ON_ONCE(id > LOWPAN_IPHC_CTX_TABLE_SIZE);
180
181 sprintf(buf, "%d", id);
182
183 root = debugfs_create_dir(buf, ctx);
184 if (!root)
185 return -EINVAL;
186
187 dentry = debugfs_create_file("active", 0644, root,
188 &lpriv->ctx.table[id],
189 &lowpan_ctx_flag_active_fops);
190 if (!dentry)
191 return -EINVAL;
192
193 dentry = debugfs_create_file("compression", 0644, root,
194 &lpriv->ctx.table[id],
195 &lowpan_ctx_flag_c_fops);
196 if (!dentry)
197 return -EINVAL;
198
199 dentry = debugfs_create_file("prefix", 0644, root,
200 &lpriv->ctx.table[id],
201 &lowpan_ctx_pfx_fops);
202 if (!dentry)
203 return -EINVAL;
204
205 dentry = debugfs_create_file("prefix_len", 0644, root,
206 &lpriv->ctx.table[id],
207 &lowpan_ctx_plen_fops);
208 if (!dentry)
209 return -EINVAL;
210
211 return 0;
212}
213
214static int lowpan_context_show(struct seq_file *file, void *offset)
215{
216 struct lowpan_iphc_ctx_table *t = file->private;
217 int i;
218
219 seq_printf(file, "%3s|%-43s|%c\n", "cid", "prefix", 'C');
220 seq_puts(file, "-------------------------------------------------\n");
221
222 spin_lock_bh(&t->lock);
223 for (i = 0; i < LOWPAN_IPHC_CTX_TABLE_SIZE; i++) {
224 if (!lowpan_iphc_ctx_is_active(&t->table[i]))
225 continue;
226
227 seq_printf(file, "%3d|%39pI6c/%-3d|%d\n", t->table[i].id,
228 &t->table[i].pfx, t->table[i].plen,
229 lowpan_iphc_ctx_is_compression(&t->table[i]));
230 }
231 spin_unlock_bh(&t->lock);
232
233 return 0;
234}
235
236static int lowpan_context_open(struct inode *inode, struct file *file)
237{
238 return single_open(file, lowpan_context_show, inode->i_private);
239}
240
241static const struct file_operations lowpan_context_fops = {
242 .open = lowpan_context_open,
243 .read = seq_read,
244 .llseek = seq_lseek,
245 .release = single_release,
246};
247
21int lowpan_dev_debugfs_init(struct net_device *dev) 248int lowpan_dev_debugfs_init(struct net_device *dev)
22{ 249{
23 struct lowpan_priv *lpriv = lowpan_priv(dev); 250 struct lowpan_priv *lpriv = lowpan_priv(dev);
251 struct dentry *contexts, *dentry;
252 int ret, i;
24 253
25 /* creating the root */ 254 /* creating the root */
26 lpriv->iface_debugfs = debugfs_create_dir(dev->name, lowpan_debugfs); 255 lpriv->iface_debugfs = debugfs_create_dir(dev->name, lowpan_debugfs);
27 if (!lpriv->iface_debugfs) 256 if (!lpriv->iface_debugfs)
28 goto fail; 257 goto fail;
29 258
259 contexts = debugfs_create_dir("contexts", lpriv->iface_debugfs);
260 if (!contexts)
261 goto remove_root;
262
263 dentry = debugfs_create_file("show", 0644, contexts,
264 &lowpan_priv(dev)->ctx,
265 &lowpan_context_fops);
266 if (!dentry)
267 goto remove_root;
268
269 for (i = 0; i < LOWPAN_IPHC_CTX_TABLE_SIZE; i++) {
270 ret = lowpan_dev_debugfs_ctx_init(dev, contexts, i);
271 if (ret < 0)
272 goto remove_root;
273 }
274
30 return 0; 275 return 0;
31 276
277remove_root:
278 lowpan_dev_debugfs_exit(dev);
32fail: 279fail:
33 return -EINVAL; 280 return -EINVAL;
34} 281}
diff --git a/net/6lowpan/iphc.c b/net/6lowpan/iphc.c
index 346b5c1a9185..99bb22aea346 100644
--- a/net/6lowpan/iphc.c
+++ b/net/6lowpan/iphc.c
@@ -56,6 +56,7 @@
56/* special link-layer handling */ 56/* special link-layer handling */
57#include <net/mac802154.h> 57#include <net/mac802154.h>
58 58
59#include "6lowpan_i.h"
59#include "nhc.h" 60#include "nhc.h"
60 61
61/* Values of fields within the IPHC encoding first byte */ 62/* Values of fields within the IPHC encoding first byte */
@@ -147,6 +148,9 @@
147 (((a)->s6_addr16[6]) == 0) && \ 148 (((a)->s6_addr16[6]) == 0) && \
148 (((a)->s6_addr[14]) == 0)) 149 (((a)->s6_addr[14]) == 0))
149 150
151#define LOWPAN_IPHC_CID_DCI(cid) (cid & 0x0f)
152#define LOWPAN_IPHC_CID_SCI(cid) ((cid & 0xf0) >> 4)
153
150static inline void iphc_uncompress_eui64_lladdr(struct in6_addr *ipaddr, 154static inline void iphc_uncompress_eui64_lladdr(struct in6_addr *ipaddr,
151 const void *lladdr) 155 const void *lladdr)
152{ 156{
@@ -195,6 +199,98 @@ static inline void iphc_uncompress_802154_lladdr(struct in6_addr *ipaddr,
195 } 199 }
196} 200}
197 201
202static struct lowpan_iphc_ctx *
203lowpan_iphc_ctx_get_by_id(const struct net_device *dev, u8 id)
204{
205 struct lowpan_iphc_ctx *ret = &lowpan_priv(dev)->ctx.table[id];
206
207 if (!lowpan_iphc_ctx_is_active(ret))
208 return NULL;
209
210 return ret;
211}
212
213static struct lowpan_iphc_ctx *
214lowpan_iphc_ctx_get_by_addr(const struct net_device *dev,
215 const struct in6_addr *addr)
216{
217 struct lowpan_iphc_ctx *table = lowpan_priv(dev)->ctx.table;
218 struct lowpan_iphc_ctx *ret = NULL;
219 struct in6_addr addr_pfx;
220 u8 addr_plen;
221 int i;
222
223 for (i = 0; i < LOWPAN_IPHC_CTX_TABLE_SIZE; i++) {
224 /* Check if context is valid. A context that is not valid
225 * MUST NOT be used for compression.
226 */
227 if (!lowpan_iphc_ctx_is_active(&table[i]) ||
228 !lowpan_iphc_ctx_is_compression(&table[i]))
229 continue;
230
231 ipv6_addr_prefix(&addr_pfx, addr, table[i].plen);
232
233 /* if prefix len < 64, the remaining bits until 64th bit is
234 * zero. Otherwise we use table[i]->plen.
235 */
236 if (table[i].plen < 64)
237 addr_plen = 64;
238 else
239 addr_plen = table[i].plen;
240
241 if (ipv6_prefix_equal(&addr_pfx, &table[i].pfx, addr_plen)) {
242 /* remember first match */
243 if (!ret) {
244 ret = &table[i];
245 continue;
246 }
247
248 /* get the context with longest prefix len */
249 if (table[i].plen > ret->plen)
250 ret = &table[i];
251 }
252 }
253
254 return ret;
255}
256
257static struct lowpan_iphc_ctx *
258lowpan_iphc_ctx_get_by_mcast_addr(const struct net_device *dev,
259 const struct in6_addr *addr)
260{
261 struct lowpan_iphc_ctx *table = lowpan_priv(dev)->ctx.table;
262 struct lowpan_iphc_ctx *ret = NULL;
263 struct in6_addr addr_mcast, network_pfx = {};
264 int i;
265
266 /* init mcast address with */
267 memcpy(&addr_mcast, addr, sizeof(*addr));
268
269 for (i = 0; i < LOWPAN_IPHC_CTX_TABLE_SIZE; i++) {
270 /* Check if context is valid. A context that is not valid
271 * MUST NOT be used for compression.
272 */
273 if (!lowpan_iphc_ctx_is_active(&table[i]) ||
274 !lowpan_iphc_ctx_is_compression(&table[i]))
275 continue;
276
277 /* setting plen */
278 addr_mcast.s6_addr[3] = table[i].plen;
279 /* get network prefix to copy into multicast address */
280 ipv6_addr_prefix(&network_pfx, &table[i].pfx,
281 table[i].plen);
282 /* setting network prefix */
283 memcpy(&addr_mcast.s6_addr[4], &network_pfx, 8);
284
285 if (ipv6_addr_equal(addr, &addr_mcast)) {
286 ret = &table[i];
287 break;
288 }
289 }
290
291 return ret;
292}
293
198/* Uncompress address function for source and 294/* Uncompress address function for source and
199 * destination address(non-multicast). 295 * destination address(non-multicast).
200 * 296 *
@@ -259,30 +355,59 @@ static int uncompress_addr(struct sk_buff *skb, const struct net_device *dev,
259/* Uncompress address function for source context 355/* Uncompress address function for source context
260 * based address(non-multicast). 356 * based address(non-multicast).
261 */ 357 */
262static int uncompress_context_based_src_addr(struct sk_buff *skb, 358static int uncompress_ctx_addr(struct sk_buff *skb,
263 struct in6_addr *ipaddr, 359 const struct net_device *dev,
264 u8 address_mode) 360 const struct lowpan_iphc_ctx *ctx,
361 struct in6_addr *ipaddr, u8 address_mode,
362 const void *lladdr)
265{ 363{
364 bool fail;
365
266 switch (address_mode) { 366 switch (address_mode) {
267 case LOWPAN_IPHC_SAM_00: 367 /* SAM and DAM are the same here */
268 /* unspec address :: 368 case LOWPAN_IPHC_DAM_00:
369 fail = false;
370 /* SAM_00 -> unspec address ::
269 * Do nothing, address is already :: 371 * Do nothing, address is already ::
372 *
373 * DAM 00 -> reserved should never occur.
270 */ 374 */
271 break; 375 break;
272 case LOWPAN_IPHC_SAM_01: 376 case LOWPAN_IPHC_SAM_01:
273 /* TODO */ 377 case LOWPAN_IPHC_DAM_01:
378 fail = lowpan_fetch_skb(skb, &ipaddr->s6_addr[8], 8);
379 ipv6_addr_prefix_copy(ipaddr, &ctx->pfx, ctx->plen);
380 break;
274 case LOWPAN_IPHC_SAM_10: 381 case LOWPAN_IPHC_SAM_10:
275 /* TODO */ 382 case LOWPAN_IPHC_DAM_10:
383 ipaddr->s6_addr[11] = 0xFF;
384 ipaddr->s6_addr[12] = 0xFE;
385 fail = lowpan_fetch_skb(skb, &ipaddr->s6_addr[14], 2);
386 ipv6_addr_prefix_copy(ipaddr, &ctx->pfx, ctx->plen);
387 break;
276 case LOWPAN_IPHC_SAM_11: 388 case LOWPAN_IPHC_SAM_11:
277 /* TODO */ 389 case LOWPAN_IPHC_DAM_11:
278 netdev_warn(skb->dev, "SAM value 0x%x not supported\n", 390 fail = false;
279 address_mode); 391 switch (lowpan_priv(dev)->lltype) {
280 return -EINVAL; 392 case LOWPAN_LLTYPE_IEEE802154:
393 iphc_uncompress_802154_lladdr(ipaddr, lladdr);
394 break;
395 default:
396 iphc_uncompress_eui64_lladdr(ipaddr, lladdr);
397 break;
398 }
399 ipv6_addr_prefix_copy(ipaddr, &ctx->pfx, ctx->plen);
400 break;
281 default: 401 default:
282 pr_debug("Invalid sam value: 0x%x\n", address_mode); 402 pr_debug("Invalid sam value: 0x%x\n", address_mode);
283 return -EINVAL; 403 return -EINVAL;
284 } 404 }
285 405
406 if (fail) {
407 pr_debug("Failed to fetch skb data\n");
408 return -EIO;
409 }
410
286 raw_dump_inline(NULL, 411 raw_dump_inline(NULL,
287 "Reconstructed context based ipv6 src addr is", 412 "Reconstructed context based ipv6 src addr is",
288 ipaddr->s6_addr, 16); 413 ipaddr->s6_addr, 16);
@@ -346,6 +471,30 @@ static int lowpan_uncompress_multicast_daddr(struct sk_buff *skb,
346 return 0; 471 return 0;
347} 472}
348 473
474static int lowpan_uncompress_multicast_ctx_daddr(struct sk_buff *skb,
475 struct lowpan_iphc_ctx *ctx,
476 struct in6_addr *ipaddr,
477 u8 address_mode)
478{
479 struct in6_addr network_pfx = {};
480 bool fail;
481
482 ipaddr->s6_addr[0] = 0xFF;
483 fail = lowpan_fetch_skb(skb, &ipaddr->s6_addr[1], 2);
484 fail |= lowpan_fetch_skb(skb, &ipaddr->s6_addr[12], 4);
485 if (fail)
486 return -EIO;
487
488 /* take prefix_len and network prefix from the context */
489 ipaddr->s6_addr[3] = ctx->plen;
490 /* get network prefix to copy into multicast address */
491 ipv6_addr_prefix(&network_pfx, &ctx->pfx, ctx->plen);
492 /* setting network prefix */
493 memcpy(&ipaddr->s6_addr[4], &network_pfx, 8);
494
495 return 0;
496}
497
349/* get the ecn values from iphc tf format and set it to ipv6hdr */ 498/* get the ecn values from iphc tf format and set it to ipv6hdr */
350static inline void lowpan_iphc_tf_set_ecn(struct ipv6hdr *hdr, const u8 *tf) 499static inline void lowpan_iphc_tf_set_ecn(struct ipv6hdr *hdr, const u8 *tf)
351{ 500{
@@ -459,7 +608,8 @@ int lowpan_header_decompress(struct sk_buff *skb, const struct net_device *dev,
459 const void *daddr, const void *saddr) 608 const void *daddr, const void *saddr)
460{ 609{
461 struct ipv6hdr hdr = {}; 610 struct ipv6hdr hdr = {};
462 u8 iphc0, iphc1; 611 struct lowpan_iphc_ctx *ci;
612 u8 iphc0, iphc1, cid = 0;
463 int err; 613 int err;
464 614
465 raw_dump_table(__func__, "raw skb data dump uncompressed", 615 raw_dump_table(__func__, "raw skb data dump uncompressed",
@@ -469,12 +619,14 @@ int lowpan_header_decompress(struct sk_buff *skb, const struct net_device *dev,
469 lowpan_fetch_skb(skb, &iphc1, sizeof(iphc1))) 619 lowpan_fetch_skb(skb, &iphc1, sizeof(iphc1)))
470 return -EINVAL; 620 return -EINVAL;
471 621
472 /* another if the CID flag is set */
473 if (iphc1 & LOWPAN_IPHC_CID)
474 return -ENOTSUPP;
475
476 hdr.version = 6; 622 hdr.version = 6;
477 623
624 /* default CID = 0, another if the CID flag is set */
625 if (iphc1 & LOWPAN_IPHC_CID) {
626 if (lowpan_fetch_skb(skb, &cid, sizeof(cid)))
627 return -EINVAL;
628 }
629
478 err = lowpan_iphc_tf_decompress(skb, &hdr, 630 err = lowpan_iphc_tf_decompress(skb, &hdr,
479 iphc0 & LOWPAN_IPHC_TF_MASK); 631 iphc0 & LOWPAN_IPHC_TF_MASK);
480 if (err < 0) 632 if (err < 0)
@@ -500,10 +652,17 @@ int lowpan_header_decompress(struct sk_buff *skb, const struct net_device *dev,
500 } 652 }
501 653
502 if (iphc1 & LOWPAN_IPHC_SAC) { 654 if (iphc1 & LOWPAN_IPHC_SAC) {
503 /* Source address context based uncompression */ 655 spin_lock_bh(&lowpan_priv(dev)->ctx.lock);
656 ci = lowpan_iphc_ctx_get_by_id(dev, LOWPAN_IPHC_CID_SCI(cid));
657 if (!ci) {
658 spin_unlock_bh(&lowpan_priv(dev)->ctx.lock);
659 return -EINVAL;
660 }
661
504 pr_debug("SAC bit is set. Handle context based source address.\n"); 662 pr_debug("SAC bit is set. Handle context based source address.\n");
505 err = uncompress_context_based_src_addr(skb, &hdr.saddr, 663 err = uncompress_ctx_addr(skb, dev, ci, &hdr.saddr,
506 iphc1 & LOWPAN_IPHC_SAM_MASK); 664 iphc1 & LOWPAN_IPHC_SAM_MASK, saddr);
665 spin_unlock_bh(&lowpan_priv(dev)->ctx.lock);
507 } else { 666 } else {
508 /* Source address uncompression */ 667 /* Source address uncompression */
509 pr_debug("source address stateless compression\n"); 668 pr_debug("source address stateless compression\n");
@@ -515,27 +674,52 @@ int lowpan_header_decompress(struct sk_buff *skb, const struct net_device *dev,
515 if (err) 674 if (err)
516 return -EINVAL; 675 return -EINVAL;
517 676
518 /* check for Multicast Compression */ 677 switch (iphc1 & (LOWPAN_IPHC_M | LOWPAN_IPHC_DAC)) {
519 if (iphc1 & LOWPAN_IPHC_M) { 678 case LOWPAN_IPHC_M | LOWPAN_IPHC_DAC:
520 if (iphc1 & LOWPAN_IPHC_DAC) { 679 spin_lock_bh(&lowpan_priv(dev)->ctx.lock);
521 pr_debug("dest: context-based mcast compression\n"); 680 ci = lowpan_iphc_ctx_get_by_id(dev, LOWPAN_IPHC_CID_DCI(cid));
522 /* TODO: implement this */ 681 if (!ci) {
523 } else { 682 spin_unlock_bh(&lowpan_priv(dev)->ctx.lock);
524 err = lowpan_uncompress_multicast_daddr(skb, &hdr.daddr, 683 return -EINVAL;
525 iphc1 & LOWPAN_IPHC_DAM_MASK); 684 }
526 685
527 if (err) 686 /* multicast with context */
528 return -EINVAL; 687 pr_debug("dest: context-based mcast compression\n");
688 err = lowpan_uncompress_multicast_ctx_daddr(skb, ci,
689 &hdr.daddr,
690 iphc1 & LOWPAN_IPHC_DAM_MASK);
691 spin_unlock_bh(&lowpan_priv(dev)->ctx.lock);
692 break;
693 case LOWPAN_IPHC_M:
694 /* multicast */
695 err = lowpan_uncompress_multicast_daddr(skb, &hdr.daddr,
696 iphc1 & LOWPAN_IPHC_DAM_MASK);
697 break;
698 case LOWPAN_IPHC_DAC:
699 spin_lock_bh(&lowpan_priv(dev)->ctx.lock);
700 ci = lowpan_iphc_ctx_get_by_id(dev, LOWPAN_IPHC_CID_DCI(cid));
701 if (!ci) {
702 spin_unlock_bh(&lowpan_priv(dev)->ctx.lock);
703 return -EINVAL;
529 } 704 }
530 } else { 705
706 /* Destination address context based uncompression */
707 pr_debug("DAC bit is set. Handle context based destination address.\n");
708 err = uncompress_ctx_addr(skb, dev, ci, &hdr.daddr,
709 iphc1 & LOWPAN_IPHC_DAM_MASK, daddr);
710 spin_unlock_bh(&lowpan_priv(dev)->ctx.lock);
711 break;
712 default:
531 err = uncompress_addr(skb, dev, &hdr.daddr, 713 err = uncompress_addr(skb, dev, &hdr.daddr,
532 iphc1 & LOWPAN_IPHC_DAM_MASK, daddr); 714 iphc1 & LOWPAN_IPHC_DAM_MASK, daddr);
533 pr_debug("dest: stateless compression mode %d dest %pI6c\n", 715 pr_debug("dest: stateless compression mode %d dest %pI6c\n",
534 iphc1 & LOWPAN_IPHC_DAM_MASK, &hdr.daddr); 716 iphc1 & LOWPAN_IPHC_DAM_MASK, &hdr.daddr);
535 if (err) 717 break;
536 return -EINVAL;
537 } 718 }
538 719
720 if (err)
721 return -EINVAL;
722
539 /* Next header data uncompression */ 723 /* Next header data uncompression */
540 if (iphc0 & LOWPAN_IPHC_NH) { 724 if (iphc0 & LOWPAN_IPHC_NH) {
541 err = lowpan_nhc_do_uncompression(skb, dev, &hdr); 725 err = lowpan_nhc_do_uncompression(skb, dev, &hdr);
@@ -585,6 +769,58 @@ static const u8 lowpan_iphc_dam_to_sam_value[] = {
585 [LOWPAN_IPHC_DAM_11] = LOWPAN_IPHC_SAM_11, 769 [LOWPAN_IPHC_DAM_11] = LOWPAN_IPHC_SAM_11,
586}; 770};
587 771
772static u8 lowpan_compress_ctx_addr(u8 **hc_ptr, const struct in6_addr *ipaddr,
773 const struct lowpan_iphc_ctx *ctx,
774 const unsigned char *lladdr, bool sam)
775{
776 struct in6_addr tmp = {};
777 u8 dam;
778
779 /* check for SAM/DAM = 11 */
780 memcpy(&tmp.s6_addr[8], lladdr, 8);
781 /* second bit-flip (Universe/Local) is done according RFC2464 */
782 tmp.s6_addr[8] ^= 0x02;
783 /* context information are always used */
784 ipv6_addr_prefix_copy(&tmp, &ctx->pfx, ctx->plen);
785 if (ipv6_addr_equal(&tmp, ipaddr)) {
786 dam = LOWPAN_IPHC_DAM_11;
787 goto out;
788 }
789
790 memset(&tmp, 0, sizeof(tmp));
791 /* check for SAM/DAM = 10 */
792 tmp.s6_addr[11] = 0xFF;
793 tmp.s6_addr[12] = 0xFE;
794 memcpy(&tmp.s6_addr[14], &ipaddr->s6_addr[14], 2);
795 /* context information are always used */
796 ipv6_addr_prefix_copy(&tmp, &ctx->pfx, ctx->plen);
797 if (ipv6_addr_equal(&tmp, ipaddr)) {
798 lowpan_push_hc_data(hc_ptr, &ipaddr->s6_addr[14], 2);
799 dam = LOWPAN_IPHC_DAM_10;
800 goto out;
801 }
802
803 memset(&tmp, 0, sizeof(tmp));
804 /* check for SAM/DAM = 01, should always match */
805 memcpy(&tmp.s6_addr[8], &ipaddr->s6_addr[8], 8);
806 /* context information are always used */
807 ipv6_addr_prefix_copy(&tmp, &ctx->pfx, ctx->plen);
808 if (ipv6_addr_equal(&tmp, ipaddr)) {
809 lowpan_push_hc_data(hc_ptr, &ipaddr->s6_addr[8], 8);
810 dam = LOWPAN_IPHC_DAM_01;
811 goto out;
812 }
813
814 WARN_ONCE(1, "context found but no address mode matched\n");
815 return LOWPAN_IPHC_DAM_00;
816out:
817
818 if (sam)
819 return lowpan_iphc_dam_to_sam_value[dam];
820 else
821 return dam;
822}
823
588static u8 lowpan_compress_addr_64(u8 **hc_ptr, const struct in6_addr *ipaddr, 824static u8 lowpan_compress_addr_64(u8 **hc_ptr, const struct in6_addr *ipaddr,
589 const unsigned char *lladdr, bool sam) 825 const unsigned char *lladdr, bool sam)
590{ 826{
@@ -708,6 +944,21 @@ static u8 lowpan_iphc_tf_compress(u8 **hc_ptr, const struct ipv6hdr *hdr)
708 return val; 944 return val;
709} 945}
710 946
947static u8 lowpan_iphc_mcast_ctx_addr_compress(u8 **hc_ptr,
948 const struct lowpan_iphc_ctx *ctx,
949 const struct in6_addr *ipaddr)
950{
951 u8 data[6];
952
953 /* flags/scope, reserved (RIID) */
954 memcpy(data, &ipaddr->s6_addr[1], 2);
955 /* group ID */
956 memcpy(&data[1], &ipaddr->s6_addr[11], 4);
957 lowpan_push_hc_data(hc_ptr, data, 6);
958
959 return LOWPAN_IPHC_DAM_00;
960}
961
711static u8 lowpan_iphc_mcast_addr_compress(u8 **hc_ptr, 962static u8 lowpan_iphc_mcast_addr_compress(u8 **hc_ptr,
712 const struct in6_addr *ipaddr) 963 const struct in6_addr *ipaddr)
713{ 964{
@@ -742,10 +993,11 @@ static u8 lowpan_iphc_mcast_addr_compress(u8 **hc_ptr,
742int lowpan_header_compress(struct sk_buff *skb, const struct net_device *dev, 993int lowpan_header_compress(struct sk_buff *skb, const struct net_device *dev,
743 const void *daddr, const void *saddr) 994 const void *daddr, const void *saddr)
744{ 995{
745 u8 iphc0, iphc1, *hc_ptr; 996 u8 iphc0, iphc1, *hc_ptr, cid = 0;
746 struct ipv6hdr *hdr; 997 struct ipv6hdr *hdr;
747 u8 head[LOWPAN_IPHC_MAX_HC_BUF_LEN] = {}; 998 u8 head[LOWPAN_IPHC_MAX_HC_BUF_LEN] = {};
748 int ret, addr_type; 999 struct lowpan_iphc_ctx *dci, *sci, dci_entry, sci_entry;
1000 int ret, ipv6_daddr_type, ipv6_saddr_type;
749 1001
750 if (skb->protocol != htons(ETH_P_IPV6)) 1002 if (skb->protocol != htons(ETH_P_IPV6))
751 return -EINVAL; 1003 return -EINVAL;
@@ -769,14 +1021,38 @@ int lowpan_header_compress(struct sk_buff *skb, const struct net_device *dev,
769 iphc0 = LOWPAN_DISPATCH_IPHC; 1021 iphc0 = LOWPAN_DISPATCH_IPHC;
770 iphc1 = 0; 1022 iphc1 = 0;
771 1023
772 /* TODO: context lookup */
773
774 raw_dump_inline(__func__, "saddr", saddr, EUI64_ADDR_LEN); 1024 raw_dump_inline(__func__, "saddr", saddr, EUI64_ADDR_LEN);
775 raw_dump_inline(__func__, "daddr", daddr, EUI64_ADDR_LEN); 1025 raw_dump_inline(__func__, "daddr", daddr, EUI64_ADDR_LEN);
776 1026
777 raw_dump_table(__func__, "sending raw skb network uncompressed packet", 1027 raw_dump_table(__func__, "sending raw skb network uncompressed packet",
778 skb->data, skb->len); 1028 skb->data, skb->len);
779 1029
1030 ipv6_daddr_type = ipv6_addr_type(&hdr->daddr);
1031 spin_lock_bh(&lowpan_priv(dev)->ctx.lock);
1032 if (ipv6_daddr_type & IPV6_ADDR_MULTICAST)
1033 dci = lowpan_iphc_ctx_get_by_mcast_addr(dev, &hdr->daddr);
1034 else
1035 dci = lowpan_iphc_ctx_get_by_addr(dev, &hdr->daddr);
1036 if (dci) {
1037 memcpy(&dci_entry, dci, sizeof(*dci));
1038 cid |= dci->id;
1039 }
1040 spin_unlock_bh(&lowpan_priv(dev)->ctx.lock);
1041
1042 spin_lock_bh(&lowpan_priv(dev)->ctx.lock);
1043 sci = lowpan_iphc_ctx_get_by_addr(dev, &hdr->saddr);
1044 if (sci) {
1045 memcpy(&sci_entry, sci, sizeof(*sci));
1046 cid |= (sci->id << 4);
1047 }
1048 spin_unlock_bh(&lowpan_priv(dev)->ctx.lock);
1049
1050 /* if cid is zero it will be compressed */
1051 if (cid) {
1052 iphc1 |= LOWPAN_IPHC_CID;
1053 lowpan_push_hc_data(&hc_ptr, &cid, sizeof(cid));
1054 }
1055
780 /* Traffic Class, Flow Label compression */ 1056 /* Traffic Class, Flow Label compression */
781 iphc0 |= lowpan_iphc_tf_compress(&hc_ptr, hdr); 1057 iphc0 |= lowpan_iphc_tf_compress(&hc_ptr, hdr);
782 1058
@@ -813,39 +1089,64 @@ int lowpan_header_compress(struct sk_buff *skb, const struct net_device *dev,
813 sizeof(hdr->hop_limit)); 1089 sizeof(hdr->hop_limit));
814 } 1090 }
815 1091
816 addr_type = ipv6_addr_type(&hdr->saddr); 1092 ipv6_saddr_type = ipv6_addr_type(&hdr->saddr);
817 /* source address compression */ 1093 /* source address compression */
818 if (addr_type == IPV6_ADDR_ANY) { 1094 if (ipv6_saddr_type == IPV6_ADDR_ANY) {
819 pr_debug("source address is unspecified, setting SAC\n"); 1095 pr_debug("source address is unspecified, setting SAC\n");
820 iphc1 |= LOWPAN_IPHC_SAC; 1096 iphc1 |= LOWPAN_IPHC_SAC;
821 } else { 1097 } else {
822 if (addr_type & IPV6_ADDR_LINKLOCAL) { 1098 if (sci) {
823 iphc1 |= lowpan_compress_addr_64(&hc_ptr, &hdr->saddr, 1099 iphc1 |= lowpan_compress_ctx_addr(&hc_ptr, &hdr->saddr,
824 saddr, true); 1100 &sci_entry, saddr,
825 pr_debug("source address unicast link-local %pI6c iphc1 0x%02x\n", 1101 true);
826 &hdr->saddr, iphc1); 1102 iphc1 |= LOWPAN_IPHC_SAC;
827 } else { 1103 } else {
828 pr_debug("send the full source address\n"); 1104 if (ipv6_saddr_type & IPV6_ADDR_LINKLOCAL) {
829 lowpan_push_hc_data(&hc_ptr, hdr->saddr.s6_addr, 16); 1105 iphc1 |= lowpan_compress_addr_64(&hc_ptr,
1106 &hdr->saddr,
1107 saddr, true);
1108 pr_debug("source address unicast link-local %pI6c iphc1 0x%02x\n",
1109 &hdr->saddr, iphc1);
1110 } else {
1111 pr_debug("send the full source address\n");
1112 lowpan_push_hc_data(&hc_ptr,
1113 hdr->saddr.s6_addr, 16);
1114 }
830 } 1115 }
831 } 1116 }
832 1117
833 addr_type = ipv6_addr_type(&hdr->daddr);
834 /* destination address compression */ 1118 /* destination address compression */
835 if (addr_type & IPV6_ADDR_MULTICAST) { 1119 if (ipv6_daddr_type & IPV6_ADDR_MULTICAST) {
836 pr_debug("destination address is multicast: "); 1120 pr_debug("destination address is multicast: ");
837 iphc1 |= LOWPAN_IPHC_M; 1121 iphc1 |= LOWPAN_IPHC_M;
838 iphc1 |= lowpan_iphc_mcast_addr_compress(&hc_ptr, &hdr->daddr); 1122 if (dci) {
1123 iphc1 |= lowpan_iphc_mcast_ctx_addr_compress(&hc_ptr,
1124 &dci_entry,
1125 &hdr->daddr);
1126 iphc1 |= LOWPAN_IPHC_DAC;
1127 } else {
1128 iphc1 |= lowpan_iphc_mcast_addr_compress(&hc_ptr,
1129 &hdr->daddr);
1130 }
839 } else { 1131 } else {
840 if (addr_type & IPV6_ADDR_LINKLOCAL) { 1132 if (dci) {
841 /* TODO: context lookup */ 1133 iphc1 |= lowpan_compress_ctx_addr(&hc_ptr, &hdr->daddr,
842 iphc1 |= lowpan_compress_addr_64(&hc_ptr, &hdr->daddr, 1134 &dci_entry, daddr,
843 daddr, false); 1135 false);
844 pr_debug("dest address unicast link-local %pI6c " 1136 iphc1 |= LOWPAN_IPHC_DAC;
845 "iphc1 0x%02x\n", &hdr->daddr, iphc1);
846 } else { 1137 } else {
847 pr_debug("dest address unicast %pI6c\n", &hdr->daddr); 1138 if (ipv6_daddr_type & IPV6_ADDR_LINKLOCAL) {
848 lowpan_push_hc_data(&hc_ptr, hdr->daddr.s6_addr, 16); 1139 iphc1 |= lowpan_compress_addr_64(&hc_ptr,
1140 &hdr->daddr,
1141 daddr, false);
1142 pr_debug("dest address unicast link-local %pI6c iphc1 0x%02x\n",
1143 &hdr->daddr, iphc1);
1144 } else {
1145 pr_debug("dest address unicast %pI6c\n",
1146 &hdr->daddr);
1147 lowpan_push_hc_data(&hc_ptr,
1148 hdr->daddr.s6_addr, 16);
1149 }
849 } 1150 }
850 } 1151 }
851 1152
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index d2cd9de4b724..a1e273af6fc8 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -261,7 +261,6 @@ static int register_vlan_device(struct net_device *real_dev, u16 vlan_id)
261 * hope the underlying device can handle it. 261 * hope the underlying device can handle it.
262 */ 262 */
263 new_dev->mtu = real_dev->mtu; 263 new_dev->mtu = real_dev->mtu;
264 new_dev->priv_flags |= (real_dev->priv_flags & IFF_UNICAST_FLT);
265 264
266 vlan = vlan_dev_priv(new_dev); 265 vlan = vlan_dev_priv(new_dev);
267 vlan->vlan_proto = htons(ETH_P_8021Q); 266 vlan->vlan_proto = htons(ETH_P_8021Q);
@@ -312,6 +311,7 @@ static void vlan_transfer_features(struct net_device *dev,
312 struct vlan_dev_priv *vlan = vlan_dev_priv(vlandev); 311 struct vlan_dev_priv *vlan = vlan_dev_priv(vlandev);
313 312
314 vlandev->gso_max_size = dev->gso_max_size; 313 vlandev->gso_max_size = dev->gso_max_size;
314 vlandev->gso_max_segs = dev->gso_max_segs;
315 315
316 if (vlan_hw_offload_capable(dev->features, vlan->vlan_proto)) 316 if (vlan_hw_offload_capable(dev->features, vlan->vlan_proto))
317 vlandev->hard_header_len = dev->hard_header_len; 317 vlandev->hard_header_len = dev->hard_header_len;
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index ad5e2fd1012c..e7e62570bdb8 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -551,6 +551,7 @@ static int vlan_dev_init(struct net_device *dev)
551 dev->features |= real_dev->vlan_features | NETIF_F_LLTX | 551 dev->features |= real_dev->vlan_features | NETIF_F_LLTX |
552 NETIF_F_GSO_SOFTWARE; 552 NETIF_F_GSO_SOFTWARE;
553 dev->gso_max_size = real_dev->gso_max_size; 553 dev->gso_max_size = real_dev->gso_max_size;
554 dev->gso_max_segs = real_dev->gso_max_segs;
554 if (dev->features & NETIF_F_VLAN_FEATURES) 555 if (dev->features & NETIF_F_VLAN_FEATURES)
555 netdev_warn(real_dev, "VLAN features are set incorrectly. Q-in-Q configurations may not work correctly.\n"); 556 netdev_warn(real_dev, "VLAN features are set incorrectly. Q-in-Q configurations may not work correctly.\n");
556 557
@@ -621,12 +622,12 @@ static netdev_features_t vlan_dev_fix_features(struct net_device *dev,
621 return features; 622 return features;
622} 623}
623 624
624static int vlan_ethtool_get_settings(struct net_device *dev, 625static int vlan_ethtool_get_link_ksettings(struct net_device *dev,
625 struct ethtool_cmd *cmd) 626 struct ethtool_link_ksettings *cmd)
626{ 627{
627 const struct vlan_dev_priv *vlan = vlan_dev_priv(dev); 628 const struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
628 629
629 return __ethtool_get_settings(vlan->real_dev, cmd); 630 return __ethtool_get_link_ksettings(vlan->real_dev, cmd);
630} 631}
631 632
632static void vlan_ethtool_get_drvinfo(struct net_device *dev, 633static void vlan_ethtool_get_drvinfo(struct net_device *dev,
@@ -741,7 +742,7 @@ static int vlan_dev_get_iflink(const struct net_device *dev)
741} 742}
742 743
743static const struct ethtool_ops vlan_ethtool_ops = { 744static const struct ethtool_ops vlan_ethtool_ops = {
744 .get_settings = vlan_ethtool_get_settings, 745 .get_link_ksettings = vlan_ethtool_get_link_ksettings,
745 .get_drvinfo = vlan_ethtool_get_drvinfo, 746 .get_drvinfo = vlan_ethtool_get_drvinfo,
746 .get_link = ethtool_op_get_link, 747 .get_link = ethtool_op_get_link,
747 .get_ts_info = vlan_ethtool_get_ts_info, 748 .get_ts_info = vlan_ethtool_get_ts_info,
@@ -799,6 +800,7 @@ void vlan_setup(struct net_device *dev)
799 ether_setup(dev); 800 ether_setup(dev);
800 801
801 dev->priv_flags |= IFF_802_1Q_VLAN | IFF_NO_QUEUE; 802 dev->priv_flags |= IFF_802_1Q_VLAN | IFF_NO_QUEUE;
803 dev->priv_flags |= IFF_UNICAST_FLT;
802 dev->priv_flags &= ~IFF_TX_SKB_SHARING; 804 dev->priv_flags &= ~IFF_TX_SKB_SHARING;
803 netif_keep_dst(dev); 805 netif_keep_dst(dev);
804 806
diff --git a/net/8021q/vlanproc.c b/net/8021q/vlanproc.c
index ae63cf72a953..5f1446c9f098 100644
--- a/net/8021q/vlanproc.c
+++ b/net/8021q/vlanproc.c
@@ -184,12 +184,11 @@ int vlan_proc_add_dev(struct net_device *vlandev)
184/* 184/*
185 * Delete directory entry for VLAN device. 185 * Delete directory entry for VLAN device.
186 */ 186 */
187int vlan_proc_rem_dev(struct net_device *vlandev) 187void vlan_proc_rem_dev(struct net_device *vlandev)
188{ 188{
189 /** NOTE: This will consume the memory pointed to by dent, it seems. */ 189 /** NOTE: This will consume the memory pointed to by dent, it seems. */
190 proc_remove(vlan_dev_priv(vlandev)->dent); 190 proc_remove(vlan_dev_priv(vlandev)->dent);
191 vlan_dev_priv(vlandev)->dent = NULL; 191 vlan_dev_priv(vlandev)->dent = NULL;
192 return 0;
193} 192}
194 193
195/****** Proc filesystem entry points ****************************************/ 194/****** Proc filesystem entry points ****************************************/
diff --git a/net/8021q/vlanproc.h b/net/8021q/vlanproc.h
index 063f60a3d5cc..8838a2e92eb6 100644
--- a/net/8021q/vlanproc.h
+++ b/net/8021q/vlanproc.h
@@ -5,7 +5,7 @@
5struct net; 5struct net;
6 6
7int vlan_proc_init(struct net *net); 7int vlan_proc_init(struct net *net);
8int vlan_proc_rem_dev(struct net_device *vlandev); 8void vlan_proc_rem_dev(struct net_device *vlandev);
9int vlan_proc_add_dev(struct net_device *vlandev); 9int vlan_proc_add_dev(struct net_device *vlandev);
10void vlan_proc_cleanup(struct net *net); 10void vlan_proc_cleanup(struct net *net);
11 11
@@ -14,7 +14,7 @@ void vlan_proc_cleanup(struct net *net);
14#define vlan_proc_init(net) (0) 14#define vlan_proc_init(net) (0)
15#define vlan_proc_cleanup(net) do {} while (0) 15#define vlan_proc_cleanup(net) do {} while (0)
16#define vlan_proc_add_dev(dev) ({(void)(dev), 0; }) 16#define vlan_proc_add_dev(dev) ({(void)(dev), 0; })
17#define vlan_proc_rem_dev(dev) ({(void)(dev), 0; }) 17#define vlan_proc_rem_dev(dev) do {} while (0)
18#endif 18#endif
19 19
20#endif /* !(__BEN_VLAN_PROC_INC__) */ 20#endif /* !(__BEN_VLAN_PROC_INC__) */
diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c
index 52b4a2f993f2..1852e383afd6 100644
--- a/net/9p/trans_rdma.c
+++ b/net/9p/trans_rdma.c
@@ -109,14 +109,13 @@ struct p9_trans_rdma {
109/** 109/**
110 * p9_rdma_context - Keeps track of in-process WR 110 * p9_rdma_context - Keeps track of in-process WR
111 * 111 *
112 * @wc_op: The original WR op for when the CQE completes in error.
113 * @busa: Bus address to unmap when the WR completes 112 * @busa: Bus address to unmap when the WR completes
114 * @req: Keeps track of requests (send) 113 * @req: Keeps track of requests (send)
115 * @rc: Keepts track of replies (receive) 114 * @rc: Keepts track of replies (receive)
116 */ 115 */
117struct p9_rdma_req; 116struct p9_rdma_req;
118struct p9_rdma_context { 117struct p9_rdma_context {
119 enum ib_wc_opcode wc_op; 118 struct ib_cqe cqe;
120 dma_addr_t busa; 119 dma_addr_t busa;
121 union { 120 union {
122 struct p9_req_t *req; 121 struct p9_req_t *req;
@@ -284,9 +283,12 @@ p9_cm_event_handler(struct rdma_cm_id *id, struct rdma_cm_event *event)
284} 283}
285 284
286static void 285static void
287handle_recv(struct p9_client *client, struct p9_trans_rdma *rdma, 286recv_done(struct ib_cq *cq, struct ib_wc *wc)
288 struct p9_rdma_context *c, enum ib_wc_status status, u32 byte_len)
289{ 287{
288 struct p9_client *client = cq->cq_context;
289 struct p9_trans_rdma *rdma = client->trans;
290 struct p9_rdma_context *c =
291 container_of(wc->wr_cqe, struct p9_rdma_context, cqe);
290 struct p9_req_t *req; 292 struct p9_req_t *req;
291 int err = 0; 293 int err = 0;
292 int16_t tag; 294 int16_t tag;
@@ -295,7 +297,7 @@ handle_recv(struct p9_client *client, struct p9_trans_rdma *rdma,
295 ib_dma_unmap_single(rdma->cm_id->device, c->busa, client->msize, 297 ib_dma_unmap_single(rdma->cm_id->device, c->busa, client->msize,
296 DMA_FROM_DEVICE); 298 DMA_FROM_DEVICE);
297 299
298 if (status != IB_WC_SUCCESS) 300 if (wc->status != IB_WC_SUCCESS)
299 goto err_out; 301 goto err_out;
300 302
301 err = p9_parse_header(c->rc, NULL, NULL, &tag, 1); 303 err = p9_parse_header(c->rc, NULL, NULL, &tag, 1);
@@ -316,21 +318,32 @@ handle_recv(struct p9_client *client, struct p9_trans_rdma *rdma,
316 req->rc = c->rc; 318 req->rc = c->rc;
317 p9_client_cb(client, req, REQ_STATUS_RCVD); 319 p9_client_cb(client, req, REQ_STATUS_RCVD);
318 320
321 out:
322 up(&rdma->rq_sem);
323 kfree(c);
319 return; 324 return;
320 325
321 err_out: 326 err_out:
322 p9_debug(P9_DEBUG_ERROR, "req %p err %d status %d\n", req, err, status); 327 p9_debug(P9_DEBUG_ERROR, "req %p err %d status %d\n",
328 req, err, wc->status);
323 rdma->state = P9_RDMA_FLUSHING; 329 rdma->state = P9_RDMA_FLUSHING;
324 client->status = Disconnected; 330 client->status = Disconnected;
331 goto out;
325} 332}
326 333
327static void 334static void
328handle_send(struct p9_client *client, struct p9_trans_rdma *rdma, 335send_done(struct ib_cq *cq, struct ib_wc *wc)
329 struct p9_rdma_context *c, enum ib_wc_status status, u32 byte_len)
330{ 336{
337 struct p9_client *client = cq->cq_context;
338 struct p9_trans_rdma *rdma = client->trans;
339 struct p9_rdma_context *c =
340 container_of(wc->wr_cqe, struct p9_rdma_context, cqe);
341
331 ib_dma_unmap_single(rdma->cm_id->device, 342 ib_dma_unmap_single(rdma->cm_id->device,
332 c->busa, c->req->tc->size, 343 c->busa, c->req->tc->size,
333 DMA_TO_DEVICE); 344 DMA_TO_DEVICE);
345 up(&rdma->sq_sem);
346 kfree(c);
334} 347}
335 348
336static void qp_event_handler(struct ib_event *event, void *context) 349static void qp_event_handler(struct ib_event *event, void *context)
@@ -339,42 +352,6 @@ static void qp_event_handler(struct ib_event *event, void *context)
339 event->event, context); 352 event->event, context);
340} 353}
341 354
342static void cq_comp_handler(struct ib_cq *cq, void *cq_context)
343{
344 struct p9_client *client = cq_context;
345 struct p9_trans_rdma *rdma = client->trans;
346 int ret;
347 struct ib_wc wc;
348
349 ib_req_notify_cq(rdma->cq, IB_CQ_NEXT_COMP);
350 while ((ret = ib_poll_cq(cq, 1, &wc)) > 0) {
351 struct p9_rdma_context *c = (void *) (unsigned long) wc.wr_id;
352
353 switch (c->wc_op) {
354 case IB_WC_RECV:
355 handle_recv(client, rdma, c, wc.status, wc.byte_len);
356 up(&rdma->rq_sem);
357 break;
358
359 case IB_WC_SEND:
360 handle_send(client, rdma, c, wc.status, wc.byte_len);
361 up(&rdma->sq_sem);
362 break;
363
364 default:
365 pr_err("unexpected completion type, c->wc_op=%d, wc.opcode=%d, status=%d\n",
366 c->wc_op, wc.opcode, wc.status);
367 break;
368 }
369 kfree(c);
370 }
371}
372
373static void cq_event_handler(struct ib_event *e, void *v)
374{
375 p9_debug(P9_DEBUG_ERROR, "CQ event %d context %p\n", e->event, v);
376}
377
378static void rdma_destroy_trans(struct p9_trans_rdma *rdma) 355static void rdma_destroy_trans(struct p9_trans_rdma *rdma)
379{ 356{
380 if (!rdma) 357 if (!rdma)
@@ -387,7 +364,7 @@ static void rdma_destroy_trans(struct p9_trans_rdma *rdma)
387 ib_dealloc_pd(rdma->pd); 364 ib_dealloc_pd(rdma->pd);
388 365
389 if (rdma->cq && !IS_ERR(rdma->cq)) 366 if (rdma->cq && !IS_ERR(rdma->cq))
390 ib_destroy_cq(rdma->cq); 367 ib_free_cq(rdma->cq);
391 368
392 if (rdma->cm_id && !IS_ERR(rdma->cm_id)) 369 if (rdma->cm_id && !IS_ERR(rdma->cm_id))
393 rdma_destroy_id(rdma->cm_id); 370 rdma_destroy_id(rdma->cm_id);
@@ -408,13 +385,14 @@ post_recv(struct p9_client *client, struct p9_rdma_context *c)
408 if (ib_dma_mapping_error(rdma->cm_id->device, c->busa)) 385 if (ib_dma_mapping_error(rdma->cm_id->device, c->busa))
409 goto error; 386 goto error;
410 387
388 c->cqe.done = recv_done;
389
411 sge.addr = c->busa; 390 sge.addr = c->busa;
412 sge.length = client->msize; 391 sge.length = client->msize;
413 sge.lkey = rdma->pd->local_dma_lkey; 392 sge.lkey = rdma->pd->local_dma_lkey;
414 393
415 wr.next = NULL; 394 wr.next = NULL;
416 c->wc_op = IB_WC_RECV; 395 wr.wr_cqe = &c->cqe;
417 wr.wr_id = (unsigned long) c;
418 wr.sg_list = &sge; 396 wr.sg_list = &sge;
419 wr.num_sge = 1; 397 wr.num_sge = 1;
420 return ib_post_recv(rdma->qp, &wr, &bad_wr); 398 return ib_post_recv(rdma->qp, &wr, &bad_wr);
@@ -499,13 +477,14 @@ dont_need_post_recv:
499 goto send_error; 477 goto send_error;
500 } 478 }
501 479
480 c->cqe.done = send_done;
481
502 sge.addr = c->busa; 482 sge.addr = c->busa;
503 sge.length = c->req->tc->size; 483 sge.length = c->req->tc->size;
504 sge.lkey = rdma->pd->local_dma_lkey; 484 sge.lkey = rdma->pd->local_dma_lkey;
505 485
506 wr.next = NULL; 486 wr.next = NULL;
507 c->wc_op = IB_WC_SEND; 487 wr.wr_cqe = &c->cqe;
508 wr.wr_id = (unsigned long) c;
509 wr.opcode = IB_WR_SEND; 488 wr.opcode = IB_WR_SEND;
510 wr.send_flags = IB_SEND_SIGNALED; 489 wr.send_flags = IB_SEND_SIGNALED;
511 wr.sg_list = &sge; 490 wr.sg_list = &sge;
@@ -642,7 +621,6 @@ rdma_create_trans(struct p9_client *client, const char *addr, char *args)
642 struct p9_trans_rdma *rdma; 621 struct p9_trans_rdma *rdma;
643 struct rdma_conn_param conn_param; 622 struct rdma_conn_param conn_param;
644 struct ib_qp_init_attr qp_attr; 623 struct ib_qp_init_attr qp_attr;
645 struct ib_cq_init_attr cq_attr = {};
646 624
647 /* Parse the transport specific mount options */ 625 /* Parse the transport specific mount options */
648 err = parse_opts(args, &opts); 626 err = parse_opts(args, &opts);
@@ -695,13 +673,11 @@ rdma_create_trans(struct p9_client *client, const char *addr, char *args)
695 goto error; 673 goto error;
696 674
697 /* Create the Completion Queue */ 675 /* Create the Completion Queue */
698 cq_attr.cqe = opts.sq_depth + opts.rq_depth + 1; 676 rdma->cq = ib_alloc_cq(rdma->cm_id->device, client,
699 rdma->cq = ib_create_cq(rdma->cm_id->device, cq_comp_handler, 677 opts.sq_depth + opts.rq_depth + 1,
700 cq_event_handler, client, 678 0, IB_POLL_SOFTIRQ);
701 &cq_attr);
702 if (IS_ERR(rdma->cq)) 679 if (IS_ERR(rdma->cq))
703 goto error; 680 goto error;
704 ib_req_notify_cq(rdma->cq, IB_CQ_NEXT_COMP);
705 681
706 /* Create the Protection Domain */ 682 /* Create the Protection Domain */
707 rdma->pd = ib_alloc_pd(rdma->cm_id->device); 683 rdma->pd = ib_alloc_pd(rdma->cm_id->device);
diff --git a/net/Kconfig b/net/Kconfig
index 174354618f8a..a8934d8c8fda 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -253,6 +253,9 @@ config XPS
253 depends on SMP 253 depends on SMP
254 default y 254 default y
255 255
256config HWBM
257 bool
258
256config SOCK_CGROUP_DATA 259config SOCK_CGROUP_DATA
257 bool 260 bool
258 default n 261 default n
@@ -360,6 +363,7 @@ source "net/can/Kconfig"
360source "net/irda/Kconfig" 363source "net/irda/Kconfig"
361source "net/bluetooth/Kconfig" 364source "net/bluetooth/Kconfig"
362source "net/rxrpc/Kconfig" 365source "net/rxrpc/Kconfig"
366source "net/kcm/Kconfig"
363 367
364config FIB_RULES 368config FIB_RULES
365 bool 369 bool
@@ -392,6 +396,26 @@ config LWTUNNEL
392 weight tunnel endpoint. Tunnel encapsulation parameters are stored 396 weight tunnel endpoint. Tunnel encapsulation parameters are stored
393 with light weight tunnel state associated with fib routes. 397 with light weight tunnel state associated with fib routes.
394 398
399config DST_CACHE
400 bool
401 default n
402
403config NET_DEVLINK
404 tristate "Network physical/parent device Netlink interface"
405 help
406 Network physical/parent device Netlink interface provides
407 infrastructure to support access to physical chip-wide config and
408 monitoring.
409
410config MAY_USE_DEVLINK
411 tristate
412 default m if NET_DEVLINK=m
413 default y if NET_DEVLINK=y || NET_DEVLINK=n
414 help
415 Drivers using the devlink infrastructure should have a dependency
416 on MAY_USE_DEVLINK to ensure they do not cause link errors when
417 devlink is a loadable module and the driver using it is built-in.
418
395endif # if NET 419endif # if NET
396 420
397# Used by archs to tell that they support BPF_JIT 421# Used by archs to tell that they support BPF_JIT
diff --git a/net/Makefile b/net/Makefile
index a5d04098dfce..81d14119eab5 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -34,6 +34,7 @@ obj-$(CONFIG_IRDA) += irda/
34obj-$(CONFIG_BT) += bluetooth/ 34obj-$(CONFIG_BT) += bluetooth/
35obj-$(CONFIG_SUNRPC) += sunrpc/ 35obj-$(CONFIG_SUNRPC) += sunrpc/
36obj-$(CONFIG_AF_RXRPC) += rxrpc/ 36obj-$(CONFIG_AF_RXRPC) += rxrpc/
37obj-$(CONFIG_AF_KCM) += kcm/
37obj-$(CONFIG_ATM) += atm/ 38obj-$(CONFIG_ATM) += atm/
38obj-$(CONFIG_L2TP) += l2tp/ 39obj-$(CONFIG_L2TP) += l2tp/
39obj-$(CONFIG_DECNET) += decnet/ 40obj-$(CONFIG_DECNET) += decnet/
diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index d5871ac493eb..f066781be3c8 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c
@@ -1625,7 +1625,7 @@ static int atalk_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
1625 1625
1626 rt = atrtr_find(&at_hint); 1626 rt = atrtr_find(&at_hint);
1627 } 1627 }
1628 err = ENETUNREACH; 1628 err = -ENETUNREACH;
1629 if (!rt) 1629 if (!rt)
1630 goto out; 1630 goto out;
1631 1631
diff --git a/net/ax25/ax25_ip.c b/net/ax25/ax25_ip.c
index b563a3f5f2a8..2fa3be965101 100644
--- a/net/ax25/ax25_ip.c
+++ b/net/ax25/ax25_ip.c
@@ -228,8 +228,23 @@ netdev_tx_t ax25_ip_xmit(struct sk_buff *skb)
228} 228}
229#endif 229#endif
230 230
231static bool ax25_validate_header(const char *header, unsigned int len)
232{
233 ax25_digi digi;
234
235 if (!len)
236 return false;
237
238 if (header[0])
239 return true;
240
241 return ax25_addr_parse(header + 1, len - 1, NULL, NULL, &digi, NULL,
242 NULL);
243}
244
231const struct header_ops ax25_header_ops = { 245const struct header_ops ax25_header_ops = {
232 .create = ax25_hard_header, 246 .create = ax25_hard_header,
247 .validate = ax25_validate_header,
233}; 248};
234 249
235EXPORT_SYMBOL(ax25_header_ops); 250EXPORT_SYMBOL(ax25_header_ops);
diff --git a/net/batman-adv/Kconfig b/net/batman-adv/Kconfig
index c6fc8f756c9a..f66930ee3c0b 100644
--- a/net/batman-adv/Kconfig
+++ b/net/batman-adv/Kconfig
@@ -12,9 +12,23 @@ config BATMAN_ADV
12 B.A.T.M.A.N. (better approach to mobile ad-hoc networking) is 12 B.A.T.M.A.N. (better approach to mobile ad-hoc networking) is
13 a routing protocol for multi-hop ad-hoc mesh networks. The 13 a routing protocol for multi-hop ad-hoc mesh networks. The
14 networks may be wired or wireless. See 14 networks may be wired or wireless. See
15 http://www.open-mesh.org/ for more information and user space 15 https://www.open-mesh.org/ for more information and user space
16 tools. 16 tools.
17 17
18config BATMAN_ADV_BATMAN_V
19 bool "B.A.T.M.A.N. V protocol (experimental)"
20 depends on BATMAN_ADV && CFG80211=y || (CFG80211=m && BATMAN_ADV=m)
21 default n
22 help
23 This option enables the B.A.T.M.A.N. V protocol, the successor
24 of the currently used B.A.T.M.A.N. IV protocol. The main
25 changes include splitting of the OGM protocol into a neighbor
26 discovery protocol (Echo Location Protocol, ELP) and a new OGM
27 Protocol OGMv2 for flooding protocol information through the
28 network, as well as a throughput based metric.
29 B.A.T.M.A.N. V is currently considered experimental and not
30 compatible to B.A.T.M.A.N. IV networks.
31
18config BATMAN_ADV_BLA 32config BATMAN_ADV_BLA
19 bool "Bridge Loop Avoidance" 33 bool "Bridge Loop Avoidance"
20 depends on BATMAN_ADV && INET 34 depends on BATMAN_ADV && INET
diff --git a/net/batman-adv/Makefile b/net/batman-adv/Makefile
index 21434ab79d2c..797cf2fc88c1 100644
--- a/net/batman-adv/Makefile
+++ b/net/batman-adv/Makefile
@@ -1,5 +1,5 @@
1# 1#
2# Copyright (C) 2007-2015 B.A.T.M.A.N. contributors: 2# Copyright (C) 2007-2016 B.A.T.M.A.N. contributors:
3# 3#
4# Marek Lindner, Simon Wunderlich 4# Marek Lindner, Simon Wunderlich
5# 5#
@@ -18,6 +18,9 @@
18 18
19obj-$(CONFIG_BATMAN_ADV) += batman-adv.o 19obj-$(CONFIG_BATMAN_ADV) += batman-adv.o
20batman-adv-y += bat_iv_ogm.o 20batman-adv-y += bat_iv_ogm.o
21batman-adv-$(CONFIG_BATMAN_ADV_BATMAN_V) += bat_v.o
22batman-adv-$(CONFIG_BATMAN_ADV_BATMAN_V) += bat_v_elp.o
23batman-adv-$(CONFIG_BATMAN_ADV_BATMAN_V) += bat_v_ogm.o
21batman-adv-y += bitarray.o 24batman-adv-y += bitarray.o
22batman-adv-$(CONFIG_BATMAN_ADV_BLA) += bridge_loop_avoidance.o 25batman-adv-$(CONFIG_BATMAN_ADV_BLA) += bridge_loop_avoidance.o
23batman-adv-$(CONFIG_DEBUG_FS) += debugfs.o 26batman-adv-$(CONFIG_DEBUG_FS) += debugfs.o
diff --git a/net/batman-adv/bat_algo.h b/net/batman-adv/bat_algo.h
index 4e59cf3eb079..03dafd33d23b 100644
--- a/net/batman-adv/bat_algo.h
+++ b/net/batman-adv/bat_algo.h
@@ -1,6 +1,6 @@
1/* Copyright (C) 2011-2015 B.A.T.M.A.N. contributors: 1/* Copyright (C) 2011-2016 B.A.T.M.A.N. contributors:
2 * 2 *
3 * Marek Lindner 3 * Marek Lindner, Linus Lüssing
4 * 4 *
5 * This program is free software; you can redistribute it and/or 5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of version 2 of the GNU General Public 6 * modify it under the terms of version 2 of the GNU General Public
@@ -18,6 +18,32 @@
18#ifndef _NET_BATMAN_ADV_BAT_ALGO_H_ 18#ifndef _NET_BATMAN_ADV_BAT_ALGO_H_
19#define _NET_BATMAN_ADV_BAT_ALGO_H_ 19#define _NET_BATMAN_ADV_BAT_ALGO_H_
20 20
21struct batadv_priv;
22
21int batadv_iv_init(void); 23int batadv_iv_init(void);
22 24
25#ifdef CONFIG_BATMAN_ADV_BATMAN_V
26
27int batadv_v_init(void);
28int batadv_v_mesh_init(struct batadv_priv *bat_priv);
29void batadv_v_mesh_free(struct batadv_priv *bat_priv);
30
31#else
32
33static inline int batadv_v_init(void)
34{
35 return 0;
36}
37
38static inline int batadv_v_mesh_init(struct batadv_priv *bat_priv)
39{
40 return 0;
41}
42
43static inline void batadv_v_mesh_free(struct batadv_priv *bat_priv)
44{
45}
46
47#endif /* CONFIG_BATMAN_ADV_BATMAN_V */
48
23#endif /* _NET_BATMAN_ADV_BAT_ALGO_H_ */ 49#endif /* _NET_BATMAN_ADV_BAT_ALGO_H_ */
diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c
index df625de55ef2..cb2d1b9b0340 100644
--- a/net/batman-adv/bat_iv_ogm.c
+++ b/net/batman-adv/bat_iv_ogm.c
@@ -1,4 +1,4 @@
1/* Copyright (C) 2007-2015 B.A.T.M.A.N. contributors: 1/* Copyright (C) 2007-2016 B.A.T.M.A.N. contributors:
2 * 2 *
3 * Marek Lindner, Simon Wunderlich 3 * Marek Lindner, Simon Wunderlich
4 * 4 *
@@ -31,6 +31,7 @@
31#include <linux/init.h> 31#include <linux/init.h>
32#include <linux/jiffies.h> 32#include <linux/jiffies.h>
33#include <linux/list.h> 33#include <linux/list.h>
34#include <linux/kref.h>
34#include <linux/netdevice.h> 35#include <linux/netdevice.h>
35#include <linux/pkt_sched.h> 36#include <linux/pkt_sched.h>
36#include <linux/printk.h> 37#include <linux/printk.h>
@@ -88,7 +89,7 @@ static void batadv_ring_buffer_set(u8 lq_recv[], u8 *lq_index, u8 value)
88 * in the given ring buffer 89 * in the given ring buffer
89 * @lq_recv: pointer to the ring buffer 90 * @lq_recv: pointer to the ring buffer
90 * 91 *
91 * Returns computed average value. 92 * Return: computed average value.
92 */ 93 */
93static u8 batadv_ring_buffer_avg(const u8 lq_recv[]) 94static u8 batadv_ring_buffer_avg(const u8 lq_recv[])
94{ 95{
@@ -132,7 +133,7 @@ static void batadv_iv_ogm_orig_free(struct batadv_orig_node *orig_node)
132 * @orig_node: the orig_node that has to be changed 133 * @orig_node: the orig_node that has to be changed
133 * @max_if_num: the current amount of interfaces 134 * @max_if_num: the current amount of interfaces
134 * 135 *
135 * Returns 0 on success, a negative error code otherwise. 136 * Return: 0 on success, a negative error code otherwise.
136 */ 137 */
137static int batadv_iv_ogm_orig_add_if(struct batadv_orig_node *orig_node, 138static int batadv_iv_ogm_orig_add_if(struct batadv_orig_node *orig_node,
138 int max_if_num) 139 int max_if_num)
@@ -180,7 +181,7 @@ unlock:
180 * @max_if_num: the current amount of interfaces 181 * @max_if_num: the current amount of interfaces
181 * @del_if_num: the index of the interface being removed 182 * @del_if_num: the index of the interface being removed
182 * 183 *
183 * Returns 0 on success, a negative error code otherwise. 184 * Return: 0 on success, a negative error code otherwise.
184 */ 185 */
185static int batadv_iv_ogm_orig_del_if(struct batadv_orig_node *orig_node, 186static int batadv_iv_ogm_orig_del_if(struct batadv_orig_node *orig_node,
186 int max_if_num, int del_if_num) 187 int max_if_num, int del_if_num)
@@ -246,7 +247,7 @@ unlock:
246 * @bat_priv: the bat priv with all the soft interface information 247 * @bat_priv: the bat priv with all the soft interface information
247 * @addr: mac address of the originator 248 * @addr: mac address of the originator
248 * 249 *
249 * Returns the originator object corresponding to the passed mac address or NULL 250 * Return: the originator object corresponding to the passed mac address or NULL
250 * on failure. 251 * on failure.
251 * If the object does not exists it is created an initialised. 252 * If the object does not exists it is created an initialised.
252 */ 253 */
@@ -286,8 +287,8 @@ batadv_iv_ogm_orig_get(struct batadv_priv *bat_priv, const u8 *addr)
286 287
287free_orig_node: 288free_orig_node:
288 /* free twice, as batadv_orig_node_new sets refcount to 2 */ 289 /* free twice, as batadv_orig_node_new sets refcount to 2 */
289 batadv_orig_node_free_ref(orig_node); 290 batadv_orig_node_put(orig_node);
290 batadv_orig_node_free_ref(orig_node); 291 batadv_orig_node_put(orig_node);
291 292
292 return NULL; 293 return NULL;
293} 294}
@@ -396,7 +397,14 @@ static u8 batadv_hop_penalty(u8 tq, const struct batadv_priv *bat_priv)
396 return new_tq; 397 return new_tq;
397} 398}
398 399
399/* is there another aggregated packet here? */ 400/**
401 * batadv_iv_ogm_aggr_packet - checks if there is another OGM attached
402 * @buff_pos: current position in the skb
403 * @packet_len: total length of the skb
404 * @tvlv_len: tvlv length of the previously considered OGM
405 *
406 * Return: true if there is enough space for another OGM, false otherwise.
407 */
400static bool batadv_iv_ogm_aggr_packet(int buff_pos, int packet_len, 408static bool batadv_iv_ogm_aggr_packet(int buff_pos, int packet_len,
401 __be16 tvlv_len) 409 __be16 tvlv_len)
402{ 410{
@@ -470,7 +478,7 @@ static void batadv_iv_ogm_send_to_if(struct batadv_forw_packet *forw_packet,
470 batadv_inc_counter(bat_priv, BATADV_CNT_MGMT_TX); 478 batadv_inc_counter(bat_priv, BATADV_CNT_MGMT_TX);
471 batadv_add_counter(bat_priv, BATADV_CNT_MGMT_TX_BYTES, 479 batadv_add_counter(bat_priv, BATADV_CNT_MGMT_TX_BYTES,
472 skb->len + ETH_HLEN); 480 skb->len + ETH_HLEN);
473 batadv_send_skb_packet(skb, hard_iface, batadv_broadcast_addr); 481 batadv_send_broadcast_skb(skb, hard_iface);
474 } 482 }
475} 483}
476 484
@@ -507,7 +515,7 @@ static void batadv_iv_ogm_emit(struct batadv_forw_packet *forw_packet)
507 515
508out: 516out:
509 if (primary_if) 517 if (primary_if)
510 batadv_hardif_free_ref(primary_if); 518 batadv_hardif_put(primary_if);
511} 519}
512 520
513/** 521/**
@@ -522,7 +530,7 @@ out:
522 * @if_outgoing: interface for which the retransmission should be considered 530 * @if_outgoing: interface for which the retransmission should be considered
523 * @forw_packet: the forwarded packet which should be checked 531 * @forw_packet: the forwarded packet which should be checked
524 * 532 *
525 * Returns true if new_packet can be aggregated with forw_packet 533 * Return: true if new_packet can be aggregated with forw_packet
526 */ 534 */
527static bool 535static bool
528batadv_iv_ogm_can_aggregate(const struct batadv_ogm_packet *new_bat_ogm_packet, 536batadv_iv_ogm_can_aggregate(const struct batadv_ogm_packet *new_bat_ogm_packet,
@@ -609,7 +617,7 @@ batadv_iv_ogm_can_aggregate(const struct batadv_ogm_packet *new_bat_ogm_packet,
609 617
610out: 618out:
611 if (primary_if) 619 if (primary_if)
612 batadv_hardif_free_ref(primary_if); 620 batadv_hardif_put(primary_if);
613 return res; 621 return res;
614} 622}
615 623
@@ -636,10 +644,10 @@ static void batadv_iv_ogm_aggregate_new(const unsigned char *packet_buff,
636 unsigned char *skb_buff; 644 unsigned char *skb_buff;
637 unsigned int skb_size; 645 unsigned int skb_size;
638 646
639 if (!atomic_inc_not_zero(&if_incoming->refcount)) 647 if (!kref_get_unless_zero(&if_incoming->refcount))
640 return; 648 return;
641 649
642 if (!atomic_inc_not_zero(&if_outgoing->refcount)) 650 if (!kref_get_unless_zero(&if_outgoing->refcount))
643 goto out_free_incoming; 651 goto out_free_incoming;
644 652
645 /* own packet should always be scheduled */ 653 /* own packet should always be scheduled */
@@ -703,9 +711,9 @@ out_nomem:
703 if (!own_packet) 711 if (!own_packet)
704 atomic_inc(&bat_priv->batman_queue_left); 712 atomic_inc(&bat_priv->batman_queue_left);
705out_free_outgoing: 713out_free_outgoing:
706 batadv_hardif_free_ref(if_outgoing); 714 batadv_hardif_put(if_outgoing);
707out_free_incoming: 715out_free_incoming:
708 batadv_hardif_free_ref(if_incoming); 716 batadv_hardif_put(if_incoming);
709} 717}
710 718
711/* aggregate a new packet into the existing ogm packet */ 719/* aggregate a new packet into the existing ogm packet */
@@ -950,7 +958,7 @@ static void batadv_iv_ogm_schedule(struct batadv_hard_iface *hard_iface)
950 958
951out: 959out:
952 if (primary_if) 960 if (primary_if)
953 batadv_hardif_free_ref(primary_if); 961 batadv_hardif_put(primary_if);
954} 962}
955 963
956/** 964/**
@@ -995,9 +1003,9 @@ batadv_iv_ogm_orig_update(struct batadv_priv *bat_priv,
995 neigh_addr = tmp_neigh_node->addr; 1003 neigh_addr = tmp_neigh_node->addr;
996 if (batadv_compare_eth(neigh_addr, ethhdr->h_source) && 1004 if (batadv_compare_eth(neigh_addr, ethhdr->h_source) &&
997 tmp_neigh_node->if_incoming == if_incoming && 1005 tmp_neigh_node->if_incoming == if_incoming &&
998 atomic_inc_not_zero(&tmp_neigh_node->refcount)) { 1006 kref_get_unless_zero(&tmp_neigh_node->refcount)) {
999 if (WARN(neigh_node, "too many matching neigh_nodes")) 1007 if (WARN(neigh_node, "too many matching neigh_nodes"))
1000 batadv_neigh_node_free_ref(neigh_node); 1008 batadv_neigh_node_put(neigh_node);
1001 neigh_node = tmp_neigh_node; 1009 neigh_node = tmp_neigh_node;
1002 continue; 1010 continue;
1003 } 1011 }
@@ -1018,7 +1026,7 @@ batadv_iv_ogm_orig_update(struct batadv_priv *bat_priv,
1018 neigh_ifinfo->bat_iv.tq_avg = tq_avg; 1026 neigh_ifinfo->bat_iv.tq_avg = tq_avg;
1019 spin_unlock_bh(&tmp_neigh_node->ifinfo_lock); 1027 spin_unlock_bh(&tmp_neigh_node->ifinfo_lock);
1020 1028
1021 batadv_neigh_ifinfo_free_ref(neigh_ifinfo); 1029 batadv_neigh_ifinfo_put(neigh_ifinfo);
1022 neigh_ifinfo = NULL; 1030 neigh_ifinfo = NULL;
1023 } 1031 }
1024 1032
@@ -1033,7 +1041,7 @@ batadv_iv_ogm_orig_update(struct batadv_priv *bat_priv,
1033 ethhdr->h_source, 1041 ethhdr->h_source,
1034 orig_node, orig_tmp); 1042 orig_node, orig_tmp);
1035 1043
1036 batadv_orig_node_free_ref(orig_tmp); 1044 batadv_orig_node_put(orig_tmp);
1037 if (!neigh_node) 1045 if (!neigh_node)
1038 goto unlock; 1046 goto unlock;
1039 } else { 1047 } else {
@@ -1108,13 +1116,13 @@ unlock:
1108 rcu_read_unlock(); 1116 rcu_read_unlock();
1109out: 1117out:
1110 if (neigh_node) 1118 if (neigh_node)
1111 batadv_neigh_node_free_ref(neigh_node); 1119 batadv_neigh_node_put(neigh_node);
1112 if (router) 1120 if (router)
1113 batadv_neigh_node_free_ref(router); 1121 batadv_neigh_node_put(router);
1114 if (neigh_ifinfo) 1122 if (neigh_ifinfo)
1115 batadv_neigh_ifinfo_free_ref(neigh_ifinfo); 1123 batadv_neigh_ifinfo_put(neigh_ifinfo);
1116 if (router_ifinfo) 1124 if (router_ifinfo)
1117 batadv_neigh_ifinfo_free_ref(router_ifinfo); 1125 batadv_neigh_ifinfo_put(router_ifinfo);
1118} 1126}
1119 1127
1120/** 1128/**
@@ -1125,7 +1133,7 @@ out:
1125 * @if_incoming: interface where the packet was received 1133 * @if_incoming: interface where the packet was received
1126 * @if_outgoing: interface for which the retransmission should be considered 1134 * @if_outgoing: interface for which the retransmission should be considered
1127 * 1135 *
1128 * Returns 1 if the link can be considered bidirectional, 0 otherwise 1136 * Return: 1 if the link can be considered bidirectional, 0 otherwise
1129 */ 1137 */
1130static int batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node, 1138static int batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node,
1131 struct batadv_orig_node *orig_neigh_node, 1139 struct batadv_orig_node *orig_neigh_node,
@@ -1154,7 +1162,7 @@ static int batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node,
1154 if (tmp_neigh_node->if_incoming != if_incoming) 1162 if (tmp_neigh_node->if_incoming != if_incoming)
1155 continue; 1163 continue;
1156 1164
1157 if (!atomic_inc_not_zero(&tmp_neigh_node->refcount)) 1165 if (!kref_get_unless_zero(&tmp_neigh_node->refcount))
1158 continue; 1166 continue;
1159 1167
1160 neigh_node = tmp_neigh_node; 1168 neigh_node = tmp_neigh_node;
@@ -1184,7 +1192,7 @@ static int batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node,
1184 neigh_ifinfo = batadv_neigh_ifinfo_new(neigh_node, if_outgoing); 1192 neigh_ifinfo = batadv_neigh_ifinfo_new(neigh_node, if_outgoing);
1185 if (neigh_ifinfo) { 1193 if (neigh_ifinfo) {
1186 neigh_rq_count = neigh_ifinfo->bat_iv.real_packet_count; 1194 neigh_rq_count = neigh_ifinfo->bat_iv.real_packet_count;
1187 batadv_neigh_ifinfo_free_ref(neigh_ifinfo); 1195 batadv_neigh_ifinfo_put(neigh_ifinfo);
1188 } else { 1196 } else {
1189 neigh_rq_count = 0; 1197 neigh_rq_count = 0;
1190 } 1198 }
@@ -1257,7 +1265,7 @@ static int batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node,
1257 1265
1258out: 1266out:
1259 if (neigh_node) 1267 if (neigh_node)
1260 batadv_neigh_node_free_ref(neigh_node); 1268 batadv_neigh_node_put(neigh_node);
1261 return ret; 1269 return ret;
1262} 1270}
1263 1271
@@ -1269,7 +1277,7 @@ out:
1269 * @if_incoming: interface on which the OGM packet was received 1277 * @if_incoming: interface on which the OGM packet was received
1270 * @if_outgoing: interface for which the retransmission should be considered 1278 * @if_outgoing: interface for which the retransmission should be considered
1271 * 1279 *
1272 * Returns duplicate status as enum batadv_dup_status 1280 * Return: duplicate status as enum batadv_dup_status
1273 */ 1281 */
1274static enum batadv_dup_status 1282static enum batadv_dup_status
1275batadv_iv_ogm_update_seqnos(const struct ethhdr *ethhdr, 1283batadv_iv_ogm_update_seqnos(const struct ethhdr *ethhdr,
@@ -1298,7 +1306,7 @@ batadv_iv_ogm_update_seqnos(const struct ethhdr *ethhdr,
1298 1306
1299 orig_ifinfo = batadv_orig_ifinfo_new(orig_node, if_outgoing); 1307 orig_ifinfo = batadv_orig_ifinfo_new(orig_node, if_outgoing);
1300 if (WARN_ON(!orig_ifinfo)) { 1308 if (WARN_ON(!orig_ifinfo)) {
1301 batadv_orig_node_free_ref(orig_node); 1309 batadv_orig_node_put(orig_node);
1302 return 0; 1310 return 0;
1303 } 1311 }
1304 1312
@@ -1308,7 +1316,8 @@ batadv_iv_ogm_update_seqnos(const struct ethhdr *ethhdr,
1308 /* signalize caller that the packet is to be dropped. */ 1316 /* signalize caller that the packet is to be dropped. */
1309 if (!hlist_empty(&orig_node->neigh_list) && 1317 if (!hlist_empty(&orig_node->neigh_list) &&
1310 batadv_window_protected(bat_priv, seq_diff, 1318 batadv_window_protected(bat_priv, seq_diff,
1311 &orig_ifinfo->batman_seqno_reset)) { 1319 BATADV_TQ_LOCAL_WINDOW_SIZE,
1320 &orig_ifinfo->batman_seqno_reset, NULL)) {
1312 ret = BATADV_PROTECTED; 1321 ret = BATADV_PROTECTED;
1313 goto out; 1322 goto out;
1314 } 1323 }
@@ -1344,7 +1353,7 @@ batadv_iv_ogm_update_seqnos(const struct ethhdr *ethhdr,
1344 packet_count = bitmap_weight(bitmap, 1353 packet_count = bitmap_weight(bitmap,
1345 BATADV_TQ_LOCAL_WINDOW_SIZE); 1354 BATADV_TQ_LOCAL_WINDOW_SIZE);
1346 neigh_ifinfo->bat_iv.real_packet_count = packet_count; 1355 neigh_ifinfo->bat_iv.real_packet_count = packet_count;
1347 batadv_neigh_ifinfo_free_ref(neigh_ifinfo); 1356 batadv_neigh_ifinfo_put(neigh_ifinfo);
1348 } 1357 }
1349 rcu_read_unlock(); 1358 rcu_read_unlock();
1350 1359
@@ -1358,8 +1367,8 @@ batadv_iv_ogm_update_seqnos(const struct ethhdr *ethhdr,
1358 1367
1359out: 1368out:
1360 spin_unlock_bh(&orig_node->bat_iv.ogm_cnt_lock); 1369 spin_unlock_bh(&orig_node->bat_iv.ogm_cnt_lock);
1361 batadv_orig_node_free_ref(orig_node); 1370 batadv_orig_node_put(orig_node);
1362 batadv_orig_ifinfo_free_ref(orig_ifinfo); 1371 batadv_orig_ifinfo_put(orig_ifinfo);
1363 return ret; 1372 return ret;
1364} 1373}
1365 1374
@@ -1505,7 +1514,7 @@ batadv_iv_ogm_process_per_outif(const struct sk_buff *skb, int ogm_offset,
1505 ogm_packet, if_incoming, 1514 ogm_packet, if_incoming,
1506 if_outgoing, dup_status); 1515 if_outgoing, dup_status);
1507 } 1516 }
1508 batadv_orig_ifinfo_free_ref(orig_ifinfo); 1517 batadv_orig_ifinfo_put(orig_ifinfo);
1509 1518
1510 /* only forward for specific interface, not for the default one. */ 1519 /* only forward for specific interface, not for the default one. */
1511 if (if_outgoing == BATADV_IF_DEFAULT) 1520 if (if_outgoing == BATADV_IF_DEFAULT)
@@ -1554,18 +1563,18 @@ batadv_iv_ogm_process_per_outif(const struct sk_buff *skb, int ogm_offset,
1554 1563
1555out_neigh: 1564out_neigh:
1556 if ((orig_neigh_node) && (!is_single_hop_neigh)) 1565 if ((orig_neigh_node) && (!is_single_hop_neigh))
1557 batadv_orig_node_free_ref(orig_neigh_node); 1566 batadv_orig_node_put(orig_neigh_node);
1558out: 1567out:
1559 if (router_ifinfo) 1568 if (router_ifinfo)
1560 batadv_neigh_ifinfo_free_ref(router_ifinfo); 1569 batadv_neigh_ifinfo_put(router_ifinfo);
1561 if (router) 1570 if (router)
1562 batadv_neigh_node_free_ref(router); 1571 batadv_neigh_node_put(router);
1563 if (router_router) 1572 if (router_router)
1564 batadv_neigh_node_free_ref(router_router); 1573 batadv_neigh_node_put(router_router);
1565 if (orig_neigh_router) 1574 if (orig_neigh_router)
1566 batadv_neigh_node_free_ref(orig_neigh_router); 1575 batadv_neigh_node_put(orig_neigh_router);
1567 if (hardif_neigh) 1576 if (hardif_neigh)
1568 batadv_hardif_neigh_free_ref(hardif_neigh); 1577 batadv_hardif_neigh_put(hardif_neigh);
1569 1578
1570 kfree_skb(skb_priv); 1579 kfree_skb(skb_priv);
1571} 1580}
@@ -1688,7 +1697,7 @@ static void batadv_iv_ogm_process(const struct sk_buff *skb, int ogm_offset,
1688 1697
1689 batadv_dbg(BATADV_DBG_BATMAN, bat_priv, 1698 batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
1690 "Drop packet: originator packet from myself (via neighbor)\n"); 1699 "Drop packet: originator packet from myself (via neighbor)\n");
1691 batadv_orig_node_free_ref(orig_neigh_node); 1700 batadv_orig_node_put(orig_neigh_node);
1692 return; 1701 return;
1693 } 1702 }
1694 1703
@@ -1726,7 +1735,7 @@ static void batadv_iv_ogm_process(const struct sk_buff *skb, int ogm_offset,
1726 } 1735 }
1727 rcu_read_unlock(); 1736 rcu_read_unlock();
1728 1737
1729 batadv_orig_node_free_ref(orig_node); 1738 batadv_orig_node_put(orig_node);
1730} 1739}
1731 1740
1732static int batadv_iv_ogm_receive(struct sk_buff *skb, 1741static int batadv_iv_ogm_receive(struct sk_buff *skb,
@@ -1796,7 +1805,7 @@ batadv_iv_ogm_orig_print_neigh(struct batadv_orig_node *orig_node,
1796 neigh_node->addr, 1805 neigh_node->addr,
1797 n_ifinfo->bat_iv.tq_avg); 1806 n_ifinfo->bat_iv.tq_avg);
1798 1807
1799 batadv_neigh_ifinfo_free_ref(n_ifinfo); 1808 batadv_neigh_ifinfo_put(n_ifinfo);
1800 } 1809 }
1801} 1810}
1802 1811
@@ -1859,9 +1868,9 @@ static void batadv_iv_ogm_orig_print(struct batadv_priv *bat_priv,
1859 batman_count++; 1868 batman_count++;
1860 1869
1861next: 1870next:
1862 batadv_neigh_node_free_ref(neigh_node); 1871 batadv_neigh_node_put(neigh_node);
1863 if (n_ifinfo) 1872 if (n_ifinfo)
1864 batadv_neigh_ifinfo_free_ref(n_ifinfo); 1873 batadv_neigh_ifinfo_put(n_ifinfo);
1865 } 1874 }
1866 rcu_read_unlock(); 1875 rcu_read_unlock();
1867 } 1876 }
@@ -1929,7 +1938,7 @@ static void batadv_iv_neigh_print(struct batadv_priv *bat_priv,
1929 * @neigh2: the second neighbor object of the comparison 1938 * @neigh2: the second neighbor object of the comparison
1930 * @if_outgoing2: outgoing interface for the second neighbor 1939 * @if_outgoing2: outgoing interface for the second neighbor
1931 * 1940 *
1932 * Returns a value less, equal to or greater than 0 if the metric via neigh1 is 1941 * Return: a value less, equal to or greater than 0 if the metric via neigh1 is
1933 * lower, the same as or higher than the metric via neigh2 1942 * lower, the same as or higher than the metric via neigh2
1934 */ 1943 */
1935static int batadv_iv_ogm_neigh_cmp(struct batadv_neigh_node *neigh1, 1944static int batadv_iv_ogm_neigh_cmp(struct batadv_neigh_node *neigh1,
@@ -1955,9 +1964,9 @@ static int batadv_iv_ogm_neigh_cmp(struct batadv_neigh_node *neigh1,
1955 1964
1956out: 1965out:
1957 if (neigh1_ifinfo) 1966 if (neigh1_ifinfo)
1958 batadv_neigh_ifinfo_free_ref(neigh1_ifinfo); 1967 batadv_neigh_ifinfo_put(neigh1_ifinfo);
1959 if (neigh2_ifinfo) 1968 if (neigh2_ifinfo)
1960 batadv_neigh_ifinfo_free_ref(neigh2_ifinfo); 1969 batadv_neigh_ifinfo_put(neigh2_ifinfo);
1961 1970
1962 return diff; 1971 return diff;
1963} 1972}
@@ -1970,7 +1979,7 @@ out:
1970 * @neigh2: the second neighbor object of the comparison 1979 * @neigh2: the second neighbor object of the comparison
1971 * @if_outgoing2: outgoing interface for the second neighbor 1980 * @if_outgoing2: outgoing interface for the second neighbor
1972 * 1981 *
1973 * Returns true if the metric via neigh1 is equally good or better than 1982 * Return: true if the metric via neigh1 is equally good or better than
1974 * the metric via neigh2, false otherwise. 1983 * the metric via neigh2, false otherwise.
1975 */ 1984 */
1976static bool 1985static bool
@@ -1998,9 +2007,9 @@ batadv_iv_ogm_neigh_is_sob(struct batadv_neigh_node *neigh1,
1998 2007
1999out: 2008out:
2000 if (neigh1_ifinfo) 2009 if (neigh1_ifinfo)
2001 batadv_neigh_ifinfo_free_ref(neigh1_ifinfo); 2010 batadv_neigh_ifinfo_put(neigh1_ifinfo);
2002 if (neigh2_ifinfo) 2011 if (neigh2_ifinfo)
2003 batadv_neigh_ifinfo_free_ref(neigh2_ifinfo); 2012 batadv_neigh_ifinfo_put(neigh2_ifinfo);
2004 2013
2005 return ret; 2014 return ret;
2006} 2015}
diff --git a/net/batman-adv/bat_v.c b/net/batman-adv/bat_v.c
new file mode 100644
index 000000000000..3315b9a598af
--- /dev/null
+++ b/net/batman-adv/bat_v.c
@@ -0,0 +1,347 @@
1/* Copyright (C) 2013-2016 B.A.T.M.A.N. contributors:
2 *
3 * Linus Lüssing, Marek Lindner
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of version 2 of the GNU General Public
7 * License as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, see <http://www.gnu.org/licenses/>.
16 */
17
18#include "bat_algo.h"
19#include "main.h"
20
21#include <linux/atomic.h>
22#include <linux/bug.h>
23#include <linux/cache.h>
24#include <linux/init.h>
25#include <linux/jiffies.h>
26#include <linux/netdevice.h>
27#include <linux/rculist.h>
28#include <linux/rcupdate.h>
29#include <linux/seq_file.h>
30#include <linux/types.h>
31#include <linux/workqueue.h>
32
33#include "bat_v_elp.h"
34#include "bat_v_ogm.h"
35#include "hash.h"
36#include "originator.h"
37#include "packet.h"
38
39static int batadv_v_iface_enable(struct batadv_hard_iface *hard_iface)
40{
41 int ret;
42
43 ret = batadv_v_elp_iface_enable(hard_iface);
44 if (ret < 0)
45 return ret;
46
47 ret = batadv_v_ogm_iface_enable(hard_iface);
48 if (ret < 0)
49 batadv_v_elp_iface_disable(hard_iface);
50
51 /* enable link throughput auto-detection by setting the throughput
52 * override to zero
53 */
54 atomic_set(&hard_iface->bat_v.throughput_override, 0);
55
56 return ret;
57}
58
59static void batadv_v_iface_disable(struct batadv_hard_iface *hard_iface)
60{
61 batadv_v_elp_iface_disable(hard_iface);
62}
63
64static void batadv_v_iface_update_mac(struct batadv_hard_iface *hard_iface)
65{
66}
67
68static void batadv_v_primary_iface_set(struct batadv_hard_iface *hard_iface)
69{
70 batadv_v_elp_primary_iface_set(hard_iface);
71 batadv_v_ogm_primary_iface_set(hard_iface);
72}
73
74static void
75batadv_v_hardif_neigh_init(struct batadv_hardif_neigh_node *hardif_neigh)
76{
77 ewma_throughput_init(&hardif_neigh->bat_v.throughput);
78 INIT_WORK(&hardif_neigh->bat_v.metric_work,
79 batadv_v_elp_throughput_metric_update);
80}
81
82static void batadv_v_ogm_schedule(struct batadv_hard_iface *hard_iface)
83{
84}
85
86static void batadv_v_ogm_emit(struct batadv_forw_packet *forw_packet)
87{
88}
89
90/**
91 * batadv_v_orig_print_neigh - print neighbors for the originator table
92 * @orig_node: the orig_node for which the neighbors are printed
93 * @if_outgoing: outgoing interface for these entries
94 * @seq: debugfs table seq_file struct
95 *
96 * Must be called while holding an rcu lock.
97 */
98static void
99batadv_v_orig_print_neigh(struct batadv_orig_node *orig_node,
100 struct batadv_hard_iface *if_outgoing,
101 struct seq_file *seq)
102{
103 struct batadv_neigh_node *neigh_node;
104 struct batadv_neigh_ifinfo *n_ifinfo;
105
106 hlist_for_each_entry_rcu(neigh_node, &orig_node->neigh_list, list) {
107 n_ifinfo = batadv_neigh_ifinfo_get(neigh_node, if_outgoing);
108 if (!n_ifinfo)
109 continue;
110
111 seq_printf(seq, " %pM (%9u.%1u)",
112 neigh_node->addr,
113 n_ifinfo->bat_v.throughput / 10,
114 n_ifinfo->bat_v.throughput % 10);
115
116 batadv_neigh_ifinfo_put(n_ifinfo);
117 }
118}
119
120/**
121 * batadv_v_hardif_neigh_print - print a single ELP neighbour node
122 * @seq: neighbour table seq_file struct
123 * @hardif_neigh: hardif neighbour information
124 */
125static void
126batadv_v_hardif_neigh_print(struct seq_file *seq,
127 struct batadv_hardif_neigh_node *hardif_neigh)
128{
129 int last_secs, last_msecs;
130 u32 throughput;
131
132 last_secs = jiffies_to_msecs(jiffies - hardif_neigh->last_seen) / 1000;
133 last_msecs = jiffies_to_msecs(jiffies - hardif_neigh->last_seen) % 1000;
134 throughput = ewma_throughput_read(&hardif_neigh->bat_v.throughput);
135
136 seq_printf(seq, "%pM %4i.%03is (%9u.%1u) [%10s]\n",
137 hardif_neigh->addr, last_secs, last_msecs, throughput / 10,
138 throughput % 10, hardif_neigh->if_incoming->net_dev->name);
139}
140
141/**
142 * batadv_v_neigh_print - print the single hop neighbour list
143 * @bat_priv: the bat priv with all the soft interface information
144 * @seq: neighbour table seq_file struct
145 */
146static void batadv_v_neigh_print(struct batadv_priv *bat_priv,
147 struct seq_file *seq)
148{
149 struct net_device *net_dev = (struct net_device *)seq->private;
150 struct batadv_hardif_neigh_node *hardif_neigh;
151 struct batadv_hard_iface *hard_iface;
152 int batman_count = 0;
153
154 seq_printf(seq, " %-15s %s (%11s) [%10s]\n", "Neighbor",
155 "last-seen", "throughput", "IF");
156
157 rcu_read_lock();
158 list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) {
159 if (hard_iface->soft_iface != net_dev)
160 continue;
161
162 hlist_for_each_entry_rcu(hardif_neigh,
163 &hard_iface->neigh_list, list) {
164 batadv_v_hardif_neigh_print(seq, hardif_neigh);
165 batman_count++;
166 }
167 }
168 rcu_read_unlock();
169
170 if (batman_count == 0)
171 seq_puts(seq, "No batman nodes in range ...\n");
172}
173
174/**
175 * batadv_v_orig_print - print the originator table
176 * @bat_priv: the bat priv with all the soft interface information
177 * @seq: debugfs table seq_file struct
178 * @if_outgoing: the outgoing interface for which this should be printed
179 */
180static void batadv_v_orig_print(struct batadv_priv *bat_priv,
181 struct seq_file *seq,
182 struct batadv_hard_iface *if_outgoing)
183{
184 struct batadv_neigh_node *neigh_node;
185 struct batadv_hashtable *hash = bat_priv->orig_hash;
186 int last_seen_msecs, last_seen_secs;
187 struct batadv_orig_node *orig_node;
188 struct batadv_neigh_ifinfo *n_ifinfo;
189 unsigned long last_seen_jiffies;
190 struct hlist_head *head;
191 int batman_count = 0;
192 u32 i;
193
194 seq_printf(seq, " %-15s %s (%11s) %17s [%10s]: %20s ...\n",
195 "Originator", "last-seen", "throughput", "Nexthop",
196 "outgoingIF", "Potential nexthops");
197
198 for (i = 0; i < hash->size; i++) {
199 head = &hash->table[i];
200
201 rcu_read_lock();
202 hlist_for_each_entry_rcu(orig_node, head, hash_entry) {
203 neigh_node = batadv_orig_router_get(orig_node,
204 if_outgoing);
205 if (!neigh_node)
206 continue;
207
208 n_ifinfo = batadv_neigh_ifinfo_get(neigh_node,
209 if_outgoing);
210 if (!n_ifinfo)
211 goto next;
212
213 last_seen_jiffies = jiffies - orig_node->last_seen;
214 last_seen_msecs = jiffies_to_msecs(last_seen_jiffies);
215 last_seen_secs = last_seen_msecs / 1000;
216 last_seen_msecs = last_seen_msecs % 1000;
217
218 seq_printf(seq, "%pM %4i.%03is (%9u.%1u) %pM [%10s]:",
219 orig_node->orig, last_seen_secs,
220 last_seen_msecs,
221 n_ifinfo->bat_v.throughput / 10,
222 n_ifinfo->bat_v.throughput % 10,
223 neigh_node->addr,
224 neigh_node->if_incoming->net_dev->name);
225
226 batadv_v_orig_print_neigh(orig_node, if_outgoing, seq);
227 seq_puts(seq, "\n");
228 batman_count++;
229
230next:
231 batadv_neigh_node_put(neigh_node);
232 if (n_ifinfo)
233 batadv_neigh_ifinfo_put(n_ifinfo);
234 }
235 rcu_read_unlock();
236 }
237
238 if (batman_count == 0)
239 seq_puts(seq, "No batman nodes in range ...\n");
240}
241
242static int batadv_v_neigh_cmp(struct batadv_neigh_node *neigh1,
243 struct batadv_hard_iface *if_outgoing1,
244 struct batadv_neigh_node *neigh2,
245 struct batadv_hard_iface *if_outgoing2)
246{
247 struct batadv_neigh_ifinfo *ifinfo1, *ifinfo2;
248
249 ifinfo1 = batadv_neigh_ifinfo_get(neigh1, if_outgoing1);
250 ifinfo2 = batadv_neigh_ifinfo_get(neigh2, if_outgoing2);
251
252 if (WARN_ON(!ifinfo1 || !ifinfo2))
253 return 0;
254
255 return ifinfo1->bat_v.throughput - ifinfo2->bat_v.throughput;
256}
257
258static bool batadv_v_neigh_is_sob(struct batadv_neigh_node *neigh1,
259 struct batadv_hard_iface *if_outgoing1,
260 struct batadv_neigh_node *neigh2,
261 struct batadv_hard_iface *if_outgoing2)
262{
263 struct batadv_neigh_ifinfo *ifinfo1, *ifinfo2;
264 u32 threshold;
265
266 ifinfo1 = batadv_neigh_ifinfo_get(neigh1, if_outgoing1);
267 ifinfo2 = batadv_neigh_ifinfo_get(neigh2, if_outgoing2);
268
269 threshold = ifinfo1->bat_v.throughput / 4;
270 threshold = ifinfo1->bat_v.throughput - threshold;
271
272 return ifinfo2->bat_v.throughput > threshold;
273}
274
275static struct batadv_algo_ops batadv_batman_v __read_mostly = {
276 .name = "BATMAN_V",
277 .bat_iface_enable = batadv_v_iface_enable,
278 .bat_iface_disable = batadv_v_iface_disable,
279 .bat_iface_update_mac = batadv_v_iface_update_mac,
280 .bat_primary_iface_set = batadv_v_primary_iface_set,
281 .bat_hardif_neigh_init = batadv_v_hardif_neigh_init,
282 .bat_ogm_emit = batadv_v_ogm_emit,
283 .bat_ogm_schedule = batadv_v_ogm_schedule,
284 .bat_orig_print = batadv_v_orig_print,
285 .bat_neigh_cmp = batadv_v_neigh_cmp,
286 .bat_neigh_is_similar_or_better = batadv_v_neigh_is_sob,
287 .bat_neigh_print = batadv_v_neigh_print,
288};
289
290/**
291 * batadv_v_mesh_init - initialize the B.A.T.M.A.N. V private resources for a
292 * mesh
293 * @bat_priv: the object representing the mesh interface to initialise
294 *
295 * Return: 0 on success or a negative error code otherwise
296 */
297int batadv_v_mesh_init(struct batadv_priv *bat_priv)
298{
299 return batadv_v_ogm_init(bat_priv);
300}
301
302/**
303 * batadv_v_mesh_free - free the B.A.T.M.A.N. V private resources for a mesh
304 * @bat_priv: the object representing the mesh interface to free
305 */
306void batadv_v_mesh_free(struct batadv_priv *bat_priv)
307{
308 batadv_v_ogm_free(bat_priv);
309}
310
311/**
312 * batadv_v_init - B.A.T.M.A.N. V initialization function
313 *
314 * Description: Takes care of initializing all the subcomponents.
315 * It is invoked upon module load only.
316 *
317 * Return: 0 on success or a negative error code otherwise
318 */
319int __init batadv_v_init(void)
320{
321 int ret;
322
323 /* B.A.T.M.A.N. V echo location protocol packet */
324 ret = batadv_recv_handler_register(BATADV_ELP,
325 batadv_v_elp_packet_recv);
326 if (ret < 0)
327 return ret;
328
329 ret = batadv_recv_handler_register(BATADV_OGM2,
330 batadv_v_ogm_packet_recv);
331 if (ret < 0)
332 goto elp_unregister;
333
334 ret = batadv_algo_register(&batadv_batman_v);
335 if (ret < 0)
336 goto ogm_unregister;
337
338 return ret;
339
340ogm_unregister:
341 batadv_recv_handler_unregister(BATADV_OGM2);
342
343elp_unregister:
344 batadv_recv_handler_unregister(BATADV_ELP);
345
346 return ret;
347}
diff --git a/net/batman-adv/bat_v_elp.c b/net/batman-adv/bat_v_elp.c
new file mode 100644
index 000000000000..3844e7efd0b0
--- /dev/null
+++ b/net/batman-adv/bat_v_elp.c
@@ -0,0 +1,515 @@
1/* Copyright (C) 2011-2016 B.A.T.M.A.N. contributors:
2 *
3 * Linus Lüssing, Marek Lindner
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of version 2 of the GNU General Public
7 * License as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, see <http://www.gnu.org/licenses/>.
16 */
17
18#include "bat_v_elp.h"
19#include "main.h"
20
21#include <linux/atomic.h>
22#include <linux/byteorder/generic.h>
23#include <linux/errno.h>
24#include <linux/etherdevice.h>
25#include <linux/ethtool.h>
26#include <linux/fs.h>
27#include <linux/if_ether.h>
28#include <linux/jiffies.h>
29#include <linux/kernel.h>
30#include <linux/kref.h>
31#include <linux/netdevice.h>
32#include <linux/random.h>
33#include <linux/rculist.h>
34#include <linux/rcupdate.h>
35#include <linux/rtnetlink.h>
36#include <linux/skbuff.h>
37#include <linux/stddef.h>
38#include <linux/string.h>
39#include <linux/types.h>
40#include <linux/workqueue.h>
41#include <net/cfg80211.h>
42
43#include "bat_algo.h"
44#include "bat_v_ogm.h"
45#include "hard-interface.h"
46#include "originator.h"
47#include "packet.h"
48#include "routing.h"
49#include "send.h"
50
51/**
52 * batadv_v_elp_start_timer - restart timer for ELP periodic work
53 * @hard_iface: the interface for which the timer has to be reset
54 */
55static void batadv_v_elp_start_timer(struct batadv_hard_iface *hard_iface)
56{
57 unsigned int msecs;
58
59 msecs = atomic_read(&hard_iface->bat_v.elp_interval) - BATADV_JITTER;
60 msecs += prandom_u32() % (2 * BATADV_JITTER);
61
62 queue_delayed_work(batadv_event_workqueue, &hard_iface->bat_v.elp_wq,
63 msecs_to_jiffies(msecs));
64}
65
66/**
67 * batadv_v_elp_get_throughput - get the throughput towards a neighbour
68 * @neigh: the neighbour for which the throughput has to be obtained
69 *
70 * Return: The throughput towards the given neighbour in multiples of 100kpbs
71 * (a value of '1' equals to 0.1Mbps, '10' equals 1Mbps, etc).
72 */
73static u32 batadv_v_elp_get_throughput(struct batadv_hardif_neigh_node *neigh)
74{
75 struct batadv_hard_iface *hard_iface = neigh->if_incoming;
76 struct ethtool_link_ksettings link_settings;
77 struct station_info sinfo;
78 u32 throughput;
79 int ret;
80
81 /* if the user specified a customised value for this interface, then
82 * return it directly
83 */
84 throughput = atomic_read(&hard_iface->bat_v.throughput_override);
85 if (throughput != 0)
86 return throughput;
87
88 /* if this is a wireless device, then ask its throughput through
89 * cfg80211 API
90 */
91 if (batadv_is_wifi_netdev(hard_iface->net_dev)) {
92 if (hard_iface->net_dev->ieee80211_ptr) {
93 ret = cfg80211_get_station(hard_iface->net_dev,
94 neigh->addr, &sinfo);
95 if (ret == -ENOENT) {
96 /* Node is not associated anymore! It would be
97 * possible to delete this neighbor. For now set
98 * the throughput metric to 0.
99 */
100 return 0;
101 }
102 if (!ret)
103 return sinfo.expected_throughput / 100;
104 }
105
106 /* unsupported WiFi driver version */
107 goto default_throughput;
108 }
109
110 /* if not a wifi interface, check if this device provides data via
111 * ethtool (e.g. an Ethernet adapter)
112 */
113 memset(&link_settings, 0, sizeof(link_settings));
114 rtnl_lock();
115 ret = __ethtool_get_link_ksettings(hard_iface->net_dev, &link_settings);
116 rtnl_unlock();
117 if (ret == 0) {
118 /* link characteristics might change over time */
119 if (link_settings.base.duplex == DUPLEX_FULL)
120 hard_iface->bat_v.flags |= BATADV_FULL_DUPLEX;
121 else
122 hard_iface->bat_v.flags &= ~BATADV_FULL_DUPLEX;
123
124 throughput = link_settings.base.speed;
125 if (throughput && (throughput != SPEED_UNKNOWN))
126 return throughput * 10;
127 }
128
129default_throughput:
130 if (!(hard_iface->bat_v.flags & BATADV_WARNING_DEFAULT)) {
131 batadv_info(hard_iface->soft_iface,
132 "WiFi driver or ethtool info does not provide information about link speeds on interface %s, therefore defaulting to hardcoded throughput values of %u.%1u Mbps. Consider overriding the throughput manually or checking your driver.\n",
133 hard_iface->net_dev->name,
134 BATADV_THROUGHPUT_DEFAULT_VALUE / 10,
135 BATADV_THROUGHPUT_DEFAULT_VALUE % 10);
136 hard_iface->bat_v.flags |= BATADV_WARNING_DEFAULT;
137 }
138
139 /* if none of the above cases apply, return the base_throughput */
140 return BATADV_THROUGHPUT_DEFAULT_VALUE;
141}
142
143/**
144 * batadv_v_elp_throughput_metric_update - worker updating the throughput metric
145 * of a single hop neighbour
146 * @work: the work queue item
147 */
148void batadv_v_elp_throughput_metric_update(struct work_struct *work)
149{
150 struct batadv_hardif_neigh_node_bat_v *neigh_bat_v;
151 struct batadv_hardif_neigh_node *neigh;
152
153 neigh_bat_v = container_of(work, struct batadv_hardif_neigh_node_bat_v,
154 metric_work);
155 neigh = container_of(neigh_bat_v, struct batadv_hardif_neigh_node,
156 bat_v);
157
158 ewma_throughput_add(&neigh->bat_v.throughput,
159 batadv_v_elp_get_throughput(neigh));
160
161 /* decrement refcounter to balance increment performed before scheduling
162 * this task
163 */
164 batadv_hardif_neigh_put(neigh);
165}
166
167/**
168 * batadv_v_elp_wifi_neigh_probe - send link probing packets to a neighbour
169 * @neigh: the neighbour to probe
170 *
171 * Sends a predefined number of unicast wifi packets to a given neighbour in
172 * order to trigger the throughput estimation on this link by the RC algorithm.
173 * Packets are sent only if there there is not enough payload unicast traffic
174 * towards this neighbour..
175 *
176 * Return: True on success and false in case of error during skb preparation.
177 */
178static bool
179batadv_v_elp_wifi_neigh_probe(struct batadv_hardif_neigh_node *neigh)
180{
181 struct batadv_hard_iface *hard_iface = neigh->if_incoming;
182 struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface);
183 unsigned long last_tx_diff;
184 struct sk_buff *skb;
185 int probe_len, i;
186 int elp_skb_len;
187
188 /* this probing routine is for Wifi neighbours only */
189 if (!batadv_is_wifi_netdev(hard_iface->net_dev))
190 return true;
191
192 /* probe the neighbor only if no unicast packets have been sent
193 * to it in the last 100 milliseconds: this is the rate control
194 * algorithm sampling interval (minstrel). In this way, if not
195 * enough traffic has been sent to the neighbor, batman-adv can
196 * generate 2 probe packets and push the RC algorithm to perform
197 * the sampling
198 */
199 last_tx_diff = jiffies_to_msecs(jiffies - neigh->bat_v.last_unicast_tx);
200 if (last_tx_diff <= BATADV_ELP_PROBE_MAX_TX_DIFF)
201 return true;
202
203 probe_len = max_t(int, sizeof(struct batadv_elp_packet),
204 BATADV_ELP_MIN_PROBE_SIZE);
205
206 for (i = 0; i < BATADV_ELP_PROBES_PER_NODE; i++) {
207 elp_skb_len = hard_iface->bat_v.elp_skb->len;
208 skb = skb_copy_expand(hard_iface->bat_v.elp_skb, 0,
209 probe_len - elp_skb_len,
210 GFP_ATOMIC);
211 if (!skb)
212 return false;
213
214 /* Tell the skb to get as big as the allocated space (we want
215 * the packet to be exactly of that size to make the link
216 * throughput estimation effective.
217 */
218 skb_put(skb, probe_len - hard_iface->bat_v.elp_skb->len);
219
220 batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
221 "Sending unicast (probe) ELP packet on interface %s to %pM\n",
222 hard_iface->net_dev->name, neigh->addr);
223
224 batadv_send_skb_packet(skb, hard_iface, neigh->addr);
225 }
226
227 return true;
228}
229
230/**
231 * batadv_v_elp_periodic_work - ELP periodic task per interface
232 * @work: work queue item
233 *
234 * Emits broadcast ELP message in regular intervals.
235 */
236static void batadv_v_elp_periodic_work(struct work_struct *work)
237{
238 struct batadv_hardif_neigh_node *hardif_neigh;
239 struct batadv_hard_iface *hard_iface;
240 struct batadv_hard_iface_bat_v *bat_v;
241 struct batadv_elp_packet *elp_packet;
242 struct batadv_priv *bat_priv;
243 struct sk_buff *skb;
244 u32 elp_interval;
245
246 bat_v = container_of(work, struct batadv_hard_iface_bat_v, elp_wq.work);
247 hard_iface = container_of(bat_v, struct batadv_hard_iface, bat_v);
248 bat_priv = netdev_priv(hard_iface->soft_iface);
249
250 if (atomic_read(&bat_priv->mesh_state) == BATADV_MESH_DEACTIVATING)
251 goto out;
252
253 /* we are in the process of shutting this interface down */
254 if ((hard_iface->if_status == BATADV_IF_NOT_IN_USE) ||
255 (hard_iface->if_status == BATADV_IF_TO_BE_REMOVED))
256 goto out;
257
258 /* the interface was enabled but may not be ready yet */
259 if (hard_iface->if_status != BATADV_IF_ACTIVE)
260 goto restart_timer;
261
262 skb = skb_copy(hard_iface->bat_v.elp_skb, GFP_ATOMIC);
263 if (!skb)
264 goto restart_timer;
265
266 elp_packet = (struct batadv_elp_packet *)skb->data;
267 elp_packet->seqno = htonl(atomic_read(&hard_iface->bat_v.elp_seqno));
268 elp_interval = atomic_read(&hard_iface->bat_v.elp_interval);
269 elp_packet->elp_interval = htonl(elp_interval);
270
271 batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
272 "Sending broadcast ELP packet on interface %s, seqno %u\n",
273 hard_iface->net_dev->name,
274 atomic_read(&hard_iface->bat_v.elp_seqno));
275
276 batadv_send_broadcast_skb(skb, hard_iface);
277
278 atomic_inc(&hard_iface->bat_v.elp_seqno);
279
280 /* The throughput metric is updated on each sent packet. This way, if a
281 * node is dead and no longer sends packets, batman-adv is still able to
282 * react timely to its death.
283 *
284 * The throughput metric is updated by following these steps:
285 * 1) if the hard_iface is wifi => send a number of unicast ELPs for
286 * probing/sampling to each neighbor
287 * 2) update the throughput metric value of each neighbor (note that the
288 * value retrieved in this step might be 100ms old because the
289 * probing packets at point 1) could still be in the HW queue)
290 */
291 rcu_read_lock();
292 hlist_for_each_entry_rcu(hardif_neigh, &hard_iface->neigh_list, list) {
293 if (!batadv_v_elp_wifi_neigh_probe(hardif_neigh))
294 /* if something goes wrong while probing, better to stop
295 * sending packets immediately and reschedule the task
296 */
297 break;
298
299 if (!kref_get_unless_zero(&hardif_neigh->refcount))
300 continue;
301
302 /* Reading the estimated throughput from cfg80211 is a task that
303 * may sleep and that is not allowed in an rcu protected
304 * context. Therefore schedule a task for that.
305 */
306 queue_work(batadv_event_workqueue,
307 &hardif_neigh->bat_v.metric_work);
308 }
309 rcu_read_unlock();
310
311restart_timer:
312 batadv_v_elp_start_timer(hard_iface);
313out:
314 return;
315}
316
317/**
318 * batadv_v_elp_iface_enable - setup the ELP interface private resources
319 * @hard_iface: interface for which the data has to be prepared
320 *
321 * Return: 0 on success or a -ENOMEM in case of failure.
322 */
323int batadv_v_elp_iface_enable(struct batadv_hard_iface *hard_iface)
324{
325 struct batadv_elp_packet *elp_packet;
326 unsigned char *elp_buff;
327 u32 random_seqno;
328 size_t size;
329 int res = -ENOMEM;
330
331 size = ETH_HLEN + NET_IP_ALIGN + BATADV_ELP_HLEN;
332 hard_iface->bat_v.elp_skb = dev_alloc_skb(size);
333 if (!hard_iface->bat_v.elp_skb)
334 goto out;
335
336 skb_reserve(hard_iface->bat_v.elp_skb, ETH_HLEN + NET_IP_ALIGN);
337 elp_buff = skb_push(hard_iface->bat_v.elp_skb, BATADV_ELP_HLEN);
338 elp_packet = (struct batadv_elp_packet *)elp_buff;
339 memset(elp_packet, 0, BATADV_ELP_HLEN);
340
341 elp_packet->packet_type = BATADV_ELP;
342 elp_packet->version = BATADV_COMPAT_VERSION;
343
344 /* randomize initial seqno to avoid collision */
345 get_random_bytes(&random_seqno, sizeof(random_seqno));
346 atomic_set(&hard_iface->bat_v.elp_seqno, random_seqno);
347 atomic_set(&hard_iface->bat_v.elp_interval, 500);
348
349 /* assume full-duplex by default */
350 hard_iface->bat_v.flags |= BATADV_FULL_DUPLEX;
351
352 /* warn the user (again) if there is no throughput data is available */
353 hard_iface->bat_v.flags &= ~BATADV_WARNING_DEFAULT;
354
355 if (batadv_is_wifi_netdev(hard_iface->net_dev))
356 hard_iface->bat_v.flags &= ~BATADV_FULL_DUPLEX;
357
358 INIT_DELAYED_WORK(&hard_iface->bat_v.elp_wq,
359 batadv_v_elp_periodic_work);
360 batadv_v_elp_start_timer(hard_iface);
361 res = 0;
362
363out:
364 return res;
365}
366
367/**
368 * batadv_v_elp_iface_disable - release ELP interface private resources
369 * @hard_iface: interface for which the resources have to be released
370 */
371void batadv_v_elp_iface_disable(struct batadv_hard_iface *hard_iface)
372{
373 cancel_delayed_work_sync(&hard_iface->bat_v.elp_wq);
374
375 dev_kfree_skb(hard_iface->bat_v.elp_skb);
376 hard_iface->bat_v.elp_skb = NULL;
377}
378
379/**
380 * batadv_v_elp_primary_iface_set - change internal data to reflect the new
381 * primary interface
382 * @primary_iface: the new primary interface
383 */
384void batadv_v_elp_primary_iface_set(struct batadv_hard_iface *primary_iface)
385{
386 struct batadv_hard_iface *hard_iface;
387 struct batadv_elp_packet *elp_packet;
388 struct sk_buff *skb;
389
390 /* update orig field of every elp iface belonging to this mesh */
391 rcu_read_lock();
392 list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) {
393 if (primary_iface->soft_iface != hard_iface->soft_iface)
394 continue;
395
396 if (!hard_iface->bat_v.elp_skb)
397 continue;
398
399 skb = hard_iface->bat_v.elp_skb;
400 elp_packet = (struct batadv_elp_packet *)skb->data;
401 ether_addr_copy(elp_packet->orig,
402 primary_iface->net_dev->dev_addr);
403 }
404 rcu_read_unlock();
405}
406
407/**
408 * batadv_v_elp_neigh_update - update an ELP neighbour node
409 * @bat_priv: the bat priv with all the soft interface information
410 * @neigh_addr: the neighbour interface address
411 * @if_incoming: the interface the packet was received through
412 * @elp_packet: the received ELP packet
413 *
414 * Updates the ELP neighbour node state with the data received within the new
415 * ELP packet.
416 */
417static void batadv_v_elp_neigh_update(struct batadv_priv *bat_priv,
418 u8 *neigh_addr,
419 struct batadv_hard_iface *if_incoming,
420 struct batadv_elp_packet *elp_packet)
421
422{
423 struct batadv_neigh_node *neigh;
424 struct batadv_orig_node *orig_neigh;
425 struct batadv_hardif_neigh_node *hardif_neigh;
426 s32 seqno_diff;
427 s32 elp_latest_seqno;
428
429 orig_neigh = batadv_v_ogm_orig_get(bat_priv, elp_packet->orig);
430 if (!orig_neigh)
431 return;
432
433 neigh = batadv_neigh_node_new(orig_neigh, if_incoming, neigh_addr);
434 if (!neigh)
435 goto orig_free;
436
437 hardif_neigh = batadv_hardif_neigh_get(if_incoming, neigh_addr);
438 if (!hardif_neigh)
439 goto neigh_free;
440
441 elp_latest_seqno = hardif_neigh->bat_v.elp_latest_seqno;
442 seqno_diff = ntohl(elp_packet->seqno) - elp_latest_seqno;
443
444 /* known or older sequence numbers are ignored. However always adopt
445 * if the router seems to have been restarted.
446 */
447 if (seqno_diff < 1 && seqno_diff > -BATADV_ELP_MAX_AGE)
448 goto hardif_free;
449
450 neigh->last_seen = jiffies;
451 hardif_neigh->last_seen = jiffies;
452 hardif_neigh->bat_v.elp_latest_seqno = ntohl(elp_packet->seqno);
453 hardif_neigh->bat_v.elp_interval = ntohl(elp_packet->elp_interval);
454
455hardif_free:
456 if (hardif_neigh)
457 batadv_hardif_neigh_put(hardif_neigh);
458neigh_free:
459 if (neigh)
460 batadv_neigh_node_put(neigh);
461orig_free:
462 if (orig_neigh)
463 batadv_orig_node_put(orig_neigh);
464}
465
466/**
467 * batadv_v_elp_packet_recv - main ELP packet handler
468 * @skb: the received packet
469 * @if_incoming: the interface this packet was received through
470 *
471 * Return: NET_RX_SUCCESS and consumes the skb if the packet was peoperly
472 * processed or NET_RX_DROP in case of failure.
473 */
474int batadv_v_elp_packet_recv(struct sk_buff *skb,
475 struct batadv_hard_iface *if_incoming)
476{
477 struct batadv_priv *bat_priv = netdev_priv(if_incoming->soft_iface);
478 struct batadv_elp_packet *elp_packet;
479 struct batadv_hard_iface *primary_if;
480 struct ethhdr *ethhdr = (struct ethhdr *)skb_mac_header(skb);
481 bool ret;
482
483 ret = batadv_check_management_packet(skb, if_incoming, BATADV_ELP_HLEN);
484 if (!ret)
485 return NET_RX_DROP;
486
487 if (batadv_is_my_mac(bat_priv, ethhdr->h_source))
488 return NET_RX_DROP;
489
490 /* did we receive a B.A.T.M.A.N. V ELP packet on an interface
491 * that does not have B.A.T.M.A.N. V ELP enabled ?
492 */
493 if (strcmp(bat_priv->bat_algo_ops->name, "BATMAN_V") != 0)
494 return NET_RX_DROP;
495
496 elp_packet = (struct batadv_elp_packet *)skb->data;
497
498 batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
499 "Received ELP packet from %pM seqno %u ORIG: %pM\n",
500 ethhdr->h_source, ntohl(elp_packet->seqno),
501 elp_packet->orig);
502
503 primary_if = batadv_primary_if_get_selected(bat_priv);
504 if (!primary_if)
505 goto out;
506
507 batadv_v_elp_neigh_update(bat_priv, ethhdr->h_source, if_incoming,
508 elp_packet);
509
510out:
511 if (primary_if)
512 batadv_hardif_put(primary_if);
513 consume_skb(skb);
514 return NET_RX_SUCCESS;
515}
diff --git a/net/batman-adv/bat_v_elp.h b/net/batman-adv/bat_v_elp.h
new file mode 100644
index 000000000000..e95f1bca0785
--- /dev/null
+++ b/net/batman-adv/bat_v_elp.h
@@ -0,0 +1,33 @@
1/* Copyright (C) 2013-2016 B.A.T.M.A.N. contributors:
2 *
3 * Linus Lüssing, Marek Lindner
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of version 2 of the GNU General Public
7 * License as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, see <http://www.gnu.org/licenses/>.
16 */
17
18#include "main.h"
19
20#ifndef _NET_BATMAN_ADV_BAT_V_ELP_H_
21#define _NET_BATMAN_ADV_BAT_V_ELP_H_
22
23struct sk_buff;
24struct work_struct;
25
26int batadv_v_elp_iface_enable(struct batadv_hard_iface *hard_iface);
27void batadv_v_elp_iface_disable(struct batadv_hard_iface *hard_iface);
28void batadv_v_elp_primary_iface_set(struct batadv_hard_iface *primary_iface);
29int batadv_v_elp_packet_recv(struct sk_buff *skb,
30 struct batadv_hard_iface *if_incoming);
31void batadv_v_elp_throughput_metric_update(struct work_struct *work);
32
33#endif /* _NET_BATMAN_ADV_BAT_V_ELP_H_ */
diff --git a/net/batman-adv/bat_v_ogm.c b/net/batman-adv/bat_v_ogm.c
new file mode 100644
index 000000000000..d9bcbe6e7d65
--- /dev/null
+++ b/net/batman-adv/bat_v_ogm.c
@@ -0,0 +1,833 @@
1/* Copyright (C) 2013-2016 B.A.T.M.A.N. contributors:
2 *
3 * Antonio Quartulli
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of version 2 of the GNU General Public
7 * License as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, see <http://www.gnu.org/licenses/>.
16 */
17
18#include "bat_v_ogm.h"
19#include "main.h"
20
21#include <linux/atomic.h>
22#include <linux/byteorder/generic.h>
23#include <linux/errno.h>
24#include <linux/etherdevice.h>
25#include <linux/fs.h>
26#include <linux/if_ether.h>
27#include <linux/jiffies.h>
28#include <linux/kernel.h>
29#include <linux/list.h>
30#include <linux/netdevice.h>
31#include <linux/random.h>
32#include <linux/rculist.h>
33#include <linux/rcupdate.h>
34#include <linux/skbuff.h>
35#include <linux/slab.h>
36#include <linux/stddef.h>
37#include <linux/string.h>
38#include <linux/types.h>
39#include <linux/workqueue.h>
40
41#include "hard-interface.h"
42#include "hash.h"
43#include "originator.h"
44#include "packet.h"
45#include "routing.h"
46#include "send.h"
47#include "translation-table.h"
48
49/**
50 * batadv_v_ogm_orig_get - retrieve and possibly create an originator node
51 * @bat_priv: the bat priv with all the soft interface information
52 * @addr: the address of the originator
53 *
54 * Return: the orig_node corresponding to the specified address. If such object
55 * does not exist it is allocated here. In case of allocation failure returns
56 * NULL.
57 */
58struct batadv_orig_node *batadv_v_ogm_orig_get(struct batadv_priv *bat_priv,
59 const u8 *addr)
60{
61 struct batadv_orig_node *orig_node;
62 int hash_added;
63
64 orig_node = batadv_orig_hash_find(bat_priv, addr);
65 if (orig_node)
66 return orig_node;
67
68 orig_node = batadv_orig_node_new(bat_priv, addr);
69 if (!orig_node)
70 return NULL;
71
72 hash_added = batadv_hash_add(bat_priv->orig_hash, batadv_compare_orig,
73 batadv_choose_orig, orig_node,
74 &orig_node->hash_entry);
75 if (hash_added != 0) {
76 /* orig_node->refcounter is initialised to 2 by
77 * batadv_orig_node_new()
78 */
79 batadv_orig_node_put(orig_node);
80 batadv_orig_node_put(orig_node);
81 orig_node = NULL;
82 }
83
84 return orig_node;
85}
86
87/**
88 * batadv_v_ogm_start_timer - restart the OGM sending timer
89 * @bat_priv: the bat priv with all the soft interface information
90 */
91static void batadv_v_ogm_start_timer(struct batadv_priv *bat_priv)
92{
93 unsigned long msecs;
94 /* this function may be invoked in different contexts (ogm rescheduling
95 * or hard_iface activation), but the work timer should not be reset
96 */
97 if (delayed_work_pending(&bat_priv->bat_v.ogm_wq))
98 return;
99
100 msecs = atomic_read(&bat_priv->orig_interval) - BATADV_JITTER;
101 msecs += prandom_u32() % (2 * BATADV_JITTER);
102 queue_delayed_work(batadv_event_workqueue, &bat_priv->bat_v.ogm_wq,
103 msecs_to_jiffies(msecs));
104}
105
106/**
107 * batadv_v_ogm_send_to_if - send a batman ogm using a given interface
108 * @skb: the OGM to send
109 * @hard_iface: the interface to use to send the OGM
110 */
111static void batadv_v_ogm_send_to_if(struct sk_buff *skb,
112 struct batadv_hard_iface *hard_iface)
113{
114 struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface);
115
116 if (hard_iface->if_status != BATADV_IF_ACTIVE)
117 return;
118
119 batadv_inc_counter(bat_priv, BATADV_CNT_MGMT_TX);
120 batadv_add_counter(bat_priv, BATADV_CNT_MGMT_TX_BYTES,
121 skb->len + ETH_HLEN);
122
123 batadv_send_broadcast_skb(skb, hard_iface);
124}
125
126/**
127 * batadv_v_ogm_send - periodic worker broadcasting the own OGM
128 * @work: work queue item
129 */
130static void batadv_v_ogm_send(struct work_struct *work)
131{
132 struct batadv_hard_iface *hard_iface;
133 struct batadv_priv_bat_v *bat_v;
134 struct batadv_priv *bat_priv;
135 struct batadv_ogm2_packet *ogm_packet;
136 struct sk_buff *skb, *skb_tmp;
137 unsigned char *ogm_buff, *pkt_buff;
138 int ogm_buff_len;
139 u16 tvlv_len = 0;
140
141 bat_v = container_of(work, struct batadv_priv_bat_v, ogm_wq.work);
142 bat_priv = container_of(bat_v, struct batadv_priv, bat_v);
143
144 if (atomic_read(&bat_priv->mesh_state) == BATADV_MESH_DEACTIVATING)
145 goto out;
146
147 ogm_buff = bat_priv->bat_v.ogm_buff;
148 ogm_buff_len = bat_priv->bat_v.ogm_buff_len;
149 /* tt changes have to be committed before the tvlv data is
150 * appended as it may alter the tt tvlv container
151 */
152 batadv_tt_local_commit_changes(bat_priv);
153 tvlv_len = batadv_tvlv_container_ogm_append(bat_priv, &ogm_buff,
154 &ogm_buff_len,
155 BATADV_OGM2_HLEN);
156
157 bat_priv->bat_v.ogm_buff = ogm_buff;
158 bat_priv->bat_v.ogm_buff_len = ogm_buff_len;
159
160 skb = netdev_alloc_skb_ip_align(NULL, ETH_HLEN + ogm_buff_len);
161 if (!skb)
162 goto reschedule;
163
164 skb_reserve(skb, ETH_HLEN);
165 pkt_buff = skb_put(skb, ogm_buff_len);
166 memcpy(pkt_buff, ogm_buff, ogm_buff_len);
167
168 ogm_packet = (struct batadv_ogm2_packet *)skb->data;
169 ogm_packet->seqno = htonl(atomic_read(&bat_priv->bat_v.ogm_seqno));
170 atomic_inc(&bat_priv->bat_v.ogm_seqno);
171 ogm_packet->tvlv_len = htons(tvlv_len);
172
173 /* broadcast on every interface */
174 rcu_read_lock();
175 list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) {
176 if (hard_iface->soft_iface != bat_priv->soft_iface)
177 continue;
178
179 batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
180 "Sending own OGM2 packet (originator %pM, seqno %u, throughput %u, TTL %d) on interface %s [%pM]\n",
181 ogm_packet->orig, ntohl(ogm_packet->seqno),
182 ntohl(ogm_packet->throughput), ogm_packet->ttl,
183 hard_iface->net_dev->name,
184 hard_iface->net_dev->dev_addr);
185
186 /* this skb gets consumed by batadv_v_ogm_send_to_if() */
187 skb_tmp = skb_clone(skb, GFP_ATOMIC);
188 if (!skb_tmp)
189 break;
190
191 batadv_v_ogm_send_to_if(skb_tmp, hard_iface);
192 }
193 rcu_read_unlock();
194
195 consume_skb(skb);
196
197reschedule:
198 batadv_v_ogm_start_timer(bat_priv);
199out:
200 return;
201}
202
203/**
204 * batadv_v_ogm_iface_enable - prepare an interface for B.A.T.M.A.N. V
205 * @hard_iface: the interface to prepare
206 *
207 * Takes care of scheduling own OGM sending routine for this interface.
208 *
209 * Return: 0 on success or a negative error code otherwise
210 */
211int batadv_v_ogm_iface_enable(struct batadv_hard_iface *hard_iface)
212{
213 struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface);
214
215 batadv_v_ogm_start_timer(bat_priv);
216
217 return 0;
218}
219
220/**
221 * batadv_v_ogm_primary_iface_set - set a new primary interface
222 * @primary_iface: the new primary interface
223 */
224void batadv_v_ogm_primary_iface_set(struct batadv_hard_iface *primary_iface)
225{
226 struct batadv_priv *bat_priv = netdev_priv(primary_iface->soft_iface);
227 struct batadv_ogm2_packet *ogm_packet;
228
229 if (!bat_priv->bat_v.ogm_buff)
230 return;
231
232 ogm_packet = (struct batadv_ogm2_packet *)bat_priv->bat_v.ogm_buff;
233 ether_addr_copy(ogm_packet->orig, primary_iface->net_dev->dev_addr);
234}
235
236/**
237 * batadv_v_ogm_orig_update - update the originator status based on the received
238 * OGM
239 * @bat_priv: the bat priv with all the soft interface information
240 * @orig_node: the originator to update
241 * @neigh_node: the neighbour the OGM has been received from (to update)
242 * @ogm2: the received OGM
243 * @if_outgoing: the interface where this OGM is going to be forwarded through
244 */
245static void
246batadv_v_ogm_orig_update(struct batadv_priv *bat_priv,
247 struct batadv_orig_node *orig_node,
248 struct batadv_neigh_node *neigh_node,
249 const struct batadv_ogm2_packet *ogm2,
250 struct batadv_hard_iface *if_outgoing)
251{
252 struct batadv_neigh_ifinfo *router_ifinfo = NULL, *neigh_ifinfo = NULL;
253 struct batadv_neigh_node *router = NULL;
254 s32 neigh_seq_diff;
255 u32 neigh_last_seqno;
256 u32 router_last_seqno;
257 u32 router_throughput, neigh_throughput;
258
259 batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
260 "Searching and updating originator entry of received packet\n");
261
262 /* if this neighbor already is our next hop there is nothing
263 * to change
264 */
265 router = batadv_orig_router_get(orig_node, if_outgoing);
266 if (router == neigh_node)
267 goto out;
268
269 /* don't consider neighbours with worse throughput.
270 * also switch route if this seqno is BATADV_V_MAX_ORIGDIFF newer than
271 * the last received seqno from our best next hop.
272 */
273 if (router) {
274 router_ifinfo = batadv_neigh_ifinfo_get(router, if_outgoing);
275 neigh_ifinfo = batadv_neigh_ifinfo_get(neigh_node, if_outgoing);
276
277 /* if these are not allocated, something is wrong. */
278 if (!router_ifinfo || !neigh_ifinfo)
279 goto out;
280
281 neigh_last_seqno = neigh_ifinfo->bat_v.last_seqno;
282 router_last_seqno = router_ifinfo->bat_v.last_seqno;
283 neigh_seq_diff = neigh_last_seqno - router_last_seqno;
284 router_throughput = router_ifinfo->bat_v.throughput;
285 neigh_throughput = neigh_ifinfo->bat_v.throughput;
286
287 if ((neigh_seq_diff < BATADV_OGM_MAX_ORIGDIFF) &&
288 (router_throughput >= neigh_throughput))
289 goto out;
290 }
291
292 batadv_update_route(bat_priv, orig_node, if_outgoing, neigh_node);
293
294out:
295 if (router_ifinfo)
296 batadv_neigh_ifinfo_put(router_ifinfo);
297 if (neigh_ifinfo)
298 batadv_neigh_ifinfo_put(neigh_ifinfo);
299 if (router)
300 batadv_neigh_node_put(router);
301}
302
303/**
304 * batadv_v_forward_penalty - apply a penalty to the throughput metric forwarded
305 * with B.A.T.M.A.N. V OGMs
306 * @bat_priv: the bat priv with all the soft interface information
307 * @if_incoming: the interface where the OGM has been received
308 * @if_outgoing: the interface where the OGM has to be forwarded to
309 * @throughput: the current throughput
310 *
311 * Apply a penalty on the current throughput metric value based on the
312 * characteristic of the interface where the OGM has been received. The return
313 * value is computed as follows:
314 * - throughput * 50% if the incoming and outgoing interface are the
315 * same WiFi interface and the throughput is above
316 * 1MBit/s
317 * - throughput if the outgoing interface is the default
318 * interface (i.e. this OGM is processed for the
319 * internal table and not forwarded)
320 * - throughput * hop penalty otherwise
321 *
322 * Return: the penalised throughput metric.
323 */
324static u32 batadv_v_forward_penalty(struct batadv_priv *bat_priv,
325 struct batadv_hard_iface *if_incoming,
326 struct batadv_hard_iface *if_outgoing,
327 u32 throughput)
328{
329 int hop_penalty = atomic_read(&bat_priv->hop_penalty);
330 int hop_penalty_max = BATADV_TQ_MAX_VALUE;
331
332 /* Don't apply hop penalty in default originator table. */
333 if (if_outgoing == BATADV_IF_DEFAULT)
334 return throughput;
335
336 /* Forwarding on the same WiFi interface cuts the throughput in half
337 * due to the store & forward characteristics of WIFI.
338 * Very low throughput values are the exception.
339 */
340 if ((throughput > 10) &&
341 (if_incoming == if_outgoing) &&
342 !(if_incoming->bat_v.flags & BATADV_FULL_DUPLEX))
343 return throughput / 2;
344
345 /* hop penalty of 255 equals 100% */
346 return throughput * (hop_penalty_max - hop_penalty) / hop_penalty_max;
347}
348
349/**
350 * batadv_v_ogm_forward - forward an OGM to the given outgoing interface
351 * @bat_priv: the bat priv with all the soft interface information
352 * @ogm_received: previously received OGM to be forwarded
353 * @throughput: throughput to announce, may vary per outgoing interface
354 * @if_incoming: the interface on which this OGM was received on
355 * @if_outgoing: the interface to which the OGM has to be forwarded to
356 *
357 * Forward an OGM to an interface after having altered the throughput metric and
358 * the TTL value contained in it. The original OGM isn't modified.
359 */
360static void batadv_v_ogm_forward(struct batadv_priv *bat_priv,
361 const struct batadv_ogm2_packet *ogm_received,
362 u32 throughput,
363 struct batadv_hard_iface *if_incoming,
364 struct batadv_hard_iface *if_outgoing)
365{
366 struct batadv_ogm2_packet *ogm_forward;
367 unsigned char *skb_buff;
368 struct sk_buff *skb;
369 size_t packet_len;
370 u16 tvlv_len;
371
372 if (ogm_received->ttl <= 1) {
373 batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "ttl exceeded\n");
374 return;
375 }
376
377 tvlv_len = ntohs(ogm_received->tvlv_len);
378
379 packet_len = BATADV_OGM2_HLEN + tvlv_len;
380 skb = netdev_alloc_skb_ip_align(if_outgoing->net_dev,
381 ETH_HLEN + packet_len);
382 if (!skb)
383 return;
384
385 skb_reserve(skb, ETH_HLEN);
386 skb_buff = skb_put(skb, packet_len);
387 memcpy(skb_buff, ogm_received, packet_len);
388
389 /* apply forward penalty */
390 ogm_forward = (struct batadv_ogm2_packet *)skb_buff;
391 ogm_forward->throughput = htonl(throughput);
392 ogm_forward->ttl--;
393
394 batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
395 "Forwarding OGM2 packet on %s: throughput %u, ttl %u, received via %s\n",
396 if_outgoing->net_dev->name, throughput, ogm_forward->ttl,
397 if_incoming->net_dev->name);
398
399 batadv_v_ogm_send_to_if(skb, if_outgoing);
400}
401
402/**
403 * batadv_v_ogm_metric_update - update route metric based on OGM
404 * @bat_priv: the bat priv with all the soft interface information
405 * @ogm2: OGM2 structure
406 * @orig_node: Originator structure for which the OGM has been received
407 * @neigh_node: the neigh_node through with the OGM has been received
408 * @if_incoming: the interface where this packet was received
409 * @if_outgoing: the interface for which the packet should be considered
410 *
411 * Return:
412 * 1 if the OGM is new,
413 * 0 if it is not new but valid,
414 * <0 on error (e.g. old OGM)
415 */
416static int batadv_v_ogm_metric_update(struct batadv_priv *bat_priv,
417 const struct batadv_ogm2_packet *ogm2,
418 struct batadv_orig_node *orig_node,
419 struct batadv_neigh_node *neigh_node,
420 struct batadv_hard_iface *if_incoming,
421 struct batadv_hard_iface *if_outgoing)
422{
423 struct batadv_orig_ifinfo *orig_ifinfo = NULL;
424 struct batadv_neigh_ifinfo *neigh_ifinfo = NULL;
425 bool protection_started = false;
426 int ret = -EINVAL;
427 u32 path_throughput;
428 s32 seq_diff;
429
430 orig_ifinfo = batadv_orig_ifinfo_new(orig_node, if_outgoing);
431 if (!orig_ifinfo)
432 goto out;
433
434 seq_diff = ntohl(ogm2->seqno) - orig_ifinfo->last_real_seqno;
435
436 if (!hlist_empty(&orig_node->neigh_list) &&
437 batadv_window_protected(bat_priv, seq_diff,
438 BATADV_OGM_MAX_AGE,
439 &orig_ifinfo->batman_seqno_reset,
440 &protection_started)) {
441 batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
442 "Drop packet: packet within window protection time from %pM\n",
443 ogm2->orig);
444 batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
445 "Last reset: %ld, %ld\n",
446 orig_ifinfo->batman_seqno_reset, jiffies);
447 goto out;
448 }
449
450 /* drop packets with old seqnos, however accept the first packet after
451 * a host has been rebooted.
452 */
453 if ((seq_diff < 0) && !protection_started)
454 goto out;
455
456 neigh_node->last_seen = jiffies;
457
458 orig_node->last_seen = jiffies;
459
460 orig_ifinfo->last_real_seqno = ntohl(ogm2->seqno);
461 orig_ifinfo->last_ttl = ogm2->ttl;
462
463 neigh_ifinfo = batadv_neigh_ifinfo_new(neigh_node, if_outgoing);
464 if (!neigh_ifinfo)
465 goto out;
466
467 path_throughput = batadv_v_forward_penalty(bat_priv, if_incoming,
468 if_outgoing,
469 ntohl(ogm2->throughput));
470 neigh_ifinfo->bat_v.throughput = path_throughput;
471 neigh_ifinfo->bat_v.last_seqno = ntohl(ogm2->seqno);
472 neigh_ifinfo->last_ttl = ogm2->ttl;
473
474 if (seq_diff > 0 || protection_started)
475 ret = 1;
476 else
477 ret = 0;
478out:
479 if (orig_ifinfo)
480 batadv_orig_ifinfo_put(orig_ifinfo);
481 if (neigh_ifinfo)
482 batadv_neigh_ifinfo_put(neigh_ifinfo);
483
484 return ret;
485}
486
487/**
488 * batadv_v_ogm_route_update - update routes based on OGM
489 * @bat_priv: the bat priv with all the soft interface information
490 * @ethhdr: the Ethernet header of the OGM2
491 * @ogm2: OGM2 structure
492 * @orig_node: Originator structure for which the OGM has been received
493 * @neigh_node: the neigh_node through with the OGM has been received
494 * @if_incoming: the interface where this packet was received
495 * @if_outgoing: the interface for which the packet should be considered
496 */
497static void batadv_v_ogm_route_update(struct batadv_priv *bat_priv,
498 const struct ethhdr *ethhdr,
499 const struct batadv_ogm2_packet *ogm2,
500 struct batadv_orig_node *orig_node,
501 struct batadv_neigh_node *neigh_node,
502 struct batadv_hard_iface *if_incoming,
503 struct batadv_hard_iface *if_outgoing)
504{
505 struct batadv_neigh_node *router = NULL;
506 struct batadv_neigh_ifinfo *neigh_ifinfo = NULL;
507 struct batadv_orig_node *orig_neigh_node = NULL;
508 struct batadv_orig_ifinfo *orig_ifinfo = NULL;
509 struct batadv_neigh_node *orig_neigh_router = NULL;
510
511 neigh_ifinfo = batadv_neigh_ifinfo_get(neigh_node, if_outgoing);
512 if (!neigh_ifinfo)
513 goto out;
514
515 orig_neigh_node = batadv_v_ogm_orig_get(bat_priv, ethhdr->h_source);
516 if (!orig_neigh_node)
517 goto out;
518
519 orig_neigh_router = batadv_orig_router_get(orig_neigh_node,
520 if_outgoing);
521
522 /* drop packet if sender is not a direct neighbor and if we
523 * don't route towards it
524 */
525 router = batadv_orig_router_get(orig_node, if_outgoing);
526 if (router && router->orig_node != orig_node && !orig_neigh_router) {
527 batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
528 "Drop packet: OGM via unknown neighbor!\n");
529 goto out;
530 }
531
532 if (router)
533 batadv_neigh_node_put(router);
534
535 /* Update routes, and check if the OGM is from the best next hop */
536 batadv_v_ogm_orig_update(bat_priv, orig_node, neigh_node, ogm2,
537 if_outgoing);
538
539 orig_ifinfo = batadv_orig_ifinfo_new(orig_node, if_outgoing);
540 if (!orig_ifinfo)
541 goto out;
542
543 /* don't forward the same seqno twice on one interface */
544 if (orig_ifinfo->last_seqno_forwarded == ntohl(ogm2->seqno))
545 goto out;
546
547 /* acquire possibly updated router */
548 router = batadv_orig_router_get(orig_node, if_outgoing);
549
550 /* strict rule: forward packets coming from the best next hop only */
551 if (neigh_node != router)
552 goto out;
553
554 /* only forward for specific interface, not for the default one. */
555 if (if_outgoing != BATADV_IF_DEFAULT) {
556 orig_ifinfo->last_seqno_forwarded = ntohl(ogm2->seqno);
557 batadv_v_ogm_forward(bat_priv, ogm2,
558 neigh_ifinfo->bat_v.throughput,
559 if_incoming, if_outgoing);
560 }
561
562out:
563 if (orig_ifinfo)
564 batadv_orig_ifinfo_put(orig_ifinfo);
565 if (router)
566 batadv_neigh_node_put(router);
567 if (orig_neigh_router)
568 batadv_neigh_node_put(orig_neigh_router);
569 if (orig_neigh_node)
570 batadv_orig_node_put(orig_neigh_node);
571 if (neigh_ifinfo)
572 batadv_neigh_ifinfo_put(neigh_ifinfo);
573}
574
575/**
576 * batadv_v_ogm_process_per_outif - process a batman v OGM for an outgoing if
577 * @bat_priv: the bat priv with all the soft interface information
578 * @ethhdr: the Ethernet header of the OGM2
579 * @ogm2: OGM2 structure
580 * @orig_node: Originator structure for which the OGM has been received
581 * @neigh_node: the neigh_node through with the OGM has been received
582 * @if_incoming: the interface where this packet was received
583 * @if_outgoing: the interface for which the packet should be considered
584 */
585static void
586batadv_v_ogm_process_per_outif(struct batadv_priv *bat_priv,
587 const struct ethhdr *ethhdr,
588 const struct batadv_ogm2_packet *ogm2,
589 struct batadv_orig_node *orig_node,
590 struct batadv_neigh_node *neigh_node,
591 struct batadv_hard_iface *if_incoming,
592 struct batadv_hard_iface *if_outgoing)
593{
594 int seqno_age;
595
596 /* first, update the metric with according sanity checks */
597 seqno_age = batadv_v_ogm_metric_update(bat_priv, ogm2, orig_node,
598 neigh_node, if_incoming,
599 if_outgoing);
600
601 /* outdated sequence numbers are to be discarded */
602 if (seqno_age < 0)
603 return;
604
605 /* only unknown & newer OGMs contain TVLVs we are interested in */
606 if ((seqno_age > 0) && (if_outgoing == BATADV_IF_DEFAULT))
607 batadv_tvlv_containers_process(bat_priv, true, orig_node,
608 NULL, NULL,
609 (unsigned char *)(ogm2 + 1),
610 ntohs(ogm2->tvlv_len));
611
612 /* if the metric update went through, update routes if needed */
613 batadv_v_ogm_route_update(bat_priv, ethhdr, ogm2, orig_node,
614 neigh_node, if_incoming, if_outgoing);
615}
616
617/**
618 * batadv_v_ogm_aggr_packet - checks if there is another OGM aggregated
619 * @buff_pos: current position in the skb
620 * @packet_len: total length of the skb
621 * @tvlv_len: tvlv length of the previously considered OGM
622 *
623 * Return: true if there is enough space for another OGM, false otherwise.
624 */
625static bool batadv_v_ogm_aggr_packet(int buff_pos, int packet_len,
626 __be16 tvlv_len)
627{
628 int next_buff_pos = 0;
629
630 next_buff_pos += buff_pos + BATADV_OGM2_HLEN;
631 next_buff_pos += ntohs(tvlv_len);
632
633 return (next_buff_pos <= packet_len) &&
634 (next_buff_pos <= BATADV_MAX_AGGREGATION_BYTES);
635}
636
637/**
638 * batadv_v_ogm_process - process an incoming batman v OGM
639 * @skb: the skb containing the OGM
640 * @ogm_offset: offset to the OGM which should be processed (for aggregates)
641 * @if_incoming: the interface where this packet was receved
642 */
643static void batadv_v_ogm_process(const struct sk_buff *skb, int ogm_offset,
644 struct batadv_hard_iface *if_incoming)
645{
646 struct batadv_priv *bat_priv = netdev_priv(if_incoming->soft_iface);
647 struct ethhdr *ethhdr;
648 struct batadv_orig_node *orig_node = NULL;
649 struct batadv_hardif_neigh_node *hardif_neigh = NULL;
650 struct batadv_neigh_node *neigh_node = NULL;
651 struct batadv_hard_iface *hard_iface;
652 struct batadv_ogm2_packet *ogm_packet;
653 u32 ogm_throughput, link_throughput, path_throughput;
654
655 ethhdr = eth_hdr(skb);
656 ogm_packet = (struct batadv_ogm2_packet *)(skb->data + ogm_offset);
657
658 ogm_throughput = ntohl(ogm_packet->throughput);
659
660 batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
661 "Received OGM2 packet via NB: %pM, IF: %s [%pM] (from OG: %pM, seqno %u, troughput %u, TTL %u, V %u, tvlv_len %u)\n",
662 ethhdr->h_source, if_incoming->net_dev->name,
663 if_incoming->net_dev->dev_addr, ogm_packet->orig,
664 ntohl(ogm_packet->seqno), ogm_throughput, ogm_packet->ttl,
665 ogm_packet->version, ntohs(ogm_packet->tvlv_len));
666
667 /* If the troughput metric is 0, immediately drop the packet. No need to
668 * create orig_node / neigh_node for an unusable route.
669 */
670 if (ogm_throughput == 0) {
671 batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
672 "Drop packet: originator packet with troughput metric of 0\n");
673 return;
674 }
675
676 /* require ELP packets be to received from this neighbor first */
677 hardif_neigh = batadv_hardif_neigh_get(if_incoming, ethhdr->h_source);
678 if (!hardif_neigh) {
679 batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
680 "Drop packet: OGM via unknown neighbor!\n");
681 goto out;
682 }
683
684 orig_node = batadv_v_ogm_orig_get(bat_priv, ogm_packet->orig);
685 if (!orig_node)
686 return;
687
688 neigh_node = batadv_neigh_node_new(orig_node, if_incoming,
689 ethhdr->h_source);
690 if (!neigh_node)
691 goto out;
692
693 /* Update the received throughput metric to match the link
694 * characteristic:
695 * - If this OGM traveled one hop so far (emitted by single hop
696 * neighbor) the path throughput metric equals the link throughput.
697 * - For OGMs traversing more than hop the path throughput metric is
698 * the smaller of the path throughput and the link throughput.
699 */
700 link_throughput = ewma_throughput_read(&hardif_neigh->bat_v.throughput);
701 path_throughput = min_t(u32, link_throughput, ogm_throughput);
702 ogm_packet->throughput = htonl(path_throughput);
703
704 batadv_v_ogm_process_per_outif(bat_priv, ethhdr, ogm_packet, orig_node,
705 neigh_node, if_incoming,
706 BATADV_IF_DEFAULT);
707
708 rcu_read_lock();
709 list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) {
710 if (hard_iface->if_status != BATADV_IF_ACTIVE)
711 continue;
712
713 if (hard_iface->soft_iface != bat_priv->soft_iface)
714 continue;
715
716 batadv_v_ogm_process_per_outif(bat_priv, ethhdr, ogm_packet,
717 orig_node, neigh_node,
718 if_incoming, hard_iface);
719 }
720 rcu_read_unlock();
721out:
722 if (orig_node)
723 batadv_orig_node_put(orig_node);
724 if (neigh_node)
725 batadv_neigh_node_put(neigh_node);
726 if (hardif_neigh)
727 batadv_hardif_neigh_put(hardif_neigh);
728}
729
730/**
731 * batadv_v_ogm_packet_recv - OGM2 receiving handler
732 * @skb: the received OGM
733 * @if_incoming: the interface where this OGM has been received
734 *
735 * Return: NET_RX_SUCCESS and consume the skb on success or returns NET_RX_DROP
736 * (without freeing the skb) on failure
737 */
738int batadv_v_ogm_packet_recv(struct sk_buff *skb,
739 struct batadv_hard_iface *if_incoming)
740{
741 struct batadv_priv *bat_priv = netdev_priv(if_incoming->soft_iface);
742 struct batadv_ogm2_packet *ogm_packet;
743 struct ethhdr *ethhdr = eth_hdr(skb);
744 int ogm_offset;
745 u8 *packet_pos;
746 int ret = NET_RX_DROP;
747
748 /* did we receive a OGM2 packet on an interface that does not have
749 * B.A.T.M.A.N. V enabled ?
750 */
751 if (strcmp(bat_priv->bat_algo_ops->name, "BATMAN_V") != 0)
752 return NET_RX_DROP;
753
754 if (!batadv_check_management_packet(skb, if_incoming, BATADV_OGM2_HLEN))
755 return NET_RX_DROP;
756
757 if (batadv_is_my_mac(bat_priv, ethhdr->h_source))
758 return NET_RX_DROP;
759
760 ogm_packet = (struct batadv_ogm2_packet *)skb->data;
761
762 if (batadv_is_my_mac(bat_priv, ogm_packet->orig))
763 return NET_RX_DROP;
764
765 batadv_inc_counter(bat_priv, BATADV_CNT_MGMT_RX);
766 batadv_add_counter(bat_priv, BATADV_CNT_MGMT_RX_BYTES,
767 skb->len + ETH_HLEN);
768
769 ogm_offset = 0;
770 ogm_packet = (struct batadv_ogm2_packet *)skb->data;
771
772 while (batadv_v_ogm_aggr_packet(ogm_offset, skb_headlen(skb),
773 ogm_packet->tvlv_len)) {
774 batadv_v_ogm_process(skb, ogm_offset, if_incoming);
775
776 ogm_offset += BATADV_OGM2_HLEN;
777 ogm_offset += ntohs(ogm_packet->tvlv_len);
778
779 packet_pos = skb->data + ogm_offset;
780 ogm_packet = (struct batadv_ogm2_packet *)packet_pos;
781 }
782
783 ret = NET_RX_SUCCESS;
784 consume_skb(skb);
785
786 return ret;
787}
788
789/**
790 * batadv_v_ogm_init - initialise the OGM2 engine
791 * @bat_priv: the bat priv with all the soft interface information
792 *
793 * Return: 0 on success or a negative error code in case of failure
794 */
795int batadv_v_ogm_init(struct batadv_priv *bat_priv)
796{
797 struct batadv_ogm2_packet *ogm_packet;
798 unsigned char *ogm_buff;
799 u32 random_seqno;
800
801 bat_priv->bat_v.ogm_buff_len = BATADV_OGM2_HLEN;
802 ogm_buff = kzalloc(bat_priv->bat_v.ogm_buff_len, GFP_ATOMIC);
803 if (!ogm_buff)
804 return -ENOMEM;
805
806 bat_priv->bat_v.ogm_buff = ogm_buff;
807 ogm_packet = (struct batadv_ogm2_packet *)ogm_buff;
808 ogm_packet->packet_type = BATADV_OGM2;
809 ogm_packet->version = BATADV_COMPAT_VERSION;
810 ogm_packet->ttl = BATADV_TTL;
811 ogm_packet->flags = BATADV_NO_FLAGS;
812 ogm_packet->throughput = htonl(BATADV_THROUGHPUT_MAX_VALUE);
813
814 /* randomize initial seqno to avoid collision */
815 get_random_bytes(&random_seqno, sizeof(random_seqno));
816 atomic_set(&bat_priv->bat_v.ogm_seqno, random_seqno);
817 INIT_DELAYED_WORK(&bat_priv->bat_v.ogm_wq, batadv_v_ogm_send);
818
819 return 0;
820}
821
822/**
823 * batadv_v_ogm_free - free OGM private resources
824 * @bat_priv: the bat priv with all the soft interface information
825 */
826void batadv_v_ogm_free(struct batadv_priv *bat_priv)
827{
828 cancel_delayed_work_sync(&bat_priv->bat_v.ogm_wq);
829
830 kfree(bat_priv->bat_v.ogm_buff);
831 bat_priv->bat_v.ogm_buff = NULL;
832 bat_priv->bat_v.ogm_buff_len = 0;
833}
diff --git a/net/batman-adv/bat_v_ogm.h b/net/batman-adv/bat_v_ogm.h
new file mode 100644
index 000000000000..d849c75ada0e
--- /dev/null
+++ b/net/batman-adv/bat_v_ogm.h
@@ -0,0 +1,36 @@
1/* Copyright (C) 2013-2016 B.A.T.M.A.N. contributors:
2 *
3 * Antonio Quartulli
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of version 2 of the GNU General Public
7 * License as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, see <http://www.gnu.org/licenses/>.
16 */
17
18#ifndef _BATMAN_ADV_BATADV_V_OGM_H_
19#define _BATMAN_ADV_BATADV_V_OGM_H_
20
21#include <linux/types.h>
22
23struct batadv_hard_iface;
24struct batadv_priv;
25struct sk_buff;
26
27int batadv_v_ogm_init(struct batadv_priv *bat_priv);
28void batadv_v_ogm_free(struct batadv_priv *bat_priv);
29int batadv_v_ogm_iface_enable(struct batadv_hard_iface *hard_iface);
30struct batadv_orig_node *batadv_v_ogm_orig_get(struct batadv_priv *bat_priv,
31 const u8 *addr);
32void batadv_v_ogm_primary_iface_set(struct batadv_hard_iface *primary_iface);
33int batadv_v_ogm_packet_recv(struct sk_buff *skb,
34 struct batadv_hard_iface *if_incoming);
35
36#endif /* _BATMAN_ADV_BATADV_V_OGM_H_ */
diff --git a/net/batman-adv/bitarray.c b/net/batman-adv/bitarray.c
index 25cbc36e997a..b56bb000a0ab 100644
--- a/net/batman-adv/bitarray.c
+++ b/net/batman-adv/bitarray.c
@@ -1,4 +1,4 @@
1/* Copyright (C) 2006-2015 B.A.T.M.A.N. contributors: 1/* Copyright (C) 2006-2016 B.A.T.M.A.N. contributors:
2 * 2 *
3 * Simon Wunderlich, Marek Lindner 3 * Simon Wunderlich, Marek Lindner
4 * 4 *
@@ -29,10 +29,16 @@ static void batadv_bitmap_shift_left(unsigned long *seq_bits, s32 n)
29 bitmap_shift_left(seq_bits, seq_bits, n, BATADV_TQ_LOCAL_WINDOW_SIZE); 29 bitmap_shift_left(seq_bits, seq_bits, n, BATADV_TQ_LOCAL_WINDOW_SIZE);
30} 30}
31 31
32/* receive and process one packet within the sequence number window. 32/**
33 * batadv_bit_get_packet - receive and process one packet within the sequence
34 * number window
35 * @priv: the bat priv with all the soft interface information
36 * @seq_bits: pointer to the sequence number receive packet
37 * @seq_num_diff: difference between the current/received sequence number and
38 * the last sequence number
39 * @set_mark: whether this packet should be marked in seq_bits
33 * 40 *
34 * returns: 41 * Return: 1 if the window was moved (either new or very old),
35 * 1 if the window was moved (either new or very old)
36 * 0 if the window was not moved/shifted. 42 * 0 if the window was not moved/shifted.
37 */ 43 */
38int batadv_bit_get_packet(void *priv, unsigned long *seq_bits, s32 seq_num_diff, 44int batadv_bit_get_packet(void *priv, unsigned long *seq_bits, s32 seq_num_diff,
diff --git a/net/batman-adv/bitarray.h b/net/batman-adv/bitarray.h
index 0226b220fe5b..3e41bb80eb81 100644
--- a/net/batman-adv/bitarray.h
+++ b/net/batman-adv/bitarray.h
@@ -1,4 +1,4 @@
1/* Copyright (C) 2006-2015 B.A.T.M.A.N. contributors: 1/* Copyright (C) 2006-2016 B.A.T.M.A.N. contributors:
2 * 2 *
3 * Simon Wunderlich, Marek Lindner 3 * Simon Wunderlich, Marek Lindner
4 * 4 *
@@ -24,7 +24,14 @@
24#include <linux/compiler.h> 24#include <linux/compiler.h>
25#include <linux/types.h> 25#include <linux/types.h>
26 26
27/* Returns 1 if the corresponding bit in the given seq_bits indicates true 27/**
28 * batadv_test_bit - check if bit is set in the current window
29 *
30 * @seq_bits: pointer to the sequence number receive packet
31 * @last_seqno: latest sequence number in seq_bits
32 * @curr_seqno: sequence number to test for
33 *
34 * Return: 1 if the corresponding bit in the given seq_bits indicates true
28 * and curr_seqno is within range of last_seqno. Otherwise returns 0. 35 * and curr_seqno is within range of last_seqno. Otherwise returns 0.
29 */ 36 */
30static inline int batadv_test_bit(const unsigned long *seq_bits, 37static inline int batadv_test_bit(const unsigned long *seq_bits,
@@ -48,9 +55,6 @@ static inline void batadv_set_bit(unsigned long *seq_bits, s32 n)
48 set_bit(n, seq_bits); /* turn the position on */ 55 set_bit(n, seq_bits); /* turn the position on */
49} 56}
50 57
51/* receive and process one packet, returns 1 if received seq_num is considered
52 * new, 0 if old
53 */
54int batadv_bit_get_packet(void *priv, unsigned long *seq_bits, s32 seq_num_diff, 58int batadv_bit_get_packet(void *priv, unsigned long *seq_bits, s32 seq_num_diff,
55 int set_mark); 59 int set_mark);
56 60
diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c
index c24c481b666f..0a6c8b824a00 100644
--- a/net/batman-adv/bridge_loop_avoidance.c
+++ b/net/batman-adv/bridge_loop_avoidance.c
@@ -1,4 +1,4 @@
1/* Copyright (C) 2011-2015 B.A.T.M.A.N. contributors: 1/* Copyright (C) 2011-2016 B.A.T.M.A.N. contributors:
2 * 2 *
3 * Simon Wunderlich 3 * Simon Wunderlich
4 * 4 *
@@ -31,6 +31,7 @@
31#include <linux/jhash.h> 31#include <linux/jhash.h>
32#include <linux/jiffies.h> 32#include <linux/jiffies.h>
33#include <linux/kernel.h> 33#include <linux/kernel.h>
34#include <linux/kref.h>
34#include <linux/list.h> 35#include <linux/list.h>
35#include <linux/lockdep.h> 36#include <linux/lockdep.h>
36#include <linux/netdevice.h> 37#include <linux/netdevice.h>
@@ -58,7 +59,13 @@ static void
58batadv_bla_send_announce(struct batadv_priv *bat_priv, 59batadv_bla_send_announce(struct batadv_priv *bat_priv,
59 struct batadv_bla_backbone_gw *backbone_gw); 60 struct batadv_bla_backbone_gw *backbone_gw);
60 61
61/* return the index of the claim */ 62/**
63 * batadv_choose_claim - choose the right bucket for a claim.
64 * @data: data to hash
65 * @size: size of the hash table
66 *
67 * Return: the hash index of the claim
68 */
62static inline u32 batadv_choose_claim(const void *data, u32 size) 69static inline u32 batadv_choose_claim(const void *data, u32 size)
63{ 70{
64 struct batadv_bla_claim *claim = (struct batadv_bla_claim *)data; 71 struct batadv_bla_claim *claim = (struct batadv_bla_claim *)data;
@@ -70,7 +77,13 @@ static inline u32 batadv_choose_claim(const void *data, u32 size)
70 return hash % size; 77 return hash % size;
71} 78}
72 79
73/* return the index of the backbone gateway */ 80/**
81 * batadv_choose_backbone_gw - choose the right bucket for a backbone gateway.
82 * @data: data to hash
83 * @size: size of the hash table
84 *
85 * Return: the hash index of the backbone gateway
86 */
74static inline u32 batadv_choose_backbone_gw(const void *data, u32 size) 87static inline u32 batadv_choose_backbone_gw(const void *data, u32 size)
75{ 88{
76 const struct batadv_bla_claim *claim = (struct batadv_bla_claim *)data; 89 const struct batadv_bla_claim *claim = (struct batadv_bla_claim *)data;
@@ -82,7 +95,13 @@ static inline u32 batadv_choose_backbone_gw(const void *data, u32 size)
82 return hash % size; 95 return hash % size;
83} 96}
84 97
85/* compares address and vid of two backbone gws */ 98/**
99 * batadv_compare_backbone_gw - compare address and vid of two backbone gws
100 * @node: list node of the first entry to compare
101 * @data2: pointer to the second backbone gateway
102 *
103 * Return: 1 if the backbones have the same data, 0 otherwise
104 */
86static int batadv_compare_backbone_gw(const struct hlist_node *node, 105static int batadv_compare_backbone_gw(const struct hlist_node *node,
87 const void *data2) 106 const void *data2)
88{ 107{
@@ -100,7 +119,13 @@ static int batadv_compare_backbone_gw(const struct hlist_node *node,
100 return 1; 119 return 1;
101} 120}
102 121
103/* compares address and vid of two claims */ 122/**
123 * batadv_compare_backbone_gw - compare address and vid of two claims
124 * @node: list node of the first entry to compare
125 * @data2: pointer to the second claims
126 *
127 * Return: 1 if the claim have the same data, 0 otherwise
128 */
104static int batadv_compare_claim(const struct hlist_node *node, 129static int batadv_compare_claim(const struct hlist_node *node,
105 const void *data2) 130 const void *data2)
106{ 131{
@@ -118,35 +143,62 @@ static int batadv_compare_claim(const struct hlist_node *node,
118 return 1; 143 return 1;
119} 144}
120 145
121/* free a backbone gw */ 146/**
122static void 147 * batadv_backbone_gw_release - release backbone gw from lists and queue for
123batadv_backbone_gw_free_ref(struct batadv_bla_backbone_gw *backbone_gw) 148 * free after rcu grace period
149 * @ref: kref pointer of the backbone gw
150 */
151static void batadv_backbone_gw_release(struct kref *ref)
124{ 152{
125 if (atomic_dec_and_test(&backbone_gw->refcount)) 153 struct batadv_bla_backbone_gw *backbone_gw;
126 kfree_rcu(backbone_gw, rcu); 154
155 backbone_gw = container_of(ref, struct batadv_bla_backbone_gw,
156 refcount);
157
158 kfree_rcu(backbone_gw, rcu);
127} 159}
128 160
129/* finally deinitialize the claim */ 161/**
130static void batadv_claim_release(struct batadv_bla_claim *claim) 162 * batadv_backbone_gw_put - decrement the backbone gw refcounter and possibly
163 * release it
164 * @backbone_gw: backbone gateway to be free'd
165 */
166static void batadv_backbone_gw_put(struct batadv_bla_backbone_gw *backbone_gw)
131{ 167{
132 batadv_backbone_gw_free_ref(claim->backbone_gw); 168 kref_put(&backbone_gw->refcount, batadv_backbone_gw_release);
169}
170
171/**
172 * batadv_claim_release - release claim from lists and queue for free after rcu
173 * grace period
174 * @ref: kref pointer of the claim
175 */
176static void batadv_claim_release(struct kref *ref)
177{
178 struct batadv_bla_claim *claim;
179
180 claim = container_of(ref, struct batadv_bla_claim, refcount);
181
182 batadv_backbone_gw_put(claim->backbone_gw);
133 kfree_rcu(claim, rcu); 183 kfree_rcu(claim, rcu);
134} 184}
135 185
136/* free a claim, call claim_free_rcu if its the last reference */ 186/**
137static void batadv_claim_free_ref(struct batadv_bla_claim *claim) 187 * batadv_claim_put - decrement the claim refcounter and possibly
188 * release it
189 * @claim: claim to be free'd
190 */
191static void batadv_claim_put(struct batadv_bla_claim *claim)
138{ 192{
139 if (atomic_dec_and_test(&claim->refcount)) 193 kref_put(&claim->refcount, batadv_claim_release);
140 batadv_claim_release(claim);
141} 194}
142 195
143/** 196/**
144 * batadv_claim_hash_find 197 * batadv_claim_hash_find - looks for a claim in the claim hash
145 * @bat_priv: the bat priv with all the soft interface information 198 * @bat_priv: the bat priv with all the soft interface information
146 * @data: search data (may be local/static data) 199 * @data: search data (may be local/static data)
147 * 200 *
148 * looks for a claim in the hash, and returns it if found 201 * Return: claim if found or NULL otherwise.
149 * or NULL otherwise.
150 */ 202 */
151static struct batadv_bla_claim 203static struct batadv_bla_claim
152*batadv_claim_hash_find(struct batadv_priv *bat_priv, 204*batadv_claim_hash_find(struct batadv_priv *bat_priv,
@@ -169,7 +221,7 @@ static struct batadv_bla_claim
169 if (!batadv_compare_claim(&claim->hash_entry, data)) 221 if (!batadv_compare_claim(&claim->hash_entry, data))
170 continue; 222 continue;
171 223
172 if (!atomic_inc_not_zero(&claim->refcount)) 224 if (!kref_get_unless_zero(&claim->refcount))
173 continue; 225 continue;
174 226
175 claim_tmp = claim; 227 claim_tmp = claim;
@@ -181,12 +233,12 @@ static struct batadv_bla_claim
181} 233}
182 234
183/** 235/**
184 * batadv_backbone_hash_find - looks for a claim in the hash 236 * batadv_backbone_hash_find - looks for a backbone gateway in the hash
185 * @bat_priv: the bat priv with all the soft interface information 237 * @bat_priv: the bat priv with all the soft interface information
186 * @addr: the address of the originator 238 * @addr: the address of the originator
187 * @vid: the VLAN ID 239 * @vid: the VLAN ID
188 * 240 *
189 * Returns claim if found or NULL otherwise. 241 * Return: backbone gateway if found or NULL otherwise
190 */ 242 */
191static struct batadv_bla_backbone_gw * 243static struct batadv_bla_backbone_gw *
192batadv_backbone_hash_find(struct batadv_priv *bat_priv, u8 *addr, 244batadv_backbone_hash_find(struct batadv_priv *bat_priv, u8 *addr,
@@ -213,7 +265,7 @@ batadv_backbone_hash_find(struct batadv_priv *bat_priv, u8 *addr,
213 &search_entry)) 265 &search_entry))
214 continue; 266 continue;
215 267
216 if (!atomic_inc_not_zero(&backbone_gw->refcount)) 268 if (!kref_get_unless_zero(&backbone_gw->refcount))
217 continue; 269 continue;
218 270
219 backbone_gw_tmp = backbone_gw; 271 backbone_gw_tmp = backbone_gw;
@@ -224,7 +276,10 @@ batadv_backbone_hash_find(struct batadv_priv *bat_priv, u8 *addr,
224 return backbone_gw_tmp; 276 return backbone_gw_tmp;
225} 277}
226 278
227/* delete all claims for a backbone */ 279/**
280 * batadv_bla_del_backbone_claims - delete all claims for a backbone
281 * @backbone_gw: backbone gateway where the claims should be removed
282 */
228static void 283static void
229batadv_bla_del_backbone_claims(struct batadv_bla_backbone_gw *backbone_gw) 284batadv_bla_del_backbone_claims(struct batadv_bla_backbone_gw *backbone_gw)
230{ 285{
@@ -249,7 +304,7 @@ batadv_bla_del_backbone_claims(struct batadv_bla_backbone_gw *backbone_gw)
249 if (claim->backbone_gw != backbone_gw) 304 if (claim->backbone_gw != backbone_gw)
250 continue; 305 continue;
251 306
252 batadv_claim_free_ref(claim); 307 batadv_claim_put(claim);
253 hlist_del_rcu(&claim->hash_entry); 308 hlist_del_rcu(&claim->hash_entry);
254 } 309 }
255 spin_unlock_bh(list_lock); 310 spin_unlock_bh(list_lock);
@@ -368,18 +423,17 @@ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, u8 *mac,
368 netif_rx(skb); 423 netif_rx(skb);
369out: 424out:
370 if (primary_if) 425 if (primary_if)
371 batadv_hardif_free_ref(primary_if); 426 batadv_hardif_put(primary_if);
372} 427}
373 428
374/** 429/**
375 * batadv_bla_get_backbone_gw 430 * batadv_bla_get_backbone_gw - finds or creates a backbone gateway
376 * @bat_priv: the bat priv with all the soft interface information 431 * @bat_priv: the bat priv with all the soft interface information
377 * @orig: the mac address of the originator 432 * @orig: the mac address of the originator
378 * @vid: the VLAN ID 433 * @vid: the VLAN ID
379 * @own_backbone: set if the requested backbone is local 434 * @own_backbone: set if the requested backbone is local
380 * 435 *
381 * searches for the backbone gw or creates a new one if it could not 436 * Return: the (possibly created) backbone gateway or NULL on error
382 * be found.
383 */ 437 */
384static struct batadv_bla_backbone_gw * 438static struct batadv_bla_backbone_gw *
385batadv_bla_get_backbone_gw(struct batadv_priv *bat_priv, u8 *orig, 439batadv_bla_get_backbone_gw(struct batadv_priv *bat_priv, u8 *orig,
@@ -412,7 +466,8 @@ batadv_bla_get_backbone_gw(struct batadv_priv *bat_priv, u8 *orig,
412 ether_addr_copy(entry->orig, orig); 466 ether_addr_copy(entry->orig, orig);
413 467
414 /* one for the hash, one for returning */ 468 /* one for the hash, one for returning */
415 atomic_set(&entry->refcount, 2); 469 kref_init(&entry->refcount);
470 kref_get(&entry->refcount);
416 471
417 hash_added = batadv_hash_add(bat_priv->bla.backbone_hash, 472 hash_added = batadv_hash_add(bat_priv->bla.backbone_hash,
418 batadv_compare_backbone_gw, 473 batadv_compare_backbone_gw,
@@ -430,7 +485,7 @@ batadv_bla_get_backbone_gw(struct batadv_priv *bat_priv, u8 *orig,
430 if (orig_node) { 485 if (orig_node) {
431 batadv_tt_global_del_orig(bat_priv, orig_node, vid, 486 batadv_tt_global_del_orig(bat_priv, orig_node, vid,
432 "became a backbone gateway"); 487 "became a backbone gateway");
433 batadv_orig_node_free_ref(orig_node); 488 batadv_orig_node_put(orig_node);
434 } 489 }
435 490
436 if (own_backbone) { 491 if (own_backbone) {
@@ -445,7 +500,13 @@ batadv_bla_get_backbone_gw(struct batadv_priv *bat_priv, u8 *orig,
445 return entry; 500 return entry;
446} 501}
447 502
448/* update or add the own backbone gw to make sure we announce 503/**
504 * batadv_bla_update_own_backbone_gw - updates the own backbone gw for a VLAN
505 * @bat_priv: the bat priv with all the soft interface information
506 * @primary_if: the selected primary interface
507 * @vid: VLAN identifier
508 *
509 * update or add the own backbone gw to make sure we announce
449 * where we receive other backbone gws 510 * where we receive other backbone gws
450 */ 511 */
451static void 512static void
@@ -462,7 +523,7 @@ batadv_bla_update_own_backbone_gw(struct batadv_priv *bat_priv,
462 return; 523 return;
463 524
464 backbone_gw->lasttime = jiffies; 525 backbone_gw->lasttime = jiffies;
465 batadv_backbone_gw_free_ref(backbone_gw); 526 batadv_backbone_gw_put(backbone_gw);
466} 527}
467 528
468/** 529/**
@@ -511,7 +572,7 @@ static void batadv_bla_answer_request(struct batadv_priv *bat_priv,
511 572
512 /* finally, send an announcement frame */ 573 /* finally, send an announcement frame */
513 batadv_bla_send_announce(bat_priv, backbone_gw); 574 batadv_bla_send_announce(bat_priv, backbone_gw);
514 batadv_backbone_gw_free_ref(backbone_gw); 575 batadv_backbone_gw_put(backbone_gw);
515} 576}
516 577
517/** 578/**
@@ -542,12 +603,9 @@ static void batadv_bla_send_request(struct batadv_bla_backbone_gw *backbone_gw)
542} 603}
543 604
544/** 605/**
545 * batadv_bla_send_announce 606 * batadv_bla_send_announce - Send an announcement frame
546 * @bat_priv: the bat priv with all the soft interface information 607 * @bat_priv: the bat priv with all the soft interface information
547 * @backbone_gw: our backbone gateway which should be announced 608 * @backbone_gw: our backbone gateway which should be announced
548 *
549 * This function sends an announcement. It is called from multiple
550 * places.
551 */ 609 */
552static void batadv_bla_send_announce(struct batadv_priv *bat_priv, 610static void batadv_bla_send_announce(struct batadv_priv *bat_priv,
553 struct batadv_bla_backbone_gw *backbone_gw) 611 struct batadv_bla_backbone_gw *backbone_gw)
@@ -595,7 +653,8 @@ static void batadv_bla_add_claim(struct batadv_priv *bat_priv,
595 claim->lasttime = jiffies; 653 claim->lasttime = jiffies;
596 claim->backbone_gw = backbone_gw; 654 claim->backbone_gw = backbone_gw;
597 655
598 atomic_set(&claim->refcount, 2); 656 kref_init(&claim->refcount);
657 kref_get(&claim->refcount);
599 batadv_dbg(BATADV_DBG_BLA, bat_priv, 658 batadv_dbg(BATADV_DBG_BLA, bat_priv,
600 "bla_add_claim(): adding new entry %pM, vid %d to hash ...\n", 659 "bla_add_claim(): adding new entry %pM, vid %d to hash ...\n",
601 mac, BATADV_PRINT_VID(vid)); 660 mac, BATADV_PRINT_VID(vid));
@@ -622,10 +681,10 @@ static void batadv_bla_add_claim(struct batadv_priv *bat_priv,
622 spin_lock_bh(&claim->backbone_gw->crc_lock); 681 spin_lock_bh(&claim->backbone_gw->crc_lock);
623 claim->backbone_gw->crc ^= crc16(0, claim->addr, ETH_ALEN); 682 claim->backbone_gw->crc ^= crc16(0, claim->addr, ETH_ALEN);
624 spin_unlock_bh(&claim->backbone_gw->crc_lock); 683 spin_unlock_bh(&claim->backbone_gw->crc_lock);
625 batadv_backbone_gw_free_ref(claim->backbone_gw); 684 batadv_backbone_gw_put(claim->backbone_gw);
626 } 685 }
627 /* set (new) backbone gw */ 686 /* set (new) backbone gw */
628 atomic_inc(&backbone_gw->refcount); 687 kref_get(&backbone_gw->refcount);
629 claim->backbone_gw = backbone_gw; 688 claim->backbone_gw = backbone_gw;
630 689
631 spin_lock_bh(&backbone_gw->crc_lock); 690 spin_lock_bh(&backbone_gw->crc_lock);
@@ -634,11 +693,14 @@ static void batadv_bla_add_claim(struct batadv_priv *bat_priv,
634 backbone_gw->lasttime = jiffies; 693 backbone_gw->lasttime = jiffies;
635 694
636claim_free_ref: 695claim_free_ref:
637 batadv_claim_free_ref(claim); 696 batadv_claim_put(claim);
638} 697}
639 698
640/* Delete a claim from the claim hash which has the 699/**
641 * given mac address and vid. 700 * batadv_bla_del_claim - delete a claim from the claim hash
701 * @bat_priv: the bat priv with all the soft interface information
702 * @mac: mac address of the claim to be removed
703 * @vid: VLAN id for the claim to be removed
642 */ 704 */
643static void batadv_bla_del_claim(struct batadv_priv *bat_priv, 705static void batadv_bla_del_claim(struct batadv_priv *bat_priv,
644 const u8 *mac, const unsigned short vid) 706 const u8 *mac, const unsigned short vid)
@@ -656,17 +718,25 @@ static void batadv_bla_del_claim(struct batadv_priv *bat_priv,
656 718
657 batadv_hash_remove(bat_priv->bla.claim_hash, batadv_compare_claim, 719 batadv_hash_remove(bat_priv->bla.claim_hash, batadv_compare_claim,
658 batadv_choose_claim, claim); 720 batadv_choose_claim, claim);
659 batadv_claim_free_ref(claim); /* reference from the hash is gone */ 721 batadv_claim_put(claim); /* reference from the hash is gone */
660 722
661 spin_lock_bh(&claim->backbone_gw->crc_lock); 723 spin_lock_bh(&claim->backbone_gw->crc_lock);
662 claim->backbone_gw->crc ^= crc16(0, claim->addr, ETH_ALEN); 724 claim->backbone_gw->crc ^= crc16(0, claim->addr, ETH_ALEN);
663 spin_unlock_bh(&claim->backbone_gw->crc_lock); 725 spin_unlock_bh(&claim->backbone_gw->crc_lock);
664 726
665 /* don't need the reference from hash_find() anymore */ 727 /* don't need the reference from hash_find() anymore */
666 batadv_claim_free_ref(claim); 728 batadv_claim_put(claim);
667} 729}
668 730
669/* check for ANNOUNCE frame, return 1 if handled */ 731/**
732 * batadv_handle_announce - check for ANNOUNCE frame
733 * @bat_priv: the bat priv with all the soft interface information
734 * @an_addr: announcement mac address (ARP Sender HW address)
735 * @backbone_addr: originator address of the sender (Ethernet source MAC)
736 * @vid: the VLAN ID of the frame
737 *
738 * Return: 1 if handled
739 */
670static int batadv_handle_announce(struct batadv_priv *bat_priv, u8 *an_addr, 740static int batadv_handle_announce(struct batadv_priv *bat_priv, u8 *an_addr,
671 u8 *backbone_addr, unsigned short vid) 741 u8 *backbone_addr, unsigned short vid)
672{ 742{
@@ -712,11 +782,20 @@ static int batadv_handle_announce(struct batadv_priv *bat_priv, u8 *an_addr,
712 } 782 }
713 } 783 }
714 784
715 batadv_backbone_gw_free_ref(backbone_gw); 785 batadv_backbone_gw_put(backbone_gw);
716 return 1; 786 return 1;
717} 787}
718 788
719/* check for REQUEST frame, return 1 if handled */ 789/**
790 * batadv_handle_request - check for REQUEST frame
791 * @bat_priv: the bat priv with all the soft interface information
792 * @primary_if: the primary hard interface of this batman soft interface
793 * @backbone_addr: backbone address to be requested (ARP sender HW MAC)
794 * @ethhdr: ethernet header of a packet
795 * @vid: the VLAN ID of the frame
796 *
797 * Return: 1 if handled
798 */
720static int batadv_handle_request(struct batadv_priv *bat_priv, 799static int batadv_handle_request(struct batadv_priv *bat_priv,
721 struct batadv_hard_iface *primary_if, 800 struct batadv_hard_iface *primary_if,
722 u8 *backbone_addr, struct ethhdr *ethhdr, 801 u8 *backbone_addr, struct ethhdr *ethhdr,
@@ -740,7 +819,16 @@ static int batadv_handle_request(struct batadv_priv *bat_priv,
740 return 1; 819 return 1;
741} 820}
742 821
743/* check for UNCLAIM frame, return 1 if handled */ 822/**
823 * batadv_handle_unclaim - check for UNCLAIM frame
824 * @bat_priv: the bat priv with all the soft interface information
825 * @primary_if: the primary hard interface of this batman soft interface
826 * @backbone_addr: originator address of the backbone (Ethernet source)
827 * @claim_addr: Client to be unclaimed (ARP sender HW MAC)
828 * @vid: the VLAN ID of the frame
829 *
830 * Return: 1 if handled
831 */
744static int batadv_handle_unclaim(struct batadv_priv *bat_priv, 832static int batadv_handle_unclaim(struct batadv_priv *bat_priv,
745 struct batadv_hard_iface *primary_if, 833 struct batadv_hard_iface *primary_if,
746 u8 *backbone_addr, u8 *claim_addr, 834 u8 *backbone_addr, u8 *claim_addr,
@@ -765,11 +853,20 @@ static int batadv_handle_unclaim(struct batadv_priv *bat_priv,
765 claim_addr, BATADV_PRINT_VID(vid), backbone_gw->orig); 853 claim_addr, BATADV_PRINT_VID(vid), backbone_gw->orig);
766 854
767 batadv_bla_del_claim(bat_priv, claim_addr, vid); 855 batadv_bla_del_claim(bat_priv, claim_addr, vid);
768 batadv_backbone_gw_free_ref(backbone_gw); 856 batadv_backbone_gw_put(backbone_gw);
769 return 1; 857 return 1;
770} 858}
771 859
772/* check for CLAIM frame, return 1 if handled */ 860/**
861 * batadv_handle_claim - check for CLAIM frame
862 * @bat_priv: the bat priv with all the soft interface information
863 * @primary_if: the primary hard interface of this batman soft interface
864 * @backbone_addr: originator address of the backbone (Ethernet Source)
865 * @claim_addr: client mac address to be claimed (ARP sender HW MAC)
866 * @vid: the VLAN ID of the frame
867 *
868 * Return: 1 if handled
869 */
773static int batadv_handle_claim(struct batadv_priv *bat_priv, 870static int batadv_handle_claim(struct batadv_priv *bat_priv,
774 struct batadv_hard_iface *primary_if, 871 struct batadv_hard_iface *primary_if,
775 u8 *backbone_addr, u8 *claim_addr, 872 u8 *backbone_addr, u8 *claim_addr,
@@ -793,12 +890,12 @@ static int batadv_handle_claim(struct batadv_priv *bat_priv,
793 890
794 /* TODO: we could call something like tt_local_del() here. */ 891 /* TODO: we could call something like tt_local_del() here. */
795 892
796 batadv_backbone_gw_free_ref(backbone_gw); 893 batadv_backbone_gw_put(backbone_gw);
797 return 1; 894 return 1;
798} 895}
799 896
800/** 897/**
801 * batadv_check_claim_group 898 * batadv_check_claim_group - check for claim group membership
802 * @bat_priv: the bat priv with all the soft interface information 899 * @bat_priv: the bat priv with all the soft interface information
803 * @primary_if: the primary interface of this batman interface 900 * @primary_if: the primary interface of this batman interface
804 * @hw_src: the Hardware source in the ARP Header 901 * @hw_src: the Hardware source in the ARP Header
@@ -809,7 +906,7 @@ static int batadv_handle_claim(struct batadv_priv *bat_priv,
809 * This function also applies the group ID of the sender 906 * This function also applies the group ID of the sender
810 * if it is in the same mesh. 907 * if it is in the same mesh.
811 * 908 *
812 * returns: 909 * Return:
813 * 2 - if it is a claim packet and on the same group 910 * 2 - if it is a claim packet and on the same group
814 * 1 - if is a claim packet from another group 911 * 1 - if is a claim packet from another group
815 * 0 - if it is not a claim packet 912 * 0 - if it is not a claim packet
@@ -867,20 +964,18 @@ static int batadv_check_claim_group(struct batadv_priv *bat_priv,
867 bla_dst_own->group = bla_dst->group; 964 bla_dst_own->group = bla_dst->group;
868 } 965 }
869 966
870 batadv_orig_node_free_ref(orig_node); 967 batadv_orig_node_put(orig_node);
871 968
872 return 2; 969 return 2;
873} 970}
874 971
875/** 972/**
876 * batadv_bla_process_claim 973 * batadv_bla_process_claim - Check if this is a claim frame, and process it
877 * @bat_priv: the bat priv with all the soft interface information 974 * @bat_priv: the bat priv with all the soft interface information
878 * @primary_if: the primary hard interface of this batman soft interface 975 * @primary_if: the primary hard interface of this batman soft interface
879 * @skb: the frame to be checked 976 * @skb: the frame to be checked
880 * 977 *
881 * Check if this is a claim frame, and process it accordingly. 978 * Return: 1 if it was a claim frame, otherwise return 0 to
882 *
883 * returns 1 if it was a claim frame, otherwise return 0 to
884 * tell the callee that it can use the frame on its own. 979 * tell the callee that it can use the frame on its own.
885 */ 980 */
886static int batadv_bla_process_claim(struct batadv_priv *bat_priv, 981static int batadv_bla_process_claim(struct batadv_priv *bat_priv,
@@ -1011,7 +1106,13 @@ static int batadv_bla_process_claim(struct batadv_priv *bat_priv,
1011 return 1; 1106 return 1;
1012} 1107}
1013 1108
1014/* Check when we last heard from other nodes, and remove them in case of 1109/**
1110 * batadv_bla_purge_backbone_gw - Remove backbone gateways after a timeout or
1111 * immediately
1112 * @bat_priv: the bat priv with all the soft interface information
1113 * @now: whether the whole hash shall be wiped now
1114 *
1115 * Check when we last heard from other nodes, and remove them in case of
1015 * a time out, or clean all backbone gws if now is set. 1116 * a time out, or clean all backbone gws if now is set.
1016 */ 1117 */
1017static void batadv_bla_purge_backbone_gw(struct batadv_priv *bat_priv, int now) 1118static void batadv_bla_purge_backbone_gw(struct batadv_priv *bat_priv, int now)
@@ -1052,14 +1153,14 @@ purge_now:
1052 batadv_bla_del_backbone_claims(backbone_gw); 1153 batadv_bla_del_backbone_claims(backbone_gw);
1053 1154
1054 hlist_del_rcu(&backbone_gw->hash_entry); 1155 hlist_del_rcu(&backbone_gw->hash_entry);
1055 batadv_backbone_gw_free_ref(backbone_gw); 1156 batadv_backbone_gw_put(backbone_gw);
1056 } 1157 }
1057 spin_unlock_bh(list_lock); 1158 spin_unlock_bh(list_lock);
1058 } 1159 }
1059} 1160}
1060 1161
1061/** 1162/**
1062 * batadv_bla_purge_claims 1163 * batadv_bla_purge_claims - Remove claims after a timeout or immediately
1063 * @bat_priv: the bat priv with all the soft interface information 1164 * @bat_priv: the bat priv with all the soft interface information
1064 * @primary_if: the selected primary interface, may be NULL if now is set 1165 * @primary_if: the selected primary interface, may be NULL if now is set
1065 * @now: whether the whole hash shall be wiped now 1166 * @now: whether the whole hash shall be wiped now
@@ -1108,12 +1209,11 @@ purge_now:
1108} 1209}
1109 1210
1110/** 1211/**
1111 * batadv_bla_update_orig_address 1212 * batadv_bla_update_orig_address - Update the backbone gateways when the own
1213 * originator address changes
1112 * @bat_priv: the bat priv with all the soft interface information 1214 * @bat_priv: the bat priv with all the soft interface information
1113 * @primary_if: the new selected primary_if 1215 * @primary_if: the new selected primary_if
1114 * @oldif: the old primary interface, may be NULL 1216 * @oldif: the old primary interface, may be NULL
1115 *
1116 * Update the backbone gateways when the own orig address changes.
1117 */ 1217 */
1118void batadv_bla_update_orig_address(struct batadv_priv *bat_priv, 1218void batadv_bla_update_orig_address(struct batadv_priv *bat_priv,
1119 struct batadv_hard_iface *primary_if, 1219 struct batadv_hard_iface *primary_if,
@@ -1181,10 +1281,14 @@ void batadv_bla_status_update(struct net_device *net_dev)
1181 * so just call that one. 1281 * so just call that one.
1182 */ 1282 */
1183 batadv_bla_update_orig_address(bat_priv, primary_if, primary_if); 1283 batadv_bla_update_orig_address(bat_priv, primary_if, primary_if);
1184 batadv_hardif_free_ref(primary_if); 1284 batadv_hardif_put(primary_if);
1185} 1285}
1186 1286
1187/* periodic work to do: 1287/**
1288 * batadv_bla_periodic_work - performs periodic bla work
1289 * @work: kernel work struct
1290 *
1291 * periodic work to do:
1188 * * purge structures when they are too old 1292 * * purge structures when they are too old
1189 * * send announcements 1293 * * send announcements
1190 */ 1294 */
@@ -1251,7 +1355,7 @@ static void batadv_bla_periodic_work(struct work_struct *work)
1251 } 1355 }
1252out: 1356out:
1253 if (primary_if) 1357 if (primary_if)
1254 batadv_hardif_free_ref(primary_if); 1358 batadv_hardif_put(primary_if);
1255 1359
1256 queue_delayed_work(batadv_event_workqueue, &bat_priv->bla.work, 1360 queue_delayed_work(batadv_event_workqueue, &bat_priv->bla.work,
1257 msecs_to_jiffies(BATADV_BLA_PERIOD_LENGTH)); 1361 msecs_to_jiffies(BATADV_BLA_PERIOD_LENGTH));
@@ -1265,7 +1369,12 @@ out:
1265static struct lock_class_key batadv_claim_hash_lock_class_key; 1369static struct lock_class_key batadv_claim_hash_lock_class_key;
1266static struct lock_class_key batadv_backbone_hash_lock_class_key; 1370static struct lock_class_key batadv_backbone_hash_lock_class_key;
1267 1371
1268/* initialize all bla structures */ 1372/**
1373 * batadv_bla_init - initialize all bla structures
1374 * @bat_priv: the bat priv with all the soft interface information
1375 *
1376 * Return: 0 on success, < 0 on error.
1377 */
1269int batadv_bla_init(struct batadv_priv *bat_priv) 1378int batadv_bla_init(struct batadv_priv *bat_priv)
1270{ 1379{
1271 int i; 1380 int i;
@@ -1285,7 +1394,7 @@ int batadv_bla_init(struct batadv_priv *bat_priv)
1285 if (primary_if) { 1394 if (primary_if) {
1286 crc = crc16(0, primary_if->net_dev->dev_addr, ETH_ALEN); 1395 crc = crc16(0, primary_if->net_dev->dev_addr, ETH_ALEN);
1287 bat_priv->bla.claim_dest.group = htons(crc); 1396 bat_priv->bla.claim_dest.group = htons(crc);
1288 batadv_hardif_free_ref(primary_if); 1397 batadv_hardif_put(primary_if);
1289 } else { 1398 } else {
1290 bat_priv->bla.claim_dest.group = 0; /* will be set later */ 1399 bat_priv->bla.claim_dest.group = 0; /* will be set later */
1291 } 1400 }
@@ -1320,7 +1429,7 @@ int batadv_bla_init(struct batadv_priv *bat_priv)
1320} 1429}
1321 1430
1322/** 1431/**
1323 * batadv_bla_check_bcast_duplist 1432 * batadv_bla_check_bcast_duplist - Check if a frame is in the broadcast dup.
1324 * @bat_priv: the bat priv with all the soft interface information 1433 * @bat_priv: the bat priv with all the soft interface information
1325 * @skb: contains the bcast_packet to be checked 1434 * @skb: contains the bcast_packet to be checked
1326 * 1435 *
@@ -1332,6 +1441,8 @@ int batadv_bla_init(struct batadv_priv *bat_priv)
1332 * with a good chance that it is the same packet. If it is furthermore 1441 * with a good chance that it is the same packet. If it is furthermore
1333 * sent by another host, drop it. We allow equal packets from 1442 * sent by another host, drop it. We allow equal packets from
1334 * the same host however as this might be intended. 1443 * the same host however as this might be intended.
1444 *
1445 * Return: 1 if a packet is in the duplicate list, 0 otherwise.
1335 */ 1446 */
1336int batadv_bla_check_bcast_duplist(struct batadv_priv *bat_priv, 1447int batadv_bla_check_bcast_duplist(struct batadv_priv *bat_priv,
1337 struct sk_buff *skb) 1448 struct sk_buff *skb)
@@ -1390,14 +1501,13 @@ out:
1390} 1501}
1391 1502
1392/** 1503/**
1393 * batadv_bla_is_backbone_gw_orig 1504 * batadv_bla_is_backbone_gw_orig - Check if the originator is a gateway for
1505 * the VLAN identified by vid.
1394 * @bat_priv: the bat priv with all the soft interface information 1506 * @bat_priv: the bat priv with all the soft interface information
1395 * @orig: originator mac address 1507 * @orig: originator mac address
1396 * @vid: VLAN identifier 1508 * @vid: VLAN identifier
1397 * 1509 *
1398 * Check if the originator is a gateway for the VLAN identified by vid. 1510 * Return: true if orig is a backbone for this vid, false otherwise.
1399 *
1400 * Returns true if orig is a backbone for this vid, false otherwise.
1401 */ 1511 */
1402bool batadv_bla_is_backbone_gw_orig(struct batadv_priv *bat_priv, u8 *orig, 1512bool batadv_bla_is_backbone_gw_orig(struct batadv_priv *bat_priv, u8 *orig,
1403 unsigned short vid) 1513 unsigned short vid)
@@ -1431,14 +1541,13 @@ bool batadv_bla_is_backbone_gw_orig(struct batadv_priv *bat_priv, u8 *orig,
1431} 1541}
1432 1542
1433/** 1543/**
1434 * batadv_bla_is_backbone_gw 1544 * batadv_bla_is_backbone_gw - check if originator is a backbone gw for a VLAN.
1435 * @skb: the frame to be checked 1545 * @skb: the frame to be checked
1436 * @orig_node: the orig_node of the frame 1546 * @orig_node: the orig_node of the frame
1437 * @hdr_size: maximum length of the frame 1547 * @hdr_size: maximum length of the frame
1438 * 1548 *
1439 * bla_is_backbone_gw inspects the skb for the VLAN ID and returns 1 1549 * Return: 1 if the orig_node is also a gateway on the soft interface, otherwise
1440 * if the orig_node is also a gateway on the soft interface, otherwise it 1550 * it returns 0.
1441 * returns 0.
1442 */ 1551 */
1443int batadv_bla_is_backbone_gw(struct sk_buff *skb, 1552int batadv_bla_is_backbone_gw(struct sk_buff *skb,
1444 struct batadv_orig_node *orig_node, int hdr_size) 1553 struct batadv_orig_node *orig_node, int hdr_size)
@@ -1461,11 +1570,16 @@ int batadv_bla_is_backbone_gw(struct sk_buff *skb,
1461 if (!backbone_gw) 1570 if (!backbone_gw)
1462 return 0; 1571 return 0;
1463 1572
1464 batadv_backbone_gw_free_ref(backbone_gw); 1573 batadv_backbone_gw_put(backbone_gw);
1465 return 1; 1574 return 1;
1466} 1575}
1467 1576
1468/* free all bla structures (for softinterface free or module unload) */ 1577/**
1578 * batadv_bla_init - free all bla structures
1579 * @bat_priv: the bat priv with all the soft interface information
1580 *
1581 * for softinterface free or module unload
1582 */
1469void batadv_bla_free(struct batadv_priv *bat_priv) 1583void batadv_bla_free(struct batadv_priv *bat_priv)
1470{ 1584{
1471 struct batadv_hard_iface *primary_if; 1585 struct batadv_hard_iface *primary_if;
@@ -1484,22 +1598,23 @@ void batadv_bla_free(struct batadv_priv *bat_priv)
1484 bat_priv->bla.backbone_hash = NULL; 1598 bat_priv->bla.backbone_hash = NULL;
1485 } 1599 }
1486 if (primary_if) 1600 if (primary_if)
1487 batadv_hardif_free_ref(primary_if); 1601 batadv_hardif_put(primary_if);
1488} 1602}
1489 1603
1490/** 1604/**
1491 * batadv_bla_rx 1605 * batadv_bla_rx - check packets coming from the mesh.
1492 * @bat_priv: the bat priv with all the soft interface information 1606 * @bat_priv: the bat priv with all the soft interface information
1493 * @skb: the frame to be checked 1607 * @skb: the frame to be checked
1494 * @vid: the VLAN ID of the frame 1608 * @vid: the VLAN ID of the frame
1495 * @is_bcast: the packet came in a broadcast packet type. 1609 * @is_bcast: the packet came in a broadcast packet type.
1496 * 1610 *
1497 * bla_rx avoidance checks if: 1611 * batadv_bla_rx avoidance checks if:
1498 * * we have to race for a claim 1612 * * we have to race for a claim
1499 * * if the frame is allowed on the LAN 1613 * * if the frame is allowed on the LAN
1500 * 1614 *
1501 * in these cases, the skb is further handled by this function and 1615 * in these cases, the skb is further handled by this function
1502 * returns 1, otherwise it returns 0 and the caller shall further 1616 *
1617 * Return: 1 if handled, otherwise it returns 0 and the caller shall further
1503 * process the skb. 1618 * process the skb.
1504 */ 1619 */
1505int batadv_bla_rx(struct batadv_priv *bat_priv, struct sk_buff *skb, 1620int batadv_bla_rx(struct batadv_priv *bat_priv, struct sk_buff *skb,
@@ -1576,27 +1691,28 @@ handled:
1576 1691
1577out: 1692out:
1578 if (primary_if) 1693 if (primary_if)
1579 batadv_hardif_free_ref(primary_if); 1694 batadv_hardif_put(primary_if);
1580 if (claim) 1695 if (claim)
1581 batadv_claim_free_ref(claim); 1696 batadv_claim_put(claim);
1582 return ret; 1697 return ret;
1583} 1698}
1584 1699
1585/** 1700/**
1586 * batadv_bla_tx 1701 * batadv_bla_tx - check packets going into the mesh
1587 * @bat_priv: the bat priv with all the soft interface information 1702 * @bat_priv: the bat priv with all the soft interface information
1588 * @skb: the frame to be checked 1703 * @skb: the frame to be checked
1589 * @vid: the VLAN ID of the frame 1704 * @vid: the VLAN ID of the frame
1590 * 1705 *
1591 * bla_tx checks if: 1706 * batadv_bla_tx checks if:
1592 * * a claim was received which has to be processed 1707 * * a claim was received which has to be processed
1593 * * the frame is allowed on the mesh 1708 * * the frame is allowed on the mesh
1594 * 1709 *
1595 * in these cases, the skb is further handled by this function and 1710 * in these cases, the skb is further handled by this function.
1596 * returns 1, otherwise it returns 0 and the caller shall further
1597 * process the skb.
1598 * 1711 *
1599 * This call might reallocate skb data. 1712 * This call might reallocate skb data.
1713 *
1714 * Return: 1 if handled, otherwise it returns 0 and the caller shall further
1715 * process the skb.
1600 */ 1716 */
1601int batadv_bla_tx(struct batadv_priv *bat_priv, struct sk_buff *skb, 1717int batadv_bla_tx(struct batadv_priv *bat_priv, struct sk_buff *skb,
1602 unsigned short vid) 1718 unsigned short vid)
@@ -1664,12 +1780,19 @@ handled:
1664 ret = 1; 1780 ret = 1;
1665out: 1781out:
1666 if (primary_if) 1782 if (primary_if)
1667 batadv_hardif_free_ref(primary_if); 1783 batadv_hardif_put(primary_if);
1668 if (claim) 1784 if (claim)
1669 batadv_claim_free_ref(claim); 1785 batadv_claim_put(claim);
1670 return ret; 1786 return ret;
1671} 1787}
1672 1788
1789/**
1790 * batadv_bla_claim_table_seq_print_text - print the claim table in a seq file
1791 * @seq: seq file to print on
1792 * @offset: not used
1793 *
1794 * Return: always 0
1795 */
1673int batadv_bla_claim_table_seq_print_text(struct seq_file *seq, void *offset) 1796int batadv_bla_claim_table_seq_print_text(struct seq_file *seq, void *offset)
1674{ 1797{
1675 struct net_device *net_dev = (struct net_device *)seq->private; 1798 struct net_device *net_dev = (struct net_device *)seq->private;
@@ -1715,10 +1838,18 @@ int batadv_bla_claim_table_seq_print_text(struct seq_file *seq, void *offset)
1715 } 1838 }
1716out: 1839out:
1717 if (primary_if) 1840 if (primary_if)
1718 batadv_hardif_free_ref(primary_if); 1841 batadv_hardif_put(primary_if);
1719 return 0; 1842 return 0;
1720} 1843}
1721 1844
1845/**
1846 * batadv_bla_backbone_table_seq_print_text - print the backbone table in a seq
1847 * file
1848 * @seq: seq file to print on
1849 * @offset: not used
1850 *
1851 * Return: always 0
1852 */
1722int batadv_bla_backbone_table_seq_print_text(struct seq_file *seq, void *offset) 1853int batadv_bla_backbone_table_seq_print_text(struct seq_file *seq, void *offset)
1723{ 1854{
1724 struct net_device *net_dev = (struct net_device *)seq->private; 1855 struct net_device *net_dev = (struct net_device *)seq->private;
@@ -1772,6 +1903,6 @@ int batadv_bla_backbone_table_seq_print_text(struct seq_file *seq, void *offset)
1772 } 1903 }
1773out: 1904out:
1774 if (primary_if) 1905 if (primary_if)
1775 batadv_hardif_free_ref(primary_if); 1906 batadv_hardif_put(primary_if);
1776 return 0; 1907 return 0;
1777} 1908}
diff --git a/net/batman-adv/bridge_loop_avoidance.h b/net/batman-adv/bridge_loop_avoidance.h
index 7ea199b8b5ab..579f0fa6fe6a 100644
--- a/net/batman-adv/bridge_loop_avoidance.h
+++ b/net/batman-adv/bridge_loop_avoidance.h
@@ -1,4 +1,4 @@
1/* Copyright (C) 2011-2015 B.A.T.M.A.N. contributors: 1/* Copyright (C) 2011-2016 B.A.T.M.A.N. contributors:
2 * 2 *
3 * Simon Wunderlich 3 * Simon Wunderlich
4 * 4 *
diff --git a/net/batman-adv/debugfs.c b/net/batman-adv/debugfs.c
index 037ad0a5f485..48253cf8341b 100644
--- a/net/batman-adv/debugfs.c
+++ b/net/batman-adv/debugfs.c
@@ -1,4 +1,4 @@
1/* Copyright (C) 2010-2015 B.A.T.M.A.N. contributors: 1/* Copyright (C) 2010-2016 B.A.T.M.A.N. contributors:
2 * 2 *
3 * Marek Lindner 3 * Marek Lindner
4 * 4 *
@@ -281,6 +281,8 @@ static int batadv_originators_open(struct inode *inode, struct file *file)
281 * originator table of an hard interface 281 * originator table of an hard interface
282 * @inode: inode pointer to debugfs file 282 * @inode: inode pointer to debugfs file
283 * @file: pointer to the seq_file 283 * @file: pointer to the seq_file
284 *
285 * Return: 0 on success or negative error number in case of failure
284 */ 286 */
285static int batadv_originators_hardif_open(struct inode *inode, 287static int batadv_originators_hardif_open(struct inode *inode,
286 struct file *file) 288 struct file *file)
@@ -329,6 +331,8 @@ static int batadv_bla_backbone_table_open(struct inode *inode,
329 * batadv_dat_cache_open - Prepare file handler for reads from dat_chache 331 * batadv_dat_cache_open - Prepare file handler for reads from dat_chache
330 * @inode: inode which was opened 332 * @inode: inode which was opened
331 * @file: file handle to be initialized 333 * @file: file handle to be initialized
334 *
335 * Return: 0 on success or negative error number in case of failure
332 */ 336 */
333static int batadv_dat_cache_open(struct inode *inode, struct file *file) 337static int batadv_dat_cache_open(struct inode *inode, struct file *file)
334{ 338{
@@ -483,6 +487,8 @@ void batadv_debugfs_destroy(void)
483 * batadv_debugfs_add_hardif - creates the base directory for a hard interface 487 * batadv_debugfs_add_hardif - creates the base directory for a hard interface
484 * in debugfs. 488 * in debugfs.
485 * @hard_iface: hard interface which should be added. 489 * @hard_iface: hard interface which should be added.
490 *
491 * Return: 0 on success or negative error number in case of failure
486 */ 492 */
487int batadv_debugfs_add_hardif(struct batadv_hard_iface *hard_iface) 493int batadv_debugfs_add_hardif(struct batadv_hard_iface *hard_iface)
488{ 494{
diff --git a/net/batman-adv/debugfs.h b/net/batman-adv/debugfs.h
index 80ab8d6f0ab3..1ab4e2e63afc 100644
--- a/net/batman-adv/debugfs.h
+++ b/net/batman-adv/debugfs.h
@@ -1,4 +1,4 @@
1/* Copyright (C) 2010-2015 B.A.T.M.A.N. contributors: 1/* Copyright (C) 2010-2016 B.A.T.M.A.N. contributors:
2 * 2 *
3 * Marek Lindner 3 * Marek Lindner
4 * 4 *
diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c
index a49c705fb86b..e96d7c745b4a 100644
--- a/net/batman-adv/distributed-arp-table.c
+++ b/net/batman-adv/distributed-arp-table.c
@@ -1,4 +1,4 @@
1/* Copyright (C) 2011-2015 B.A.T.M.A.N. contributors: 1/* Copyright (C) 2011-2016 B.A.T.M.A.N. contributors:
2 * 2 *
3 * Antonio Quartulli 3 * Antonio Quartulli
4 * 4 *
@@ -30,6 +30,7 @@
30#include <linux/in.h> 30#include <linux/in.h>
31#include <linux/jiffies.h> 31#include <linux/jiffies.h>
32#include <linux/kernel.h> 32#include <linux/kernel.h>
33#include <linux/kref.h>
33#include <linux/list.h> 34#include <linux/list.h>
34#include <linux/rculist.h> 35#include <linux/rculist.h>
35#include <linux/rcupdate.h> 36#include <linux/rcupdate.h>
@@ -62,21 +63,34 @@ static void batadv_dat_start_timer(struct batadv_priv *bat_priv)
62} 63}
63 64
64/** 65/**
65 * batadv_dat_entry_free_ref - decrement the dat_entry refcounter and possibly 66 * batadv_dat_entry_release - release dat_entry from lists and queue for free
66 * free it 67 * after rcu grace period
67 * @dat_entry: the entry to free 68 * @ref: kref pointer of the dat_entry
68 */ 69 */
69static void batadv_dat_entry_free_ref(struct batadv_dat_entry *dat_entry) 70static void batadv_dat_entry_release(struct kref *ref)
70{ 71{
71 if (atomic_dec_and_test(&dat_entry->refcount)) 72 struct batadv_dat_entry *dat_entry;
72 kfree_rcu(dat_entry, rcu); 73
74 dat_entry = container_of(ref, struct batadv_dat_entry, refcount);
75
76 kfree_rcu(dat_entry, rcu);
77}
78
79/**
80 * batadv_dat_entry_put - decrement the dat_entry refcounter and possibly
81 * release it
82 * @dat_entry: dat_entry to be free'd
83 */
84static void batadv_dat_entry_put(struct batadv_dat_entry *dat_entry)
85{
86 kref_put(&dat_entry->refcount, batadv_dat_entry_release);
73} 87}
74 88
75/** 89/**
76 * batadv_dat_to_purge - check whether a dat_entry has to be purged or not 90 * batadv_dat_to_purge - check whether a dat_entry has to be purged or not
77 * @dat_entry: the entry to check 91 * @dat_entry: the entry to check
78 * 92 *
79 * Returns true if the entry has to be purged now, false otherwise. 93 * Return: true if the entry has to be purged now, false otherwise.
80 */ 94 */
81static bool batadv_dat_to_purge(struct batadv_dat_entry *dat_entry) 95static bool batadv_dat_to_purge(struct batadv_dat_entry *dat_entry)
82{ 96{
@@ -121,7 +135,7 @@ static void __batadv_dat_purge(struct batadv_priv *bat_priv,
121 continue; 135 continue;
122 136
123 hlist_del_rcu(&dat_entry->hash_entry); 137 hlist_del_rcu(&dat_entry->hash_entry);
124 batadv_dat_entry_free_ref(dat_entry); 138 batadv_dat_entry_put(dat_entry);
125 } 139 }
126 spin_unlock_bh(list_lock); 140 spin_unlock_bh(list_lock);
127 } 141 }
@@ -151,7 +165,7 @@ static void batadv_dat_purge(struct work_struct *work)
151 * @node: node in the local table 165 * @node: node in the local table
152 * @data2: second object to compare the node to 166 * @data2: second object to compare the node to
153 * 167 *
154 * Returns 1 if the two entries are the same, 0 otherwise. 168 * Return: 1 if the two entries are the same, 0 otherwise.
155 */ 169 */
156static int batadv_compare_dat(const struct hlist_node *node, const void *data2) 170static int batadv_compare_dat(const struct hlist_node *node, const void *data2)
157{ 171{
@@ -166,7 +180,7 @@ static int batadv_compare_dat(const struct hlist_node *node, const void *data2)
166 * @skb: ARP packet 180 * @skb: ARP packet
167 * @hdr_size: size of the possible header before the ARP packet 181 * @hdr_size: size of the possible header before the ARP packet
168 * 182 *
169 * Returns the value of the hw_src field in the ARP packet. 183 * Return: the value of the hw_src field in the ARP packet.
170 */ 184 */
171static u8 *batadv_arp_hw_src(struct sk_buff *skb, int hdr_size) 185static u8 *batadv_arp_hw_src(struct sk_buff *skb, int hdr_size)
172{ 186{
@@ -183,7 +197,7 @@ static u8 *batadv_arp_hw_src(struct sk_buff *skb, int hdr_size)
183 * @skb: ARP packet 197 * @skb: ARP packet
184 * @hdr_size: size of the possible header before the ARP packet 198 * @hdr_size: size of the possible header before the ARP packet
185 * 199 *
186 * Returns the value of the ip_src field in the ARP packet. 200 * Return: the value of the ip_src field in the ARP packet.
187 */ 201 */
188static __be32 batadv_arp_ip_src(struct sk_buff *skb, int hdr_size) 202static __be32 batadv_arp_ip_src(struct sk_buff *skb, int hdr_size)
189{ 203{
@@ -195,7 +209,7 @@ static __be32 batadv_arp_ip_src(struct sk_buff *skb, int hdr_size)
195 * @skb: ARP packet 209 * @skb: ARP packet
196 * @hdr_size: size of the possible header before the ARP packet 210 * @hdr_size: size of the possible header before the ARP packet
197 * 211 *
198 * Returns the value of the hw_dst field in the ARP packet. 212 * Return: the value of the hw_dst field in the ARP packet.
199 */ 213 */
200static u8 *batadv_arp_hw_dst(struct sk_buff *skb, int hdr_size) 214static u8 *batadv_arp_hw_dst(struct sk_buff *skb, int hdr_size)
201{ 215{
@@ -207,7 +221,7 @@ static u8 *batadv_arp_hw_dst(struct sk_buff *skb, int hdr_size)
207 * @skb: ARP packet 221 * @skb: ARP packet
208 * @hdr_size: size of the possible header before the ARP packet 222 * @hdr_size: size of the possible header before the ARP packet
209 * 223 *
210 * Returns the value of the ip_dst field in the ARP packet. 224 * Return: the value of the ip_dst field in the ARP packet.
211 */ 225 */
212static __be32 batadv_arp_ip_dst(struct sk_buff *skb, int hdr_size) 226static __be32 batadv_arp_ip_dst(struct sk_buff *skb, int hdr_size)
213{ 227{
@@ -219,7 +233,7 @@ static __be32 batadv_arp_ip_dst(struct sk_buff *skb, int hdr_size)
219 * @data: data to hash 233 * @data: data to hash
220 * @size: size of the hash table 234 * @size: size of the hash table
221 * 235 *
222 * Returns the selected index in the hash table for the given data. 236 * Return: the selected index in the hash table for the given data.
223 */ 237 */
224static u32 batadv_hash_dat(const void *data, u32 size) 238static u32 batadv_hash_dat(const void *data, u32 size)
225{ 239{
@@ -256,7 +270,7 @@ static u32 batadv_hash_dat(const void *data, u32 size)
256 * @ip: search key 270 * @ip: search key
257 * @vid: VLAN identifier 271 * @vid: VLAN identifier
258 * 272 *
259 * Returns the dat_entry if found, NULL otherwise. 273 * Return: the dat_entry if found, NULL otherwise.
260 */ 274 */
261static struct batadv_dat_entry * 275static struct batadv_dat_entry *
262batadv_dat_entry_hash_find(struct batadv_priv *bat_priv, __be32 ip, 276batadv_dat_entry_hash_find(struct batadv_priv *bat_priv, __be32 ip,
@@ -281,7 +295,7 @@ batadv_dat_entry_hash_find(struct batadv_priv *bat_priv, __be32 ip,
281 if (dat_entry->ip != ip) 295 if (dat_entry->ip != ip)
282 continue; 296 continue;
283 297
284 if (!atomic_inc_not_zero(&dat_entry->refcount)) 298 if (!kref_get_unless_zero(&dat_entry->refcount))
285 continue; 299 continue;
286 300
287 dat_entry_tmp = dat_entry; 301 dat_entry_tmp = dat_entry;
@@ -326,7 +340,8 @@ static void batadv_dat_entry_add(struct batadv_priv *bat_priv, __be32 ip,
326 dat_entry->vid = vid; 340 dat_entry->vid = vid;
327 ether_addr_copy(dat_entry->mac_addr, mac_addr); 341 ether_addr_copy(dat_entry->mac_addr, mac_addr);
328 dat_entry->last_update = jiffies; 342 dat_entry->last_update = jiffies;
329 atomic_set(&dat_entry->refcount, 2); 343 kref_init(&dat_entry->refcount);
344 kref_get(&dat_entry->refcount);
330 345
331 hash_added = batadv_hash_add(bat_priv->dat.hash, batadv_compare_dat, 346 hash_added = batadv_hash_add(bat_priv->dat.hash, batadv_compare_dat,
332 batadv_hash_dat, dat_entry, 347 batadv_hash_dat, dat_entry,
@@ -334,7 +349,7 @@ static void batadv_dat_entry_add(struct batadv_priv *bat_priv, __be32 ip,
334 349
335 if (unlikely(hash_added != 0)) { 350 if (unlikely(hash_added != 0)) {
336 /* remove the reference for the hash */ 351 /* remove the reference for the hash */
337 batadv_dat_entry_free_ref(dat_entry); 352 batadv_dat_entry_put(dat_entry);
338 goto out; 353 goto out;
339 } 354 }
340 355
@@ -343,7 +358,7 @@ static void batadv_dat_entry_add(struct batadv_priv *bat_priv, __be32 ip,
343 358
344out: 359out:
345 if (dat_entry) 360 if (dat_entry)
346 batadv_dat_entry_free_ref(dat_entry); 361 batadv_dat_entry_put(dat_entry);
347} 362}
348 363
349#ifdef CONFIG_BATMAN_ADV_DEBUG 364#ifdef CONFIG_BATMAN_ADV_DEBUG
@@ -440,7 +455,7 @@ static void batadv_dbg_arp(struct batadv_priv *bat_priv, struct sk_buff *skb,
440 * @candidate: orig_node under evaluation 455 * @candidate: orig_node under evaluation
441 * @max_orig_node: last selected candidate 456 * @max_orig_node: last selected candidate
442 * 457 *
443 * Returns true if the node has been elected as next candidate or false 458 * Return: true if the node has been elected as next candidate or false
444 * otherwise. 459 * otherwise.
445 */ 460 */
446static bool batadv_is_orig_node_eligible(struct batadv_dat_candidate *res, 461static bool batadv_is_orig_node_eligible(struct batadv_dat_candidate *res,
@@ -527,12 +542,12 @@ static void batadv_choose_next_candidate(struct batadv_priv *bat_priv,
527 max_orig_node)) 542 max_orig_node))
528 continue; 543 continue;
529 544
530 if (!atomic_inc_not_zero(&orig_node->refcount)) 545 if (!kref_get_unless_zero(&orig_node->refcount))
531 continue; 546 continue;
532 547
533 max = tmp_max; 548 max = tmp_max;
534 if (max_orig_node) 549 if (max_orig_node)
535 batadv_orig_node_free_ref(max_orig_node); 550 batadv_orig_node_put(max_orig_node);
536 max_orig_node = orig_node; 551 max_orig_node = orig_node;
537 } 552 }
538 rcu_read_unlock(); 553 rcu_read_unlock();
@@ -558,7 +573,7 @@ static void batadv_choose_next_candidate(struct batadv_priv *bat_priv,
558 * closest values (from the LEFT, with wrap around if needed) then the hash 573 * closest values (from the LEFT, with wrap around if needed) then the hash
559 * value of the key. ip_dst is the key. 574 * value of the key. ip_dst is the key.
560 * 575 *
561 * Returns the candidate array of size BATADV_DAT_CANDIDATE_NUM. 576 * Return: the candidate array of size BATADV_DAT_CANDIDATE_NUM.
562 */ 577 */
563static struct batadv_dat_candidate * 578static struct batadv_dat_candidate *
564batadv_dat_select_candidates(struct batadv_priv *bat_priv, __be32 ip_dst) 579batadv_dat_select_candidates(struct batadv_priv *bat_priv, __be32 ip_dst)
@@ -602,7 +617,7 @@ batadv_dat_select_candidates(struct batadv_priv *bat_priv, __be32 ip_dst)
602 * This function copies the skb with pskb_copy() and is sent as unicast packet 617 * This function copies the skb with pskb_copy() and is sent as unicast packet
603 * to each of the selected candidates. 618 * to each of the selected candidates.
604 * 619 *
605 * Returns true if the packet is sent to at least one candidate, false 620 * Return: true if the packet is sent to at least one candidate, false
606 * otherwise. 621 * otherwise.
607 */ 622 */
608static bool batadv_dat_send_data(struct batadv_priv *bat_priv, 623static bool batadv_dat_send_data(struct batadv_priv *bat_priv,
@@ -639,9 +654,7 @@ static bool batadv_dat_send_data(struct batadv_priv *bat_priv,
639 goto free_neigh; 654 goto free_neigh;
640 } 655 }
641 656
642 send_status = batadv_send_skb_packet(tmp_skb, 657 send_status = batadv_send_unicast_skb(tmp_skb, neigh_node);
643 neigh_node->if_incoming,
644 neigh_node->addr);
645 if (send_status == NET_XMIT_SUCCESS) { 658 if (send_status == NET_XMIT_SUCCESS) {
646 /* count the sent packet */ 659 /* count the sent packet */
647 switch (packet_subtype) { 660 switch (packet_subtype) {
@@ -659,9 +672,9 @@ static bool batadv_dat_send_data(struct batadv_priv *bat_priv,
659 ret = true; 672 ret = true;
660 } 673 }
661free_neigh: 674free_neigh:
662 batadv_neigh_node_free_ref(neigh_node); 675 batadv_neigh_node_put(neigh_node);
663free_orig: 676free_orig:
664 batadv_orig_node_free_ref(cand[i].orig_node); 677 batadv_orig_node_put(cand[i].orig_node);
665 } 678 }
666 679
667out: 680out:
@@ -741,6 +754,8 @@ static void batadv_dat_hash_free(struct batadv_priv *bat_priv)
741/** 754/**
742 * batadv_dat_init - initialise the DAT internals 755 * batadv_dat_init - initialise the DAT internals
743 * @bat_priv: the bat priv with all the soft interface information 756 * @bat_priv: the bat priv with all the soft interface information
757 *
758 * Return: 0 in case of success, a negative error code otherwise
744 */ 759 */
745int batadv_dat_init(struct batadv_priv *bat_priv) 760int batadv_dat_init(struct batadv_priv *bat_priv)
746{ 761{
@@ -779,6 +794,8 @@ void batadv_dat_free(struct batadv_priv *bat_priv)
779 * batadv_dat_cache_seq_print_text - print the local DAT hash table 794 * batadv_dat_cache_seq_print_text - print the local DAT hash table
780 * @seq: seq file to print on 795 * @seq: seq file to print on
781 * @offset: not used 796 * @offset: not used
797 *
798 * Return: always 0
782 */ 799 */
783int batadv_dat_cache_seq_print_text(struct seq_file *seq, void *offset) 800int batadv_dat_cache_seq_print_text(struct seq_file *seq, void *offset)
784{ 801{
@@ -821,7 +838,7 @@ int batadv_dat_cache_seq_print_text(struct seq_file *seq, void *offset)
821 838
822out: 839out:
823 if (primary_if) 840 if (primary_if)
824 batadv_hardif_free_ref(primary_if); 841 batadv_hardif_put(primary_if);
825 return 0; 842 return 0;
826} 843}
827 844
@@ -831,7 +848,7 @@ out:
831 * @skb: packet to analyse 848 * @skb: packet to analyse
832 * @hdr_size: size of the possible header before the ARP packet in the skb 849 * @hdr_size: size of the possible header before the ARP packet in the skb
833 * 850 *
834 * Returns the ARP type if the skb contains a valid ARP packet, 0 otherwise. 851 * Return: the ARP type if the skb contains a valid ARP packet, 0 otherwise.
835 */ 852 */
836static u16 batadv_arp_get_type(struct batadv_priv *bat_priv, 853static u16 batadv_arp_get_type(struct batadv_priv *bat_priv,
837 struct sk_buff *skb, int hdr_size) 854 struct sk_buff *skb, int hdr_size)
@@ -904,8 +921,9 @@ out:
904 * @skb: the buffer containing the packet to extract the VID from 921 * @skb: the buffer containing the packet to extract the VID from
905 * @hdr_size: the size of the batman-adv header encapsulating the packet 922 * @hdr_size: the size of the batman-adv header encapsulating the packet
906 * 923 *
907 * If the packet embedded in the skb is vlan tagged this function returns the 924 * Return: If the packet embedded in the skb is vlan tagged this function
908 * VID with the BATADV_VLAN_HAS_TAG flag. Otherwise BATADV_NO_FLAGS is returned. 925 * returns the VID with the BATADV_VLAN_HAS_TAG flag. Otherwise BATADV_NO_FLAGS
926 * is returned.
909 */ 927 */
910static unsigned short batadv_dat_get_vid(struct sk_buff *skb, int *hdr_size) 928static unsigned short batadv_dat_get_vid(struct sk_buff *skb, int *hdr_size)
911{ 929{
@@ -930,7 +948,7 @@ static unsigned short batadv_dat_get_vid(struct sk_buff *skb, int *hdr_size)
930 * @bat_priv: the bat priv with all the soft interface information 948 * @bat_priv: the bat priv with all the soft interface information
931 * @skb: packet to check 949 * @skb: packet to check
932 * 950 *
933 * Returns true if the message has been sent to the dht candidates, false 951 * Return: true if the message has been sent to the dht candidates, false
934 * otherwise. In case of a positive return value the message has to be enqueued 952 * otherwise. In case of a positive return value the message has to be enqueued
935 * to permit the fallback. 953 * to permit the fallback.
936 */ 954 */
@@ -1009,7 +1027,7 @@ bool batadv_dat_snoop_outgoing_arp_request(struct batadv_priv *bat_priv,
1009 } 1027 }
1010out: 1028out:
1011 if (dat_entry) 1029 if (dat_entry)
1012 batadv_dat_entry_free_ref(dat_entry); 1030 batadv_dat_entry_put(dat_entry);
1013 return ret; 1031 return ret;
1014} 1032}
1015 1033
@@ -1020,7 +1038,7 @@ out:
1020 * @skb: packet to check 1038 * @skb: packet to check
1021 * @hdr_size: size of the encapsulation header 1039 * @hdr_size: size of the encapsulation header
1022 * 1040 *
1023 * Returns true if the request has been answered, false otherwise. 1041 * Return: true if the request has been answered, false otherwise.
1024 */ 1042 */
1025bool batadv_dat_snoop_incoming_arp_request(struct batadv_priv *bat_priv, 1043bool batadv_dat_snoop_incoming_arp_request(struct batadv_priv *bat_priv,
1026 struct sk_buff *skb, int hdr_size) 1044 struct sk_buff *skb, int hdr_size)
@@ -1089,7 +1107,7 @@ bool batadv_dat_snoop_incoming_arp_request(struct batadv_priv *bat_priv,
1089 } 1107 }
1090out: 1108out:
1091 if (dat_entry) 1109 if (dat_entry)
1092 batadv_dat_entry_free_ref(dat_entry); 1110 batadv_dat_entry_put(dat_entry);
1093 if (ret) 1111 if (ret)
1094 kfree_skb(skb); 1112 kfree_skb(skb);
1095 return ret; 1113 return ret;
@@ -1143,7 +1161,7 @@ void batadv_dat_snoop_outgoing_arp_reply(struct batadv_priv *bat_priv,
1143 * @skb: packet to check 1161 * @skb: packet to check
1144 * @hdr_size: size of the encapsulation header 1162 * @hdr_size: size of the encapsulation header
1145 * 1163 *
1146 * Returns true if the packet was snooped and consumed by DAT. False if the 1164 * Return: true if the packet was snooped and consumed by DAT. False if the
1147 * packet has to be delivered to the interface 1165 * packet has to be delivered to the interface
1148 */ 1166 */
1149bool batadv_dat_snoop_incoming_arp_reply(struct batadv_priv *bat_priv, 1167bool batadv_dat_snoop_incoming_arp_reply(struct batadv_priv *bat_priv,
@@ -1200,7 +1218,7 @@ out:
1200 * @bat_priv: the bat priv with all the soft interface information 1218 * @bat_priv: the bat priv with all the soft interface information
1201 * @forw_packet: the broadcast packet 1219 * @forw_packet: the broadcast packet
1202 * 1220 *
1203 * Returns true if the node can drop the packet, false otherwise. 1221 * Return: true if the node can drop the packet, false otherwise.
1204 */ 1222 */
1205bool batadv_dat_drop_broadcast_packet(struct batadv_priv *bat_priv, 1223bool batadv_dat_drop_broadcast_packet(struct batadv_priv *bat_priv,
1206 struct batadv_forw_packet *forw_packet) 1224 struct batadv_forw_packet *forw_packet)
@@ -1242,6 +1260,6 @@ bool batadv_dat_drop_broadcast_packet(struct batadv_priv *bat_priv,
1242 1260
1243out: 1261out:
1244 if (dat_entry) 1262 if (dat_entry)
1245 batadv_dat_entry_free_ref(dat_entry); 1263 batadv_dat_entry_put(dat_entry);
1246 return ret; 1264 return ret;
1247} 1265}
diff --git a/net/batman-adv/distributed-arp-table.h b/net/batman-adv/distributed-arp-table.h
index 26d4a525a798..813ecea96cf9 100644
--- a/net/batman-adv/distributed-arp-table.h
+++ b/net/batman-adv/distributed-arp-table.h
@@ -1,4 +1,4 @@
1/* Copyright (C) 2011-2015 B.A.T.M.A.N. contributors: 1/* Copyright (C) 2011-2016 B.A.T.M.A.N. contributors:
2 * 2 *
3 * Antonio Quartulli 3 * Antonio Quartulli
4 * 4 *
diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c
index 20d9282f895b..e6956d0746a2 100644
--- a/net/batman-adv/fragmentation.c
+++ b/net/batman-adv/fragmentation.c
@@ -1,4 +1,4 @@
1/* Copyright (C) 2013-2015 B.A.T.M.A.N. contributors: 1/* Copyright (C) 2013-2016 B.A.T.M.A.N. contributors:
2 * 2 *
3 * Martin Hundebøll <martin@hundeboll.net> 3 * Martin Hundebøll <martin@hundeboll.net>
4 * 4 *
@@ -85,7 +85,7 @@ void batadv_frag_purge_orig(struct batadv_orig_node *orig_node,
85/** 85/**
86 * batadv_frag_size_limit - maximum possible size of packet to be fragmented 86 * batadv_frag_size_limit - maximum possible size of packet to be fragmented
87 * 87 *
88 * Returns the maximum size of payload that can be fragmented. 88 * Return: the maximum size of payload that can be fragmented.
89 */ 89 */
90static int batadv_frag_size_limit(void) 90static int batadv_frag_size_limit(void)
91{ 91{
@@ -107,7 +107,7 @@ static int batadv_frag_size_limit(void)
107 * 107 *
108 * Caller must hold chain->lock. 108 * Caller must hold chain->lock.
109 * 109 *
110 * Returns true if chain is empty and caller can just insert the new fragment 110 * Return: true if chain is empty and caller can just insert the new fragment
111 * without searching for the right position. 111 * without searching for the right position.
112 */ 112 */
113static bool batadv_frag_init_chain(struct batadv_frag_table_entry *chain, 113static bool batadv_frag_init_chain(struct batadv_frag_table_entry *chain,
@@ -136,7 +136,7 @@ static bool batadv_frag_init_chain(struct batadv_frag_table_entry *chain,
136 * Insert a new fragment into the reverse ordered chain in the right table 136 * Insert a new fragment into the reverse ordered chain in the right table
137 * entry. The hash table entry is cleared if "old" fragments exist in it. 137 * entry. The hash table entry is cleared if "old" fragments exist in it.
138 * 138 *
139 * Returns true if skb is buffered, false on error. If the chain has all the 139 * Return: true if skb is buffered, false on error. If the chain has all the
140 * fragments needed to merge the packet, the chain is moved to the passed head 140 * fragments needed to merge the packet, the chain is moved to the passed head
141 * to avoid locking the chain in the table. 141 * to avoid locking the chain in the table.
142 */ 142 */
@@ -242,12 +242,11 @@ err:
242/** 242/**
243 * batadv_frag_merge_packets - merge a chain of fragments 243 * batadv_frag_merge_packets - merge a chain of fragments
244 * @chain: head of chain with fragments 244 * @chain: head of chain with fragments
245 * @skb: packet with total size of skb after merging
246 * 245 *
247 * Expand the first skb in the chain and copy the content of the remaining 246 * Expand the first skb in the chain and copy the content of the remaining
248 * skb's into the expanded one. After doing so, clear the chain. 247 * skb's into the expanded one. After doing so, clear the chain.
249 * 248 *
250 * Returns the merged skb or NULL on error. 249 * Return: the merged skb or NULL on error.
251 */ 250 */
252static struct sk_buff * 251static struct sk_buff *
253batadv_frag_merge_packets(struct hlist_head *chain) 252batadv_frag_merge_packets(struct hlist_head *chain)
@@ -307,6 +306,9 @@ free:
307 * There are three possible outcomes: 1) Packet is merged: Return true and 306 * There are three possible outcomes: 1) Packet is merged: Return true and
308 * set *skb to merged packet; 2) Packet is buffered: Return true and set *skb 307 * set *skb to merged packet; 2) Packet is buffered: Return true and set *skb
309 * to NULL; 3) Error: Return false and leave skb as is. 308 * to NULL; 3) Error: Return false and leave skb as is.
309 *
310 * Return: true when packet is merged or buffered, false when skb is not not
311 * used.
310 */ 312 */
311bool batadv_frag_skb_buffer(struct sk_buff **skb, 313bool batadv_frag_skb_buffer(struct sk_buff **skb,
312 struct batadv_orig_node *orig_node_src) 314 struct batadv_orig_node *orig_node_src)
@@ -344,7 +346,7 @@ out_err:
344 * will exceed the MTU towards the next-hop. If so, the fragment is forwarded 346 * will exceed the MTU towards the next-hop. If so, the fragment is forwarded
345 * without merging it. 347 * without merging it.
346 * 348 *
347 * Returns true if the fragment is consumed/forwarded, false otherwise. 349 * Return: true if the fragment is consumed/forwarded, false otherwise.
348 */ 350 */
349bool batadv_frag_skb_fwd(struct sk_buff *skb, 351bool batadv_frag_skb_fwd(struct sk_buff *skb,
350 struct batadv_hard_iface *recv_if, 352 struct batadv_hard_iface *recv_if,
@@ -376,16 +378,15 @@ bool batadv_frag_skb_fwd(struct sk_buff *skb,
376 skb->len + ETH_HLEN); 378 skb->len + ETH_HLEN);
377 379
378 packet->ttl--; 380 packet->ttl--;
379 batadv_send_skb_packet(skb, neigh_node->if_incoming, 381 batadv_send_unicast_skb(skb, neigh_node);
380 neigh_node->addr);
381 ret = true; 382 ret = true;
382 } 383 }
383 384
384out: 385out:
385 if (orig_node_dst) 386 if (orig_node_dst)
386 batadv_orig_node_free_ref(orig_node_dst); 387 batadv_orig_node_put(orig_node_dst);
387 if (neigh_node) 388 if (neigh_node)
388 batadv_neigh_node_free_ref(neigh_node); 389 batadv_neigh_node_put(neigh_node);
389 return ret; 390 return ret;
390} 391}
391 392
@@ -399,7 +400,7 @@ out:
399 * passed mtu and the old one with the rest. The new skb contains data from the 400 * passed mtu and the old one with the rest. The new skb contains data from the
400 * tail of the old skb. 401 * tail of the old skb.
401 * 402 *
402 * Returns the new fragment, NULL on error. 403 * Return: the new fragment, NULL on error.
403 */ 404 */
404static struct sk_buff *batadv_frag_create(struct sk_buff *skb, 405static struct sk_buff *batadv_frag_create(struct sk_buff *skb,
405 struct batadv_frag_packet *frag_head, 406 struct batadv_frag_packet *frag_head,
@@ -433,7 +434,7 @@ err:
433 * @orig_node: final destination of the created fragments 434 * @orig_node: final destination of the created fragments
434 * @neigh_node: next-hop of the created fragments 435 * @neigh_node: next-hop of the created fragments
435 * 436 *
436 * Returns true on success, false otherwise. 437 * Return: true on success, false otherwise.
437 */ 438 */
438bool batadv_frag_send_packet(struct sk_buff *skb, 439bool batadv_frag_send_packet(struct sk_buff *skb,
439 struct batadv_orig_node *orig_node, 440 struct batadv_orig_node *orig_node,
@@ -484,8 +485,7 @@ bool batadv_frag_send_packet(struct sk_buff *skb,
484 batadv_inc_counter(bat_priv, BATADV_CNT_FRAG_TX); 485 batadv_inc_counter(bat_priv, BATADV_CNT_FRAG_TX);
485 batadv_add_counter(bat_priv, BATADV_CNT_FRAG_TX_BYTES, 486 batadv_add_counter(bat_priv, BATADV_CNT_FRAG_TX_BYTES,
486 skb_fragment->len + ETH_HLEN); 487 skb_fragment->len + ETH_HLEN);
487 batadv_send_skb_packet(skb_fragment, neigh_node->if_incoming, 488 batadv_send_unicast_skb(skb_fragment, neigh_node);
488 neigh_node->addr);
489 frag_header.no++; 489 frag_header.no++;
490 490
491 /* The initial check in this function should cover this case */ 491 /* The initial check in this function should cover this case */
@@ -504,13 +504,13 @@ bool batadv_frag_send_packet(struct sk_buff *skb,
504 batadv_inc_counter(bat_priv, BATADV_CNT_FRAG_TX); 504 batadv_inc_counter(bat_priv, BATADV_CNT_FRAG_TX);
505 batadv_add_counter(bat_priv, BATADV_CNT_FRAG_TX_BYTES, 505 batadv_add_counter(bat_priv, BATADV_CNT_FRAG_TX_BYTES,
506 skb->len + ETH_HLEN); 506 skb->len + ETH_HLEN);
507 batadv_send_skb_packet(skb, neigh_node->if_incoming, neigh_node->addr); 507 batadv_send_unicast_skb(skb, neigh_node);
508 508
509 ret = true; 509 ret = true;
510 510
511out_err: 511out_err:
512 if (primary_if) 512 if (primary_if)
513 batadv_hardif_free_ref(primary_if); 513 batadv_hardif_put(primary_if);
514 514
515 return ret; 515 return ret;
516} 516}
diff --git a/net/batman-adv/fragmentation.h b/net/batman-adv/fragmentation.h
index 8b9877e70b95..9ff77c7ef7c7 100644
--- a/net/batman-adv/fragmentation.h
+++ b/net/batman-adv/fragmentation.h
@@ -1,4 +1,4 @@
1/* Copyright (C) 2013-2015 B.A.T.M.A.N. contributors: 1/* Copyright (C) 2013-2016 B.A.T.M.A.N. contributors:
2 * 2 *
3 * Martin Hundebøll <martin@hundeboll.net> 3 * Martin Hundebøll <martin@hundeboll.net>
4 * 4 *
@@ -42,7 +42,7 @@ bool batadv_frag_send_packet(struct sk_buff *skb,
42 * batadv_frag_check_entry - check if a list of fragments has timed out 42 * batadv_frag_check_entry - check if a list of fragments has timed out
43 * @frags_entry: table entry to check 43 * @frags_entry: table entry to check
44 * 44 *
45 * Returns true if the frags entry has timed out, false otherwise. 45 * Return: true if the frags entry has timed out, false otherwise.
46 */ 46 */
47static inline bool 47static inline bool
48batadv_frag_check_entry(struct batadv_frag_table_entry *frags_entry) 48batadv_frag_check_entry(struct batadv_frag_table_entry *frags_entry)
diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c
index e6c8382c79ba..c59aff5ccac8 100644
--- a/net/batman-adv/gateway_client.c
+++ b/net/batman-adv/gateway_client.c
@@ -1,4 +1,4 @@
1/* Copyright (C) 2009-2015 B.A.T.M.A.N. contributors: 1/* Copyright (C) 2009-2016 B.A.T.M.A.N. contributors:
2 * 2 *
3 * Marek Lindner 3 * Marek Lindner
4 * 4 *
@@ -28,6 +28,7 @@
28#include <linux/ip.h> 28#include <linux/ip.h>
29#include <linux/ipv6.h> 29#include <linux/ipv6.h>
30#include <linux/kernel.h> 30#include <linux/kernel.h>
31#include <linux/kref.h>
31#include <linux/list.h> 32#include <linux/list.h>
32#include <linux/netdevice.h> 33#include <linux/netdevice.h>
33#include <linux/rculist.h> 34#include <linux/rculist.h>
@@ -59,12 +60,28 @@
59 */ 60 */
60#define BATADV_DHCP_CHADDR_OFFSET 28 61#define BATADV_DHCP_CHADDR_OFFSET 28
61 62
62static void batadv_gw_node_free_ref(struct batadv_gw_node *gw_node) 63/**
64 * batadv_gw_node_release - release gw_node from lists and queue for free after
65 * rcu grace period
66 * @ref: kref pointer of the gw_node
67 */
68static void batadv_gw_node_release(struct kref *ref)
63{ 69{
64 if (atomic_dec_and_test(&gw_node->refcount)) { 70 struct batadv_gw_node *gw_node;
65 batadv_orig_node_free_ref(gw_node->orig_node); 71
66 kfree_rcu(gw_node, rcu); 72 gw_node = container_of(ref, struct batadv_gw_node, refcount);
67 } 73
74 batadv_orig_node_put(gw_node->orig_node);
75 kfree_rcu(gw_node, rcu);
76}
77
78/**
79 * batadv_gw_node_put - decrement the gw_node refcounter and possibly release it
80 * @gw_node: gateway node to free
81 */
82static void batadv_gw_node_put(struct batadv_gw_node *gw_node)
83{
84 kref_put(&gw_node->refcount, batadv_gw_node_release);
68} 85}
69 86
70static struct batadv_gw_node * 87static struct batadv_gw_node *
@@ -77,7 +94,7 @@ batadv_gw_get_selected_gw_node(struct batadv_priv *bat_priv)
77 if (!gw_node) 94 if (!gw_node)
78 goto out; 95 goto out;
79 96
80 if (!atomic_inc_not_zero(&gw_node->refcount)) 97 if (!kref_get_unless_zero(&gw_node->refcount))
81 gw_node = NULL; 98 gw_node = NULL;
82 99
83out: 100out:
@@ -100,14 +117,14 @@ batadv_gw_get_selected_orig(struct batadv_priv *bat_priv)
100 if (!orig_node) 117 if (!orig_node)
101 goto unlock; 118 goto unlock;
102 119
103 if (!atomic_inc_not_zero(&orig_node->refcount)) 120 if (!kref_get_unless_zero(&orig_node->refcount))
104 orig_node = NULL; 121 orig_node = NULL;
105 122
106unlock: 123unlock:
107 rcu_read_unlock(); 124 rcu_read_unlock();
108out: 125out:
109 if (gw_node) 126 if (gw_node)
110 batadv_gw_node_free_ref(gw_node); 127 batadv_gw_node_put(gw_node);
111 return orig_node; 128 return orig_node;
112} 129}
113 130
@@ -118,14 +135,14 @@ static void batadv_gw_select(struct batadv_priv *bat_priv,
118 135
119 spin_lock_bh(&bat_priv->gw.list_lock); 136 spin_lock_bh(&bat_priv->gw.list_lock);
120 137
121 if (new_gw_node && !atomic_inc_not_zero(&new_gw_node->refcount)) 138 if (new_gw_node && !kref_get_unless_zero(&new_gw_node->refcount))
122 new_gw_node = NULL; 139 new_gw_node = NULL;
123 140
124 curr_gw_node = rcu_dereference_protected(bat_priv->gw.curr_gw, 1); 141 curr_gw_node = rcu_dereference_protected(bat_priv->gw.curr_gw, 1);
125 rcu_assign_pointer(bat_priv->gw.curr_gw, new_gw_node); 142 rcu_assign_pointer(bat_priv->gw.curr_gw, new_gw_node);
126 143
127 if (curr_gw_node) 144 if (curr_gw_node)
128 batadv_gw_node_free_ref(curr_gw_node); 145 batadv_gw_node_put(curr_gw_node);
129 146
130 spin_unlock_bh(&bat_priv->gw.list_lock); 147 spin_unlock_bh(&bat_priv->gw.list_lock);
131} 148}
@@ -170,7 +187,7 @@ batadv_gw_get_best_gw_node(struct batadv_priv *bat_priv)
170 if (!router_ifinfo) 187 if (!router_ifinfo)
171 goto next; 188 goto next;
172 189
173 if (!atomic_inc_not_zero(&gw_node->refcount)) 190 if (!kref_get_unless_zero(&gw_node->refcount))
174 goto next; 191 goto next;
175 192
176 tq_avg = router_ifinfo->bat_iv.tq_avg; 193 tq_avg = router_ifinfo->bat_iv.tq_avg;
@@ -186,9 +203,9 @@ batadv_gw_get_best_gw_node(struct batadv_priv *bat_priv)
186 ((tmp_gw_factor == max_gw_factor) && 203 ((tmp_gw_factor == max_gw_factor) &&
187 (tq_avg > max_tq))) { 204 (tq_avg > max_tq))) {
188 if (curr_gw) 205 if (curr_gw)
189 batadv_gw_node_free_ref(curr_gw); 206 batadv_gw_node_put(curr_gw);
190 curr_gw = gw_node; 207 curr_gw = gw_node;
191 atomic_inc(&curr_gw->refcount); 208 kref_get(&curr_gw->refcount);
192 } 209 }
193 break; 210 break;
194 211
@@ -201,9 +218,9 @@ batadv_gw_get_best_gw_node(struct batadv_priv *bat_priv)
201 */ 218 */
202 if (tq_avg > max_tq) { 219 if (tq_avg > max_tq) {
203 if (curr_gw) 220 if (curr_gw)
204 batadv_gw_node_free_ref(curr_gw); 221 batadv_gw_node_put(curr_gw);
205 curr_gw = gw_node; 222 curr_gw = gw_node;
206 atomic_inc(&curr_gw->refcount); 223 kref_get(&curr_gw->refcount);
207 } 224 }
208 break; 225 break;
209 } 226 }
@@ -214,12 +231,12 @@ batadv_gw_get_best_gw_node(struct batadv_priv *bat_priv)
214 if (tmp_gw_factor > max_gw_factor) 231 if (tmp_gw_factor > max_gw_factor)
215 max_gw_factor = tmp_gw_factor; 232 max_gw_factor = tmp_gw_factor;
216 233
217 batadv_gw_node_free_ref(gw_node); 234 batadv_gw_node_put(gw_node);
218 235
219next: 236next:
220 batadv_neigh_node_free_ref(router); 237 batadv_neigh_node_put(router);
221 if (router_ifinfo) 238 if (router_ifinfo)
222 batadv_neigh_ifinfo_free_ref(router_ifinfo); 239 batadv_neigh_ifinfo_put(router_ifinfo);
223 } 240 }
224 rcu_read_unlock(); 241 rcu_read_unlock();
225 242
@@ -255,7 +272,7 @@ void batadv_gw_check_client_stop(struct batadv_priv *bat_priv)
255 */ 272 */
256 batadv_throw_uevent(bat_priv, BATADV_UEV_GW, BATADV_UEV_DEL, NULL); 273 batadv_throw_uevent(bat_priv, BATADV_UEV_GW, BATADV_UEV_DEL, NULL);
257 274
258 batadv_gw_node_free_ref(curr_gw); 275 batadv_gw_node_put(curr_gw);
259} 276}
260 277
261void batadv_gw_election(struct batadv_priv *bat_priv) 278void batadv_gw_election(struct batadv_priv *bat_priv)
@@ -330,13 +347,13 @@ void batadv_gw_election(struct batadv_priv *bat_priv)
330 347
331out: 348out:
332 if (curr_gw) 349 if (curr_gw)
333 batadv_gw_node_free_ref(curr_gw); 350 batadv_gw_node_put(curr_gw);
334 if (next_gw) 351 if (next_gw)
335 batadv_gw_node_free_ref(next_gw); 352 batadv_gw_node_put(next_gw);
336 if (router) 353 if (router)
337 batadv_neigh_node_free_ref(router); 354 batadv_neigh_node_put(router);
338 if (router_ifinfo) 355 if (router_ifinfo)
339 batadv_neigh_ifinfo_free_ref(router_ifinfo); 356 batadv_neigh_ifinfo_put(router_ifinfo);
340} 357}
341 358
342void batadv_gw_check_election(struct batadv_priv *bat_priv, 359void batadv_gw_check_election(struct batadv_priv *bat_priv,
@@ -397,15 +414,15 @@ reselect:
397 batadv_gw_reselect(bat_priv); 414 batadv_gw_reselect(bat_priv);
398out: 415out:
399 if (curr_gw_orig) 416 if (curr_gw_orig)
400 batadv_orig_node_free_ref(curr_gw_orig); 417 batadv_orig_node_put(curr_gw_orig);
401 if (router_gw) 418 if (router_gw)
402 batadv_neigh_node_free_ref(router_gw); 419 batadv_neigh_node_put(router_gw);
403 if (router_orig) 420 if (router_orig)
404 batadv_neigh_node_free_ref(router_orig); 421 batadv_neigh_node_put(router_orig);
405 if (router_gw_tq) 422 if (router_gw_tq)
406 batadv_neigh_ifinfo_free_ref(router_gw_tq); 423 batadv_neigh_ifinfo_put(router_gw_tq);
407 if (router_orig_tq) 424 if (router_orig_tq)
408 batadv_neigh_ifinfo_free_ref(router_orig_tq); 425 batadv_neigh_ifinfo_put(router_orig_tq);
409} 426}
410 427
411/** 428/**
@@ -423,12 +440,12 @@ static void batadv_gw_node_add(struct batadv_priv *bat_priv,
423 if (gateway->bandwidth_down == 0) 440 if (gateway->bandwidth_down == 0)
424 return; 441 return;
425 442
426 if (!atomic_inc_not_zero(&orig_node->refcount)) 443 if (!kref_get_unless_zero(&orig_node->refcount))
427 return; 444 return;
428 445
429 gw_node = kzalloc(sizeof(*gw_node), GFP_ATOMIC); 446 gw_node = kzalloc(sizeof(*gw_node), GFP_ATOMIC);
430 if (!gw_node) { 447 if (!gw_node) {
431 batadv_orig_node_free_ref(orig_node); 448 batadv_orig_node_put(orig_node);
432 return; 449 return;
433 } 450 }
434 451
@@ -436,7 +453,7 @@ static void batadv_gw_node_add(struct batadv_priv *bat_priv,
436 gw_node->orig_node = orig_node; 453 gw_node->orig_node = orig_node;
437 gw_node->bandwidth_down = ntohl(gateway->bandwidth_down); 454 gw_node->bandwidth_down = ntohl(gateway->bandwidth_down);
438 gw_node->bandwidth_up = ntohl(gateway->bandwidth_up); 455 gw_node->bandwidth_up = ntohl(gateway->bandwidth_up);
439 atomic_set(&gw_node->refcount, 1); 456 kref_init(&gw_node->refcount);
440 457
441 spin_lock_bh(&bat_priv->gw.list_lock); 458 spin_lock_bh(&bat_priv->gw.list_lock);
442 hlist_add_head_rcu(&gw_node->list, &bat_priv->gw.list); 459 hlist_add_head_rcu(&gw_node->list, &bat_priv->gw.list);
@@ -456,7 +473,7 @@ static void batadv_gw_node_add(struct batadv_priv *bat_priv,
456 * @bat_priv: the bat priv with all the soft interface information 473 * @bat_priv: the bat priv with all the soft interface information
457 * @orig_node: originator announcing gateway capabilities 474 * @orig_node: originator announcing gateway capabilities
458 * 475 *
459 * Returns gateway node if found or NULL otherwise. 476 * Return: gateway node if found or NULL otherwise.
460 */ 477 */
461static struct batadv_gw_node * 478static struct batadv_gw_node *
462batadv_gw_node_get(struct batadv_priv *bat_priv, 479batadv_gw_node_get(struct batadv_priv *bat_priv,
@@ -469,7 +486,7 @@ batadv_gw_node_get(struct batadv_priv *bat_priv,
469 if (gw_node_tmp->orig_node != orig_node) 486 if (gw_node_tmp->orig_node != orig_node)
470 continue; 487 continue;
471 488
472 if (!atomic_inc_not_zero(&gw_node_tmp->refcount)) 489 if (!kref_get_unless_zero(&gw_node_tmp->refcount))
473 continue; 490 continue;
474 491
475 gw_node = gw_node_tmp; 492 gw_node = gw_node_tmp;
@@ -527,22 +544,23 @@ void batadv_gw_node_update(struct batadv_priv *bat_priv,
527 * gets dereferenced. 544 * gets dereferenced.
528 */ 545 */
529 spin_lock_bh(&bat_priv->gw.list_lock); 546 spin_lock_bh(&bat_priv->gw.list_lock);
530 hlist_del_init_rcu(&gw_node->list); 547 if (!hlist_unhashed(&gw_node->list)) {
548 hlist_del_init_rcu(&gw_node->list);
549 batadv_gw_node_put(gw_node);
550 }
531 spin_unlock_bh(&bat_priv->gw.list_lock); 551 spin_unlock_bh(&bat_priv->gw.list_lock);
532 552
533 batadv_gw_node_free_ref(gw_node);
534
535 curr_gw = batadv_gw_get_selected_gw_node(bat_priv); 553 curr_gw = batadv_gw_get_selected_gw_node(bat_priv);
536 if (gw_node == curr_gw) 554 if (gw_node == curr_gw)
537 batadv_gw_reselect(bat_priv); 555 batadv_gw_reselect(bat_priv);
538 556
539 if (curr_gw) 557 if (curr_gw)
540 batadv_gw_node_free_ref(curr_gw); 558 batadv_gw_node_put(curr_gw);
541 } 559 }
542 560
543out: 561out:
544 if (gw_node) 562 if (gw_node)
545 batadv_gw_node_free_ref(gw_node); 563 batadv_gw_node_put(gw_node);
546} 564}
547 565
548void batadv_gw_node_delete(struct batadv_priv *bat_priv, 566void batadv_gw_node_delete(struct batadv_priv *bat_priv,
@@ -565,7 +583,7 @@ void batadv_gw_node_free(struct batadv_priv *bat_priv)
565 hlist_for_each_entry_safe(gw_node, node_tmp, 583 hlist_for_each_entry_safe(gw_node, node_tmp,
566 &bat_priv->gw.list, list) { 584 &bat_priv->gw.list, list) {
567 hlist_del_init_rcu(&gw_node->list); 585 hlist_del_init_rcu(&gw_node->list);
568 batadv_gw_node_free_ref(gw_node); 586 batadv_gw_node_put(gw_node);
569 } 587 }
570 spin_unlock_bh(&bat_priv->gw.list_lock); 588 spin_unlock_bh(&bat_priv->gw.list_lock);
571} 589}
@@ -602,12 +620,12 @@ static int batadv_write_buffer_text(struct batadv_priv *bat_priv,
602 ret = seq_has_overflowed(seq) ? -1 : 0; 620 ret = seq_has_overflowed(seq) ? -1 : 0;
603 621
604 if (curr_gw) 622 if (curr_gw)
605 batadv_gw_node_free_ref(curr_gw); 623 batadv_gw_node_put(curr_gw);
606out: 624out:
607 if (router_ifinfo) 625 if (router_ifinfo)
608 batadv_neigh_ifinfo_free_ref(router_ifinfo); 626 batadv_neigh_ifinfo_put(router_ifinfo);
609 if (router) 627 if (router)
610 batadv_neigh_node_free_ref(router); 628 batadv_neigh_node_put(router);
611 return ret; 629 return ret;
612} 630}
613 631
@@ -644,7 +662,7 @@ int batadv_gw_client_seq_print_text(struct seq_file *seq, void *offset)
644 662
645out: 663out:
646 if (primary_if) 664 if (primary_if)
647 batadv_hardif_free_ref(primary_if); 665 batadv_hardif_put(primary_if);
648 return 0; 666 return 0;
649} 667}
650 668
@@ -655,13 +673,13 @@ out:
655 * @chaddr: buffer where the client address will be stored. Valid 673 * @chaddr: buffer where the client address will be stored. Valid
656 * only if the function returns BATADV_DHCP_TO_CLIENT 674 * only if the function returns BATADV_DHCP_TO_CLIENT
657 * 675 *
658 * Returns: 676 * This function may re-allocate the data buffer of the skb passed as argument.
677 *
678 * Return:
659 * - BATADV_DHCP_NO if the packet is not a dhcp message or if there was an error 679 * - BATADV_DHCP_NO if the packet is not a dhcp message or if there was an error
660 * while parsing it 680 * while parsing it
661 * - BATADV_DHCP_TO_SERVER if this is a message going to the DHCP server 681 * - BATADV_DHCP_TO_SERVER if this is a message going to the DHCP server
662 * - BATADV_DHCP_TO_CLIENT if this is a message going to a DHCP client 682 * - BATADV_DHCP_TO_CLIENT if this is a message going to a DHCP client
663 *
664 * This function may re-allocate the data buffer of the skb passed as argument.
665 */ 683 */
666enum batadv_dhcp_recipient 684enum batadv_dhcp_recipient
667batadv_gw_dhcp_recipient_get(struct sk_buff *skb, unsigned int *header_len, 685batadv_gw_dhcp_recipient_get(struct sk_buff *skb, unsigned int *header_len,
@@ -776,11 +794,11 @@ batadv_gw_dhcp_recipient_get(struct sk_buff *skb, unsigned int *header_len,
776 * server. Due to topology changes it may be the case that the GW server 794 * server. Due to topology changes it may be the case that the GW server
777 * previously selected is not the best one anymore. 795 * previously selected is not the best one anymore.
778 * 796 *
779 * Returns true if the packet destination is unicast and it is not the best gw,
780 * false otherwise.
781 *
782 * This call might reallocate skb data. 797 * This call might reallocate skb data.
783 * Must be invoked only when the DHCP packet is going TO a DHCP SERVER. 798 * Must be invoked only when the DHCP packet is going TO a DHCP SERVER.
799 *
800 * Return: true if the packet destination is unicast and it is not the best gw,
801 * false otherwise.
784 */ 802 */
785bool batadv_gw_out_of_range(struct batadv_priv *bat_priv, 803bool batadv_gw_out_of_range(struct batadv_priv *bat_priv,
786 struct sk_buff *skb) 804 struct sk_buff *skb)
@@ -838,7 +856,7 @@ bool batadv_gw_out_of_range(struct batadv_priv *bat_priv,
838 goto out; 856 goto out;
839 857
840 curr_tq_avg = curr_ifinfo->bat_iv.tq_avg; 858 curr_tq_avg = curr_ifinfo->bat_iv.tq_avg;
841 batadv_neigh_ifinfo_free_ref(curr_ifinfo); 859 batadv_neigh_ifinfo_put(curr_ifinfo);
842 860
843 break; 861 break;
844 case BATADV_GW_MODE_OFF: 862 case BATADV_GW_MODE_OFF:
@@ -856,18 +874,18 @@ bool batadv_gw_out_of_range(struct batadv_priv *bat_priv,
856 874
857 if ((curr_tq_avg - old_ifinfo->bat_iv.tq_avg) > BATADV_GW_THRESHOLD) 875 if ((curr_tq_avg - old_ifinfo->bat_iv.tq_avg) > BATADV_GW_THRESHOLD)
858 out_of_range = true; 876 out_of_range = true;
859 batadv_neigh_ifinfo_free_ref(old_ifinfo); 877 batadv_neigh_ifinfo_put(old_ifinfo);
860 878
861out: 879out:
862 if (orig_dst_node) 880 if (orig_dst_node)
863 batadv_orig_node_free_ref(orig_dst_node); 881 batadv_orig_node_put(orig_dst_node);
864 if (curr_gw) 882 if (curr_gw)
865 batadv_gw_node_free_ref(curr_gw); 883 batadv_gw_node_put(curr_gw);
866 if (gw_node) 884 if (gw_node)
867 batadv_gw_node_free_ref(gw_node); 885 batadv_gw_node_put(gw_node);
868 if (neigh_old) 886 if (neigh_old)
869 batadv_neigh_node_free_ref(neigh_old); 887 batadv_neigh_node_put(neigh_old);
870 if (neigh_curr) 888 if (neigh_curr)
871 batadv_neigh_node_free_ref(neigh_curr); 889 batadv_neigh_node_put(neigh_curr);
872 return out_of_range; 890 return out_of_range;
873} 891}
diff --git a/net/batman-adv/gateway_client.h b/net/batman-adv/gateway_client.h
index fa9527785ed3..582dd8c413c8 100644
--- a/net/batman-adv/gateway_client.h
+++ b/net/batman-adv/gateway_client.h
@@ -1,4 +1,4 @@
1/* Copyright (C) 2009-2015 B.A.T.M.A.N. contributors: 1/* Copyright (C) 2009-2016 B.A.T.M.A.N. contributors:
2 * 2 *
3 * Marek Lindner 3 * Marek Lindner
4 * 4 *
diff --git a/net/batman-adv/gateway_common.c b/net/batman-adv/gateway_common.c
index b51bface8bdd..4423047889e1 100644
--- a/net/batman-adv/gateway_common.c
+++ b/net/batman-adv/gateway_common.c
@@ -1,4 +1,4 @@
1/* Copyright (C) 2009-2015 B.A.T.M.A.N. contributors: 1/* Copyright (C) 2009-2016 B.A.T.M.A.N. contributors:
2 * 2 *
3 * Marek Lindner 3 * Marek Lindner
4 * 4 *
@@ -38,10 +38,10 @@
38 * @description: text shown when throughput string cannot be parsed 38 * @description: text shown when throughput string cannot be parsed
39 * @throughput: pointer holding the returned throughput information 39 * @throughput: pointer holding the returned throughput information
40 * 40 *
41 * Returns false on parse error and true otherwise. 41 * Return: false on parse error and true otherwise.
42 */ 42 */
43static bool batadv_parse_throughput(struct net_device *net_dev, char *buff, 43bool batadv_parse_throughput(struct net_device *net_dev, char *buff,
44 const char *description, u32 *throughput) 44 const char *description, u32 *throughput)
45{ 45{
46 enum batadv_bandwidth_units bw_unit_type = BATADV_BW_UNIT_KBIT; 46 enum batadv_bandwidth_units bw_unit_type = BATADV_BW_UNIT_KBIT;
47 u64 lthroughput; 47 u64 lthroughput;
diff --git a/net/batman-adv/gateway_common.h b/net/batman-adv/gateway_common.h
index ab893e318229..8a5e1ddf1175 100644
--- a/net/batman-adv/gateway_common.h
+++ b/net/batman-adv/gateway_common.h
@@ -1,4 +1,4 @@
1/* Copyright (C) 2009-2015 B.A.T.M.A.N. contributors: 1/* Copyright (C) 2009-2016 B.A.T.M.A.N. contributors:
2 * 2 *
3 * Marek Lindner 3 * Marek Lindner
4 * 4 *
@@ -49,5 +49,7 @@ ssize_t batadv_gw_bandwidth_set(struct net_device *net_dev, char *buff,
49void batadv_gw_tvlv_container_update(struct batadv_priv *bat_priv); 49void batadv_gw_tvlv_container_update(struct batadv_priv *bat_priv);
50void batadv_gw_init(struct batadv_priv *bat_priv); 50void batadv_gw_init(struct batadv_priv *bat_priv);
51void batadv_gw_free(struct batadv_priv *bat_priv); 51void batadv_gw_free(struct batadv_priv *bat_priv);
52bool batadv_parse_throughput(struct net_device *net_dev, char *buff,
53 const char *description, u32 *throughput);
52 54
53#endif /* _NET_BATMAN_ADV_GATEWAY_COMMON_H_ */ 55#endif /* _NET_BATMAN_ADV_GATEWAY_COMMON_H_ */
diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c
index 01acccc4d218..b22b2775a0a5 100644
--- a/net/batman-adv/hard-interface.c
+++ b/net/batman-adv/hard-interface.c
@@ -1,4 +1,4 @@
1/* Copyright (C) 2007-2015 B.A.T.M.A.N. contributors: 1/* Copyright (C) 2007-2016 B.A.T.M.A.N. contributors:
2 * 2 *
3 * Marek Lindner, Simon Wunderlich 3 * Marek Lindner, Simon Wunderlich
4 * 4 *
@@ -18,6 +18,7 @@
18#include "hard-interface.h" 18#include "hard-interface.h"
19#include "main.h" 19#include "main.h"
20 20
21#include <linux/atomic.h>
21#include <linux/bug.h> 22#include <linux/bug.h>
22#include <linux/byteorder/generic.h> 23#include <linux/byteorder/generic.h>
23#include <linux/errno.h> 24#include <linux/errno.h>
@@ -26,6 +27,7 @@
26#include <linux/if_ether.h> 27#include <linux/if_ether.h>
27#include <linux/if.h> 28#include <linux/if.h>
28#include <linux/kernel.h> 29#include <linux/kernel.h>
30#include <linux/kref.h>
29#include <linux/list.h> 31#include <linux/list.h>
30#include <linux/netdevice.h> 32#include <linux/netdevice.h>
31#include <linux/printk.h> 33#include <linux/printk.h>
@@ -47,13 +49,19 @@
47#include "sysfs.h" 49#include "sysfs.h"
48#include "translation-table.h" 50#include "translation-table.h"
49 51
50void batadv_hardif_free_rcu(struct rcu_head *rcu) 52/**
53 * batadv_hardif_release - release hard interface from lists and queue for
54 * free after rcu grace period
55 * @ref: kref pointer of the hard interface
56 */
57void batadv_hardif_release(struct kref *ref)
51{ 58{
52 struct batadv_hard_iface *hard_iface; 59 struct batadv_hard_iface *hard_iface;
53 60
54 hard_iface = container_of(rcu, struct batadv_hard_iface, rcu); 61 hard_iface = container_of(ref, struct batadv_hard_iface, refcount);
55 dev_put(hard_iface->net_dev); 62 dev_put(hard_iface->net_dev);
56 kfree(hard_iface); 63
64 kfree_rcu(hard_iface, rcu);
57} 65}
58 66
59struct batadv_hard_iface * 67struct batadv_hard_iface *
@@ -64,7 +72,7 @@ batadv_hardif_get_by_netdev(const struct net_device *net_dev)
64 rcu_read_lock(); 72 rcu_read_lock();
65 list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) { 73 list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) {
66 if (hard_iface->net_dev == net_dev && 74 if (hard_iface->net_dev == net_dev &&
67 atomic_inc_not_zero(&hard_iface->refcount)) 75 kref_get_unless_zero(&hard_iface->refcount))
68 goto out; 76 goto out;
69 } 77 }
70 78
@@ -76,6 +84,28 @@ out:
76} 84}
77 85
78/** 86/**
87 * batadv_mutual_parents - check if two devices are each others parent
88 * @dev1: 1st net_device
89 * @dev2: 2nd net_device
90 *
91 * veth devices come in pairs and each is the parent of the other!
92 *
93 * Return: true if the devices are each others parent, otherwise false
94 */
95static bool batadv_mutual_parents(const struct net_device *dev1,
96 const struct net_device *dev2)
97{
98 int dev1_parent_iflink = dev_get_iflink(dev1);
99 int dev2_parent_iflink = dev_get_iflink(dev2);
100
101 if (!dev1_parent_iflink || !dev2_parent_iflink)
102 return false;
103
104 return (dev1_parent_iflink == dev2->ifindex) &&
105 (dev2_parent_iflink == dev1->ifindex);
106}
107
108/**
79 * batadv_is_on_batman_iface - check if a device is a batman iface descendant 109 * batadv_is_on_batman_iface - check if a device is a batman iface descendant
80 * @net_dev: the device to check 110 * @net_dev: the device to check
81 * 111 *
@@ -85,7 +115,7 @@ out:
85 * This function recursively checks all the fathers of the device passed as 115 * This function recursively checks all the fathers of the device passed as
86 * argument looking for a batman-adv soft interface. 116 * argument looking for a batman-adv soft interface.
87 * 117 *
88 * Returns true if the device is descendant of a batman-adv mesh interface (or 118 * Return: true if the device is descendant of a batman-adv mesh interface (or
89 * if it is a batman-adv interface itself), false otherwise 119 * if it is a batman-adv interface itself), false otherwise
90 */ 120 */
91static bool batadv_is_on_batman_iface(const struct net_device *net_dev) 121static bool batadv_is_on_batman_iface(const struct net_device *net_dev)
@@ -108,6 +138,9 @@ static bool batadv_is_on_batman_iface(const struct net_device *net_dev)
108 if (WARN(!parent_dev, "Cannot find parent device")) 138 if (WARN(!parent_dev, "Cannot find parent device"))
109 return false; 139 return false;
110 140
141 if (batadv_mutual_parents(net_dev, parent_dev))
142 return false;
143
111 ret = batadv_is_on_batman_iface(parent_dev); 144 ret = batadv_is_on_batman_iface(parent_dev);
112 145
113 return ret; 146 return ret;
@@ -136,7 +169,7 @@ static int batadv_is_valid_iface(const struct net_device *net_dev)
136 * interface 169 * interface
137 * @net_device: the device to check 170 * @net_device: the device to check
138 * 171 *
139 * Returns true if the net device is a 802.11 wireless device, false otherwise. 172 * Return: true if the net device is a 802.11 wireless device, false otherwise.
140 */ 173 */
141bool batadv_is_wifi_netdev(struct net_device *net_device) 174bool batadv_is_wifi_netdev(struct net_device *net_device)
142{ 175{
@@ -169,7 +202,7 @@ batadv_hardif_get_active(const struct net_device *soft_iface)
169 continue; 202 continue;
170 203
171 if (hard_iface->if_status == BATADV_IF_ACTIVE && 204 if (hard_iface->if_status == BATADV_IF_ACTIVE &&
172 atomic_inc_not_zero(&hard_iface->refcount)) 205 kref_get_unless_zero(&hard_iface->refcount))
173 goto out; 206 goto out;
174 } 207 }
175 208
@@ -193,7 +226,7 @@ static void batadv_primary_if_update_addr(struct batadv_priv *bat_priv,
193 batadv_bla_update_orig_address(bat_priv, primary_if, oldif); 226 batadv_bla_update_orig_address(bat_priv, primary_if, oldif);
194out: 227out:
195 if (primary_if) 228 if (primary_if)
196 batadv_hardif_free_ref(primary_if); 229 batadv_hardif_put(primary_if);
197} 230}
198 231
199static void batadv_primary_if_select(struct batadv_priv *bat_priv, 232static void batadv_primary_if_select(struct batadv_priv *bat_priv,
@@ -203,7 +236,7 @@ static void batadv_primary_if_select(struct batadv_priv *bat_priv,
203 236
204 ASSERT_RTNL(); 237 ASSERT_RTNL();
205 238
206 if (new_hard_iface && !atomic_inc_not_zero(&new_hard_iface->refcount)) 239 if (new_hard_iface && !kref_get_unless_zero(&new_hard_iface->refcount))
207 new_hard_iface = NULL; 240 new_hard_iface = NULL;
208 241
209 curr_hard_iface = rcu_dereference_protected(bat_priv->primary_if, 1); 242 curr_hard_iface = rcu_dereference_protected(bat_priv->primary_if, 1);
@@ -217,7 +250,7 @@ static void batadv_primary_if_select(struct batadv_priv *bat_priv,
217 250
218out: 251out:
219 if (curr_hard_iface) 252 if (curr_hard_iface)
220 batadv_hardif_free_ref(curr_hard_iface); 253 batadv_hardif_put(curr_hard_iface);
221} 254}
222 255
223static bool 256static bool
@@ -376,7 +409,7 @@ batadv_hardif_activate_interface(struct batadv_hard_iface *hard_iface)
376 409
377out: 410out:
378 if (primary_if) 411 if (primary_if)
379 batadv_hardif_free_ref(primary_if); 412 batadv_hardif_put(primary_if);
380} 413}
381 414
382static void 415static void
@@ -401,7 +434,8 @@ batadv_hardif_deactivate_interface(struct batadv_hard_iface *hard_iface)
401 * 434 *
402 * Invoke ndo_del_slave on master passing slave as argument. In this way slave 435 * Invoke ndo_del_slave on master passing slave as argument. In this way slave
403 * is free'd and master can correctly change its internal state. 436 * is free'd and master can correctly change its internal state.
404 * Return 0 on success, a negative value representing the error otherwise 437 *
438 * Return: 0 on success, a negative value representing the error otherwise
405 */ 439 */
406static int batadv_master_del_slave(struct batadv_hard_iface *slave, 440static int batadv_master_del_slave(struct batadv_hard_iface *slave,
407 struct net_device *master) 441 struct net_device *master)
@@ -430,7 +464,7 @@ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface,
430 if (hard_iface->if_status != BATADV_IF_NOT_IN_USE) 464 if (hard_iface->if_status != BATADV_IF_NOT_IN_USE)
431 goto out; 465 goto out;
432 466
433 if (!atomic_inc_not_zero(&hard_iface->refcount)) 467 if (!kref_get_unless_zero(&hard_iface->refcount))
434 goto out; 468 goto out;
435 469
436 soft_iface = dev_get_by_name(&init_net, iface_name); 470 soft_iface = dev_get_by_name(&init_net, iface_name);
@@ -528,7 +562,7 @@ err_dev:
528 hard_iface->soft_iface = NULL; 562 hard_iface->soft_iface = NULL;
529 dev_put(soft_iface); 563 dev_put(soft_iface);
530err: 564err:
531 batadv_hardif_free_ref(hard_iface); 565 batadv_hardif_put(hard_iface);
532 return ret; 566 return ret;
533} 567}
534 568
@@ -559,7 +593,7 @@ void batadv_hardif_disable_interface(struct batadv_hard_iface *hard_iface,
559 batadv_primary_if_select(bat_priv, new_if); 593 batadv_primary_if_select(bat_priv, new_if);
560 594
561 if (new_if) 595 if (new_if)
562 batadv_hardif_free_ref(new_if); 596 batadv_hardif_put(new_if);
563 } 597 }
564 598
565 bat_priv->bat_algo_ops->bat_iface_disable(hard_iface); 599 bat_priv->bat_algo_ops->bat_iface_disable(hard_iface);
@@ -582,11 +616,11 @@ void batadv_hardif_disable_interface(struct batadv_hard_iface *hard_iface,
582 } 616 }
583 617
584 hard_iface->soft_iface = NULL; 618 hard_iface->soft_iface = NULL;
585 batadv_hardif_free_ref(hard_iface); 619 batadv_hardif_put(hard_iface);
586 620
587out: 621out:
588 if (primary_if) 622 if (primary_if)
589 batadv_hardif_free_ref(primary_if); 623 batadv_hardif_put(primary_if);
590} 624}
591 625
592/** 626/**
@@ -605,7 +639,7 @@ static void batadv_hardif_remove_interface_finish(struct work_struct *work)
605 639
606 batadv_debugfs_del_hardif(hard_iface); 640 batadv_debugfs_del_hardif(hard_iface);
607 batadv_sysfs_del_hardif(&hard_iface->hardif_obj); 641 batadv_sysfs_del_hardif(&hard_iface->hardif_obj);
608 batadv_hardif_free_ref(hard_iface); 642 batadv_hardif_put(hard_iface);
609} 643}
610 644
611static struct batadv_hard_iface * 645static struct batadv_hard_iface *
@@ -651,7 +685,8 @@ batadv_hardif_add_interface(struct net_device *net_dev)
651 hard_iface->num_bcasts = BATADV_NUM_BCASTS_WIRELESS; 685 hard_iface->num_bcasts = BATADV_NUM_BCASTS_WIRELESS;
652 686
653 /* extra reference for return */ 687 /* extra reference for return */
654 atomic_set(&hard_iface->refcount, 2); 688 kref_init(&hard_iface->refcount);
689 kref_get(&hard_iface->refcount);
655 690
656 batadv_check_known_mac_addr(hard_iface->net_dev); 691 batadv_check_known_mac_addr(hard_iface->net_dev);
657 list_add_tail_rcu(&hard_iface->list, &batadv_hardif_list); 692 list_add_tail_rcu(&hard_iface->list, &batadv_hardif_list);
@@ -759,10 +794,10 @@ static int batadv_hard_if_event(struct notifier_block *this,
759 } 794 }
760 795
761hardif_put: 796hardif_put:
762 batadv_hardif_free_ref(hard_iface); 797 batadv_hardif_put(hard_iface);
763out: 798out:
764 if (primary_if) 799 if (primary_if)
765 batadv_hardif_free_ref(primary_if); 800 batadv_hardif_put(primary_if);
766 return NOTIFY_DONE; 801 return NOTIFY_DONE;
767} 802}
768 803
diff --git a/net/batman-adv/hard-interface.h b/net/batman-adv/hard-interface.h
index 7b12ea8ea29d..d74f1983f33e 100644
--- a/net/batman-adv/hard-interface.h
+++ b/net/batman-adv/hard-interface.h
@@ -1,4 +1,4 @@
1/* Copyright (C) 2007-2015 B.A.T.M.A.N. contributors: 1/* Copyright (C) 2007-2016 B.A.T.M.A.N. contributors:
2 * 2 *
3 * Marek Lindner, Simon Wunderlich 3 * Marek Lindner, Simon Wunderlich
4 * 4 *
@@ -20,8 +20,8 @@
20 20
21#include "main.h" 21#include "main.h"
22 22
23#include <linux/atomic.h>
24#include <linux/compiler.h> 23#include <linux/compiler.h>
24#include <linux/kref.h>
25#include <linux/notifier.h> 25#include <linux/notifier.h>
26#include <linux/rcupdate.h> 26#include <linux/rcupdate.h>
27#include <linux/stddef.h> 27#include <linux/stddef.h>
@@ -61,18 +61,16 @@ void batadv_hardif_disable_interface(struct batadv_hard_iface *hard_iface,
61void batadv_hardif_remove_interfaces(void); 61void batadv_hardif_remove_interfaces(void);
62int batadv_hardif_min_mtu(struct net_device *soft_iface); 62int batadv_hardif_min_mtu(struct net_device *soft_iface);
63void batadv_update_min_mtu(struct net_device *soft_iface); 63void batadv_update_min_mtu(struct net_device *soft_iface);
64void batadv_hardif_free_rcu(struct rcu_head *rcu); 64void batadv_hardif_release(struct kref *ref);
65 65
66/** 66/**
67 * batadv_hardif_free_ref - decrement the hard interface refcounter and 67 * batadv_hardif_put - decrement the hard interface refcounter and possibly
68 * possibly free it 68 * release it
69 * @hard_iface: the hard interface to free 69 * @hard_iface: the hard interface to free
70 */ 70 */
71static inline void 71static inline void batadv_hardif_put(struct batadv_hard_iface *hard_iface)
72batadv_hardif_free_ref(struct batadv_hard_iface *hard_iface)
73{ 72{
74 if (atomic_dec_and_test(&hard_iface->refcount)) 73 kref_put(&hard_iface->refcount, batadv_hardif_release);
75 call_rcu(&hard_iface->rcu, batadv_hardif_free_rcu);
76} 74}
77 75
78static inline struct batadv_hard_iface * 76static inline struct batadv_hard_iface *
@@ -85,7 +83,7 @@ batadv_primary_if_get_selected(struct batadv_priv *bat_priv)
85 if (!hard_iface) 83 if (!hard_iface)
86 goto out; 84 goto out;
87 85
88 if (!atomic_inc_not_zero(&hard_iface->refcount)) 86 if (!kref_get_unless_zero(&hard_iface->refcount))
89 hard_iface = NULL; 87 hard_iface = NULL;
90 88
91out: 89out:
diff --git a/net/batman-adv/hash.c b/net/batman-adv/hash.c
index 2ea6a18d793f..a0a0fdb85805 100644
--- a/net/batman-adv/hash.c
+++ b/net/batman-adv/hash.c
@@ -1,4 +1,4 @@
1/* Copyright (C) 2006-2015 B.A.T.M.A.N. contributors: 1/* Copyright (C) 2006-2016 B.A.T.M.A.N. contributors:
2 * 2 *
3 * Simon Wunderlich, Marek Lindner 3 * Simon Wunderlich, Marek Lindner
4 * 4 *
diff --git a/net/batman-adv/hash.h b/net/batman-adv/hash.h
index 377626250ac7..9bb57b87447c 100644
--- a/net/batman-adv/hash.h
+++ b/net/batman-adv/hash.h
@@ -1,4 +1,4 @@
1/* Copyright (C) 2006-2015 B.A.T.M.A.N. contributors: 1/* Copyright (C) 2006-2016 B.A.T.M.A.N. contributors:
2 * 2 *
3 * Simon Wunderlich, Marek Lindner 3 * Simon Wunderlich, Marek Lindner
4 * 4 *
@@ -30,14 +30,17 @@
30struct lock_class_key; 30struct lock_class_key;
31 31
32/* callback to a compare function. should compare 2 element datas for their 32/* callback to a compare function. should compare 2 element datas for their
33 * keys, return 0 if same and not 0 if not same 33 * keys
34 *
35 * Return: 0 if same and not 0 if not same
34 */ 36 */
35typedef int (*batadv_hashdata_compare_cb)(const struct hlist_node *, 37typedef int (*batadv_hashdata_compare_cb)(const struct hlist_node *,
36 const void *); 38 const void *);
37 39
38/* the hashfunction, should return an index 40/* the hashfunction
39 * based on the key in the data of the first 41 *
40 * argument and the size the second 42 * Return: an index based on the key in the data of the first argument and the
43 * size the second
41 */ 44 */
42typedef u32 (*batadv_hashdata_choose_cb)(const void *, u32); 45typedef u32 (*batadv_hashdata_choose_cb)(const void *, u32);
43typedef void (*batadv_hashdata_free_cb)(struct hlist_node *, void *); 46typedef void (*batadv_hashdata_free_cb)(struct hlist_node *, void *);
@@ -96,7 +99,7 @@ static inline void batadv_hash_delete(struct batadv_hashtable *hash,
96 * @data: data passed to the aforementioned callbacks as argument 99 * @data: data passed to the aforementioned callbacks as argument
97 * @data_node: to be added element 100 * @data_node: to be added element
98 * 101 *
99 * Returns 0 on success, 1 if the element already is in the hash 102 * Return: 0 on success, 1 if the element already is in the hash
100 * and -1 on error. 103 * and -1 on error.
101 */ 104 */
102static inline int batadv_hash_add(struct batadv_hashtable *hash, 105static inline int batadv_hash_add(struct batadv_hashtable *hash,
@@ -139,10 +142,11 @@ out:
139 return ret; 142 return ret;
140} 143}
141 144
142/* removes data from hash, if found. returns pointer do data on success, so you 145/* removes data from hash, if found. data could be the structure you use with
143 * can remove the used structure yourself, or NULL on error . data could be the 146 * just the key filled, we just need the key for comparing.
144 * structure you use with just the key filled, we just need the key for 147 *
145 * comparing. 148 * Return: returns pointer do data on success, so you can remove the used
149 * structure yourself, or NULL on error
146 */ 150 */
147static inline void *batadv_hash_remove(struct batadv_hashtable *hash, 151static inline void *batadv_hash_remove(struct batadv_hashtable *hash,
148 batadv_hashdata_compare_cb compare, 152 batadv_hashdata_compare_cb compare,
diff --git a/net/batman-adv/icmp_socket.c b/net/batman-adv/icmp_socket.c
index bcabb5e3f4d3..14d0013b387e 100644
--- a/net/batman-adv/icmp_socket.c
+++ b/net/batman-adv/icmp_socket.c
@@ -1,4 +1,4 @@
1/* Copyright (C) 2007-2015 B.A.T.M.A.N. contributors: 1/* Copyright (C) 2007-2016 B.A.T.M.A.N. contributors:
2 * 2 *
3 * Marek Lindner 3 * Marek Lindner
4 * 4 *
@@ -278,7 +278,7 @@ static ssize_t batadv_socket_write(struct file *file, const char __user *buff,
278 278
279 ether_addr_copy(icmp_header->orig, primary_if->net_dev->dev_addr); 279 ether_addr_copy(icmp_header->orig, primary_if->net_dev->dev_addr);
280 280
281 batadv_send_skb_packet(skb, neigh_node->if_incoming, neigh_node->addr); 281 batadv_send_unicast_skb(skb, neigh_node);
282 goto out; 282 goto out;
283 283
284dst_unreach: 284dst_unreach:
@@ -288,11 +288,11 @@ free_skb:
288 kfree_skb(skb); 288 kfree_skb(skb);
289out: 289out:
290 if (primary_if) 290 if (primary_if)
291 batadv_hardif_free_ref(primary_if); 291 batadv_hardif_put(primary_if);
292 if (neigh_node) 292 if (neigh_node)
293 batadv_neigh_node_free_ref(neigh_node); 293 batadv_neigh_node_put(neigh_node);
294 if (orig_node) 294 if (orig_node)
295 batadv_orig_node_free_ref(orig_node); 295 batadv_orig_node_put(orig_node);
296 return len; 296 return len;
297} 297}
298 298
diff --git a/net/batman-adv/icmp_socket.h b/net/batman-adv/icmp_socket.h
index e937143f0b10..618d5de06f20 100644
--- a/net/batman-adv/icmp_socket.h
+++ b/net/batman-adv/icmp_socket.h
@@ -1,4 +1,4 @@
1/* Copyright (C) 2007-2015 B.A.T.M.A.N. contributors: 1/* Copyright (C) 2007-2016 B.A.T.M.A.N. contributors:
2 * 2 *
3 * Marek Lindner 3 * Marek Lindner
4 * 4 *
diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c
index 4b5d61fbadb1..d64ddb961979 100644
--- a/net/batman-adv/main.c
+++ b/net/batman-adv/main.c
@@ -1,4 +1,4 @@
1/* Copyright (C) 2007-2015 B.A.T.M.A.N. contributors: 1/* Copyright (C) 2007-2016 B.A.T.M.A.N. contributors:
2 * 2 *
3 * Marek Lindner, Simon Wunderlich 3 * Marek Lindner, Simon Wunderlich
4 * 4 *
@@ -29,6 +29,7 @@
29#include <linux/ip.h> 29#include <linux/ip.h>
30#include <linux/ipv6.h> 30#include <linux/ipv6.h>
31#include <linux/kernel.h> 31#include <linux/kernel.h>
32#include <linux/kref.h>
32#include <linux/list.h> 33#include <linux/list.h>
33#include <linux/lockdep.h> 34#include <linux/lockdep.h>
34#include <linux/module.h> 35#include <linux/module.h>
@@ -86,6 +87,7 @@ static int __init batadv_init(void)
86 87
87 batadv_recv_handler_init(); 88 batadv_recv_handler_init();
88 89
90 batadv_v_init();
89 batadv_iv_init(); 91 batadv_iv_init();
90 batadv_nc_init(); 92 batadv_nc_init();
91 93
@@ -158,6 +160,10 @@ int batadv_mesh_init(struct net_device *soft_iface)
158 INIT_HLIST_HEAD(&bat_priv->tvlv.handler_list); 160 INIT_HLIST_HEAD(&bat_priv->tvlv.handler_list);
159 INIT_HLIST_HEAD(&bat_priv->softif_vlan_list); 161 INIT_HLIST_HEAD(&bat_priv->softif_vlan_list);
160 162
163 ret = batadv_v_mesh_init(bat_priv);
164 if (ret < 0)
165 goto err;
166
161 ret = batadv_originator_init(bat_priv); 167 ret = batadv_originator_init(bat_priv);
162 if (ret < 0) 168 if (ret < 0)
163 goto err; 169 goto err;
@@ -200,6 +206,8 @@ void batadv_mesh_free(struct net_device *soft_iface)
200 batadv_purge_outstanding_packets(bat_priv, NULL); 206 batadv_purge_outstanding_packets(bat_priv, NULL);
201 207
202 batadv_gw_node_free(bat_priv); 208 batadv_gw_node_free(bat_priv);
209
210 batadv_v_mesh_free(bat_priv);
203 batadv_nc_mesh_free(bat_priv); 211 batadv_nc_mesh_free(bat_priv);
204 batadv_dat_free(bat_priv); 212 batadv_dat_free(bat_priv);
205 batadv_bla_free(bat_priv); 213 batadv_bla_free(bat_priv);
@@ -233,7 +241,7 @@ void batadv_mesh_free(struct net_device *soft_iface)
233 * @bat_priv: the bat priv with all the soft interface information 241 * @bat_priv: the bat priv with all the soft interface information
234 * @addr: the address to check 242 * @addr: the address to check
235 * 243 *
236 * Returns 'true' if the mac address was found, false otherwise. 244 * Return: 'true' if the mac address was found, false otherwise.
237 */ 245 */
238bool batadv_is_my_mac(struct batadv_priv *bat_priv, const u8 *addr) 246bool batadv_is_my_mac(struct batadv_priv *bat_priv, const u8 *addr)
239{ 247{
@@ -262,7 +270,7 @@ bool batadv_is_my_mac(struct batadv_priv *bat_priv, const u8 *addr)
262 * function that requires the primary interface 270 * function that requires the primary interface
263 * @seq: debugfs table seq_file struct 271 * @seq: debugfs table seq_file struct
264 * 272 *
265 * Returns primary interface if found or NULL otherwise. 273 * Return: primary interface if found or NULL otherwise.
266 */ 274 */
267struct batadv_hard_iface * 275struct batadv_hard_iface *
268batadv_seq_print_text_primary_if_get(struct seq_file *seq) 276batadv_seq_print_text_primary_if_get(struct seq_file *seq)
@@ -286,7 +294,7 @@ batadv_seq_print_text_primary_if_get(struct seq_file *seq)
286 seq_printf(seq, 294 seq_printf(seq,
287 "BATMAN mesh %s disabled - primary interface not active\n", 295 "BATMAN mesh %s disabled - primary interface not active\n",
288 net_dev->name); 296 net_dev->name);
289 batadv_hardif_free_ref(primary_if); 297 batadv_hardif_put(primary_if);
290 primary_if = NULL; 298 primary_if = NULL;
291 299
292out: 300out:
@@ -297,7 +305,7 @@ out:
297 * batadv_max_header_len - calculate maximum encapsulation overhead for a 305 * batadv_max_header_len - calculate maximum encapsulation overhead for a
298 * payload packet 306 * payload packet
299 * 307 *
300 * Return the maximum encapsulation overhead in bytes. 308 * Return: the maximum encapsulation overhead in bytes.
301 */ 309 */
302int batadv_max_header_len(void) 310int batadv_max_header_len(void)
303{ 311{
@@ -599,6 +607,8 @@ int batadv_algo_seq_print_text(struct seq_file *seq, void *offset)
599 * 607 *
600 * payload_ptr must always point to an address in the skb head buffer and not to 608 * payload_ptr must always point to an address in the skb head buffer and not to
601 * a fragment. 609 * a fragment.
610 *
611 * Return: big endian crc32c of the checksummed data
602 */ 612 */
603__be32 batadv_skb_crc32(struct sk_buff *skb, u8 *payload_ptr) 613__be32 batadv_skb_crc32(struct sk_buff *skb, u8 *payload_ptr)
604{ 614{
@@ -622,15 +632,26 @@ __be32 batadv_skb_crc32(struct sk_buff *skb, u8 *payload_ptr)
622} 632}
623 633
624/** 634/**
625 * batadv_tvlv_handler_free_ref - decrement the tvlv handler refcounter and 635 * batadv_tvlv_handler_release - release tvlv handler from lists and queue for
626 * possibly free it 636 * free after rcu grace period
637 * @ref: kref pointer of the tvlv
638 */
639static void batadv_tvlv_handler_release(struct kref *ref)
640{
641 struct batadv_tvlv_handler *tvlv_handler;
642
643 tvlv_handler = container_of(ref, struct batadv_tvlv_handler, refcount);
644 kfree_rcu(tvlv_handler, rcu);
645}
646
647/**
648 * batadv_tvlv_handler_put - decrement the tvlv container refcounter and
649 * possibly release it
627 * @tvlv_handler: the tvlv handler to free 650 * @tvlv_handler: the tvlv handler to free
628 */ 651 */
629static void 652static void batadv_tvlv_handler_put(struct batadv_tvlv_handler *tvlv_handler)
630batadv_tvlv_handler_free_ref(struct batadv_tvlv_handler *tvlv_handler)
631{ 653{
632 if (atomic_dec_and_test(&tvlv_handler->refcount)) 654 kref_put(&tvlv_handler->refcount, batadv_tvlv_handler_release);
633 kfree_rcu(tvlv_handler, rcu);
634} 655}
635 656
636/** 657/**
@@ -640,7 +661,7 @@ batadv_tvlv_handler_free_ref(struct batadv_tvlv_handler *tvlv_handler)
640 * @type: tvlv handler type to look for 661 * @type: tvlv handler type to look for
641 * @version: tvlv handler version to look for 662 * @version: tvlv handler version to look for
642 * 663 *
643 * Returns tvlv handler if found or NULL otherwise. 664 * Return: tvlv handler if found or NULL otherwise.
644 */ 665 */
645static struct batadv_tvlv_handler 666static struct batadv_tvlv_handler
646*batadv_tvlv_handler_get(struct batadv_priv *bat_priv, u8 type, u8 version) 667*batadv_tvlv_handler_get(struct batadv_priv *bat_priv, u8 type, u8 version)
@@ -656,7 +677,7 @@ static struct batadv_tvlv_handler
656 if (tvlv_handler_tmp->version != version) 677 if (tvlv_handler_tmp->version != version)
657 continue; 678 continue;
658 679
659 if (!atomic_inc_not_zero(&tvlv_handler_tmp->refcount)) 680 if (!kref_get_unless_zero(&tvlv_handler_tmp->refcount))
660 continue; 681 continue;
661 682
662 tvlv_handler = tvlv_handler_tmp; 683 tvlv_handler = tvlv_handler_tmp;
@@ -668,14 +689,25 @@ static struct batadv_tvlv_handler
668} 689}
669 690
670/** 691/**
671 * batadv_tvlv_container_free_ref - decrement the tvlv container refcounter and 692 * batadv_tvlv_container_release - release tvlv from lists and free
672 * possibly free it 693 * @ref: kref pointer of the tvlv
694 */
695static void batadv_tvlv_container_release(struct kref *ref)
696{
697 struct batadv_tvlv_container *tvlv;
698
699 tvlv = container_of(ref, struct batadv_tvlv_container, refcount);
700 kfree(tvlv);
701}
702
703/**
704 * batadv_tvlv_container_put - decrement the tvlv container refcounter and
705 * possibly release it
673 * @tvlv: the tvlv container to free 706 * @tvlv: the tvlv container to free
674 */ 707 */
675static void batadv_tvlv_container_free_ref(struct batadv_tvlv_container *tvlv) 708static void batadv_tvlv_container_put(struct batadv_tvlv_container *tvlv)
676{ 709{
677 if (atomic_dec_and_test(&tvlv->refcount)) 710 kref_put(&tvlv->refcount, batadv_tvlv_container_release);
678 kfree(tvlv);
679} 711}
680 712
681/** 713/**
@@ -688,13 +720,15 @@ static void batadv_tvlv_container_free_ref(struct batadv_tvlv_container *tvlv)
688 * Has to be called with the appropriate locks being acquired 720 * Has to be called with the appropriate locks being acquired
689 * (tvlv.container_list_lock). 721 * (tvlv.container_list_lock).
690 * 722 *
691 * Returns tvlv container if found or NULL otherwise. 723 * Return: tvlv container if found or NULL otherwise.
692 */ 724 */
693static struct batadv_tvlv_container 725static struct batadv_tvlv_container
694*batadv_tvlv_container_get(struct batadv_priv *bat_priv, u8 type, u8 version) 726*batadv_tvlv_container_get(struct batadv_priv *bat_priv, u8 type, u8 version)
695{ 727{
696 struct batadv_tvlv_container *tvlv_tmp, *tvlv = NULL; 728 struct batadv_tvlv_container *tvlv_tmp, *tvlv = NULL;
697 729
730 lockdep_assert_held(&bat_priv->tvlv.container_list_lock);
731
698 hlist_for_each_entry(tvlv_tmp, &bat_priv->tvlv.container_list, list) { 732 hlist_for_each_entry(tvlv_tmp, &bat_priv->tvlv.container_list, list) {
699 if (tvlv_tmp->tvlv_hdr.type != type) 733 if (tvlv_tmp->tvlv_hdr.type != type)
700 continue; 734 continue;
@@ -702,7 +736,7 @@ static struct batadv_tvlv_container
702 if (tvlv_tmp->tvlv_hdr.version != version) 736 if (tvlv_tmp->tvlv_hdr.version != version)
703 continue; 737 continue;
704 738
705 if (!atomic_inc_not_zero(&tvlv_tmp->refcount)) 739 if (!kref_get_unless_zero(&tvlv_tmp->refcount))
706 continue; 740 continue;
707 741
708 tvlv = tvlv_tmp; 742 tvlv = tvlv_tmp;
@@ -720,13 +754,15 @@ static struct batadv_tvlv_container
720 * Has to be called with the appropriate locks being acquired 754 * Has to be called with the appropriate locks being acquired
721 * (tvlv.container_list_lock). 755 * (tvlv.container_list_lock).
722 * 756 *
723 * Returns size of all currently registered tvlv containers in bytes. 757 * Return: size of all currently registered tvlv containers in bytes.
724 */ 758 */
725static u16 batadv_tvlv_container_list_size(struct batadv_priv *bat_priv) 759static u16 batadv_tvlv_container_list_size(struct batadv_priv *bat_priv)
726{ 760{
727 struct batadv_tvlv_container *tvlv; 761 struct batadv_tvlv_container *tvlv;
728 u16 tvlv_len = 0; 762 u16 tvlv_len = 0;
729 763
764 lockdep_assert_held(&bat_priv->tvlv.container_list_lock);
765
730 hlist_for_each_entry(tvlv, &bat_priv->tvlv.container_list, list) { 766 hlist_for_each_entry(tvlv, &bat_priv->tvlv.container_list, list) {
731 tvlv_len += sizeof(struct batadv_tvlv_hdr); 767 tvlv_len += sizeof(struct batadv_tvlv_hdr);
732 tvlv_len += ntohs(tvlv->tvlv_hdr.len); 768 tvlv_len += ntohs(tvlv->tvlv_hdr.len);
@@ -755,8 +791,8 @@ static void batadv_tvlv_container_remove(struct batadv_priv *bat_priv,
755 hlist_del(&tvlv->list); 791 hlist_del(&tvlv->list);
756 792
757 /* first call to decrement the counter, second call to free */ 793 /* first call to decrement the counter, second call to free */
758 batadv_tvlv_container_free_ref(tvlv); 794 batadv_tvlv_container_put(tvlv);
759 batadv_tvlv_container_free_ref(tvlv); 795 batadv_tvlv_container_put(tvlv);
760} 796}
761 797
762/** 798/**
@@ -808,7 +844,7 @@ void batadv_tvlv_container_register(struct batadv_priv *bat_priv,
808 844
809 memcpy(tvlv_new + 1, tvlv_value, ntohs(tvlv_new->tvlv_hdr.len)); 845 memcpy(tvlv_new + 1, tvlv_value, ntohs(tvlv_new->tvlv_hdr.len));
810 INIT_HLIST_NODE(&tvlv_new->list); 846 INIT_HLIST_NODE(&tvlv_new->list);
811 atomic_set(&tvlv_new->refcount, 1); 847 kref_init(&tvlv_new->refcount);
812 848
813 spin_lock_bh(&bat_priv->tvlv.container_list_lock); 849 spin_lock_bh(&bat_priv->tvlv.container_list_lock);
814 tvlv_old = batadv_tvlv_container_get(bat_priv, type, version); 850 tvlv_old = batadv_tvlv_container_get(bat_priv, type, version);
@@ -826,7 +862,7 @@ void batadv_tvlv_container_register(struct batadv_priv *bat_priv,
826 * @additional_packet_len: requested additional packet size on top of minimum 862 * @additional_packet_len: requested additional packet size on top of minimum
827 * size 863 * size
828 * 864 *
829 * Returns true of the packet buffer could be changed to the requested size, 865 * Return: true of the packet buffer could be changed to the requested size,
830 * false otherwise. 866 * false otherwise.
831 */ 867 */
832static bool batadv_tvlv_realloc_packet_buff(unsigned char **packet_buff, 868static bool batadv_tvlv_realloc_packet_buff(unsigned char **packet_buff,
@@ -862,7 +898,7 @@ static bool batadv_tvlv_realloc_packet_buff(unsigned char **packet_buff,
862 * The ogm packet might be enlarged or shrunk depending on the current size 898 * The ogm packet might be enlarged or shrunk depending on the current size
863 * and the size of the to-be-appended tvlv containers. 899 * and the size of the to-be-appended tvlv containers.
864 * 900 *
865 * Returns size of all appended tvlv containers in bytes. 901 * Return: size of all appended tvlv containers in bytes.
866 */ 902 */
867u16 batadv_tvlv_container_ogm_append(struct batadv_priv *bat_priv, 903u16 batadv_tvlv_container_ogm_append(struct batadv_priv *bat_priv,
868 unsigned char **packet_buff, 904 unsigned char **packet_buff,
@@ -915,7 +951,7 @@ end:
915 * @tvlv_value: tvlv content 951 * @tvlv_value: tvlv content
916 * @tvlv_value_len: tvlv content length 952 * @tvlv_value_len: tvlv content length
917 * 953 *
918 * Returns success if handler was not found or the return value of the handler 954 * Return: success if handler was not found or the return value of the handler
919 * callback. 955 * callback.
920 */ 956 */
921static int batadv_tvlv_call_handler(struct batadv_priv *bat_priv, 957static int batadv_tvlv_call_handler(struct batadv_priv *bat_priv,
@@ -968,7 +1004,7 @@ static int batadv_tvlv_call_handler(struct batadv_priv *bat_priv,
968 * @tvlv_value: tvlv content 1004 * @tvlv_value: tvlv content
969 * @tvlv_value_len: tvlv content length 1005 * @tvlv_value_len: tvlv content length
970 * 1006 *
971 * Returns success when processing an OGM or the return value of all called 1007 * Return: success when processing an OGM or the return value of all called
972 * handler callbacks. 1008 * handler callbacks.
973 */ 1009 */
974int batadv_tvlv_containers_process(struct batadv_priv *bat_priv, 1010int batadv_tvlv_containers_process(struct batadv_priv *bat_priv,
@@ -1001,7 +1037,7 @@ int batadv_tvlv_containers_process(struct batadv_priv *bat_priv,
1001 src, dst, tvlv_value, 1037 src, dst, tvlv_value,
1002 tvlv_value_cont_len); 1038 tvlv_value_cont_len);
1003 if (tvlv_handler) 1039 if (tvlv_handler)
1004 batadv_tvlv_handler_free_ref(tvlv_handler); 1040 batadv_tvlv_handler_put(tvlv_handler);
1005 tvlv_value = (u8 *)tvlv_value + tvlv_value_cont_len; 1041 tvlv_value = (u8 *)tvlv_value + tvlv_value_cont_len;
1006 tvlv_value_len -= tvlv_value_cont_len; 1042 tvlv_value_len -= tvlv_value_cont_len;
1007 } 1043 }
@@ -1081,7 +1117,7 @@ void batadv_tvlv_handler_register(struct batadv_priv *bat_priv,
1081 1117
1082 tvlv_handler = batadv_tvlv_handler_get(bat_priv, type, version); 1118 tvlv_handler = batadv_tvlv_handler_get(bat_priv, type, version);
1083 if (tvlv_handler) { 1119 if (tvlv_handler) {
1084 batadv_tvlv_handler_free_ref(tvlv_handler); 1120 batadv_tvlv_handler_put(tvlv_handler);
1085 return; 1121 return;
1086 } 1122 }
1087 1123
@@ -1094,7 +1130,7 @@ void batadv_tvlv_handler_register(struct batadv_priv *bat_priv,
1094 tvlv_handler->type = type; 1130 tvlv_handler->type = type;
1095 tvlv_handler->version = version; 1131 tvlv_handler->version = version;
1096 tvlv_handler->flags = flags; 1132 tvlv_handler->flags = flags;
1097 atomic_set(&tvlv_handler->refcount, 1); 1133 kref_init(&tvlv_handler->refcount);
1098 INIT_HLIST_NODE(&tvlv_handler->list); 1134 INIT_HLIST_NODE(&tvlv_handler->list);
1099 1135
1100 spin_lock_bh(&bat_priv->tvlv.handler_list_lock); 1136 spin_lock_bh(&bat_priv->tvlv.handler_list_lock);
@@ -1118,11 +1154,11 @@ void batadv_tvlv_handler_unregister(struct batadv_priv *bat_priv,
1118 if (!tvlv_handler) 1154 if (!tvlv_handler)
1119 return; 1155 return;
1120 1156
1121 batadv_tvlv_handler_free_ref(tvlv_handler); 1157 batadv_tvlv_handler_put(tvlv_handler);
1122 spin_lock_bh(&bat_priv->tvlv.handler_list_lock); 1158 spin_lock_bh(&bat_priv->tvlv.handler_list_lock);
1123 hlist_del_rcu(&tvlv_handler->list); 1159 hlist_del_rcu(&tvlv_handler->list);
1124 spin_unlock_bh(&bat_priv->tvlv.handler_list_lock); 1160 spin_unlock_bh(&bat_priv->tvlv.handler_list_lock);
1125 batadv_tvlv_handler_free_ref(tvlv_handler); 1161 batadv_tvlv_handler_put(tvlv_handler);
1126} 1162}
1127 1163
1128/** 1164/**
@@ -1182,7 +1218,7 @@ void batadv_tvlv_unicast_send(struct batadv_priv *bat_priv, u8 *src,
1182 if (batadv_send_skb_to_orig(skb, orig_node, NULL) == NET_XMIT_DROP) 1218 if (batadv_send_skb_to_orig(skb, orig_node, NULL) == NET_XMIT_DROP)
1183 kfree_skb(skb); 1219 kfree_skb(skb);
1184out: 1220out:
1185 batadv_orig_node_free_ref(orig_node); 1221 batadv_orig_node_put(orig_node);
1186} 1222}
1187 1223
1188/** 1224/**
@@ -1190,8 +1226,8 @@ out:
1190 * @skb: the buffer containing the packet 1226 * @skb: the buffer containing the packet
1191 * @header_len: length of the batman header preceding the ethernet header 1227 * @header_len: length of the batman header preceding the ethernet header
1192 * 1228 *
1193 * If the packet embedded in the skb is vlan tagged this function returns the 1229 * Return: VID with the BATADV_VLAN_HAS_TAG flag when the packet embedded in the
1194 * VID with the BATADV_VLAN_HAS_TAG flag. Otherwise BATADV_NO_FLAGS is returned. 1230 * skb is vlan tagged. Otherwise BATADV_NO_FLAGS.
1195 */ 1231 */
1196unsigned short batadv_get_vid(struct sk_buff *skb, size_t header_len) 1232unsigned short batadv_get_vid(struct sk_buff *skb, size_t header_len)
1197{ 1233{
@@ -1218,7 +1254,7 @@ unsigned short batadv_get_vid(struct sk_buff *skb, size_t header_len)
1218 * @vid: the VLAN identifier for which the AP isolation attributed as to be 1254 * @vid: the VLAN identifier for which the AP isolation attributed as to be
1219 * looked up 1255 * looked up
1220 * 1256 *
1221 * Returns true if AP isolation is on for the VLAN idenfied by vid, false 1257 * Return: true if AP isolation is on for the VLAN idenfied by vid, false
1222 * otherwise 1258 * otherwise
1223 */ 1259 */
1224bool batadv_vlan_ap_isola_get(struct batadv_priv *bat_priv, unsigned short vid) 1260bool batadv_vlan_ap_isola_get(struct batadv_priv *bat_priv, unsigned short vid)
@@ -1232,7 +1268,7 @@ bool batadv_vlan_ap_isola_get(struct batadv_priv *bat_priv, unsigned short vid)
1232 vlan = batadv_softif_vlan_get(bat_priv, vid); 1268 vlan = batadv_softif_vlan_get(bat_priv, vid);
1233 if (vlan) { 1269 if (vlan) {
1234 ap_isolation_enabled = atomic_read(&vlan->ap_isolation); 1270 ap_isolation_enabled = atomic_read(&vlan->ap_isolation);
1235 batadv_softif_vlan_free_ref(vlan); 1271 batadv_softif_vlan_put(vlan);
1236 } 1272 }
1237 1273
1238 return ap_isolation_enabled; 1274 return ap_isolation_enabled;
diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h
index 9dbd9107e7e1..db4533631834 100644
--- a/net/batman-adv/main.h
+++ b/net/batman-adv/main.h
@@ -1,4 +1,4 @@
1/* Copyright (C) 2007-2015 B.A.T.M.A.N. contributors: 1/* Copyright (C) 2007-2016 B.A.T.M.A.N. contributors:
2 * 2 *
3 * Marek Lindner, Simon Wunderlich 3 * Marek Lindner, Simon Wunderlich
4 * 4 *
@@ -24,17 +24,21 @@
24#define BATADV_DRIVER_DEVICE "batman-adv" 24#define BATADV_DRIVER_DEVICE "batman-adv"
25 25
26#ifndef BATADV_SOURCE_VERSION 26#ifndef BATADV_SOURCE_VERSION
27#define BATADV_SOURCE_VERSION "2016.0" 27#define BATADV_SOURCE_VERSION "2016.1"
28#endif 28#endif
29 29
30/* B.A.T.M.A.N. parameters */ 30/* B.A.T.M.A.N. parameters */
31 31
32#define BATADV_TQ_MAX_VALUE 255 32#define BATADV_TQ_MAX_VALUE 255
33#define BATADV_THROUGHPUT_MAX_VALUE 0xFFFFFFFF
33#define BATADV_JITTER 20 34#define BATADV_JITTER 20
34 35
35/* Time To Live of broadcast messages */ 36/* Time To Live of broadcast messages */
36#define BATADV_TTL 50 37#define BATADV_TTL 50
37 38
39/* maximum sequence number age of broadcast messages */
40#define BATADV_BCAST_MAX_AGE 64
41
38/* purge originators after time in seconds if no valid packet comes in 42/* purge originators after time in seconds if no valid packet comes in
39 * -> TODO: check influence on BATADV_TQ_LOCAL_WINDOW_SIZE 43 * -> TODO: check influence on BATADV_TQ_LOCAL_WINDOW_SIZE
40 */ 44 */
@@ -57,6 +61,15 @@
57#define BATADV_TQ_LOCAL_BIDRECT_RECV_MINIMUM 1 61#define BATADV_TQ_LOCAL_BIDRECT_RECV_MINIMUM 1
58#define BATADV_TQ_TOTAL_BIDRECT_LIMIT 1 62#define BATADV_TQ_TOTAL_BIDRECT_LIMIT 1
59 63
64/* B.A.T.M.A.N. V */
65#define BATADV_THROUGHPUT_DEFAULT_VALUE 10 /* 1 Mbps */
66#define BATADV_ELP_PROBES_PER_NODE 2
67#define BATADV_ELP_MIN_PROBE_SIZE 200 /* bytes */
68#define BATADV_ELP_PROBE_MAX_TX_DIFF 100 /* milliseconds */
69#define BATADV_ELP_MAX_AGE 64
70#define BATADV_OGM_MAX_ORIGDIFF 5
71#define BATADV_OGM_MAX_AGE 64
72
60/* number of OGMs sent with the last tt diff */ 73/* number of OGMs sent with the last tt diff */
61#define BATADV_TT_OGM_APPEND_MAX 3 74#define BATADV_TT_OGM_APPEND_MAX 3
62 75
@@ -97,11 +110,6 @@
97 */ 110 */
98#define BATADV_TQ_SIMILARITY_THRESHOLD 50 111#define BATADV_TQ_SIMILARITY_THRESHOLD 50
99 112
100/* how much worse secondary interfaces may be to be considered as bonding
101 * candidates
102 */
103#define BATADV_BONDING_TQ_THRESHOLD 50
104
105/* should not be bigger than 512 bytes or change the size of 113/* should not be bigger than 512 bytes or change the size of
106 * forw_packet->direct_link_flags 114 * forw_packet->direct_link_flags
107 */ 115 */
@@ -273,9 +281,14 @@ static inline void _batadv_dbg(int type __always_unused,
273 pr_err("%s: " fmt, _netdev->name, ## arg); \ 281 pr_err("%s: " fmt, _netdev->name, ## arg); \
274 } while (0) 282 } while (0)
275 283
276/* returns 1 if they are the same ethernet addr 284/**
285 * batadv_compare_eth - Compare two not u16 aligned Ethernet addresses
286 * @data1: Pointer to a six-byte array containing the Ethernet address
287 * @data2: Pointer other six-byte array containing the Ethernet address
277 * 288 *
278 * note: can't use ether_addr_equal() as it requires aligned memory 289 * note: can't use ether_addr_equal() as it requires aligned memory
290 *
291 * Return: 1 if they are the same ethernet addr
279 */ 292 */
280static inline bool batadv_compare_eth(const void *data1, const void *data2) 293static inline bool batadv_compare_eth(const void *data1, const void *data2)
281{ 294{
@@ -287,7 +300,7 @@ static inline bool batadv_compare_eth(const void *data1, const void *data2)
287 * @timestamp: base value to compare with (in jiffies) 300 * @timestamp: base value to compare with (in jiffies)
288 * @timeout: added to base value before comparing (in milliseconds) 301 * @timeout: added to base value before comparing (in milliseconds)
289 * 302 *
290 * Returns true if current time is after timestamp + timeout 303 * Return: true if current time is after timestamp + timeout
291 */ 304 */
292static inline bool batadv_has_timed_out(unsigned long timestamp, 305static inline bool batadv_has_timed_out(unsigned long timestamp,
293 unsigned int timeout) 306 unsigned int timeout)
@@ -326,7 +339,13 @@ static inline void batadv_add_counter(struct batadv_priv *bat_priv, size_t idx,
326 339
327#define batadv_inc_counter(b, i) batadv_add_counter(b, i, 1) 340#define batadv_inc_counter(b, i) batadv_add_counter(b, i, 1)
328 341
329/* Sum and return the cpu-local counters for index 'idx' */ 342/**
343 * batadv_sum_counter - Sum the cpu-local counters for index 'idx'
344 * @bat_priv: the bat priv with all the soft interface information
345 * @idx: index of counter to sum up
346 *
347 * Return: sum of all cpu-local counters
348 */
330static inline u64 batadv_sum_counter(struct batadv_priv *bat_priv, size_t idx) 349static inline u64 batadv_sum_counter(struct batadv_priv *bat_priv, size_t idx)
331{ 350{
332 u64 *counters, sum = 0; 351 u64 *counters, sum = 0;
diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c
index 75fa5013af72..8caa2c72efa3 100644
--- a/net/batman-adv/multicast.c
+++ b/net/batman-adv/multicast.c
@@ -1,4 +1,4 @@
1/* Copyright (C) 2014-2015 B.A.T.M.A.N. contributors: 1/* Copyright (C) 2014-2016 B.A.T.M.A.N. contributors:
2 * 2 *
3 * Linus Lüssing 3 * Linus Lüssing
4 * 4 *
@@ -30,6 +30,7 @@
30#include <linux/in.h> 30#include <linux/in.h>
31#include <linux/ip.h> 31#include <linux/ip.h>
32#include <linux/ipv6.h> 32#include <linux/ipv6.h>
33#include <linux/kref.h>
33#include <linux/list.h> 34#include <linux/list.h>
34#include <linux/lockdep.h> 35#include <linux/lockdep.h>
35#include <linux/netdevice.h> 36#include <linux/netdevice.h>
@@ -55,7 +56,7 @@
55 * Collect multicast addresses of the local multicast listeners 56 * Collect multicast addresses of the local multicast listeners
56 * on the given soft interface, dev, in the given mcast_list. 57 * on the given soft interface, dev, in the given mcast_list.
57 * 58 *
58 * Returns -ENOMEM on memory allocation error or the number of 59 * Return: -ENOMEM on memory allocation error or the number of
59 * items added to the mcast_list otherwise. 60 * items added to the mcast_list otherwise.
60 */ 61 */
61static int batadv_mcast_mla_softif_get(struct net_device *dev, 62static int batadv_mcast_mla_softif_get(struct net_device *dev,
@@ -87,7 +88,7 @@ static int batadv_mcast_mla_softif_get(struct net_device *dev,
87 * @mcast_addr: the multicast address to check 88 * @mcast_addr: the multicast address to check
88 * @mcast_list: the list with multicast addresses to search in 89 * @mcast_list: the list with multicast addresses to search in
89 * 90 *
90 * Returns true if the given address is already in the given list. 91 * Return: true if the given address is already in the given list.
91 * Otherwise returns false. 92 * Otherwise returns false.
92 */ 93 */
93static bool batadv_mcast_mla_is_duplicate(u8 *mcast_addr, 94static bool batadv_mcast_mla_is_duplicate(u8 *mcast_addr,
@@ -195,8 +196,9 @@ static void batadv_mcast_mla_tt_add(struct batadv_priv *bat_priv,
195 * batadv_mcast_has_bridge - check whether the soft-iface is bridged 196 * batadv_mcast_has_bridge - check whether the soft-iface is bridged
196 * @bat_priv: the bat priv with all the soft interface information 197 * @bat_priv: the bat priv with all the soft interface information
197 * 198 *
198 * Checks whether there is a bridge on top of our soft interface. Returns 199 * Checks whether there is a bridge on top of our soft interface.
199 * true if so, false otherwise. 200 *
201 * Return: true if there is a bridge, false otherwise.
200 */ 202 */
201static bool batadv_mcast_has_bridge(struct batadv_priv *bat_priv) 203static bool batadv_mcast_has_bridge(struct batadv_priv *bat_priv)
202{ 204{
@@ -218,7 +220,7 @@ static bool batadv_mcast_has_bridge(struct batadv_priv *bat_priv)
218 * Updates the own multicast tvlv with our current multicast related settings, 220 * Updates the own multicast tvlv with our current multicast related settings,
219 * capabilities and inabilities. 221 * capabilities and inabilities.
220 * 222 *
221 * Returns true if the tvlv container is registered afterwards. Otherwise 223 * Return: true if the tvlv container is registered afterwards. Otherwise
222 * returns false. 224 * returns false.
223 */ 225 */
224static bool batadv_mcast_mla_tvlv_update(struct batadv_priv *bat_priv) 226static bool batadv_mcast_mla_tvlv_update(struct batadv_priv *bat_priv)
@@ -289,8 +291,8 @@ out:
289 * Checks whether the given IPv4 packet has the potential to be forwarded with a 291 * Checks whether the given IPv4 packet has the potential to be forwarded with a
290 * mode more optimal than classic flooding. 292 * mode more optimal than classic flooding.
291 * 293 *
292 * If so then returns 0. Otherwise -EINVAL is returned or -ENOMEM in case of 294 * Return: If so then 0. Otherwise -EINVAL or -ENOMEM in case of memory
293 * memory allocation failure. 295 * allocation failure.
294 */ 296 */
295static int batadv_mcast_forw_mode_check_ipv4(struct batadv_priv *bat_priv, 297static int batadv_mcast_forw_mode_check_ipv4(struct batadv_priv *bat_priv,
296 struct sk_buff *skb, 298 struct sk_buff *skb,
@@ -327,8 +329,7 @@ static int batadv_mcast_forw_mode_check_ipv4(struct batadv_priv *bat_priv,
327 * Checks whether the given IPv6 packet has the potential to be forwarded with a 329 * Checks whether the given IPv6 packet has the potential to be forwarded with a
328 * mode more optimal than classic flooding. 330 * mode more optimal than classic flooding.
329 * 331 *
330 * If so then returns 0. Otherwise -EINVAL is returned or -ENOMEM if we are out 332 * Return: If so then 0. Otherwise -EINVAL is or -ENOMEM if we are out of memory
331 * of memory.
332 */ 333 */
333static int batadv_mcast_forw_mode_check_ipv6(struct batadv_priv *bat_priv, 334static int batadv_mcast_forw_mode_check_ipv6(struct batadv_priv *bat_priv,
334 struct sk_buff *skb, 335 struct sk_buff *skb,
@@ -366,8 +367,7 @@ static int batadv_mcast_forw_mode_check_ipv6(struct batadv_priv *bat_priv,
366 * Checks whether the given multicast ethernet frame has the potential to be 367 * Checks whether the given multicast ethernet frame has the potential to be
367 * forwarded with a mode more optimal than classic flooding. 368 * forwarded with a mode more optimal than classic flooding.
368 * 369 *
369 * If so then returns 0. Otherwise -EINVAL is returned or -ENOMEM if we are out 370 * Return: If so then 0. Otherwise -EINVAL is or -ENOMEM if we are out of memory
370 * of memory.
371 */ 371 */
372static int batadv_mcast_forw_mode_check(struct batadv_priv *bat_priv, 372static int batadv_mcast_forw_mode_check(struct batadv_priv *bat_priv,
373 struct sk_buff *skb, 373 struct sk_buff *skb,
@@ -398,7 +398,7 @@ static int batadv_mcast_forw_mode_check(struct batadv_priv *bat_priv,
398 * @bat_priv: the bat priv with all the soft interface information 398 * @bat_priv: the bat priv with all the soft interface information
399 * @ethhdr: ethernet header of a packet 399 * @ethhdr: ethernet header of a packet
400 * 400 *
401 * Returns the number of nodes which want all IPv4 multicast traffic if the 401 * Return: the number of nodes which want all IPv4 multicast traffic if the
402 * given ethhdr is from an IPv4 packet or the number of nodes which want all 402 * given ethhdr is from an IPv4 packet or the number of nodes which want all
403 * IPv6 traffic if it matches an IPv6 packet. 403 * IPv6 traffic if it matches an IPv6 packet.
404 */ 404 */
@@ -421,7 +421,7 @@ static int batadv_mcast_forw_want_all_ip_count(struct batadv_priv *bat_priv,
421 * @bat_priv: the bat priv with all the soft interface information 421 * @bat_priv: the bat priv with all the soft interface information
422 * @ethhdr: the ether header containing the multicast destination 422 * @ethhdr: the ether header containing the multicast destination
423 * 423 *
424 * Returns an orig_node matching the multicast address provided by ethhdr 424 * Return: an orig_node matching the multicast address provided by ethhdr
425 * via a translation table lookup. This increases the returned nodes refcount. 425 * via a translation table lookup. This increases the returned nodes refcount.
426 */ 426 */
427static struct batadv_orig_node * 427static struct batadv_orig_node *
@@ -436,7 +436,7 @@ batadv_mcast_forw_tt_node_get(struct batadv_priv *bat_priv,
436 * batadv_mcast_want_forw_ipv4_node_get - get a node with an ipv4 flag 436 * batadv_mcast_want_forw_ipv4_node_get - get a node with an ipv4 flag
437 * @bat_priv: the bat priv with all the soft interface information 437 * @bat_priv: the bat priv with all the soft interface information
438 * 438 *
439 * Returns an orig_node which has the BATADV_MCAST_WANT_ALL_IPV4 flag set and 439 * Return: an orig_node which has the BATADV_MCAST_WANT_ALL_IPV4 flag set and
440 * increases its refcount. 440 * increases its refcount.
441 */ 441 */
442static struct batadv_orig_node * 442static struct batadv_orig_node *
@@ -448,7 +448,7 @@ batadv_mcast_forw_ipv4_node_get(struct batadv_priv *bat_priv)
448 hlist_for_each_entry_rcu(tmp_orig_node, 448 hlist_for_each_entry_rcu(tmp_orig_node,
449 &bat_priv->mcast.want_all_ipv4_list, 449 &bat_priv->mcast.want_all_ipv4_list,
450 mcast_want_all_ipv4_node) { 450 mcast_want_all_ipv4_node) {
451 if (!atomic_inc_not_zero(&tmp_orig_node->refcount)) 451 if (!kref_get_unless_zero(&tmp_orig_node->refcount))
452 continue; 452 continue;
453 453
454 orig_node = tmp_orig_node; 454 orig_node = tmp_orig_node;
@@ -463,7 +463,7 @@ batadv_mcast_forw_ipv4_node_get(struct batadv_priv *bat_priv)
463 * batadv_mcast_want_forw_ipv6_node_get - get a node with an ipv6 flag 463 * batadv_mcast_want_forw_ipv6_node_get - get a node with an ipv6 flag
464 * @bat_priv: the bat priv with all the soft interface information 464 * @bat_priv: the bat priv with all the soft interface information
465 * 465 *
466 * Returns an orig_node which has the BATADV_MCAST_WANT_ALL_IPV6 flag set 466 * Return: an orig_node which has the BATADV_MCAST_WANT_ALL_IPV6 flag set
467 * and increases its refcount. 467 * and increases its refcount.
468 */ 468 */
469static struct batadv_orig_node * 469static struct batadv_orig_node *
@@ -475,7 +475,7 @@ batadv_mcast_forw_ipv6_node_get(struct batadv_priv *bat_priv)
475 hlist_for_each_entry_rcu(tmp_orig_node, 475 hlist_for_each_entry_rcu(tmp_orig_node,
476 &bat_priv->mcast.want_all_ipv6_list, 476 &bat_priv->mcast.want_all_ipv6_list,
477 mcast_want_all_ipv6_node) { 477 mcast_want_all_ipv6_node) {
478 if (!atomic_inc_not_zero(&tmp_orig_node->refcount)) 478 if (!kref_get_unless_zero(&tmp_orig_node->refcount))
479 continue; 479 continue;
480 480
481 orig_node = tmp_orig_node; 481 orig_node = tmp_orig_node;
@@ -491,7 +491,7 @@ batadv_mcast_forw_ipv6_node_get(struct batadv_priv *bat_priv)
491 * @bat_priv: the bat priv with all the soft interface information 491 * @bat_priv: the bat priv with all the soft interface information
492 * @ethhdr: an ethernet header to determine the protocol family from 492 * @ethhdr: an ethernet header to determine the protocol family from
493 * 493 *
494 * Returns an orig_node which has the BATADV_MCAST_WANT_ALL_IPV4 or 494 * Return: an orig_node which has the BATADV_MCAST_WANT_ALL_IPV4 or
495 * BATADV_MCAST_WANT_ALL_IPV6 flag, depending on the provided ethhdr, set and 495 * BATADV_MCAST_WANT_ALL_IPV6 flag, depending on the provided ethhdr, set and
496 * increases its refcount. 496 * increases its refcount.
497 */ 497 */
@@ -514,7 +514,7 @@ batadv_mcast_forw_ip_node_get(struct batadv_priv *bat_priv,
514 * batadv_mcast_want_forw_unsnoop_node_get - get a node with an unsnoopable flag 514 * batadv_mcast_want_forw_unsnoop_node_get - get a node with an unsnoopable flag
515 * @bat_priv: the bat priv with all the soft interface information 515 * @bat_priv: the bat priv with all the soft interface information
516 * 516 *
517 * Returns an orig_node which has the BATADV_MCAST_WANT_ALL_UNSNOOPABLES flag 517 * Return: an orig_node which has the BATADV_MCAST_WANT_ALL_UNSNOOPABLES flag
518 * set and increases its refcount. 518 * set and increases its refcount.
519 */ 519 */
520static struct batadv_orig_node * 520static struct batadv_orig_node *
@@ -526,7 +526,7 @@ batadv_mcast_forw_unsnoop_node_get(struct batadv_priv *bat_priv)
526 hlist_for_each_entry_rcu(tmp_orig_node, 526 hlist_for_each_entry_rcu(tmp_orig_node,
527 &bat_priv->mcast.want_all_unsnoopables_list, 527 &bat_priv->mcast.want_all_unsnoopables_list,
528 mcast_want_all_unsnoopables_node) { 528 mcast_want_all_unsnoopables_node) {
529 if (!atomic_inc_not_zero(&tmp_orig_node->refcount)) 529 if (!kref_get_unless_zero(&tmp_orig_node->refcount))
530 continue; 530 continue;
531 531
532 orig_node = tmp_orig_node; 532 orig_node = tmp_orig_node;
@@ -543,7 +543,7 @@ batadv_mcast_forw_unsnoop_node_get(struct batadv_priv *bat_priv)
543 * @skb: The multicast packet to check 543 * @skb: The multicast packet to check
544 * @orig: an originator to be set to forward the skb to 544 * @orig: an originator to be set to forward the skb to
545 * 545 *
546 * Returns the forwarding mode as enum batadv_forw_mode and in case of 546 * Return: the forwarding mode as enum batadv_forw_mode and in case of
547 * BATADV_FORW_SINGLE set the orig to the single originator the skb 547 * BATADV_FORW_SINGLE set the orig to the single originator the skb
548 * should be forwarded to. 548 * should be forwarded to.
549 */ 549 */
diff --git a/net/batman-adv/multicast.h b/net/batman-adv/multicast.h
index 8f3cb04b9f13..80bceec55592 100644
--- a/net/batman-adv/multicast.h
+++ b/net/batman-adv/multicast.h
@@ -1,4 +1,4 @@
1/* Copyright (C) 2014-2015 B.A.T.M.A.N. contributors: 1/* Copyright (C) 2014-2016 B.A.T.M.A.N. contributors:
2 * 2 *
3 * Linus Lüssing 3 * Linus Lüssing
4 * 4 *
@@ -23,7 +23,7 @@
23struct sk_buff; 23struct sk_buff;
24 24
25/** 25/**
26 * batadv_forw_mode - the way a packet should be forwarded as 26 * enum batadv_forw_mode - the way a packet should be forwarded as
27 * @BATADV_FORW_ALL: forward the packet to all nodes (currently via classic 27 * @BATADV_FORW_ALL: forward the packet to all nodes (currently via classic
28 * flooding) 28 * flooding)
29 * @BATADV_FORW_SINGLE: forward the packet to a single node (currently via the 29 * @BATADV_FORW_SINGLE: forward the packet to a single node (currently via the
diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c
index cc63b44f0d2e..b41719b6487a 100644
--- a/net/batman-adv/network-coding.c
+++ b/net/batman-adv/network-coding.c
@@ -1,4 +1,4 @@
1/* Copyright (C) 2012-2015 B.A.T.M.A.N. contributors: 1/* Copyright (C) 2012-2016 B.A.T.M.A.N. contributors:
2 * 2 *
3 * Martin Hundebøll, Jeppe Ledet-Pedersen 3 * Martin Hundebøll, Jeppe Ledet-Pedersen
4 * 4 *
@@ -32,6 +32,7 @@
32#include <linux/jhash.h> 32#include <linux/jhash.h>
33#include <linux/jiffies.h> 33#include <linux/jiffies.h>
34#include <linux/kernel.h> 34#include <linux/kernel.h>
35#include <linux/kref.h>
35#include <linux/list.h> 36#include <linux/list.h>
36#include <linux/lockdep.h> 37#include <linux/lockdep.h>
37#include <linux/netdevice.h> 38#include <linux/netdevice.h>
@@ -64,6 +65,8 @@ static int batadv_nc_recv_coded_packet(struct sk_buff *skb,
64 65
65/** 66/**
66 * batadv_nc_init - one-time initialization for network coding 67 * batadv_nc_init - one-time initialization for network coding
68 *
69 * Return: 0 on success or negative error number in case of failure
67 */ 70 */
68int __init batadv_nc_init(void) 71int __init batadv_nc_init(void)
69{ 72{
@@ -142,6 +145,8 @@ static void batadv_nc_tvlv_ogm_handler_v1(struct batadv_priv *bat_priv,
142/** 145/**
143 * batadv_nc_mesh_init - initialise coding hash table and start house keeping 146 * batadv_nc_mesh_init - initialise coding hash table and start house keeping
144 * @bat_priv: the bat priv with all the soft interface information 147 * @bat_priv: the bat priv with all the soft interface information
148 *
149 * Return: 0 on success or negative error number in case of failure
145 */ 150 */
146int batadv_nc_mesh_init(struct batadv_priv *bat_priv) 151int batadv_nc_mesh_init(struct batadv_priv *bat_priv)
147{ 152{
@@ -205,34 +210,50 @@ void batadv_nc_init_orig(struct batadv_orig_node *orig_node)
205/** 210/**
206 * batadv_nc_node_release - release nc_node from lists and queue for free after 211 * batadv_nc_node_release - release nc_node from lists and queue for free after
207 * rcu grace period 212 * rcu grace period
208 * @nc_node: the nc node to free 213 * @ref: kref pointer of the nc_node
209 */ 214 */
210static void batadv_nc_node_release(struct batadv_nc_node *nc_node) 215static void batadv_nc_node_release(struct kref *ref)
211{ 216{
212 batadv_orig_node_free_ref(nc_node->orig_node); 217 struct batadv_nc_node *nc_node;
218
219 nc_node = container_of(ref, struct batadv_nc_node, refcount);
220
221 batadv_orig_node_put(nc_node->orig_node);
213 kfree_rcu(nc_node, rcu); 222 kfree_rcu(nc_node, rcu);
214} 223}
215 224
216/** 225/**
217 * batadv_nc_node_free_ref - decrement the nc node refcounter and possibly 226 * batadv_nc_node_put - decrement the nc_node refcounter and possibly
218 * release it 227 * release it
219 * @nc_node: the nc node to free 228 * @nc_node: nc_node to be free'd
220 */ 229 */
221static void batadv_nc_node_free_ref(struct batadv_nc_node *nc_node) 230static void batadv_nc_node_put(struct batadv_nc_node *nc_node)
222{ 231{
223 if (atomic_dec_and_test(&nc_node->refcount)) 232 kref_put(&nc_node->refcount, batadv_nc_node_release);
224 batadv_nc_node_release(nc_node);
225} 233}
226 234
227/** 235/**
228 * batadv_nc_path_free_ref - decrements the nc path refcounter and possibly 236 * batadv_nc_path_release - release nc_path from lists and queue for free after
229 * frees it 237 * rcu grace period
230 * @nc_path: the nc node to free 238 * @ref: kref pointer of the nc_path
231 */ 239 */
232static void batadv_nc_path_free_ref(struct batadv_nc_path *nc_path) 240static void batadv_nc_path_release(struct kref *ref)
233{ 241{
234 if (atomic_dec_and_test(&nc_path->refcount)) 242 struct batadv_nc_path *nc_path;
235 kfree_rcu(nc_path, rcu); 243
244 nc_path = container_of(ref, struct batadv_nc_path, refcount);
245
246 kfree_rcu(nc_path, rcu);
247}
248
249/**
250 * batadv_nc_path_put - decrement the nc_path refcounter and possibly
251 * release it
252 * @nc_path: nc_path to be free'd
253 */
254static void batadv_nc_path_put(struct batadv_nc_path *nc_path)
255{
256 kref_put(&nc_path->refcount, batadv_nc_path_release);
236} 257}
237 258
238/** 259/**
@@ -242,7 +263,7 @@ static void batadv_nc_path_free_ref(struct batadv_nc_path *nc_path)
242static void batadv_nc_packet_free(struct batadv_nc_packet *nc_packet) 263static void batadv_nc_packet_free(struct batadv_nc_packet *nc_packet)
243{ 264{
244 kfree_skb(nc_packet->skb); 265 kfree_skb(nc_packet->skb);
245 batadv_nc_path_free_ref(nc_packet->nc_path); 266 batadv_nc_path_put(nc_packet->nc_path);
246 kfree(nc_packet); 267 kfree(nc_packet);
247} 268}
248 269
@@ -251,7 +272,7 @@ static void batadv_nc_packet_free(struct batadv_nc_packet *nc_packet)
251 * @bat_priv: the bat priv with all the soft interface information 272 * @bat_priv: the bat priv with all the soft interface information
252 * @nc_node: the nc node to check 273 * @nc_node: the nc node to check
253 * 274 *
254 * Returns true if the entry has to be purged now, false otherwise 275 * Return: true if the entry has to be purged now, false otherwise
255 */ 276 */
256static bool batadv_nc_to_purge_nc_node(struct batadv_priv *bat_priv, 277static bool batadv_nc_to_purge_nc_node(struct batadv_priv *bat_priv,
257 struct batadv_nc_node *nc_node) 278 struct batadv_nc_node *nc_node)
@@ -267,7 +288,7 @@ static bool batadv_nc_to_purge_nc_node(struct batadv_priv *bat_priv,
267 * @bat_priv: the bat priv with all the soft interface information 288 * @bat_priv: the bat priv with all the soft interface information
268 * @nc_path: the nc path to check 289 * @nc_path: the nc path to check
269 * 290 *
270 * Returns true if the entry has to be purged now, false otherwise 291 * Return: true if the entry has to be purged now, false otherwise
271 */ 292 */
272static bool batadv_nc_to_purge_nc_path_coding(struct batadv_priv *bat_priv, 293static bool batadv_nc_to_purge_nc_path_coding(struct batadv_priv *bat_priv,
273 struct batadv_nc_path *nc_path) 294 struct batadv_nc_path *nc_path)
@@ -287,7 +308,7 @@ static bool batadv_nc_to_purge_nc_path_coding(struct batadv_priv *bat_priv,
287 * @bat_priv: the bat priv with all the soft interface information 308 * @bat_priv: the bat priv with all the soft interface information
288 * @nc_path: the nc path to check 309 * @nc_path: the nc path to check
289 * 310 *
290 * Returns true if the entry has to be purged now, false otherwise 311 * Return: true if the entry has to be purged now, false otherwise
291 */ 312 */
292static bool batadv_nc_to_purge_nc_path_decoding(struct batadv_priv *bat_priv, 313static bool batadv_nc_to_purge_nc_path_decoding(struct batadv_priv *bat_priv,
293 struct batadv_nc_path *nc_path) 314 struct batadv_nc_path *nc_path)
@@ -335,7 +356,7 @@ batadv_nc_purge_orig_nc_nodes(struct batadv_priv *bat_priv,
335 "Removing nc_node %pM -> %pM\n", 356 "Removing nc_node %pM -> %pM\n",
336 nc_node->addr, nc_node->orig_node->orig); 357 nc_node->addr, nc_node->orig_node->orig);
337 list_del_rcu(&nc_node->list); 358 list_del_rcu(&nc_node->list);
338 batadv_nc_node_free_ref(nc_node); 359 batadv_nc_node_put(nc_node);
339 } 360 }
340 spin_unlock_bh(lock); 361 spin_unlock_bh(lock);
341} 362}
@@ -446,7 +467,7 @@ static void batadv_nc_purge_paths(struct batadv_priv *bat_priv,
446 "Remove nc_path %pM -> %pM\n", 467 "Remove nc_path %pM -> %pM\n",
447 nc_path->prev_hop, nc_path->next_hop); 468 nc_path->prev_hop, nc_path->next_hop);
448 hlist_del_rcu(&nc_path->hash_entry); 469 hlist_del_rcu(&nc_path->hash_entry);
449 batadv_nc_path_free_ref(nc_path); 470 batadv_nc_path_put(nc_path);
450 } 471 }
451 spin_unlock_bh(lock); 472 spin_unlock_bh(lock);
452 } 473 }
@@ -470,7 +491,7 @@ static void batadv_nc_hash_key_gen(struct batadv_nc_path *key, const char *src,
470 * @data: data to hash 491 * @data: data to hash
471 * @size: size of the hash table 492 * @size: size of the hash table
472 * 493 *
473 * Returns the selected index in the hash table for the given data. 494 * Return: the selected index in the hash table for the given data.
474 */ 495 */
475static u32 batadv_nc_hash_choose(const void *data, u32 size) 496static u32 batadv_nc_hash_choose(const void *data, u32 size)
476{ 497{
@@ -489,7 +510,7 @@ static u32 batadv_nc_hash_choose(const void *data, u32 size)
489 * @node: node in the local table 510 * @node: node in the local table
490 * @data2: second object to compare the node to 511 * @data2: second object to compare the node to
491 * 512 *
492 * Returns 1 if the two entry are the same, 0 otherwise 513 * Return: 1 if the two entry are the same, 0 otherwise
493 */ 514 */
494static int batadv_nc_hash_compare(const struct hlist_node *node, 515static int batadv_nc_hash_compare(const struct hlist_node *node,
495 const void *data2) 516 const void *data2)
@@ -516,7 +537,7 @@ static int batadv_nc_hash_compare(const struct hlist_node *node,
516 * @hash: hash table containing the nc path 537 * @hash: hash table containing the nc path
517 * @data: search key 538 * @data: search key
518 * 539 *
519 * Returns the nc_path if found, NULL otherwise. 540 * Return: the nc_path if found, NULL otherwise.
520 */ 541 */
521static struct batadv_nc_path * 542static struct batadv_nc_path *
522batadv_nc_hash_find(struct batadv_hashtable *hash, 543batadv_nc_hash_find(struct batadv_hashtable *hash,
@@ -537,7 +558,7 @@ batadv_nc_hash_find(struct batadv_hashtable *hash,
537 if (!batadv_nc_hash_compare(&nc_path->hash_entry, data)) 558 if (!batadv_nc_hash_compare(&nc_path->hash_entry, data))
538 continue; 559 continue;
539 560
540 if (!atomic_inc_not_zero(&nc_path->refcount)) 561 if (!kref_get_unless_zero(&nc_path->refcount))
541 continue; 562 continue;
542 563
543 nc_path_tmp = nc_path; 564 nc_path_tmp = nc_path;
@@ -554,9 +575,7 @@ batadv_nc_hash_find(struct batadv_hashtable *hash,
554 */ 575 */
555static void batadv_nc_send_packet(struct batadv_nc_packet *nc_packet) 576static void batadv_nc_send_packet(struct batadv_nc_packet *nc_packet)
556{ 577{
557 batadv_send_skb_packet(nc_packet->skb, 578 batadv_send_unicast_skb(nc_packet->skb, nc_packet->neigh_node);
558 nc_packet->neigh_node->if_incoming,
559 nc_packet->nc_path->next_hop);
560 nc_packet->skb = NULL; 579 nc_packet->skb = NULL;
561 batadv_nc_packet_free(nc_packet); 580 batadv_nc_packet_free(nc_packet);
562} 581}
@@ -571,7 +590,7 @@ static void batadv_nc_send_packet(struct batadv_nc_packet *nc_packet)
571 * timeout. If so, the packet is no longer kept and the entry deleted from the 590 * timeout. If so, the packet is no longer kept and the entry deleted from the
572 * queue. Has to be called with the appropriate locks. 591 * queue. Has to be called with the appropriate locks.
573 * 592 *
574 * Returns false as soon as the entry in the fifo queue has not been timed out 593 * Return: false as soon as the entry in the fifo queue has not been timed out
575 * yet and true otherwise. 594 * yet and true otherwise.
576 */ 595 */
577static bool batadv_nc_sniffed_purge(struct batadv_priv *bat_priv, 596static bool batadv_nc_sniffed_purge(struct batadv_priv *bat_priv,
@@ -610,7 +629,7 @@ out:
610 * packet is no longer delayed, immediately sent and the entry deleted from the 629 * packet is no longer delayed, immediately sent and the entry deleted from the
611 * queue. Has to be called with the appropriate locks. 630 * queue. Has to be called with the appropriate locks.
612 * 631 *
613 * Returns false as soon as the entry in the fifo queue has not been timed out 632 * Return: false as soon as the entry in the fifo queue has not been timed out
614 * yet and true otherwise. 633 * yet and true otherwise.
615 */ 634 */
616static bool batadv_nc_fwd_flush(struct batadv_priv *bat_priv, 635static bool batadv_nc_fwd_flush(struct batadv_priv *bat_priv,
@@ -731,7 +750,7 @@ static void batadv_nc_worker(struct work_struct *work)
731 * @orig_node: neighboring orig node which may be used as nc candidate 750 * @orig_node: neighboring orig node which may be used as nc candidate
732 * @ogm_packet: incoming ogm packet also used for the checks 751 * @ogm_packet: incoming ogm packet also used for the checks
733 * 752 *
734 * Returns true if: 753 * Return: true if:
735 * 1) The OGM must have the most recent sequence number. 754 * 1) The OGM must have the most recent sequence number.
736 * 2) The TTL must be decremented by one and only one. 755 * 2) The TTL must be decremented by one and only one.
737 * 3) The OGM must be received from the first hop from orig_node. 756 * 3) The OGM must be received from the first hop from orig_node.
@@ -751,7 +770,7 @@ static bool batadv_can_nc_with_orig(struct batadv_priv *bat_priv,
751 770
752 last_ttl = orig_ifinfo->last_ttl; 771 last_ttl = orig_ifinfo->last_ttl;
753 last_real_seqno = orig_ifinfo->last_real_seqno; 772 last_real_seqno = orig_ifinfo->last_real_seqno;
754 batadv_orig_ifinfo_free_ref(orig_ifinfo); 773 batadv_orig_ifinfo_put(orig_ifinfo);
755 774
756 if (last_real_seqno != ntohl(ogm_packet->seqno)) 775 if (last_real_seqno != ntohl(ogm_packet->seqno))
757 return false; 776 return false;
@@ -772,7 +791,7 @@ static bool batadv_can_nc_with_orig(struct batadv_priv *bat_priv,
772 * (can be equal to orig_node) 791 * (can be equal to orig_node)
773 * @in_coding: traverse incoming or outgoing network coding list 792 * @in_coding: traverse incoming or outgoing network coding list
774 * 793 *
775 * Returns the nc_node if found, NULL otherwise. 794 * Return: the nc_node if found, NULL otherwise.
776 */ 795 */
777static struct batadv_nc_node 796static struct batadv_nc_node
778*batadv_nc_find_nc_node(struct batadv_orig_node *orig_node, 797*batadv_nc_find_nc_node(struct batadv_orig_node *orig_node,
@@ -793,7 +812,7 @@ static struct batadv_nc_node
793 if (!batadv_compare_eth(nc_node->addr, orig_node->orig)) 812 if (!batadv_compare_eth(nc_node->addr, orig_node->orig))
794 continue; 813 continue;
795 814
796 if (!atomic_inc_not_zero(&nc_node->refcount)) 815 if (!kref_get_unless_zero(&nc_node->refcount))
797 continue; 816 continue;
798 817
799 /* Found a match */ 818 /* Found a match */
@@ -814,7 +833,7 @@ static struct batadv_nc_node
814 * (can be equal to orig_node) 833 * (can be equal to orig_node)
815 * @in_coding: traverse incoming or outgoing network coding list 834 * @in_coding: traverse incoming or outgoing network coding list
816 * 835 *
817 * Returns the nc_node if found or created, NULL in case of an error. 836 * Return: the nc_node if found or created, NULL in case of an error.
818 */ 837 */
819static struct batadv_nc_node 838static struct batadv_nc_node
820*batadv_nc_get_nc_node(struct batadv_priv *bat_priv, 839*batadv_nc_get_nc_node(struct batadv_priv *bat_priv,
@@ -837,14 +856,15 @@ static struct batadv_nc_node
837 if (!nc_node) 856 if (!nc_node)
838 return NULL; 857 return NULL;
839 858
840 if (!atomic_inc_not_zero(&orig_neigh_node->refcount)) 859 if (!kref_get_unless_zero(&orig_neigh_node->refcount))
841 goto free; 860 goto free;
842 861
843 /* Initialize nc_node */ 862 /* Initialize nc_node */
844 INIT_LIST_HEAD(&nc_node->list); 863 INIT_LIST_HEAD(&nc_node->list);
845 ether_addr_copy(nc_node->addr, orig_node->orig); 864 ether_addr_copy(nc_node->addr, orig_node->orig);
846 nc_node->orig_node = orig_neigh_node; 865 nc_node->orig_node = orig_neigh_node;
847 atomic_set(&nc_node->refcount, 2); 866 kref_init(&nc_node->refcount);
867 kref_get(&nc_node->refcount);
848 868
849 /* Select ingoing or outgoing coding node */ 869 /* Select ingoing or outgoing coding node */
850 if (in_coding) { 870 if (in_coding) {
@@ -920,9 +940,9 @@ void batadv_nc_update_nc_node(struct batadv_priv *bat_priv,
920 940
921out: 941out:
922 if (in_nc_node) 942 if (in_nc_node)
923 batadv_nc_node_free_ref(in_nc_node); 943 batadv_nc_node_put(in_nc_node);
924 if (out_nc_node) 944 if (out_nc_node)
925 batadv_nc_node_free_ref(out_nc_node); 945 batadv_nc_node_put(out_nc_node);
926} 946}
927 947
928/** 948/**
@@ -932,7 +952,7 @@ out:
932 * @src: ethernet source address - first half of the nc path search key 952 * @src: ethernet source address - first half of the nc path search key
933 * @dst: ethernet destination address - second half of the nc path search key 953 * @dst: ethernet destination address - second half of the nc path search key
934 * 954 *
935 * Returns pointer to nc_path if the path was found or created, returns NULL 955 * Return: pointer to nc_path if the path was found or created, returns NULL
936 * on error. 956 * on error.
937 */ 957 */
938static struct batadv_nc_path *batadv_nc_get_path(struct batadv_priv *bat_priv, 958static struct batadv_nc_path *batadv_nc_get_path(struct batadv_priv *bat_priv,
@@ -963,7 +983,8 @@ static struct batadv_nc_path *batadv_nc_get_path(struct batadv_priv *bat_priv,
963 /* Initialize nc_path */ 983 /* Initialize nc_path */
964 INIT_LIST_HEAD(&nc_path->packet_list); 984 INIT_LIST_HEAD(&nc_path->packet_list);
965 spin_lock_init(&nc_path->packet_list_lock); 985 spin_lock_init(&nc_path->packet_list_lock);
966 atomic_set(&nc_path->refcount, 2); 986 kref_init(&nc_path->refcount);
987 kref_get(&nc_path->refcount);
967 nc_path->last_valid = jiffies; 988 nc_path->last_valid = jiffies;
968 ether_addr_copy(nc_path->next_hop, dst); 989 ether_addr_copy(nc_path->next_hop, dst);
969 ether_addr_copy(nc_path->prev_hop, src); 990 ether_addr_copy(nc_path->prev_hop, src);
@@ -989,6 +1010,8 @@ static struct batadv_nc_path *batadv_nc_get_path(struct batadv_priv *bat_priv,
989 * batadv_nc_random_weight_tq - scale the receivers TQ-value to avoid unfair 1010 * batadv_nc_random_weight_tq - scale the receivers TQ-value to avoid unfair
990 * selection of a receiver with slightly lower TQ than the other 1011 * selection of a receiver with slightly lower TQ than the other
991 * @tq: to be weighted tq value 1012 * @tq: to be weighted tq value
1013 *
1014 * Return: scaled tq value
992 */ 1015 */
993static u8 batadv_nc_random_weight_tq(u8 tq) 1016static u8 batadv_nc_random_weight_tq(u8 tq)
994{ 1017{
@@ -1029,7 +1052,7 @@ static void batadv_nc_memxor(char *dst, const char *src, unsigned int len)
1029 * @nc_packet: structure containing the packet to the skb can be coded with 1052 * @nc_packet: structure containing the packet to the skb can be coded with
1030 * @neigh_node: next hop to forward packet to 1053 * @neigh_node: next hop to forward packet to
1031 * 1054 *
1032 * Returns true if both packets are consumed, false otherwise. 1055 * Return: true if both packets are consumed, false otherwise.
1033 */ 1056 */
1034static bool batadv_nc_code_packets(struct batadv_priv *bat_priv, 1057static bool batadv_nc_code_packets(struct batadv_priv *bat_priv,
1035 struct sk_buff *skb, 1058 struct sk_buff *skb,
@@ -1042,11 +1065,11 @@ static bool batadv_nc_code_packets(struct batadv_priv *bat_priv,
1042 struct batadv_unicast_packet *packet1; 1065 struct batadv_unicast_packet *packet1;
1043 struct batadv_unicast_packet *packet2; 1066 struct batadv_unicast_packet *packet2;
1044 struct batadv_coded_packet *coded_packet; 1067 struct batadv_coded_packet *coded_packet;
1045 struct batadv_neigh_node *neigh_tmp, *router_neigh; 1068 struct batadv_neigh_node *neigh_tmp, *router_neigh, *first_dest;
1046 struct batadv_neigh_node *router_coding = NULL; 1069 struct batadv_neigh_node *router_coding = NULL, *second_dest;
1047 struct batadv_neigh_ifinfo *router_neigh_ifinfo = NULL; 1070 struct batadv_neigh_ifinfo *router_neigh_ifinfo = NULL;
1048 struct batadv_neigh_ifinfo *router_coding_ifinfo = NULL; 1071 struct batadv_neigh_ifinfo *router_coding_ifinfo = NULL;
1049 u8 *first_source, *first_dest, *second_source, *second_dest; 1072 u8 *first_source, *second_source;
1050 __be32 packet_id1, packet_id2; 1073 __be32 packet_id1, packet_id2;
1051 size_t count; 1074 size_t count;
1052 bool res = false; 1075 bool res = false;
@@ -1089,9 +1112,9 @@ static bool batadv_nc_code_packets(struct batadv_priv *bat_priv,
1089 */ 1112 */
1090 if (tq_weighted_neigh >= tq_weighted_coding) { 1113 if (tq_weighted_neigh >= tq_weighted_coding) {
1091 /* Destination from nc_packet is selected for MAC-header */ 1114 /* Destination from nc_packet is selected for MAC-header */
1092 first_dest = nc_packet->nc_path->next_hop; 1115 first_dest = nc_packet->neigh_node;
1093 first_source = nc_packet->nc_path->prev_hop; 1116 first_source = nc_packet->nc_path->prev_hop;
1094 second_dest = neigh_node->addr; 1117 second_dest = neigh_node;
1095 second_source = ethhdr->h_source; 1118 second_source = ethhdr->h_source;
1096 packet1 = (struct batadv_unicast_packet *)nc_packet->skb->data; 1119 packet1 = (struct batadv_unicast_packet *)nc_packet->skb->data;
1097 packet2 = (struct batadv_unicast_packet *)skb->data; 1120 packet2 = (struct batadv_unicast_packet *)skb->data;
@@ -1100,9 +1123,9 @@ static bool batadv_nc_code_packets(struct batadv_priv *bat_priv,
1100 skb->data + sizeof(*packet2)); 1123 skb->data + sizeof(*packet2));
1101 } else { 1124 } else {
1102 /* Destination for skb is selected for MAC-header */ 1125 /* Destination for skb is selected for MAC-header */
1103 first_dest = neigh_node->addr; 1126 first_dest = neigh_node;
1104 first_source = ethhdr->h_source; 1127 first_source = ethhdr->h_source;
1105 second_dest = nc_packet->nc_path->next_hop; 1128 second_dest = nc_packet->neigh_node;
1106 second_source = nc_packet->nc_path->prev_hop; 1129 second_source = nc_packet->nc_path->prev_hop;
1107 packet1 = (struct batadv_unicast_packet *)skb->data; 1130 packet1 = (struct batadv_unicast_packet *)skb->data;
1108 packet2 = (struct batadv_unicast_packet *)nc_packet->skb->data; 1131 packet2 = (struct batadv_unicast_packet *)nc_packet->skb->data;
@@ -1144,7 +1167,7 @@ static bool batadv_nc_code_packets(struct batadv_priv *bat_priv,
1144 coded_packet->first_ttvn = packet1->ttvn; 1167 coded_packet->first_ttvn = packet1->ttvn;
1145 1168
1146 /* Info about second unicast packet */ 1169 /* Info about second unicast packet */
1147 ether_addr_copy(coded_packet->second_dest, second_dest); 1170 ether_addr_copy(coded_packet->second_dest, second_dest->addr);
1148 ether_addr_copy(coded_packet->second_source, second_source); 1171 ether_addr_copy(coded_packet->second_source, second_source);
1149 ether_addr_copy(coded_packet->second_orig_dest, packet2->dest); 1172 ether_addr_copy(coded_packet->second_orig_dest, packet2->dest);
1150 coded_packet->second_crc = packet_id2; 1173 coded_packet->second_crc = packet_id2;
@@ -1199,17 +1222,17 @@ static bool batadv_nc_code_packets(struct batadv_priv *bat_priv,
1199 batadv_nc_packet_free(nc_packet); 1222 batadv_nc_packet_free(nc_packet);
1200 1223
1201 /* Send the coded packet and return true */ 1224 /* Send the coded packet and return true */
1202 batadv_send_skb_packet(skb_dest, neigh_node->if_incoming, first_dest); 1225 batadv_send_unicast_skb(skb_dest, first_dest);
1203 res = true; 1226 res = true;
1204out: 1227out:
1205 if (router_neigh) 1228 if (router_neigh)
1206 batadv_neigh_node_free_ref(router_neigh); 1229 batadv_neigh_node_put(router_neigh);
1207 if (router_coding) 1230 if (router_coding)
1208 batadv_neigh_node_free_ref(router_coding); 1231 batadv_neigh_node_put(router_coding);
1209 if (router_neigh_ifinfo) 1232 if (router_neigh_ifinfo)
1210 batadv_neigh_ifinfo_free_ref(router_neigh_ifinfo); 1233 batadv_neigh_ifinfo_put(router_neigh_ifinfo);
1211 if (router_coding_ifinfo) 1234 if (router_coding_ifinfo)
1212 batadv_neigh_ifinfo_free_ref(router_coding_ifinfo); 1235 batadv_neigh_ifinfo_put(router_coding_ifinfo);
1213 return res; 1236 return res;
1214} 1237}
1215 1238
@@ -1228,7 +1251,7 @@ out:
1228 * Since the source encoded the packet we can be certain it has all necessary 1251 * Since the source encoded the packet we can be certain it has all necessary
1229 * decode information. 1252 * decode information.
1230 * 1253 *
1231 * Returns true if coding of a decoded packet is allowed. 1254 * Return: true if coding of a decoded packet is allowed.
1232 */ 1255 */
1233static bool batadv_nc_skb_coding_possible(struct sk_buff *skb, u8 *dst, u8 *src) 1256static bool batadv_nc_skb_coding_possible(struct sk_buff *skb, u8 *dst, u8 *src)
1234{ 1257{
@@ -1246,7 +1269,7 @@ static bool batadv_nc_skb_coding_possible(struct sk_buff *skb, u8 *dst, u8 *src)
1246 * @skb: data skb to forward 1269 * @skb: data skb to forward
1247 * @eth_dst: next hop mac address of skb 1270 * @eth_dst: next hop mac address of skb
1248 * 1271 *
1249 * Returns true if coding of a decoded skb is allowed. 1272 * Return: true if coding of a decoded skb is allowed.
1250 */ 1273 */
1251static struct batadv_nc_packet * 1274static struct batadv_nc_packet *
1252batadv_nc_path_search(struct batadv_priv *bat_priv, 1275batadv_nc_path_search(struct batadv_priv *bat_priv,
@@ -1314,7 +1337,7 @@ batadv_nc_path_search(struct batadv_priv *bat_priv,
1314 * @eth_src: source mac address of skb 1337 * @eth_src: source mac address of skb
1315 * @in_nc_node: pointer to skb next hop's neighbor nc node 1338 * @in_nc_node: pointer to skb next hop's neighbor nc node
1316 * 1339 *
1317 * Returns an nc packet if a suitable coding packet was found, NULL otherwise. 1340 * Return: an nc packet if a suitable coding packet was found, NULL otherwise.
1318 */ 1341 */
1319static struct batadv_nc_packet * 1342static struct batadv_nc_packet *
1320batadv_nc_skb_src_search(struct batadv_priv *bat_priv, 1343batadv_nc_skb_src_search(struct batadv_priv *bat_priv,
@@ -1347,7 +1370,7 @@ batadv_nc_skb_src_search(struct batadv_priv *bat_priv,
1347 } 1370 }
1348 rcu_read_unlock(); 1371 rcu_read_unlock();
1349 1372
1350 batadv_orig_node_free_ref(orig_node); 1373 batadv_orig_node_put(orig_node);
1351 return nc_packet; 1374 return nc_packet;
1352} 1375}
1353 1376
@@ -1397,7 +1420,7 @@ static void batadv_nc_skb_store_before_coding(struct batadv_priv *bat_priv,
1397 * next hop that potentially sent a packet which our next hop also received 1420 * next hop that potentially sent a packet which our next hop also received
1398 * (overheard) and has stored for later decoding. 1421 * (overheard) and has stored for later decoding.
1399 * 1422 *
1400 * Returns true if the skb was consumed (encoded packet sent) or false otherwise 1423 * Return: true if the skb was consumed (encoded packet sent) or false otherwise
1401 */ 1424 */
1402static bool batadv_nc_skb_dst_search(struct sk_buff *skb, 1425static bool batadv_nc_skb_dst_search(struct sk_buff *skb,
1403 struct batadv_neigh_node *neigh_node, 1426 struct batadv_neigh_node *neigh_node,
@@ -1451,7 +1474,7 @@ static bool batadv_nc_skb_dst_search(struct sk_buff *skb,
1451 * @neigh_node: next hop to forward packet to 1474 * @neigh_node: next hop to forward packet to
1452 * @packet_id: checksum to identify packet 1475 * @packet_id: checksum to identify packet
1453 * 1476 *
1454 * Returns true if the packet was buffered or false in case of an error. 1477 * Return: true if the packet was buffered or false in case of an error.
1455 */ 1478 */
1456static bool batadv_nc_skb_add_to_path(struct sk_buff *skb, 1479static bool batadv_nc_skb_add_to_path(struct sk_buff *skb,
1457 struct batadv_nc_path *nc_path, 1480 struct batadv_nc_path *nc_path,
@@ -1485,7 +1508,7 @@ static bool batadv_nc_skb_add_to_path(struct sk_buff *skb,
1485 * @skb: data skb to forward 1508 * @skb: data skb to forward
1486 * @neigh_node: next hop to forward packet to 1509 * @neigh_node: next hop to forward packet to
1487 * 1510 *
1488 * Returns true if the skb was consumed (encoded packet sent) or false otherwise 1511 * Return: true if the skb was consumed (encoded packet sent) or false otherwise
1489 */ 1512 */
1490bool batadv_nc_skb_forward(struct sk_buff *skb, 1513bool batadv_nc_skb_forward(struct sk_buff *skb,
1491 struct batadv_neigh_node *neigh_node) 1514 struct batadv_neigh_node *neigh_node)
@@ -1530,7 +1553,7 @@ bool batadv_nc_skb_forward(struct sk_buff *skb,
1530 return true; 1553 return true;
1531 1554
1532free_nc_path: 1555free_nc_path:
1533 batadv_nc_path_free_ref(nc_path); 1556 batadv_nc_path_put(nc_path);
1534out: 1557out:
1535 /* Packet is not consumed */ 1558 /* Packet is not consumed */
1536 return false; 1559 return false;
@@ -1592,7 +1615,7 @@ void batadv_nc_skb_store_for_decoding(struct batadv_priv *bat_priv,
1592free_skb: 1615free_skb:
1593 kfree_skb(skb); 1616 kfree_skb(skb);
1594free_nc_path: 1617free_nc_path:
1595 batadv_nc_path_free_ref(nc_path); 1618 batadv_nc_path_put(nc_path);
1596out: 1619out:
1597 return; 1620 return;
1598} 1621}
@@ -1624,7 +1647,7 @@ void batadv_nc_skb_store_sniffed_unicast(struct batadv_priv *bat_priv,
1624 * @skb: unicast skb to decode 1647 * @skb: unicast skb to decode
1625 * @nc_packet: decode data needed to decode the skb 1648 * @nc_packet: decode data needed to decode the skb
1626 * 1649 *
1627 * Returns pointer to decoded unicast packet if the packet was decoded or NULL 1650 * Return: pointer to decoded unicast packet if the packet was decoded or NULL
1628 * in case of an error. 1651 * in case of an error.
1629 */ 1652 */
1630static struct batadv_unicast_packet * 1653static struct batadv_unicast_packet *
@@ -1718,7 +1741,7 @@ batadv_nc_skb_decode_packet(struct batadv_priv *bat_priv, struct sk_buff *skb,
1718 * @ethhdr: pointer to the ethernet header inside the coded packet 1741 * @ethhdr: pointer to the ethernet header inside the coded packet
1719 * @coded: coded packet we try to find decode data for 1742 * @coded: coded packet we try to find decode data for
1720 * 1743 *
1721 * Returns pointer to nc packet if the needed data was found or NULL otherwise. 1744 * Return: pointer to nc packet if the needed data was found or NULL otherwise.
1722 */ 1745 */
1723static struct batadv_nc_packet * 1746static struct batadv_nc_packet *
1724batadv_nc_find_decoding_packet(struct batadv_priv *bat_priv, 1747batadv_nc_find_decoding_packet(struct batadv_priv *bat_priv,
@@ -1781,6 +1804,9 @@ batadv_nc_find_decoding_packet(struct batadv_priv *bat_priv,
1781 * resulting unicast packet 1804 * resulting unicast packet
1782 * @skb: incoming coded packet 1805 * @skb: incoming coded packet
1783 * @recv_if: pointer to interface this packet was received on 1806 * @recv_if: pointer to interface this packet was received on
1807 *
1808 * Return: NET_RX_SUCCESS if the packet has been consumed or NET_RX_DROP
1809 * otherwise.
1784 */ 1810 */
1785static int batadv_nc_recv_coded_packet(struct sk_buff *skb, 1811static int batadv_nc_recv_coded_packet(struct sk_buff *skb,
1786 struct batadv_hard_iface *recv_if) 1812 struct batadv_hard_iface *recv_if)
@@ -1865,6 +1891,8 @@ void batadv_nc_mesh_free(struct batadv_priv *bat_priv)
1865 * batadv_nc_nodes_seq_print_text - print the nc node information 1891 * batadv_nc_nodes_seq_print_text - print the nc node information
1866 * @seq: seq file to print on 1892 * @seq: seq file to print on
1867 * @offset: not used 1893 * @offset: not used
1894 *
1895 * Return: always 0
1868 */ 1896 */
1869int batadv_nc_nodes_seq_print_text(struct seq_file *seq, void *offset) 1897int batadv_nc_nodes_seq_print_text(struct seq_file *seq, void *offset)
1870{ 1898{
@@ -1920,13 +1948,15 @@ int batadv_nc_nodes_seq_print_text(struct seq_file *seq, void *offset)
1920 1948
1921out: 1949out:
1922 if (primary_if) 1950 if (primary_if)
1923 batadv_hardif_free_ref(primary_if); 1951 batadv_hardif_put(primary_if);
1924 return 0; 1952 return 0;
1925} 1953}
1926 1954
1927/** 1955/**
1928 * batadv_nc_init_debugfs - create nc folder and related files in debugfs 1956 * batadv_nc_init_debugfs - create nc folder and related files in debugfs
1929 * @bat_priv: the bat priv with all the soft interface information 1957 * @bat_priv: the bat priv with all the soft interface information
1958 *
1959 * Return: 0 on success or negative error number in case of failure
1930 */ 1960 */
1931int batadv_nc_init_debugfs(struct batadv_priv *bat_priv) 1961int batadv_nc_init_debugfs(struct batadv_priv *bat_priv)
1932{ 1962{
diff --git a/net/batman-adv/network-coding.h b/net/batman-adv/network-coding.h
index 8f6d4ad8778a..d6d7fb4ec5d5 100644
--- a/net/batman-adv/network-coding.h
+++ b/net/batman-adv/network-coding.h
@@ -1,4 +1,4 @@
1/* Copyright (C) 2012-2015 B.A.T.M.A.N. contributors: 1/* Copyright (C) 2012-2016 B.A.T.M.A.N. contributors:
2 * 2 *
3 * Martin Hundebøll, Jeppe Ledet-Pedersen 3 * Martin Hundebøll, Jeppe Ledet-Pedersen
4 * 4 *
diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c
index fe578f75c391..e4cbb0753e37 100644
--- a/net/batman-adv/originator.c
+++ b/net/batman-adv/originator.c
@@ -1,4 +1,4 @@
1/* Copyright (C) 2009-2015 B.A.T.M.A.N. contributors: 1/* Copyright (C) 2009-2016 B.A.T.M.A.N. contributors:
2 * 2 *
3 * Marek Lindner, Simon Wunderlich 3 * Marek Lindner, Simon Wunderlich
4 * 4 *
@@ -18,11 +18,13 @@
18#include "originator.h" 18#include "originator.h"
19#include "main.h" 19#include "main.h"
20 20
21#include <linux/atomic.h>
21#include <linux/errno.h> 22#include <linux/errno.h>
22#include <linux/etherdevice.h> 23#include <linux/etherdevice.h>
23#include <linux/fs.h> 24#include <linux/fs.h>
24#include <linux/jiffies.h> 25#include <linux/jiffies.h>
25#include <linux/kernel.h> 26#include <linux/kernel.h>
27#include <linux/kref.h>
26#include <linux/list.h> 28#include <linux/list.h>
27#include <linux/lockdep.h> 29#include <linux/lockdep.h>
28#include <linux/netdevice.h> 30#include <linux/netdevice.h>
@@ -47,7 +49,13 @@ static struct lock_class_key batadv_orig_hash_lock_class_key;
47 49
48static void batadv_purge_orig(struct work_struct *work); 50static void batadv_purge_orig(struct work_struct *work);
49 51
50/* returns 1 if they are the same originator */ 52/**
53 * batadv_compare_orig - comparing function used in the originator hash table
54 * @node: node in the local table
55 * @data2: second object to compare the node to
56 *
57 * Return: 1 if they are the same originator
58 */
51int batadv_compare_orig(const struct hlist_node *node, const void *data2) 59int batadv_compare_orig(const struct hlist_node *node, const void *data2)
52{ 60{
53 const void *data1 = container_of(node, struct batadv_orig_node, 61 const void *data1 = container_of(node, struct batadv_orig_node,
@@ -61,7 +69,7 @@ int batadv_compare_orig(const struct hlist_node *node, const void *data2)
61 * @orig_node: the originator serving the VLAN 69 * @orig_node: the originator serving the VLAN
62 * @vid: the VLAN identifier 70 * @vid: the VLAN identifier
63 * 71 *
64 * Returns the vlan object identified by vid and belonging to orig_node or NULL 72 * Return: the vlan object identified by vid and belonging to orig_node or NULL
65 * if it does not exist. 73 * if it does not exist.
66 */ 74 */
67struct batadv_orig_node_vlan * 75struct batadv_orig_node_vlan *
@@ -75,7 +83,7 @@ batadv_orig_node_vlan_get(struct batadv_orig_node *orig_node,
75 if (tmp->vid != vid) 83 if (tmp->vid != vid)
76 continue; 84 continue;
77 85
78 if (!atomic_inc_not_zero(&tmp->refcount)) 86 if (!kref_get_unless_zero(&tmp->refcount))
79 continue; 87 continue;
80 88
81 vlan = tmp; 89 vlan = tmp;
@@ -93,7 +101,7 @@ batadv_orig_node_vlan_get(struct batadv_orig_node *orig_node,
93 * @orig_node: the originator serving the VLAN 101 * @orig_node: the originator serving the VLAN
94 * @vid: the VLAN identifier 102 * @vid: the VLAN identifier
95 * 103 *
96 * Returns NULL in case of failure or the vlan object identified by vid and 104 * Return: NULL in case of failure or the vlan object identified by vid and
97 * belonging to orig_node otherwise. The object is created and added to the list 105 * belonging to orig_node otherwise. The object is created and added to the list
98 * if it does not exist. 106 * if it does not exist.
99 * 107 *
@@ -116,7 +124,8 @@ batadv_orig_node_vlan_new(struct batadv_orig_node *orig_node,
116 if (!vlan) 124 if (!vlan)
117 goto out; 125 goto out;
118 126
119 atomic_set(&vlan->refcount, 2); 127 kref_init(&vlan->refcount);
128 kref_get(&vlan->refcount);
120 vlan->vid = vid; 129 vlan->vid = vid;
121 130
122 hlist_add_head_rcu(&vlan->list, &orig_node->vlan_list); 131 hlist_add_head_rcu(&vlan->list, &orig_node->vlan_list);
@@ -128,14 +137,27 @@ out:
128} 137}
129 138
130/** 139/**
131 * batadv_orig_node_vlan_free_ref - decrement the refcounter and possibly free 140 * batadv_orig_node_vlan_release - release originator-vlan object from lists
141 * and queue for free after rcu grace period
142 * @ref: kref pointer of the originator-vlan object
143 */
144static void batadv_orig_node_vlan_release(struct kref *ref)
145{
146 struct batadv_orig_node_vlan *orig_vlan;
147
148 orig_vlan = container_of(ref, struct batadv_orig_node_vlan, refcount);
149
150 kfree_rcu(orig_vlan, rcu);
151}
152
153/**
154 * batadv_orig_node_vlan_put - decrement the refcounter and possibly release
132 * the originator-vlan object 155 * the originator-vlan object
133 * @orig_vlan: the originator-vlan object to release 156 * @orig_vlan: the originator-vlan object to release
134 */ 157 */
135void batadv_orig_node_vlan_free_ref(struct batadv_orig_node_vlan *orig_vlan) 158void batadv_orig_node_vlan_put(struct batadv_orig_node_vlan *orig_vlan)
136{ 159{
137 if (atomic_dec_and_test(&orig_vlan->refcount)) 160 kref_put(&orig_vlan->refcount, batadv_orig_node_vlan_release);
138 kfree_rcu(orig_vlan, rcu);
139} 161}
140 162
141int batadv_originator_init(struct batadv_priv *bat_priv) 163int batadv_originator_init(struct batadv_priv *bat_priv)
@@ -165,99 +187,105 @@ err:
165/** 187/**
166 * batadv_neigh_ifinfo_release - release neigh_ifinfo from lists and queue for 188 * batadv_neigh_ifinfo_release - release neigh_ifinfo from lists and queue for
167 * free after rcu grace period 189 * free after rcu grace period
168 * @neigh_ifinfo: the neigh_ifinfo object to release 190 * @ref: kref pointer of the neigh_ifinfo
169 */ 191 */
170static void 192static void batadv_neigh_ifinfo_release(struct kref *ref)
171batadv_neigh_ifinfo_release(struct batadv_neigh_ifinfo *neigh_ifinfo)
172{ 193{
194 struct batadv_neigh_ifinfo *neigh_ifinfo;
195
196 neigh_ifinfo = container_of(ref, struct batadv_neigh_ifinfo, refcount);
197
173 if (neigh_ifinfo->if_outgoing != BATADV_IF_DEFAULT) 198 if (neigh_ifinfo->if_outgoing != BATADV_IF_DEFAULT)
174 batadv_hardif_free_ref(neigh_ifinfo->if_outgoing); 199 batadv_hardif_put(neigh_ifinfo->if_outgoing);
175 200
176 kfree_rcu(neigh_ifinfo, rcu); 201 kfree_rcu(neigh_ifinfo, rcu);
177} 202}
178 203
179/** 204/**
180 * batadv_neigh_ifinfo_free_ref - decrement the refcounter and possibly release 205 * batadv_neigh_ifinfo_put - decrement the refcounter and possibly release
181 * the neigh_ifinfo 206 * the neigh_ifinfo
182 * @neigh_ifinfo: the neigh_ifinfo object to release 207 * @neigh_ifinfo: the neigh_ifinfo object to release
183 */ 208 */
184void batadv_neigh_ifinfo_free_ref(struct batadv_neigh_ifinfo *neigh_ifinfo) 209void batadv_neigh_ifinfo_put(struct batadv_neigh_ifinfo *neigh_ifinfo)
185{ 210{
186 if (atomic_dec_and_test(&neigh_ifinfo->refcount)) 211 kref_put(&neigh_ifinfo->refcount, batadv_neigh_ifinfo_release);
187 batadv_neigh_ifinfo_release(neigh_ifinfo);
188} 212}
189 213
190/** 214/**
191 * batadv_hardif_neigh_release - release hardif neigh node from lists and 215 * batadv_hardif_neigh_release - release hardif neigh node from lists and
192 * queue for free after rcu grace period 216 * queue for free after rcu grace period
193 * @hardif_neigh: hardif neigh neighbor to free 217 * @ref: kref pointer of the neigh_node
194 */ 218 */
195static void 219static void batadv_hardif_neigh_release(struct kref *ref)
196batadv_hardif_neigh_release(struct batadv_hardif_neigh_node *hardif_neigh)
197{ 220{
221 struct batadv_hardif_neigh_node *hardif_neigh;
222
223 hardif_neigh = container_of(ref, struct batadv_hardif_neigh_node,
224 refcount);
225
198 spin_lock_bh(&hardif_neigh->if_incoming->neigh_list_lock); 226 spin_lock_bh(&hardif_neigh->if_incoming->neigh_list_lock);
199 hlist_del_init_rcu(&hardif_neigh->list); 227 hlist_del_init_rcu(&hardif_neigh->list);
200 spin_unlock_bh(&hardif_neigh->if_incoming->neigh_list_lock); 228 spin_unlock_bh(&hardif_neigh->if_incoming->neigh_list_lock);
201 229
202 batadv_hardif_free_ref(hardif_neigh->if_incoming); 230 batadv_hardif_put(hardif_neigh->if_incoming);
203 kfree_rcu(hardif_neigh, rcu); 231 kfree_rcu(hardif_neigh, rcu);
204} 232}
205 233
206/** 234/**
207 * batadv_hardif_neigh_free_ref - decrement the hardif neighbors refcounter 235 * batadv_hardif_neigh_put - decrement the hardif neighbors refcounter
208 * and possibly release it 236 * and possibly release it
209 * @hardif_neigh: hardif neigh neighbor to free 237 * @hardif_neigh: hardif neigh neighbor to free
210 */ 238 */
211void batadv_hardif_neigh_free_ref(struct batadv_hardif_neigh_node *hardif_neigh) 239void batadv_hardif_neigh_put(struct batadv_hardif_neigh_node *hardif_neigh)
212{ 240{
213 if (atomic_dec_and_test(&hardif_neigh->refcount)) 241 kref_put(&hardif_neigh->refcount, batadv_hardif_neigh_release);
214 batadv_hardif_neigh_release(hardif_neigh);
215} 242}
216 243
217/** 244/**
218 * batadv_neigh_node_release - release neigh_node from lists and queue for 245 * batadv_neigh_node_release - release neigh_node from lists and queue for
219 * free after rcu grace period 246 * free after rcu grace period
220 * @neigh_node: neigh neighbor to free 247 * @ref: kref pointer of the neigh_node
221 */ 248 */
222static void batadv_neigh_node_release(struct batadv_neigh_node *neigh_node) 249static void batadv_neigh_node_release(struct kref *ref)
223{ 250{
224 struct hlist_node *node_tmp; 251 struct hlist_node *node_tmp;
252 struct batadv_neigh_node *neigh_node;
225 struct batadv_hardif_neigh_node *hardif_neigh; 253 struct batadv_hardif_neigh_node *hardif_neigh;
226 struct batadv_neigh_ifinfo *neigh_ifinfo; 254 struct batadv_neigh_ifinfo *neigh_ifinfo;
227 struct batadv_algo_ops *bao; 255 struct batadv_algo_ops *bao;
228 256
257 neigh_node = container_of(ref, struct batadv_neigh_node, refcount);
229 bao = neigh_node->orig_node->bat_priv->bat_algo_ops; 258 bao = neigh_node->orig_node->bat_priv->bat_algo_ops;
230 259
231 hlist_for_each_entry_safe(neigh_ifinfo, node_tmp, 260 hlist_for_each_entry_safe(neigh_ifinfo, node_tmp,
232 &neigh_node->ifinfo_list, list) { 261 &neigh_node->ifinfo_list, list) {
233 batadv_neigh_ifinfo_free_ref(neigh_ifinfo); 262 batadv_neigh_ifinfo_put(neigh_ifinfo);
234 } 263 }
235 264
236 hardif_neigh = batadv_hardif_neigh_get(neigh_node->if_incoming, 265 hardif_neigh = batadv_hardif_neigh_get(neigh_node->if_incoming,
237 neigh_node->addr); 266 neigh_node->addr);
238 if (hardif_neigh) { 267 if (hardif_neigh) {
239 /* batadv_hardif_neigh_get() increases refcount too */ 268 /* batadv_hardif_neigh_get() increases refcount too */
240 batadv_hardif_neigh_free_ref(hardif_neigh); 269 batadv_hardif_neigh_put(hardif_neigh);
241 batadv_hardif_neigh_free_ref(hardif_neigh); 270 batadv_hardif_neigh_put(hardif_neigh);
242 } 271 }
243 272
244 if (bao->bat_neigh_free) 273 if (bao->bat_neigh_free)
245 bao->bat_neigh_free(neigh_node); 274 bao->bat_neigh_free(neigh_node);
246 275
247 batadv_hardif_free_ref(neigh_node->if_incoming); 276 batadv_hardif_put(neigh_node->if_incoming);
248 277
249 kfree_rcu(neigh_node, rcu); 278 kfree_rcu(neigh_node, rcu);
250} 279}
251 280
252/** 281/**
253 * batadv_neigh_node_free_ref - decrement the neighbors refcounter 282 * batadv_neigh_node_put - decrement the neighbors refcounter and possibly
254 * and possibly release it 283 * release it
255 * @neigh_node: neigh neighbor to free 284 * @neigh_node: neigh neighbor to free
256 */ 285 */
257void batadv_neigh_node_free_ref(struct batadv_neigh_node *neigh_node) 286void batadv_neigh_node_put(struct batadv_neigh_node *neigh_node)
258{ 287{
259 if (atomic_dec_and_test(&neigh_node->refcount)) 288 kref_put(&neigh_node->refcount, batadv_neigh_node_release);
260 batadv_neigh_node_release(neigh_node);
261} 289}
262 290
263/** 291/**
@@ -266,7 +294,7 @@ void batadv_neigh_node_free_ref(struct batadv_neigh_node *neigh_node)
266 * @if_outgoing: the interface where the payload packet has been received or 294 * @if_outgoing: the interface where the payload packet has been received or
267 * the OGM should be sent to 295 * the OGM should be sent to
268 * 296 *
269 * Returns the neighbor which should be router for this orig_node/iface. 297 * Return: the neighbor which should be router for this orig_node/iface.
270 * 298 *
271 * The object is returned with refcounter increased by 1. 299 * The object is returned with refcounter increased by 1.
272 */ 300 */
@@ -286,7 +314,7 @@ batadv_orig_router_get(struct batadv_orig_node *orig_node,
286 break; 314 break;
287 } 315 }
288 316
289 if (router && !atomic_inc_not_zero(&router->refcount)) 317 if (router && !kref_get_unless_zero(&router->refcount))
290 router = NULL; 318 router = NULL;
291 319
292 rcu_read_unlock(); 320 rcu_read_unlock();
@@ -298,7 +326,7 @@ batadv_orig_router_get(struct batadv_orig_node *orig_node,
298 * @orig_node: the orig node to be queried 326 * @orig_node: the orig node to be queried
299 * @if_outgoing: the interface for which the ifinfo should be acquired 327 * @if_outgoing: the interface for which the ifinfo should be acquired
300 * 328 *
301 * Returns the requested orig_ifinfo or NULL if not found. 329 * Return: the requested orig_ifinfo or NULL if not found.
302 * 330 *
303 * The object is returned with refcounter increased by 1. 331 * The object is returned with refcounter increased by 1.
304 */ 332 */
@@ -314,7 +342,7 @@ batadv_orig_ifinfo_get(struct batadv_orig_node *orig_node,
314 if (tmp->if_outgoing != if_outgoing) 342 if (tmp->if_outgoing != if_outgoing)
315 continue; 343 continue;
316 344
317 if (!atomic_inc_not_zero(&tmp->refcount)) 345 if (!kref_get_unless_zero(&tmp->refcount))
318 continue; 346 continue;
319 347
320 orig_ifinfo = tmp; 348 orig_ifinfo = tmp;
@@ -330,7 +358,7 @@ batadv_orig_ifinfo_get(struct batadv_orig_node *orig_node,
330 * @orig_node: the orig node to be queried 358 * @orig_node: the orig node to be queried
331 * @if_outgoing: the interface for which the ifinfo should be acquired 359 * @if_outgoing: the interface for which the ifinfo should be acquired
332 * 360 *
333 * Returns NULL in case of failure or the orig_ifinfo object for the if_outgoing 361 * Return: NULL in case of failure or the orig_ifinfo object for the if_outgoing
334 * interface otherwise. The object is created and added to the list 362 * interface otherwise. The object is created and added to the list
335 * if it does not exist. 363 * if it does not exist.
336 * 364 *
@@ -354,7 +382,7 @@ batadv_orig_ifinfo_new(struct batadv_orig_node *orig_node,
354 goto out; 382 goto out;
355 383
356 if (if_outgoing != BATADV_IF_DEFAULT && 384 if (if_outgoing != BATADV_IF_DEFAULT &&
357 !atomic_inc_not_zero(&if_outgoing->refcount)) { 385 !kref_get_unless_zero(&if_outgoing->refcount)) {
358 kfree(orig_ifinfo); 386 kfree(orig_ifinfo);
359 orig_ifinfo = NULL; 387 orig_ifinfo = NULL;
360 goto out; 388 goto out;
@@ -365,7 +393,8 @@ batadv_orig_ifinfo_new(struct batadv_orig_node *orig_node,
365 orig_ifinfo->batman_seqno_reset = reset_time; 393 orig_ifinfo->batman_seqno_reset = reset_time;
366 orig_ifinfo->if_outgoing = if_outgoing; 394 orig_ifinfo->if_outgoing = if_outgoing;
367 INIT_HLIST_NODE(&orig_ifinfo->list); 395 INIT_HLIST_NODE(&orig_ifinfo->list);
368 atomic_set(&orig_ifinfo->refcount, 2); 396 kref_init(&orig_ifinfo->refcount);
397 kref_get(&orig_ifinfo->refcount);
369 hlist_add_head_rcu(&orig_ifinfo->list, 398 hlist_add_head_rcu(&orig_ifinfo->list,
370 &orig_node->ifinfo_list); 399 &orig_node->ifinfo_list);
371out: 400out:
@@ -375,12 +404,12 @@ out:
375 404
376/** 405/**
377 * batadv_neigh_ifinfo_get - find the ifinfo from an neigh_node 406 * batadv_neigh_ifinfo_get - find the ifinfo from an neigh_node
378 * @neigh_node: the neigh node to be queried 407 * @neigh: the neigh node to be queried
379 * @if_outgoing: the interface for which the ifinfo should be acquired 408 * @if_outgoing: the interface for which the ifinfo should be acquired
380 * 409 *
381 * The object is returned with refcounter increased by 1. 410 * The object is returned with refcounter increased by 1.
382 * 411 *
383 * Returns the requested neigh_ifinfo or NULL if not found 412 * Return: the requested neigh_ifinfo or NULL if not found
384 */ 413 */
385struct batadv_neigh_ifinfo * 414struct batadv_neigh_ifinfo *
386batadv_neigh_ifinfo_get(struct batadv_neigh_node *neigh, 415batadv_neigh_ifinfo_get(struct batadv_neigh_node *neigh,
@@ -395,7 +424,7 @@ batadv_neigh_ifinfo_get(struct batadv_neigh_node *neigh,
395 if (tmp_neigh_ifinfo->if_outgoing != if_outgoing) 424 if (tmp_neigh_ifinfo->if_outgoing != if_outgoing)
396 continue; 425 continue;
397 426
398 if (!atomic_inc_not_zero(&tmp_neigh_ifinfo->refcount)) 427 if (!kref_get_unless_zero(&tmp_neigh_ifinfo->refcount))
399 continue; 428 continue;
400 429
401 neigh_ifinfo = tmp_neigh_ifinfo; 430 neigh_ifinfo = tmp_neigh_ifinfo;
@@ -408,10 +437,10 @@ batadv_neigh_ifinfo_get(struct batadv_neigh_node *neigh,
408 437
409/** 438/**
410 * batadv_neigh_ifinfo_new - search and possibly create an neigh_ifinfo object 439 * batadv_neigh_ifinfo_new - search and possibly create an neigh_ifinfo object
411 * @neigh_node: the neigh node to be queried 440 * @neigh: the neigh node to be queried
412 * @if_outgoing: the interface for which the ifinfo should be acquired 441 * @if_outgoing: the interface for which the ifinfo should be acquired
413 * 442 *
414 * Returns NULL in case of failure or the neigh_ifinfo object for the 443 * Return: NULL in case of failure or the neigh_ifinfo object for the
415 * if_outgoing interface otherwise. The object is created and added to the list 444 * if_outgoing interface otherwise. The object is created and added to the list
416 * if it does not exist. 445 * if it does not exist.
417 * 446 *
@@ -433,14 +462,15 @@ batadv_neigh_ifinfo_new(struct batadv_neigh_node *neigh,
433 if (!neigh_ifinfo) 462 if (!neigh_ifinfo)
434 goto out; 463 goto out;
435 464
436 if (if_outgoing && !atomic_inc_not_zero(&if_outgoing->refcount)) { 465 if (if_outgoing && !kref_get_unless_zero(&if_outgoing->refcount)) {
437 kfree(neigh_ifinfo); 466 kfree(neigh_ifinfo);
438 neigh_ifinfo = NULL; 467 neigh_ifinfo = NULL;
439 goto out; 468 goto out;
440 } 469 }
441 470
442 INIT_HLIST_NODE(&neigh_ifinfo->list); 471 INIT_HLIST_NODE(&neigh_ifinfo->list);
443 atomic_set(&neigh_ifinfo->refcount, 2); 472 kref_init(&neigh_ifinfo->refcount);
473 kref_get(&neigh_ifinfo->refcount);
444 neigh_ifinfo->if_outgoing = if_outgoing; 474 neigh_ifinfo->if_outgoing = if_outgoing;
445 475
446 hlist_add_head_rcu(&neigh_ifinfo->list, &neigh->ifinfo_list); 476 hlist_add_head_rcu(&neigh_ifinfo->list, &neigh->ifinfo_list);
@@ -459,7 +489,8 @@ out:
459 * 489 *
460 * Looks for and possibly returns a neighbour belonging to this originator list 490 * Looks for and possibly returns a neighbour belonging to this originator list
461 * which is connected through the provided hard interface. 491 * which is connected through the provided hard interface.
462 * Returns NULL if the neighbour is not found. 492 *
493 * Return: neighbor when found. Othwerwise NULL
463 */ 494 */
464static struct batadv_neigh_node * 495static struct batadv_neigh_node *
465batadv_neigh_node_get(const struct batadv_orig_node *orig_node, 496batadv_neigh_node_get(const struct batadv_orig_node *orig_node,
@@ -476,7 +507,7 @@ batadv_neigh_node_get(const struct batadv_orig_node *orig_node,
476 if (tmp_neigh_node->if_incoming != hard_iface) 507 if (tmp_neigh_node->if_incoming != hard_iface)
477 continue; 508 continue;
478 509
479 if (!atomic_inc_not_zero(&tmp_neigh_node->refcount)) 510 if (!kref_get_unless_zero(&tmp_neigh_node->refcount))
480 continue; 511 continue;
481 512
482 res = tmp_neigh_node; 513 res = tmp_neigh_node;
@@ -492,7 +523,7 @@ batadv_neigh_node_get(const struct batadv_orig_node *orig_node,
492 * @hard_iface: the interface this neighbour is connected to 523 * @hard_iface: the interface this neighbour is connected to
493 * @neigh_addr: the interface address of the neighbour to retrieve 524 * @neigh_addr: the interface address of the neighbour to retrieve
494 * 525 *
495 * Returns the hardif neighbour node if found or created or NULL otherwise. 526 * Return: the hardif neighbour node if found or created or NULL otherwise.
496 */ 527 */
497static struct batadv_hardif_neigh_node * 528static struct batadv_hardif_neigh_node *
498batadv_hardif_neigh_create(struct batadv_hard_iface *hard_iface, 529batadv_hardif_neigh_create(struct batadv_hard_iface *hard_iface,
@@ -508,12 +539,12 @@ batadv_hardif_neigh_create(struct batadv_hard_iface *hard_iface,
508 if (hardif_neigh) 539 if (hardif_neigh)
509 goto out; 540 goto out;
510 541
511 if (!atomic_inc_not_zero(&hard_iface->refcount)) 542 if (!kref_get_unless_zero(&hard_iface->refcount))
512 goto out; 543 goto out;
513 544
514 hardif_neigh = kzalloc(sizeof(*hardif_neigh), GFP_ATOMIC); 545 hardif_neigh = kzalloc(sizeof(*hardif_neigh), GFP_ATOMIC);
515 if (!hardif_neigh) { 546 if (!hardif_neigh) {
516 batadv_hardif_free_ref(hard_iface); 547 batadv_hardif_put(hard_iface);
517 goto out; 548 goto out;
518 } 549 }
519 550
@@ -522,7 +553,7 @@ batadv_hardif_neigh_create(struct batadv_hard_iface *hard_iface,
522 hardif_neigh->if_incoming = hard_iface; 553 hardif_neigh->if_incoming = hard_iface;
523 hardif_neigh->last_seen = jiffies; 554 hardif_neigh->last_seen = jiffies;
524 555
525 atomic_set(&hardif_neigh->refcount, 1); 556 kref_init(&hardif_neigh->refcount);
526 557
527 if (bat_priv->bat_algo_ops->bat_hardif_neigh_init) 558 if (bat_priv->bat_algo_ops->bat_hardif_neigh_init)
528 bat_priv->bat_algo_ops->bat_hardif_neigh_init(hardif_neigh); 559 bat_priv->bat_algo_ops->bat_hardif_neigh_init(hardif_neigh);
@@ -540,7 +571,7 @@ out:
540 * @hard_iface: the interface this neighbour is connected to 571 * @hard_iface: the interface this neighbour is connected to
541 * @neigh_addr: the interface address of the neighbour to retrieve 572 * @neigh_addr: the interface address of the neighbour to retrieve
542 * 573 *
543 * Returns the hardif neighbour node if found or created or NULL otherwise. 574 * Return: the hardif neighbour node if found or created or NULL otherwise.
544 */ 575 */
545static struct batadv_hardif_neigh_node * 576static struct batadv_hardif_neigh_node *
546batadv_hardif_neigh_get_or_create(struct batadv_hard_iface *hard_iface, 577batadv_hardif_neigh_get_or_create(struct batadv_hard_iface *hard_iface,
@@ -562,7 +593,8 @@ batadv_hardif_neigh_get_or_create(struct batadv_hard_iface *hard_iface,
562 * @neigh_addr: the address of the neighbour 593 * @neigh_addr: the address of the neighbour
563 * 594 *
564 * Looks for and possibly returns a neighbour belonging to this hard interface. 595 * Looks for and possibly returns a neighbour belonging to this hard interface.
565 * Returns NULL if the neighbour is not found. 596 *
597 * Return: neighbor when found. Othwerwise NULL
566 */ 598 */
567struct batadv_hardif_neigh_node * 599struct batadv_hardif_neigh_node *
568batadv_hardif_neigh_get(const struct batadv_hard_iface *hard_iface, 600batadv_hardif_neigh_get(const struct batadv_hard_iface *hard_iface,
@@ -576,7 +608,7 @@ batadv_hardif_neigh_get(const struct batadv_hard_iface *hard_iface,
576 if (!batadv_compare_eth(tmp_hardif_neigh->addr, neigh_addr)) 608 if (!batadv_compare_eth(tmp_hardif_neigh->addr, neigh_addr))
577 continue; 609 continue;
578 610
579 if (!atomic_inc_not_zero(&tmp_hardif_neigh->refcount)) 611 if (!kref_get_unless_zero(&tmp_hardif_neigh->refcount))
580 continue; 612 continue;
581 613
582 hardif_neigh = tmp_hardif_neigh; 614 hardif_neigh = tmp_hardif_neigh;
@@ -594,7 +626,8 @@ batadv_hardif_neigh_get(const struct batadv_hard_iface *hard_iface,
594 * @neigh_addr: the mac address of the neighbour interface 626 * @neigh_addr: the mac address of the neighbour interface
595 * 627 *
596 * Allocates a new neigh_node object and initialises all the generic fields. 628 * Allocates a new neigh_node object and initialises all the generic fields.
597 * Returns the new object or NULL on failure. 629 *
630 * Return: neighbor when found. Othwerwise NULL
598 */ 631 */
599struct batadv_neigh_node * 632struct batadv_neigh_node *
600batadv_neigh_node_new(struct batadv_orig_node *orig_node, 633batadv_neigh_node_new(struct batadv_orig_node *orig_node,
@@ -617,7 +650,7 @@ batadv_neigh_node_new(struct batadv_orig_node *orig_node,
617 if (!neigh_node) 650 if (!neigh_node)
618 goto out; 651 goto out;
619 652
620 if (!atomic_inc_not_zero(&hard_iface->refcount)) { 653 if (!kref_get_unless_zero(&hard_iface->refcount)) {
621 kfree(neigh_node); 654 kfree(neigh_node);
622 neigh_node = NULL; 655 neigh_node = NULL;
623 goto out; 656 goto out;
@@ -632,14 +665,15 @@ batadv_neigh_node_new(struct batadv_orig_node *orig_node,
632 neigh_node->orig_node = orig_node; 665 neigh_node->orig_node = orig_node;
633 666
634 /* extra reference for return */ 667 /* extra reference for return */
635 atomic_set(&neigh_node->refcount, 2); 668 kref_init(&neigh_node->refcount);
669 kref_get(&neigh_node->refcount);
636 670
637 spin_lock_bh(&orig_node->neigh_list_lock); 671 spin_lock_bh(&orig_node->neigh_list_lock);
638 hlist_add_head_rcu(&neigh_node->list, &orig_node->neigh_list); 672 hlist_add_head_rcu(&neigh_node->list, &orig_node->neigh_list);
639 spin_unlock_bh(&orig_node->neigh_list_lock); 673 spin_unlock_bh(&orig_node->neigh_list_lock);
640 674
641 /* increment unique neighbor refcount */ 675 /* increment unique neighbor refcount */
642 atomic_inc(&hardif_neigh->refcount); 676 kref_get(&hardif_neigh->refcount);
643 677
644 batadv_dbg(BATADV_DBG_BATMAN, orig_node->bat_priv, 678 batadv_dbg(BATADV_DBG_BATMAN, orig_node->bat_priv,
645 "Creating new neighbor %pM for orig_node %pM on interface %s\n", 679 "Creating new neighbor %pM for orig_node %pM on interface %s\n",
@@ -647,7 +681,7 @@ batadv_neigh_node_new(struct batadv_orig_node *orig_node,
647 681
648out: 682out:
649 if (hardif_neigh) 683 if (hardif_neigh)
650 batadv_hardif_neigh_free_ref(hardif_neigh); 684 batadv_hardif_neigh_put(hardif_neigh);
651 return neigh_node; 685 return neigh_node;
652} 686}
653 687
@@ -656,7 +690,7 @@ out:
656 * @seq: neighbour table seq_file struct 690 * @seq: neighbour table seq_file struct
657 * @offset: not used 691 * @offset: not used
658 * 692 *
659 * Always returns 0. 693 * Return: always 0
660 */ 694 */
661int batadv_hardif_neigh_seq_print_text(struct seq_file *seq, void *offset) 695int batadv_hardif_neigh_seq_print_text(struct seq_file *seq, void *offset)
662{ 696{
@@ -673,7 +707,7 @@ int batadv_hardif_neigh_seq_print_text(struct seq_file *seq, void *offset)
673 primary_if->net_dev->dev_addr, net_dev->name, 707 primary_if->net_dev->dev_addr, net_dev->name,
674 bat_priv->bat_algo_ops->name); 708 bat_priv->bat_algo_ops->name);
675 709
676 batadv_hardif_free_ref(primary_if); 710 batadv_hardif_put(primary_if);
677 711
678 if (!bat_priv->bat_algo_ops->bat_neigh_print) { 712 if (!bat_priv->bat_algo_ops->bat_neigh_print) {
679 seq_puts(seq, 713 seq_puts(seq,
@@ -688,32 +722,34 @@ int batadv_hardif_neigh_seq_print_text(struct seq_file *seq, void *offset)
688/** 722/**
689 * batadv_orig_ifinfo_release - release orig_ifinfo from lists and queue for 723 * batadv_orig_ifinfo_release - release orig_ifinfo from lists and queue for
690 * free after rcu grace period 724 * free after rcu grace period
691 * @orig_ifinfo: the orig_ifinfo object to release 725 * @ref: kref pointer of the orig_ifinfo
692 */ 726 */
693static void batadv_orig_ifinfo_release(struct batadv_orig_ifinfo *orig_ifinfo) 727static void batadv_orig_ifinfo_release(struct kref *ref)
694{ 728{
729 struct batadv_orig_ifinfo *orig_ifinfo;
695 struct batadv_neigh_node *router; 730 struct batadv_neigh_node *router;
696 731
732 orig_ifinfo = container_of(ref, struct batadv_orig_ifinfo, refcount);
733
697 if (orig_ifinfo->if_outgoing != BATADV_IF_DEFAULT) 734 if (orig_ifinfo->if_outgoing != BATADV_IF_DEFAULT)
698 batadv_hardif_free_ref(orig_ifinfo->if_outgoing); 735 batadv_hardif_put(orig_ifinfo->if_outgoing);
699 736
700 /* this is the last reference to this object */ 737 /* this is the last reference to this object */
701 router = rcu_dereference_protected(orig_ifinfo->router, true); 738 router = rcu_dereference_protected(orig_ifinfo->router, true);
702 if (router) 739 if (router)
703 batadv_neigh_node_free_ref(router); 740 batadv_neigh_node_put(router);
704 741
705 kfree_rcu(orig_ifinfo, rcu); 742 kfree_rcu(orig_ifinfo, rcu);
706} 743}
707 744
708/** 745/**
709 * batadv_orig_ifinfo_free_ref - decrement the refcounter and possibly release 746 * batadv_orig_ifinfo_put - decrement the refcounter and possibly release
710 * the orig_ifinfo 747 * the orig_ifinfo
711 * @orig_ifinfo: the orig_ifinfo object to release 748 * @orig_ifinfo: the orig_ifinfo object to release
712 */ 749 */
713void batadv_orig_ifinfo_free_ref(struct batadv_orig_ifinfo *orig_ifinfo) 750void batadv_orig_ifinfo_put(struct batadv_orig_ifinfo *orig_ifinfo)
714{ 751{
715 if (atomic_dec_and_test(&orig_ifinfo->refcount)) 752 kref_put(&orig_ifinfo->refcount, batadv_orig_ifinfo_release);
716 batadv_orig_ifinfo_release(orig_ifinfo);
717} 753}
718 754
719/** 755/**
@@ -740,27 +776,30 @@ static void batadv_orig_node_free_rcu(struct rcu_head *rcu)
740/** 776/**
741 * batadv_orig_node_release - release orig_node from lists and queue for 777 * batadv_orig_node_release - release orig_node from lists and queue for
742 * free after rcu grace period 778 * free after rcu grace period
743 * @orig_node: the orig node to free 779 * @ref: kref pointer of the orig_node
744 */ 780 */
745static void batadv_orig_node_release(struct batadv_orig_node *orig_node) 781static void batadv_orig_node_release(struct kref *ref)
746{ 782{
747 struct hlist_node *node_tmp; 783 struct hlist_node *node_tmp;
748 struct batadv_neigh_node *neigh_node; 784 struct batadv_neigh_node *neigh_node;
785 struct batadv_orig_node *orig_node;
749 struct batadv_orig_ifinfo *orig_ifinfo; 786 struct batadv_orig_ifinfo *orig_ifinfo;
750 787
788 orig_node = container_of(ref, struct batadv_orig_node, refcount);
789
751 spin_lock_bh(&orig_node->neigh_list_lock); 790 spin_lock_bh(&orig_node->neigh_list_lock);
752 791
753 /* for all neighbors towards this originator ... */ 792 /* for all neighbors towards this originator ... */
754 hlist_for_each_entry_safe(neigh_node, node_tmp, 793 hlist_for_each_entry_safe(neigh_node, node_tmp,
755 &orig_node->neigh_list, list) { 794 &orig_node->neigh_list, list) {
756 hlist_del_rcu(&neigh_node->list); 795 hlist_del_rcu(&neigh_node->list);
757 batadv_neigh_node_free_ref(neigh_node); 796 batadv_neigh_node_put(neigh_node);
758 } 797 }
759 798
760 hlist_for_each_entry_safe(orig_ifinfo, node_tmp, 799 hlist_for_each_entry_safe(orig_ifinfo, node_tmp,
761 &orig_node->ifinfo_list, list) { 800 &orig_node->ifinfo_list, list) {
762 hlist_del_rcu(&orig_ifinfo->list); 801 hlist_del_rcu(&orig_ifinfo->list);
763 batadv_orig_ifinfo_free_ref(orig_ifinfo); 802 batadv_orig_ifinfo_put(orig_ifinfo);
764 } 803 }
765 spin_unlock_bh(&orig_node->neigh_list_lock); 804 spin_unlock_bh(&orig_node->neigh_list_lock);
766 805
@@ -771,14 +810,13 @@ static void batadv_orig_node_release(struct batadv_orig_node *orig_node)
771} 810}
772 811
773/** 812/**
774 * batadv_orig_node_free_ref - decrement the orig node refcounter and possibly 813 * batadv_orig_node_put - decrement the orig node refcounter and possibly
775 * release it 814 * release it
776 * @orig_node: the orig node to free 815 * @orig_node: the orig node to free
777 */ 816 */
778void batadv_orig_node_free_ref(struct batadv_orig_node *orig_node) 817void batadv_orig_node_put(struct batadv_orig_node *orig_node)
779{ 818{
780 if (atomic_dec_and_test(&orig_node->refcount)) 819 kref_put(&orig_node->refcount, batadv_orig_node_release);
781 batadv_orig_node_release(orig_node);
782} 820}
783 821
784void batadv_originator_free(struct batadv_priv *bat_priv) 822void batadv_originator_free(struct batadv_priv *bat_priv)
@@ -805,7 +843,7 @@ void batadv_originator_free(struct batadv_priv *bat_priv)
805 hlist_for_each_entry_safe(orig_node, node_tmp, 843 hlist_for_each_entry_safe(orig_node, node_tmp,
806 head, hash_entry) { 844 head, hash_entry) {
807 hlist_del_rcu(&orig_node->hash_entry); 845 hlist_del_rcu(&orig_node->hash_entry);
808 batadv_orig_node_free_ref(orig_node); 846 batadv_orig_node_put(orig_node);
809 } 847 }
810 spin_unlock_bh(list_lock); 848 spin_unlock_bh(list_lock);
811 } 849 }
@@ -820,7 +858,8 @@ void batadv_originator_free(struct batadv_priv *bat_priv)
820 * 858 *
821 * Creates a new originator object and initialise all the generic fields. 859 * Creates a new originator object and initialise all the generic fields.
822 * The new object is not added to the originator list. 860 * The new object is not added to the originator list.
823 * Returns the newly created object or NULL on failure. 861 *
862 * Return: the newly created object or NULL on failure.
824 */ 863 */
825struct batadv_orig_node *batadv_orig_node_new(struct batadv_priv *bat_priv, 864struct batadv_orig_node *batadv_orig_node_new(struct batadv_priv *bat_priv,
826 const u8 *addr) 865 const u8 *addr)
@@ -849,7 +888,8 @@ struct batadv_orig_node *batadv_orig_node_new(struct batadv_priv *bat_priv,
849 batadv_nc_init_orig(orig_node); 888 batadv_nc_init_orig(orig_node);
850 889
851 /* extra reference for return */ 890 /* extra reference for return */
852 atomic_set(&orig_node->refcount, 2); 891 kref_init(&orig_node->refcount);
892 kref_get(&orig_node->refcount);
853 893
854 orig_node->bat_priv = bat_priv; 894 orig_node->bat_priv = bat_priv;
855 ether_addr_copy(orig_node->orig, addr); 895 ether_addr_copy(orig_node->orig, addr);
@@ -877,7 +917,7 @@ struct batadv_orig_node *batadv_orig_node_new(struct batadv_priv *bat_priv,
877 * Immediately release vlan since it is not needed anymore in this 917 * Immediately release vlan since it is not needed anymore in this
878 * context 918 * context
879 */ 919 */
880 batadv_orig_node_vlan_free_ref(vlan); 920 batadv_orig_node_vlan_put(vlan);
881 921
882 for (i = 0; i < BATADV_FRAG_BUFFER_COUNT; i++) { 922 for (i = 0; i < BATADV_FRAG_BUFFER_COUNT; i++) {
883 INIT_HLIST_HEAD(&orig_node->fragments[i].head); 923 INIT_HLIST_HEAD(&orig_node->fragments[i].head);
@@ -926,7 +966,7 @@ batadv_purge_neigh_ifinfo(struct batadv_priv *bat_priv,
926 neigh->addr, if_outgoing->net_dev->name); 966 neigh->addr, if_outgoing->net_dev->name);
927 967
928 hlist_del_rcu(&neigh_ifinfo->list); 968 hlist_del_rcu(&neigh_ifinfo->list);
929 batadv_neigh_ifinfo_free_ref(neigh_ifinfo); 969 batadv_neigh_ifinfo_put(neigh_ifinfo);
930 } 970 }
931 971
932 spin_unlock_bh(&neigh->ifinfo_lock); 972 spin_unlock_bh(&neigh->ifinfo_lock);
@@ -937,7 +977,7 @@ batadv_purge_neigh_ifinfo(struct batadv_priv *bat_priv,
937 * @bat_priv: the bat priv with all the soft interface information 977 * @bat_priv: the bat priv with all the soft interface information
938 * @orig_node: orig node which is to be checked 978 * @orig_node: orig node which is to be checked
939 * 979 *
940 * Returns true if any ifinfo entry was purged, false otherwise. 980 * Return: true if any ifinfo entry was purged, false otherwise.
941 */ 981 */
942static bool 982static bool
943batadv_purge_orig_ifinfo(struct batadv_priv *bat_priv, 983batadv_purge_orig_ifinfo(struct batadv_priv *bat_priv,
@@ -972,10 +1012,10 @@ batadv_purge_orig_ifinfo(struct batadv_priv *bat_priv,
972 ifinfo_purged = true; 1012 ifinfo_purged = true;
973 1013
974 hlist_del_rcu(&orig_ifinfo->list); 1014 hlist_del_rcu(&orig_ifinfo->list);
975 batadv_orig_ifinfo_free_ref(orig_ifinfo); 1015 batadv_orig_ifinfo_put(orig_ifinfo);
976 if (orig_node->last_bonding_candidate == orig_ifinfo) { 1016 if (orig_node->last_bonding_candidate == orig_ifinfo) {
977 orig_node->last_bonding_candidate = NULL; 1017 orig_node->last_bonding_candidate = NULL;
978 batadv_orig_ifinfo_free_ref(orig_ifinfo); 1018 batadv_orig_ifinfo_put(orig_ifinfo);
979 } 1019 }
980 } 1020 }
981 1021
@@ -989,7 +1029,7 @@ batadv_purge_orig_ifinfo(struct batadv_priv *bat_priv,
989 * @bat_priv: the bat priv with all the soft interface information 1029 * @bat_priv: the bat priv with all the soft interface information
990 * @orig_node: orig node which is to be checked 1030 * @orig_node: orig node which is to be checked
991 * 1031 *
992 * Returns true if any neighbor was purged, false otherwise 1032 * Return: true if any neighbor was purged, false otherwise
993 */ 1033 */
994static bool 1034static bool
995batadv_purge_orig_neighbors(struct batadv_priv *bat_priv, 1035batadv_purge_orig_neighbors(struct batadv_priv *bat_priv,
@@ -1029,7 +1069,7 @@ batadv_purge_orig_neighbors(struct batadv_priv *bat_priv,
1029 neigh_purged = true; 1069 neigh_purged = true;
1030 1070
1031 hlist_del_rcu(&neigh_node->list); 1071 hlist_del_rcu(&neigh_node->list);
1032 batadv_neigh_node_free_ref(neigh_node); 1072 batadv_neigh_node_put(neigh_node);
1033 } else { 1073 } else {
1034 /* only necessary if not the whole neighbor is to be 1074 /* only necessary if not the whole neighbor is to be
1035 * deleted, but some interface has been removed. 1075 * deleted, but some interface has been removed.
@@ -1048,7 +1088,7 @@ batadv_purge_orig_neighbors(struct batadv_priv *bat_priv,
1048 * @orig_node: orig node which is to be checked 1088 * @orig_node: orig node which is to be checked
1049 * @if_outgoing: the interface for which the metric should be compared 1089 * @if_outgoing: the interface for which the metric should be compared
1050 * 1090 *
1051 * Returns the current best neighbor, with refcount increased. 1091 * Return: the current best neighbor, with refcount increased.
1052 */ 1092 */
1053static struct batadv_neigh_node * 1093static struct batadv_neigh_node *
1054batadv_find_best_neighbor(struct batadv_priv *bat_priv, 1094batadv_find_best_neighbor(struct batadv_priv *bat_priv,
@@ -1064,11 +1104,11 @@ batadv_find_best_neighbor(struct batadv_priv *bat_priv,
1064 best, if_outgoing) <= 0)) 1104 best, if_outgoing) <= 0))
1065 continue; 1105 continue;
1066 1106
1067 if (!atomic_inc_not_zero(&neigh->refcount)) 1107 if (!kref_get_unless_zero(&neigh->refcount))
1068 continue; 1108 continue;
1069 1109
1070 if (best) 1110 if (best)
1071 batadv_neigh_node_free_ref(best); 1111 batadv_neigh_node_put(best);
1072 1112
1073 best = neigh; 1113 best = neigh;
1074 } 1114 }
@@ -1085,7 +1125,7 @@ batadv_find_best_neighbor(struct batadv_priv *bat_priv,
1085 * This function checks if the orig_node or substructures of it have become 1125 * This function checks if the orig_node or substructures of it have become
1086 * obsolete, and purges this information if that's the case. 1126 * obsolete, and purges this information if that's the case.
1087 * 1127 *
1088 * Returns true if the orig_node is to be removed, false otherwise. 1128 * Return: true if the orig_node is to be removed, false otherwise.
1089 */ 1129 */
1090static bool batadv_purge_orig_node(struct batadv_priv *bat_priv, 1130static bool batadv_purge_orig_node(struct batadv_priv *bat_priv,
1091 struct batadv_orig_node *orig_node) 1131 struct batadv_orig_node *orig_node)
@@ -1114,7 +1154,7 @@ static bool batadv_purge_orig_node(struct batadv_priv *bat_priv,
1114 batadv_update_route(bat_priv, orig_node, BATADV_IF_DEFAULT, 1154 batadv_update_route(bat_priv, orig_node, BATADV_IF_DEFAULT,
1115 best_neigh_node); 1155 best_neigh_node);
1116 if (best_neigh_node) 1156 if (best_neigh_node)
1117 batadv_neigh_node_free_ref(best_neigh_node); 1157 batadv_neigh_node_put(best_neigh_node);
1118 1158
1119 /* ... then for all other interfaces. */ 1159 /* ... then for all other interfaces. */
1120 rcu_read_lock(); 1160 rcu_read_lock();
@@ -1131,7 +1171,7 @@ static bool batadv_purge_orig_node(struct batadv_priv *bat_priv,
1131 batadv_update_route(bat_priv, orig_node, hard_iface, 1171 batadv_update_route(bat_priv, orig_node, hard_iface,
1132 best_neigh_node); 1172 best_neigh_node);
1133 if (best_neigh_node) 1173 if (best_neigh_node)
1134 batadv_neigh_node_free_ref(best_neigh_node); 1174 batadv_neigh_node_put(best_neigh_node);
1135 } 1175 }
1136 rcu_read_unlock(); 1176 rcu_read_unlock();
1137 1177
@@ -1164,7 +1204,7 @@ static void _batadv_purge_orig(struct batadv_priv *bat_priv)
1164 batadv_tt_global_del_orig(orig_node->bat_priv, 1204 batadv_tt_global_del_orig(orig_node->bat_priv,
1165 orig_node, -1, 1205 orig_node, -1,
1166 "originator timed out"); 1206 "originator timed out");
1167 batadv_orig_node_free_ref(orig_node); 1207 batadv_orig_node_put(orig_node);
1168 continue; 1208 continue;
1169 } 1209 }
1170 1210
@@ -1210,7 +1250,7 @@ int batadv_orig_seq_print_text(struct seq_file *seq, void *offset)
1210 primary_if->net_dev->dev_addr, net_dev->name, 1250 primary_if->net_dev->dev_addr, net_dev->name,
1211 bat_priv->bat_algo_ops->name); 1251 bat_priv->bat_algo_ops->name);
1212 1252
1213 batadv_hardif_free_ref(primary_if); 1253 batadv_hardif_put(primary_if);
1214 1254
1215 if (!bat_priv->bat_algo_ops->bat_orig_print) { 1255 if (!bat_priv->bat_algo_ops->bat_orig_print) {
1216 seq_puts(seq, 1256 seq_puts(seq,
@@ -1230,7 +1270,7 @@ int batadv_orig_seq_print_text(struct seq_file *seq, void *offset)
1230 * @seq: debugfs table seq_file struct 1270 * @seq: debugfs table seq_file struct
1231 * @offset: not used 1271 * @offset: not used
1232 * 1272 *
1233 * Returns 0 1273 * Return: 0
1234 */ 1274 */
1235int batadv_orig_hardif_seq_print_text(struct seq_file *seq, void *offset) 1275int batadv_orig_hardif_seq_print_text(struct seq_file *seq, void *offset)
1236{ 1276{
@@ -1266,7 +1306,7 @@ int batadv_orig_hardif_seq_print_text(struct seq_file *seq, void *offset)
1266 1306
1267out: 1307out:
1268 if (hard_iface) 1308 if (hard_iface)
1269 batadv_hardif_free_ref(hard_iface); 1309 batadv_hardif_put(hard_iface);
1270 return 0; 1310 return 0;
1271} 1311}
1272 1312
diff --git a/net/batman-adv/originator.h b/net/batman-adv/originator.h
index cf0730414ed2..4e8b67f11051 100644
--- a/net/batman-adv/originator.h
+++ b/net/batman-adv/originator.h
@@ -1,4 +1,4 @@
1/* Copyright (C) 2007-2015 B.A.T.M.A.N. contributors: 1/* Copyright (C) 2007-2016 B.A.T.M.A.N. contributors:
2 * 2 *
3 * Marek Lindner, Simon Wunderlich 3 * Marek Lindner, Simon Wunderlich
4 * 4 *
@@ -20,10 +20,10 @@
20 20
21#include "main.h" 21#include "main.h"
22 22
23#include <linux/atomic.h>
24#include <linux/compiler.h> 23#include <linux/compiler.h>
25#include <linux/if_ether.h> 24#include <linux/if_ether.h>
26#include <linux/jhash.h> 25#include <linux/jhash.h>
26#include <linux/kref.h>
27#include <linux/rculist.h> 27#include <linux/rculist.h>
28#include <linux/rcupdate.h> 28#include <linux/rcupdate.h>
29#include <linux/stddef.h> 29#include <linux/stddef.h>
@@ -37,19 +37,19 @@ int batadv_compare_orig(const struct hlist_node *node, const void *data2);
37int batadv_originator_init(struct batadv_priv *bat_priv); 37int batadv_originator_init(struct batadv_priv *bat_priv);
38void batadv_originator_free(struct batadv_priv *bat_priv); 38void batadv_originator_free(struct batadv_priv *bat_priv);
39void batadv_purge_orig_ref(struct batadv_priv *bat_priv); 39void batadv_purge_orig_ref(struct batadv_priv *bat_priv);
40void batadv_orig_node_free_ref(struct batadv_orig_node *orig_node); 40void batadv_orig_node_put(struct batadv_orig_node *orig_node);
41struct batadv_orig_node *batadv_orig_node_new(struct batadv_priv *bat_priv, 41struct batadv_orig_node *batadv_orig_node_new(struct batadv_priv *bat_priv,
42 const u8 *addr); 42 const u8 *addr);
43struct batadv_hardif_neigh_node * 43struct batadv_hardif_neigh_node *
44batadv_hardif_neigh_get(const struct batadv_hard_iface *hard_iface, 44batadv_hardif_neigh_get(const struct batadv_hard_iface *hard_iface,
45 const u8 *neigh_addr); 45 const u8 *neigh_addr);
46void 46void
47batadv_hardif_neigh_free_ref(struct batadv_hardif_neigh_node *hardif_neigh); 47batadv_hardif_neigh_put(struct batadv_hardif_neigh_node *hardif_neigh);
48struct batadv_neigh_node * 48struct batadv_neigh_node *
49batadv_neigh_node_new(struct batadv_orig_node *orig_node, 49batadv_neigh_node_new(struct batadv_orig_node *orig_node,
50 struct batadv_hard_iface *hard_iface, 50 struct batadv_hard_iface *hard_iface,
51 const u8 *neigh_addr); 51 const u8 *neigh_addr);
52void batadv_neigh_node_free_ref(struct batadv_neigh_node *neigh_node); 52void batadv_neigh_node_put(struct batadv_neigh_node *neigh_node);
53struct batadv_neigh_node * 53struct batadv_neigh_node *
54batadv_orig_router_get(struct batadv_orig_node *orig_node, 54batadv_orig_router_get(struct batadv_orig_node *orig_node,
55 const struct batadv_hard_iface *if_outgoing); 55 const struct batadv_hard_iface *if_outgoing);
@@ -59,7 +59,7 @@ batadv_neigh_ifinfo_new(struct batadv_neigh_node *neigh,
59struct batadv_neigh_ifinfo * 59struct batadv_neigh_ifinfo *
60batadv_neigh_ifinfo_get(struct batadv_neigh_node *neigh, 60batadv_neigh_ifinfo_get(struct batadv_neigh_node *neigh,
61 struct batadv_hard_iface *if_outgoing); 61 struct batadv_hard_iface *if_outgoing);
62void batadv_neigh_ifinfo_free_ref(struct batadv_neigh_ifinfo *neigh_ifinfo); 62void batadv_neigh_ifinfo_put(struct batadv_neigh_ifinfo *neigh_ifinfo);
63 63
64int batadv_hardif_neigh_seq_print_text(struct seq_file *seq, void *offset); 64int batadv_hardif_neigh_seq_print_text(struct seq_file *seq, void *offset);
65 65
@@ -69,7 +69,7 @@ batadv_orig_ifinfo_get(struct batadv_orig_node *orig_node,
69struct batadv_orig_ifinfo * 69struct batadv_orig_ifinfo *
70batadv_orig_ifinfo_new(struct batadv_orig_node *orig_node, 70batadv_orig_ifinfo_new(struct batadv_orig_node *orig_node,
71 struct batadv_hard_iface *if_outgoing); 71 struct batadv_hard_iface *if_outgoing);
72void batadv_orig_ifinfo_free_ref(struct batadv_orig_ifinfo *orig_ifinfo); 72void batadv_orig_ifinfo_put(struct batadv_orig_ifinfo *orig_ifinfo);
73 73
74int batadv_orig_seq_print_text(struct seq_file *seq, void *offset); 74int batadv_orig_seq_print_text(struct seq_file *seq, void *offset);
75int batadv_orig_hardif_seq_print_text(struct seq_file *seq, void *offset); 75int batadv_orig_hardif_seq_print_text(struct seq_file *seq, void *offset);
@@ -83,7 +83,7 @@ batadv_orig_node_vlan_new(struct batadv_orig_node *orig_node,
83struct batadv_orig_node_vlan * 83struct batadv_orig_node_vlan *
84batadv_orig_node_vlan_get(struct batadv_orig_node *orig_node, 84batadv_orig_node_vlan_get(struct batadv_orig_node *orig_node,
85 unsigned short vid); 85 unsigned short vid);
86void batadv_orig_node_vlan_free_ref(struct batadv_orig_node_vlan *orig_vlan); 86void batadv_orig_node_vlan_put(struct batadv_orig_node_vlan *orig_vlan);
87 87
88/* hashfunction to choose an entry in a hash table of given size 88/* hashfunction to choose an entry in a hash table of given size
89 * hash algorithm from http://en.wikipedia.org/wiki/Hash_table 89 * hash algorithm from http://en.wikipedia.org/wiki/Hash_table
@@ -115,7 +115,7 @@ batadv_orig_hash_find(struct batadv_priv *bat_priv, const void *data)
115 if (!batadv_compare_eth(orig_node, data)) 115 if (!batadv_compare_eth(orig_node, data))
116 continue; 116 continue;
117 117
118 if (!atomic_inc_not_zero(&orig_node->refcount)) 118 if (!kref_get_unless_zero(&orig_node->refcount))
119 continue; 119 continue;
120 120
121 orig_node_tmp = orig_node; 121 orig_node_tmp = orig_node;
diff --git a/net/batman-adv/packet.h b/net/batman-adv/packet.h
index 0558e3237e0e..8a8d7ca1a5cf 100644
--- a/net/batman-adv/packet.h
+++ b/net/batman-adv/packet.h
@@ -1,4 +1,4 @@
1/* Copyright (C) 2007-2015 B.A.T.M.A.N. contributors: 1/* Copyright (C) 2007-2016 B.A.T.M.A.N. contributors:
2 * 2 *
3 * Marek Lindner, Simon Wunderlich 3 * Marek Lindner, Simon Wunderlich
4 * 4 *
@@ -26,6 +26,8 @@
26 * @BATADV_IV_OGM: originator messages for B.A.T.M.A.N. IV 26 * @BATADV_IV_OGM: originator messages for B.A.T.M.A.N. IV
27 * @BATADV_BCAST: broadcast packets carrying broadcast payload 27 * @BATADV_BCAST: broadcast packets carrying broadcast payload
28 * @BATADV_CODED: network coded packets 28 * @BATADV_CODED: network coded packets
29 * @BATADV_ELP: echo location packets for B.A.T.M.A.N. V
30 * @BATADV_OGM2: originator messages for B.A.T.M.A.N. V
29 * 31 *
30 * @BATADV_UNICAST: unicast packets carrying unicast payload traffic 32 * @BATADV_UNICAST: unicast packets carrying unicast payload traffic
31 * @BATADV_UNICAST_FRAG: unicast packets carrying a fragment of the original 33 * @BATADV_UNICAST_FRAG: unicast packets carrying a fragment of the original
@@ -40,6 +42,8 @@ enum batadv_packettype {
40 BATADV_IV_OGM = 0x00, 42 BATADV_IV_OGM = 0x00,
41 BATADV_BCAST = 0x01, 43 BATADV_BCAST = 0x01,
42 BATADV_CODED = 0x02, 44 BATADV_CODED = 0x02,
45 BATADV_ELP = 0x03,
46 BATADV_OGM2 = 0x04,
43 /* 0x40 - 0x7f: unicast */ 47 /* 0x40 - 0x7f: unicast */
44#define BATADV_UNICAST_MIN 0x40 48#define BATADV_UNICAST_MIN 0x40
45 BATADV_UNICAST = 0x40, 49 BATADV_UNICAST = 0x40,
@@ -158,7 +162,7 @@ enum batadv_tt_client_flags {
158}; 162};
159 163
160/** 164/**
161 * batadv_vlan_flags - flags for the four MSB of any vlan ID field 165 * enum batadv_vlan_flags - flags for the four MSB of any vlan ID field
162 * @BATADV_VLAN_HAS_TAG: whether the field contains a valid vlan tag or not 166 * @BATADV_VLAN_HAS_TAG: whether the field contains a valid vlan tag or not
163 */ 167 */
164enum batadv_vlan_flags { 168enum batadv_vlan_flags {
@@ -209,6 +213,11 @@ struct batadv_bla_claim_dst {
209 * @version: batman-adv protocol version, part of the genereal header 213 * @version: batman-adv protocol version, part of the genereal header
210 * @ttl: time to live for this packet, part of the genereal header 214 * @ttl: time to live for this packet, part of the genereal header
211 * @flags: contains routing relevant flags - see enum batadv_iv_flags 215 * @flags: contains routing relevant flags - see enum batadv_iv_flags
216 * @seqno: sequence identification
217 * @orig: address of the source node
218 * @prev_sender: address of the previous sender
219 * @reserved: reserved byte for alignment
220 * @tq: transmission quality
212 * @tvlv_len: length of tvlv data following the ogm header 221 * @tvlv_len: length of tvlv data following the ogm header
213 */ 222 */
214struct batadv_ogm_packet { 223struct batadv_ogm_packet {
@@ -230,7 +239,52 @@ struct batadv_ogm_packet {
230#define BATADV_OGM_HLEN sizeof(struct batadv_ogm_packet) 239#define BATADV_OGM_HLEN sizeof(struct batadv_ogm_packet)
231 240
232/** 241/**
233 * batadv_icmp_header - common members among all the ICMP packets 242 * struct batadv_ogm2_packet - ogm2 (routing protocol) packet
243 * @packet_type: batman-adv packet type, part of the general header
244 * @version: batman-adv protocol version, part of the general header
245 * @ttl: time to live for this packet, part of the general header
246 * @flags: reseved for routing relevant flags - currently always 0
247 * @seqno: sequence number
248 * @orig: originator mac address
249 * @tvlv_len: length of the appended tvlv buffer (in bytes)
250 * @throughput: the currently flooded path throughput
251 */
252struct batadv_ogm2_packet {
253 u8 packet_type;
254 u8 version;
255 u8 ttl;
256 u8 flags;
257 __be32 seqno;
258 u8 orig[ETH_ALEN];
259 __be16 tvlv_len;
260 __be32 throughput;
261 /* __packed is not needed as the struct size is divisible by 4,
262 * and the largest data type in this struct has a size of 4.
263 */
264};
265
266#define BATADV_OGM2_HLEN sizeof(struct batadv_ogm2_packet)
267
268/**
269 * struct batadv_elp_packet - elp (neighbor discovery) packet
270 * @packet_type: batman-adv packet type, part of the general header
271 * @version: batman-adv protocol version, part of the genereal header
272 * @orig: originator mac address
273 * @seqno: sequence number
274 * @elp_interval: currently used ELP sending interval in ms
275 */
276struct batadv_elp_packet {
277 u8 packet_type;
278 u8 version;
279 u8 orig[ETH_ALEN];
280 __be32 seqno;
281 __be32 elp_interval;
282};
283
284#define BATADV_ELP_HLEN sizeof(struct batadv_elp_packet)
285
286/**
287 * struct batadv_icmp_header - common members among all the ICMP packets
234 * @packet_type: batman-adv packet type, part of the general header 288 * @packet_type: batman-adv packet type, part of the general header
235 * @version: batman-adv protocol version, part of the genereal header 289 * @version: batman-adv protocol version, part of the genereal header
236 * @ttl: time to live for this packet, part of the genereal header 290 * @ttl: time to live for this packet, part of the genereal header
@@ -256,7 +310,7 @@ struct batadv_icmp_header {
256}; 310};
257 311
258/** 312/**
259 * batadv_icmp_packet - ICMP packet 313 * struct batadv_icmp_packet - ICMP packet
260 * @packet_type: batman-adv packet type, part of the general header 314 * @packet_type: batman-adv packet type, part of the general header
261 * @version: batman-adv protocol version, part of the genereal header 315 * @version: batman-adv protocol version, part of the genereal header
262 * @ttl: time to live for this packet, part of the genereal header 316 * @ttl: time to live for this packet, part of the genereal header
@@ -282,7 +336,7 @@ struct batadv_icmp_packet {
282#define BATADV_RR_LEN 16 336#define BATADV_RR_LEN 16
283 337
284/** 338/**
285 * batadv_icmp_packet_rr - ICMP RouteRecord packet 339 * struct batadv_icmp_packet_rr - ICMP RouteRecord packet
286 * @packet_type: batman-adv packet type, part of the general header 340 * @packet_type: batman-adv packet type, part of the general header
287 * @version: batman-adv protocol version, part of the genereal header 341 * @version: batman-adv protocol version, part of the genereal header
288 * @ttl: time to live for this packet, part of the genereal header 342 * @ttl: time to live for this packet, part of the genereal header
@@ -345,6 +399,7 @@ struct batadv_unicast_packet {
345 * @u: common unicast packet header 399 * @u: common unicast packet header
346 * @src: address of the source 400 * @src: address of the source
347 * @subtype: packet subtype 401 * @subtype: packet subtype
402 * @reserved: reserved byte for alignment
348 */ 403 */
349struct batadv_unicast_4addr_packet { 404struct batadv_unicast_4addr_packet {
350 struct batadv_unicast_packet u; 405 struct batadv_unicast_packet u;
@@ -413,7 +468,6 @@ struct batadv_bcast_packet {
413 * @packet_type: batman-adv packet type, part of the general header 468 * @packet_type: batman-adv packet type, part of the general header
414 * @version: batman-adv protocol version, part of the genereal header 469 * @version: batman-adv protocol version, part of the genereal header
415 * @ttl: time to live for this packet, part of the genereal header 470 * @ttl: time to live for this packet, part of the genereal header
416 * @reserved: Align following fields to 2-byte boundaries
417 * @first_source: original source of first included packet 471 * @first_source: original source of first included packet
418 * @first_orig_dest: original destinal of first included packet 472 * @first_orig_dest: original destinal of first included packet
419 * @first_crc: checksum of first included packet 473 * @first_crc: checksum of first included packet
@@ -495,7 +549,7 @@ struct batadv_tvlv_gateway_data {
495 * struct batadv_tvlv_tt_data - tt data propagated through the tt tvlv container 549 * struct batadv_tvlv_tt_data - tt data propagated through the tt tvlv container
496 * @flags: translation table flags (see batadv_tt_data_flags) 550 * @flags: translation table flags (see batadv_tt_data_flags)
497 * @ttvn: translation table version number 551 * @ttvn: translation table version number
498 * @vlan_num: number of announced VLANs. In the TVLV this struct is followed by 552 * @num_vlan: number of announced VLANs. In the TVLV this struct is followed by
499 * one batadv_tvlv_tt_vlan_data object per announced vlan 553 * one batadv_tvlv_tt_vlan_data object per announced vlan
500 */ 554 */
501struct batadv_tvlv_tt_data { 555struct batadv_tvlv_tt_data {
diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c
index e4f2646d9246..4dd646a52f1a 100644
--- a/net/batman-adv/routing.c
+++ b/net/batman-adv/routing.c
@@ -1,4 +1,4 @@
1/* Copyright (C) 2007-2015 B.A.T.M.A.N. contributors: 1/* Copyright (C) 2007-2016 B.A.T.M.A.N. contributors:
2 * 2 *
3 * Marek Lindner, Simon Wunderlich 3 * Marek Lindner, Simon Wunderlich
4 * 4 *
@@ -25,6 +25,7 @@
25#include <linux/etherdevice.h> 25#include <linux/etherdevice.h>
26#include <linux/if_ether.h> 26#include <linux/if_ether.h>
27#include <linux/jiffies.h> 27#include <linux/jiffies.h>
28#include <linux/kref.h>
28#include <linux/netdevice.h> 29#include <linux/netdevice.h>
29#include <linux/printk.h> 30#include <linux/printk.h>
30#include <linux/rculist.h> 31#include <linux/rculist.h>
@@ -72,7 +73,7 @@ static void _batadv_update_route(struct batadv_priv *bat_priv,
72 73
73 rcu_read_lock(); 74 rcu_read_lock();
74 curr_router = rcu_dereference(orig_ifinfo->router); 75 curr_router = rcu_dereference(orig_ifinfo->router);
75 if (curr_router && !atomic_inc_not_zero(&curr_router->refcount)) 76 if (curr_router && !kref_get_unless_zero(&curr_router->refcount))
76 curr_router = NULL; 77 curr_router = NULL;
77 rcu_read_unlock(); 78 rcu_read_unlock();
78 79
@@ -97,20 +98,20 @@ static void _batadv_update_route(struct batadv_priv *bat_priv,
97 } 98 }
98 99
99 if (curr_router) 100 if (curr_router)
100 batadv_neigh_node_free_ref(curr_router); 101 batadv_neigh_node_put(curr_router);
101 102
102 /* increase refcount of new best neighbor */ 103 /* increase refcount of new best neighbor */
103 if (neigh_node && !atomic_inc_not_zero(&neigh_node->refcount)) 104 if (neigh_node && !kref_get_unless_zero(&neigh_node->refcount))
104 neigh_node = NULL; 105 neigh_node = NULL;
105 106
106 spin_lock_bh(&orig_node->neigh_list_lock); 107 spin_lock_bh(&orig_node->neigh_list_lock);
107 rcu_assign_pointer(orig_ifinfo->router, neigh_node); 108 rcu_assign_pointer(orig_ifinfo->router, neigh_node);
108 spin_unlock_bh(&orig_node->neigh_list_lock); 109 spin_unlock_bh(&orig_node->neigh_list_lock);
109 batadv_orig_ifinfo_free_ref(orig_ifinfo); 110 batadv_orig_ifinfo_put(orig_ifinfo);
110 111
111 /* decrease refcount of previous best neighbor */ 112 /* decrease refcount of previous best neighbor */
112 if (curr_router) 113 if (curr_router)
113 batadv_neigh_node_free_ref(curr_router); 114 batadv_neigh_node_put(curr_router);
114} 115}
115 116
116/** 117/**
@@ -137,24 +138,38 @@ void batadv_update_route(struct batadv_priv *bat_priv,
137 138
138out: 139out:
139 if (router) 140 if (router)
140 batadv_neigh_node_free_ref(router); 141 batadv_neigh_node_put(router);
141} 142}
142 143
143/* checks whether the host restarted and is in the protection time. 144/**
144 * returns: 145 * batadv_window_protected - checks whether the host restarted and is in the
145 * 0 if the packet is to be accepted 146 * protection time.
147 * @bat_priv: the bat priv with all the soft interface information
148 * @seq_num_diff: difference between the current/received sequence number and
149 * the last sequence number
150 * @seq_old_max_diff: maximum age of sequence number not considered as restart
151 * @last_reset: jiffies timestamp of the last reset, will be updated when reset
152 * is detected
153 * @protection_started: is set to true if the protection window was started,
154 * doesn't change otherwise.
155 *
156 * Return:
157 * 0 if the packet is to be accepted.
146 * 1 if the packet is to be ignored. 158 * 1 if the packet is to be ignored.
147 */ 159 */
148int batadv_window_protected(struct batadv_priv *bat_priv, s32 seq_num_diff, 160int batadv_window_protected(struct batadv_priv *bat_priv, s32 seq_num_diff,
149 unsigned long *last_reset) 161 s32 seq_old_max_diff, unsigned long *last_reset,
162 bool *protection_started)
150{ 163{
151 if (seq_num_diff <= -BATADV_TQ_LOCAL_WINDOW_SIZE || 164 if (seq_num_diff <= -seq_old_max_diff ||
152 seq_num_diff >= BATADV_EXPECTED_SEQNO_RANGE) { 165 seq_num_diff >= BATADV_EXPECTED_SEQNO_RANGE) {
153 if (!batadv_has_timed_out(*last_reset, 166 if (!batadv_has_timed_out(*last_reset,
154 BATADV_RESET_PROTECTION_MS)) 167 BATADV_RESET_PROTECTION_MS))
155 return 1; 168 return 1;
156 169
157 *last_reset = jiffies; 170 *last_reset = jiffies;
171 if (protection_started)
172 *protection_started = true;
158 batadv_dbg(BATADV_DBG_BATMAN, bat_priv, 173 batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
159 "old packet received, start protection\n"); 174 "old packet received, start protection\n");
160 } 175 }
@@ -198,7 +213,7 @@ bool batadv_check_management_packet(struct sk_buff *skb,
198 * @bat_priv: the bat priv with all the soft interface information 213 * @bat_priv: the bat priv with all the soft interface information
199 * @skb: icmp packet to process 214 * @skb: icmp packet to process
200 * 215 *
201 * Returns NET_RX_SUCCESS if the packet has been consumed or NET_RX_DROP 216 * Return: NET_RX_SUCCESS if the packet has been consumed or NET_RX_DROP
202 * otherwise. 217 * otherwise.
203 */ 218 */
204static int batadv_recv_my_icmp_packet(struct batadv_priv *bat_priv, 219static int batadv_recv_my_icmp_packet(struct batadv_priv *bat_priv,
@@ -254,9 +269,9 @@ static int batadv_recv_my_icmp_packet(struct batadv_priv *bat_priv,
254 } 269 }
255out: 270out:
256 if (primary_if) 271 if (primary_if)
257 batadv_hardif_free_ref(primary_if); 272 batadv_hardif_put(primary_if);
258 if (orig_node) 273 if (orig_node)
259 batadv_orig_node_free_ref(orig_node); 274 batadv_orig_node_put(orig_node);
260 return ret; 275 return ret;
261} 276}
262 277
@@ -302,9 +317,9 @@ static int batadv_recv_icmp_ttl_exceeded(struct batadv_priv *bat_priv,
302 317
303out: 318out:
304 if (primary_if) 319 if (primary_if)
305 batadv_hardif_free_ref(primary_if); 320 batadv_hardif_put(primary_if);
306 if (orig_node) 321 if (orig_node)
307 batadv_orig_node_free_ref(orig_node); 322 batadv_orig_node_put(orig_node);
308 return ret; 323 return ret;
309} 324}
310 325
@@ -388,7 +403,7 @@ int batadv_recv_icmp_packet(struct sk_buff *skb,
388 403
389out: 404out:
390 if (orig_node) 405 if (orig_node)
391 batadv_orig_node_free_ref(orig_node); 406 batadv_orig_node_put(orig_node);
392 return ret; 407 return ret;
393} 408}
394 409
@@ -398,10 +413,11 @@ out:
398 * @skb: packet to check 413 * @skb: packet to check
399 * @hdr_size: size of header to pull 414 * @hdr_size: size of header to pull
400 * 415 *
401 * Check for short header and bad addresses in given packet. Returns negative 416 * Check for short header and bad addresses in given packet.
402 * value when check fails and 0 otherwise. The negative value depends on the 417 *
403 * reason: -ENODATA for bad header, -EBADR for broadcast destination or source, 418 * Return: negative value when check fails and 0 otherwise. The negative value
404 * and -EREMOTE for non-local (other host) destination. 419 * depends on the reason: -ENODATA for bad header, -EBADR for broadcast
420 * destination or source, and -EREMOTE for non-local (other host) destination.
405 */ 421 */
406static int batadv_check_unicast_packet(struct batadv_priv *bat_priv, 422static int batadv_check_unicast_packet(struct batadv_priv *bat_priv,
407 struct sk_buff *skb, int hdr_size) 423 struct sk_buff *skb, int hdr_size)
@@ -435,7 +451,7 @@ static int batadv_check_unicast_packet(struct batadv_priv *bat_priv,
435 * @orig_node: the destination node 451 * @orig_node: the destination node
436 * @recv_if: pointer to interface this packet was received on 452 * @recv_if: pointer to interface this packet was received on
437 * 453 *
438 * Returns the router which should be used for this orig_node on 454 * Return: the router which should be used for this orig_node on
439 * this interface, or NULL if not available. 455 * this interface, or NULL if not available.
440 */ 456 */
441struct batadv_neigh_node * 457struct batadv_neigh_node *
@@ -482,14 +498,14 @@ batadv_find_router(struct batadv_priv *bat_priv,
482 498
483 hlist_for_each_entry_rcu(cand, &orig_node->ifinfo_list, list) { 499 hlist_for_each_entry_rcu(cand, &orig_node->ifinfo_list, list) {
484 /* acquire some structures and references ... */ 500 /* acquire some structures and references ... */
485 if (!atomic_inc_not_zero(&cand->refcount)) 501 if (!kref_get_unless_zero(&cand->refcount))
486 continue; 502 continue;
487 503
488 cand_router = rcu_dereference(cand->router); 504 cand_router = rcu_dereference(cand->router);
489 if (!cand_router) 505 if (!cand_router)
490 goto next; 506 goto next;
491 507
492 if (!atomic_inc_not_zero(&cand_router->refcount)) { 508 if (!kref_get_unless_zero(&cand_router->refcount)) {
493 cand_router = NULL; 509 cand_router = NULL;
494 goto next; 510 goto next;
495 } 511 }
@@ -508,8 +524,8 @@ batadv_find_router(struct batadv_priv *bat_priv,
508 524
509 /* mark the first possible candidate */ 525 /* mark the first possible candidate */
510 if (!first_candidate) { 526 if (!first_candidate) {
511 atomic_inc(&cand_router->refcount); 527 kref_get(&cand_router->refcount);
512 atomic_inc(&cand->refcount); 528 kref_get(&cand->refcount);
513 first_candidate = cand; 529 first_candidate = cand;
514 first_candidate_router = cand_router; 530 first_candidate_router = cand_router;
515 } 531 }
@@ -529,16 +545,16 @@ batadv_find_router(struct batadv_priv *bat_priv,
529next: 545next:
530 /* free references */ 546 /* free references */
531 if (cand_router) { 547 if (cand_router) {
532 batadv_neigh_node_free_ref(cand_router); 548 batadv_neigh_node_put(cand_router);
533 cand_router = NULL; 549 cand_router = NULL;
534 } 550 }
535 batadv_orig_ifinfo_free_ref(cand); 551 batadv_orig_ifinfo_put(cand);
536 } 552 }
537 rcu_read_unlock(); 553 rcu_read_unlock();
538 554
539 /* last_bonding_candidate is reset below, remove the old reference. */ 555 /* last_bonding_candidate is reset below, remove the old reference. */
540 if (orig_node->last_bonding_candidate) 556 if (orig_node->last_bonding_candidate)
541 batadv_orig_ifinfo_free_ref(orig_node->last_bonding_candidate); 557 batadv_orig_ifinfo_put(orig_node->last_bonding_candidate);
542 558
543 /* After finding candidates, handle the three cases: 559 /* After finding candidates, handle the three cases:
544 * 1) there is a next candidate, use that 560 * 1) there is a next candidate, use that
@@ -546,17 +562,17 @@ next:
546 * 3) there is no candidate at all, return the default router 562 * 3) there is no candidate at all, return the default router
547 */ 563 */
548 if (next_candidate) { 564 if (next_candidate) {
549 batadv_neigh_node_free_ref(router); 565 batadv_neigh_node_put(router);
550 566
551 /* remove references to first candidate, we don't need it. */ 567 /* remove references to first candidate, we don't need it. */
552 if (first_candidate) { 568 if (first_candidate) {
553 batadv_neigh_node_free_ref(first_candidate_router); 569 batadv_neigh_node_put(first_candidate_router);
554 batadv_orig_ifinfo_free_ref(first_candidate); 570 batadv_orig_ifinfo_put(first_candidate);
555 } 571 }
556 router = next_candidate_router; 572 router = next_candidate_router;
557 orig_node->last_bonding_candidate = next_candidate; 573 orig_node->last_bonding_candidate = next_candidate;
558 } else if (first_candidate) { 574 } else if (first_candidate) {
559 batadv_neigh_node_free_ref(router); 575 batadv_neigh_node_put(router);
560 576
561 /* refcounting has already been done in the loop above. */ 577 /* refcounting has already been done in the loop above. */
562 router = first_candidate_router; 578 router = first_candidate_router;
@@ -633,7 +649,7 @@ static int batadv_route_unicast_packet(struct sk_buff *skb,
633 649
634out: 650out:
635 if (orig_node) 651 if (orig_node)
636 batadv_orig_node_free_ref(orig_node); 652 batadv_orig_node_put(orig_node);
637 return ret; 653 return ret;
638} 654}
639 655
@@ -648,7 +664,7 @@ out:
648 * the new corresponding information (originator address where the destination 664 * the new corresponding information (originator address where the destination
649 * client currently is and its known TTVN) 665 * client currently is and its known TTVN)
650 * 666 *
651 * Returns true if the packet header has been updated, false otherwise 667 * Return: true if the packet header has been updated, false otherwise
652 */ 668 */
653static bool 669static bool
654batadv_reroute_unicast_packet(struct batadv_priv *bat_priv, 670batadv_reroute_unicast_packet(struct batadv_priv *bat_priv,
@@ -686,9 +702,9 @@ batadv_reroute_unicast_packet(struct batadv_priv *bat_priv,
686 ret = true; 702 ret = true;
687out: 703out:
688 if (primary_if) 704 if (primary_if)
689 batadv_hardif_free_ref(primary_if); 705 batadv_hardif_put(primary_if);
690 if (orig_node) 706 if (orig_node)
691 batadv_orig_node_free_ref(orig_node); 707 batadv_orig_node_put(orig_node);
692 708
693 return ret; 709 return ret;
694} 710}
@@ -752,7 +768,7 @@ static int batadv_check_unicast_ttvn(struct batadv_priv *bat_priv,
752 return 0; 768 return 0;
753 769
754 curr_ttvn = (u8)atomic_read(&orig_node->last_ttvn); 770 curr_ttvn = (u8)atomic_read(&orig_node->last_ttvn);
755 batadv_orig_node_free_ref(orig_node); 771 batadv_orig_node_put(orig_node);
756 } 772 }
757 773
758 /* check if the TTVN contained in the packet is fresher than what the 774 /* check if the TTVN contained in the packet is fresher than what the
@@ -792,7 +808,7 @@ static int batadv_check_unicast_ttvn(struct batadv_priv *bat_priv,
792 808
793 ether_addr_copy(unicast_packet->dest, primary_if->net_dev->dev_addr); 809 ether_addr_copy(unicast_packet->dest, primary_if->net_dev->dev_addr);
794 810
795 batadv_hardif_free_ref(primary_if); 811 batadv_hardif_put(primary_if);
796 812
797 unicast_packet->ttvn = curr_ttvn; 813 unicast_packet->ttvn = curr_ttvn;
798 814
@@ -805,7 +821,7 @@ static int batadv_check_unicast_ttvn(struct batadv_priv *bat_priv,
805 * @skb: unicast tvlv packet to process 821 * @skb: unicast tvlv packet to process
806 * @recv_if: pointer to interface this packet was received on 822 * @recv_if: pointer to interface this packet was received on
807 * 823 *
808 * Returns NET_RX_SUCCESS if the packet has been consumed or NET_RX_DROP 824 * Return: NET_RX_SUCCESS if the packet has been consumed or NET_RX_DROP
809 * otherwise. 825 * otherwise.
810 */ 826 */
811int batadv_recv_unhandled_unicast_packet(struct sk_buff *skb, 827int batadv_recv_unhandled_unicast_packet(struct sk_buff *skb,
@@ -892,7 +908,7 @@ int batadv_recv_unicast_packet(struct sk_buff *skb,
892 908
893rx_success: 909rx_success:
894 if (orig_node) 910 if (orig_node)
895 batadv_orig_node_free_ref(orig_node); 911 batadv_orig_node_put(orig_node);
896 912
897 return NET_RX_SUCCESS; 913 return NET_RX_SUCCESS;
898 } 914 }
@@ -904,9 +920,8 @@ rx_success:
904 * batadv_recv_unicast_tvlv - receive and process unicast tvlv packets 920 * batadv_recv_unicast_tvlv - receive and process unicast tvlv packets
905 * @skb: unicast tvlv packet to process 921 * @skb: unicast tvlv packet to process
906 * @recv_if: pointer to interface this packet was received on 922 * @recv_if: pointer to interface this packet was received on
907 * @dst_addr: the payload destination
908 * 923 *
909 * Returns NET_RX_SUCCESS if the packet has been consumed or NET_RX_DROP 924 * Return: NET_RX_SUCCESS if the packet has been consumed or NET_RX_DROP
910 * otherwise. 925 * otherwise.
911 */ 926 */
912int batadv_recv_unicast_tvlv(struct sk_buff *skb, 927int batadv_recv_unicast_tvlv(struct sk_buff *skb,
@@ -960,7 +975,7 @@ int batadv_recv_unicast_tvlv(struct sk_buff *skb,
960 * the assembled packet will exceed our MTU; 2) Buffer fragment, if we till 975 * the assembled packet will exceed our MTU; 2) Buffer fragment, if we till
961 * lack further fragments; 3) Merge fragments, if we have all needed parts. 976 * lack further fragments; 3) Merge fragments, if we have all needed parts.
962 * 977 *
963 * Return NET_RX_DROP if the skb is not consumed, NET_RX_SUCCESS otherwise. 978 * Return: NET_RX_DROP if the skb is not consumed, NET_RX_SUCCESS otherwise.
964 */ 979 */
965int batadv_recv_frag_packet(struct sk_buff *skb, 980int batadv_recv_frag_packet(struct sk_buff *skb,
966 struct batadv_hard_iface *recv_if) 981 struct batadv_hard_iface *recv_if)
@@ -1004,7 +1019,7 @@ int batadv_recv_frag_packet(struct sk_buff *skb,
1004 1019
1005out: 1020out:
1006 if (orig_node_src) 1021 if (orig_node_src)
1007 batadv_orig_node_free_ref(orig_node_src); 1022 batadv_orig_node_put(orig_node_src);
1008 1023
1009 return ret; 1024 return ret;
1010} 1025}
@@ -1065,7 +1080,8 @@ int batadv_recv_bcast_packet(struct sk_buff *skb,
1065 1080
1066 /* check whether the packet is old and the host just restarted. */ 1081 /* check whether the packet is old and the host just restarted. */
1067 if (batadv_window_protected(bat_priv, seq_diff, 1082 if (batadv_window_protected(bat_priv, seq_diff,
1068 &orig_node->bcast_seqno_reset)) 1083 BATADV_BCAST_MAX_AGE,
1084 &orig_node->bcast_seqno_reset, NULL))
1069 goto spin_unlock; 1085 goto spin_unlock;
1070 1086
1071 /* mark broadcast in flood history, update window position 1087 /* mark broadcast in flood history, update window position
@@ -1108,6 +1124,6 @@ spin_unlock:
1108 spin_unlock_bh(&orig_node->bcast_seqno_lock); 1124 spin_unlock_bh(&orig_node->bcast_seqno_lock);
1109out: 1125out:
1110 if (orig_node) 1126 if (orig_node)
1111 batadv_orig_node_free_ref(orig_node); 1127 batadv_orig_node_put(orig_node);
1112 return ret; 1128 return ret;
1113} 1129}
diff --git a/net/batman-adv/routing.h b/net/batman-adv/routing.h
index 204bbe4952a6..02a5caa84127 100644
--- a/net/batman-adv/routing.h
+++ b/net/batman-adv/routing.h
@@ -1,4 +1,4 @@
1/* Copyright (C) 2007-2015 B.A.T.M.A.N. contributors: 1/* Copyright (C) 2007-2016 B.A.T.M.A.N. contributors:
2 * 2 *
3 * Marek Lindner, Simon Wunderlich 3 * Marek Lindner, Simon Wunderlich
4 * 4 *
@@ -52,6 +52,7 @@ batadv_find_router(struct batadv_priv *bat_priv,
52 struct batadv_orig_node *orig_node, 52 struct batadv_orig_node *orig_node,
53 struct batadv_hard_iface *recv_if); 53 struct batadv_hard_iface *recv_if);
54int batadv_window_protected(struct batadv_priv *bat_priv, s32 seq_num_diff, 54int batadv_window_protected(struct batadv_priv *bat_priv, s32 seq_num_diff,
55 unsigned long *last_reset); 55 s32 seq_old_max_diff, unsigned long *last_reset,
56 bool *protection_started);
56 57
57#endif /* _NET_BATMAN_ADV_ROUTING_H_ */ 58#endif /* _NET_BATMAN_ADV_ROUTING_H_ */
diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c
index 782fa33ec296..3ce06e0a91b1 100644
--- a/net/batman-adv/send.c
+++ b/net/batman-adv/send.c
@@ -1,4 +1,4 @@
1/* Copyright (C) 2007-2015 B.A.T.M.A.N. contributors: 1/* Copyright (C) 2007-2016 B.A.T.M.A.N. contributors:
2 * 2 *
3 * Marek Lindner, Simon Wunderlich 3 * Marek Lindner, Simon Wunderlich
4 * 4 *
@@ -49,16 +49,30 @@
49 49
50static void batadv_send_outstanding_bcast_packet(struct work_struct *work); 50static void batadv_send_outstanding_bcast_packet(struct work_struct *work);
51 51
52/* send out an already prepared packet to the given address via the 52/**
53 * specified batman interface 53 * batadv_send_skb_packet - send an already prepared packet
54 * @skb: the packet to send
55 * @hard_iface: the interface to use to send the broadcast packet
56 * @dst_addr: the payload destination
57 *
58 * Send out an already prepared packet to the given neighbor or broadcast it
59 * using the specified interface. Either hard_iface or neigh_node must be not
60 * NULL.
61 * If neigh_node is NULL, then the packet is broadcasted using hard_iface,
62 * otherwise it is sent as unicast to the given neighbor.
63 *
64 * Return: NET_TX_DROP in case of error or the result of dev_queue_xmit(skb)
65 * otherwise
54 */ 66 */
55int batadv_send_skb_packet(struct sk_buff *skb, 67int batadv_send_skb_packet(struct sk_buff *skb,
56 struct batadv_hard_iface *hard_iface, 68 struct batadv_hard_iface *hard_iface,
57 const u8 *dst_addr) 69 const u8 *dst_addr)
58{ 70{
59 struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface); 71 struct batadv_priv *bat_priv;
60 struct ethhdr *ethhdr; 72 struct ethhdr *ethhdr;
61 73
74 bat_priv = netdev_priv(hard_iface->soft_iface);
75
62 if (hard_iface->if_status != BATADV_IF_ACTIVE) 76 if (hard_iface->if_status != BATADV_IF_ACTIVE)
63 goto send_skb_err; 77 goto send_skb_err;
64 78
@@ -100,6 +114,35 @@ send_skb_err:
100 return NET_XMIT_DROP; 114 return NET_XMIT_DROP;
101} 115}
102 116
117int batadv_send_broadcast_skb(struct sk_buff *skb,
118 struct batadv_hard_iface *hard_iface)
119{
120 return batadv_send_skb_packet(skb, hard_iface, batadv_broadcast_addr);
121}
122
123int batadv_send_unicast_skb(struct sk_buff *skb,
124 struct batadv_neigh_node *neigh)
125{
126#ifdef CONFIG_BATMAN_ADV_BATMAN_V
127 struct batadv_hardif_neigh_node *hardif_neigh;
128#endif
129 int ret;
130
131 ret = batadv_send_skb_packet(skb, neigh->if_incoming, neigh->addr);
132
133#ifdef CONFIG_BATMAN_ADV_BATMAN_V
134 hardif_neigh = batadv_hardif_neigh_get(neigh->if_incoming, neigh->addr);
135
136 if ((hardif_neigh) && (ret != NET_XMIT_DROP))
137 hardif_neigh->bat_v.last_unicast_tx = jiffies;
138
139 if (hardif_neigh)
140 batadv_hardif_neigh_put(hardif_neigh);
141#endif
142
143 return ret;
144}
145
103/** 146/**
104 * batadv_send_skb_to_orig - Lookup next-hop and transmit skb. 147 * batadv_send_skb_to_orig - Lookup next-hop and transmit skb.
105 * @skb: Packet to be transmitted. 148 * @skb: Packet to be transmitted.
@@ -111,7 +154,7 @@ send_skb_err:
111 * host, NULL can be passed as recv_if and no interface alternating is 154 * host, NULL can be passed as recv_if and no interface alternating is
112 * attempted. 155 * attempted.
113 * 156 *
114 * Returns NET_XMIT_SUCCESS on success, NET_XMIT_DROP on failure, or 157 * Return: NET_XMIT_SUCCESS on success, NET_XMIT_DROP on failure, or
115 * NET_XMIT_POLICED if the skb is buffered for later transmit. 158 * NET_XMIT_POLICED if the skb is buffered for later transmit.
116 */ 159 */
117int batadv_send_skb_to_orig(struct sk_buff *skb, 160int batadv_send_skb_to_orig(struct sk_buff *skb,
@@ -146,14 +189,13 @@ int batadv_send_skb_to_orig(struct sk_buff *skb,
146 if (recv_if && batadv_nc_skb_forward(skb, neigh_node)) { 189 if (recv_if && batadv_nc_skb_forward(skb, neigh_node)) {
147 ret = NET_XMIT_POLICED; 190 ret = NET_XMIT_POLICED;
148 } else { 191 } else {
149 batadv_send_skb_packet(skb, neigh_node->if_incoming, 192 batadv_send_unicast_skb(skb, neigh_node);
150 neigh_node->addr);
151 ret = NET_XMIT_SUCCESS; 193 ret = NET_XMIT_SUCCESS;
152 } 194 }
153 195
154out: 196out:
155 if (neigh_node) 197 if (neigh_node)
156 batadv_neigh_node_free_ref(neigh_node); 198 batadv_neigh_node_put(neigh_node);
157 199
158 return ret; 200 return ret;
159} 201}
@@ -165,7 +207,7 @@ out:
165 * @hdr_size: amount of bytes to push at the beginning of the skb 207 * @hdr_size: amount of bytes to push at the beginning of the skb
166 * @orig_node: the destination node 208 * @orig_node: the destination node
167 * 209 *
168 * Returns false if the buffer extension was not possible or true otherwise. 210 * Return: false if the buffer extension was not possible or true otherwise.
169 */ 211 */
170static bool 212static bool
171batadv_send_skb_push_fill_unicast(struct sk_buff *skb, int hdr_size, 213batadv_send_skb_push_fill_unicast(struct sk_buff *skb, int hdr_size,
@@ -196,7 +238,7 @@ batadv_send_skb_push_fill_unicast(struct sk_buff *skb, int hdr_size,
196 * @skb: the skb containing the payload to encapsulate 238 * @skb: the skb containing the payload to encapsulate
197 * @orig_node: the destination node 239 * @orig_node: the destination node
198 * 240 *
199 * Returns false if the payload could not be encapsulated or true otherwise. 241 * Return: false if the payload could not be encapsulated or true otherwise.
200 */ 242 */
201static bool batadv_send_skb_prepare_unicast(struct sk_buff *skb, 243static bool batadv_send_skb_prepare_unicast(struct sk_buff *skb,
202 struct batadv_orig_node *orig_node) 244 struct batadv_orig_node *orig_node)
@@ -211,10 +253,10 @@ static bool batadv_send_skb_prepare_unicast(struct sk_buff *skb,
211 * unicast 4addr header 253 * unicast 4addr header
212 * @bat_priv: the bat priv with all the soft interface information 254 * @bat_priv: the bat priv with all the soft interface information
213 * @skb: the skb containing the payload to encapsulate 255 * @skb: the skb containing the payload to encapsulate
214 * @orig_node: the destination node 256 * @orig: the destination node
215 * @packet_subtype: the unicast 4addr packet subtype to use 257 * @packet_subtype: the unicast 4addr packet subtype to use
216 * 258 *
217 * Returns false if the payload could not be encapsulated or true otherwise. 259 * Return: false if the payload could not be encapsulated or true otherwise.
218 */ 260 */
219bool batadv_send_skb_prepare_unicast_4addr(struct batadv_priv *bat_priv, 261bool batadv_send_skb_prepare_unicast_4addr(struct batadv_priv *bat_priv,
220 struct sk_buff *skb, 262 struct sk_buff *skb,
@@ -246,7 +288,7 @@ bool batadv_send_skb_prepare_unicast_4addr(struct batadv_priv *bat_priv,
246 ret = true; 288 ret = true;
247out: 289out:
248 if (primary_if) 290 if (primary_if)
249 batadv_hardif_free_ref(primary_if); 291 batadv_hardif_put(primary_if);
250 return ret; 292 return ret;
251} 293}
252 294
@@ -265,7 +307,7 @@ out:
265 * as packet_type. Then send this frame to the given orig_node and release a 307 * as packet_type. Then send this frame to the given orig_node and release a
266 * reference to this orig_node. 308 * reference to this orig_node.
267 * 309 *
268 * Returns NET_XMIT_DROP in case of error or NET_XMIT_SUCCESS otherwise. 310 * Return: NET_XMIT_DROP in case of error or NET_XMIT_SUCCESS otherwise.
269 */ 311 */
270int batadv_send_skb_unicast(struct batadv_priv *bat_priv, 312int batadv_send_skb_unicast(struct batadv_priv *bat_priv,
271 struct sk_buff *skb, int packet_type, 313 struct sk_buff *skb, int packet_type,
@@ -317,7 +359,7 @@ int batadv_send_skb_unicast(struct batadv_priv *bat_priv,
317 359
318out: 360out:
319 if (orig_node) 361 if (orig_node)
320 batadv_orig_node_free_ref(orig_node); 362 batadv_orig_node_put(orig_node);
321 if (ret == NET_XMIT_DROP) 363 if (ret == NET_XMIT_DROP)
322 kfree_skb(skb); 364 kfree_skb(skb);
323 return ret; 365 return ret;
@@ -339,7 +381,7 @@ out:
339 * BATADV_UNICAST_4ADDR was supplied as packet_type. Then send this frame 381 * BATADV_UNICAST_4ADDR was supplied as packet_type. Then send this frame
340 * to the according destination node. 382 * to the according destination node.
341 * 383 *
342 * Returns NET_XMIT_DROP in case of error or NET_XMIT_SUCCESS otherwise. 384 * Return: NET_XMIT_DROP in case of error or NET_XMIT_SUCCESS otherwise.
343 */ 385 */
344int batadv_send_skb_via_tt_generic(struct batadv_priv *bat_priv, 386int batadv_send_skb_via_tt_generic(struct batadv_priv *bat_priv,
345 struct sk_buff *skb, int packet_type, 387 struct sk_buff *skb, int packet_type,
@@ -373,7 +415,7 @@ int batadv_send_skb_via_tt_generic(struct batadv_priv *bat_priv,
373 * Look up the currently selected gateway. Wrap the given skb into a batman-adv 415 * Look up the currently selected gateway. Wrap the given skb into a batman-adv
374 * unicast header and send this frame to this gateway node. 416 * unicast header and send this frame to this gateway node.
375 * 417 *
376 * Returns NET_XMIT_DROP in case of error or NET_XMIT_SUCCESS otherwise. 418 * Return: NET_XMIT_DROP in case of error or NET_XMIT_SUCCESS otherwise.
377 */ 419 */
378int batadv_send_skb_via_gw(struct batadv_priv *bat_priv, struct sk_buff *skb, 420int batadv_send_skb_via_gw(struct batadv_priv *bat_priv, struct sk_buff *skb,
379 unsigned short vid) 421 unsigned short vid)
@@ -409,9 +451,9 @@ static void batadv_forw_packet_free(struct batadv_forw_packet *forw_packet)
409{ 451{
410 kfree_skb(forw_packet->skb); 452 kfree_skb(forw_packet->skb);
411 if (forw_packet->if_incoming) 453 if (forw_packet->if_incoming)
412 batadv_hardif_free_ref(forw_packet->if_incoming); 454 batadv_hardif_put(forw_packet->if_incoming);
413 if (forw_packet->if_outgoing) 455 if (forw_packet->if_outgoing)
414 batadv_hardif_free_ref(forw_packet->if_outgoing); 456 batadv_hardif_put(forw_packet->if_outgoing);
415 kfree(forw_packet); 457 kfree(forw_packet);
416} 458}
417 459
@@ -430,14 +472,19 @@ _batadv_add_bcast_packet_to_list(struct batadv_priv *bat_priv,
430 send_time); 472 send_time);
431} 473}
432 474
433/* add a broadcast packet to the queue and setup timers. broadcast packets 475/**
434 * are sent multiple times to increase probability for being received. 476 * batadv_add_bcast_packet_to_list - queue broadcast packet for multiple sends
477 * @bat_priv: the bat priv with all the soft interface information
478 * @skb: broadcast packet to add
479 * @delay: number of jiffies to wait before sending
435 * 480 *
436 * This function returns NETDEV_TX_OK on success and NETDEV_TX_BUSY on 481 * add a broadcast packet to the queue and setup timers. broadcast packets
437 * errors. 482 * are sent multiple times to increase probability for being received.
438 * 483 *
439 * The skb is not consumed, so the caller should make sure that the 484 * The skb is not consumed, so the caller should make sure that the
440 * skb is freed. 485 * skb is freed.
486 *
487 * Return: NETDEV_TX_OK on success and NETDEV_TX_BUSY on errors.
441 */ 488 */
442int batadv_add_bcast_packet_to_list(struct batadv_priv *bat_priv, 489int batadv_add_bcast_packet_to_list(struct batadv_priv *bat_priv,
443 const struct sk_buff *skb, 490 const struct sk_buff *skb,
@@ -492,7 +539,7 @@ out_and_inc:
492 atomic_inc(&bat_priv->bcast_queue_left); 539 atomic_inc(&bat_priv->bcast_queue_left);
493out: 540out:
494 if (primary_if) 541 if (primary_if)
495 batadv_hardif_free_ref(primary_if); 542 batadv_hardif_put(primary_if);
496 return NETDEV_TX_BUSY; 543 return NETDEV_TX_BUSY;
497} 544}
498 545
@@ -533,8 +580,7 @@ static void batadv_send_outstanding_bcast_packet(struct work_struct *work)
533 /* send a copy of the saved skb */ 580 /* send a copy of the saved skb */
534 skb1 = skb_clone(forw_packet->skb, GFP_ATOMIC); 581 skb1 = skb_clone(forw_packet->skb, GFP_ATOMIC);
535 if (skb1) 582 if (skb1)
536 batadv_send_skb_packet(skb1, hard_iface, 583 batadv_send_broadcast_skb(skb1, hard_iface);
537 batadv_broadcast_addr);
538 } 584 }
539 rcu_read_unlock(); 585 rcu_read_unlock();
540 586
diff --git a/net/batman-adv/send.h b/net/batman-adv/send.h
index 82059f259e46..6fd7270d8ce6 100644
--- a/net/batman-adv/send.h
+++ b/net/batman-adv/send.h
@@ -1,4 +1,4 @@
1/* Copyright (C) 2007-2015 B.A.T.M.A.N. contributors: 1/* Copyright (C) 2007-2016 B.A.T.M.A.N. contributors:
2 * 2 *
3 * Marek Lindner, Simon Wunderlich 3 * Marek Lindner, Simon Wunderlich
4 * 4 *
@@ -28,12 +28,16 @@
28struct sk_buff; 28struct sk_buff;
29struct work_struct; 29struct work_struct;
30 30
31int batadv_send_skb_packet(struct sk_buff *skb,
32 struct batadv_hard_iface *hard_iface,
33 const u8 *dst_addr);
34int batadv_send_skb_to_orig(struct sk_buff *skb, 31int batadv_send_skb_to_orig(struct sk_buff *skb,
35 struct batadv_orig_node *orig_node, 32 struct batadv_orig_node *orig_node,
36 struct batadv_hard_iface *recv_if); 33 struct batadv_hard_iface *recv_if);
34int batadv_send_skb_packet(struct sk_buff *skb,
35 struct batadv_hard_iface *hard_iface,
36 const u8 *dst_addr);
37int batadv_send_broadcast_skb(struct sk_buff *skb,
38 struct batadv_hard_iface *hard_iface);
39int batadv_send_unicast_skb(struct sk_buff *skb,
40 struct batadv_neigh_node *neigh_node);
37void batadv_schedule_bat_ogm(struct batadv_hard_iface *hard_iface); 41void batadv_schedule_bat_ogm(struct batadv_hard_iface *hard_iface);
38int batadv_add_bcast_packet_to_list(struct batadv_priv *bat_priv, 42int batadv_add_bcast_packet_to_list(struct batadv_priv *bat_priv,
39 const struct sk_buff *skb, 43 const struct sk_buff *skb,
@@ -69,7 +73,7 @@ int batadv_send_skb_via_gw(struct batadv_priv *bat_priv, struct sk_buff *skb,
69 * header via the translation table. Wrap the given skb into a batman-adv 73 * header via the translation table. Wrap the given skb into a batman-adv
70 * unicast header. Then send this frame to the according destination node. 74 * unicast header. Then send this frame to the according destination node.
71 * 75 *
72 * Returns NET_XMIT_DROP in case of error or NET_XMIT_SUCCESS otherwise. 76 * Return: NET_XMIT_DROP in case of error or NET_XMIT_SUCCESS otherwise.
73 */ 77 */
74static inline int batadv_send_skb_via_tt(struct batadv_priv *bat_priv, 78static inline int batadv_send_skb_via_tt(struct batadv_priv *bat_priv,
75 struct sk_buff *skb, u8 *dst_hint, 79 struct sk_buff *skb, u8 *dst_hint,
@@ -92,7 +96,7 @@ static inline int batadv_send_skb_via_tt(struct batadv_priv *bat_priv,
92 * unicast-4addr header. Then send this frame to the according destination 96 * unicast-4addr header. Then send this frame to the according destination
93 * node. 97 * node.
94 * 98 *
95 * Returns NET_XMIT_DROP in case of error or NET_XMIT_SUCCESS otherwise. 99 * Return: NET_XMIT_DROP in case of error or NET_XMIT_SUCCESS otherwise.
96 */ 100 */
97static inline int batadv_send_skb_via_tt_4addr(struct batadv_priv *bat_priv, 101static inline int batadv_send_skb_via_tt_4addr(struct batadv_priv *bat_priv,
98 struct sk_buff *skb, 102 struct sk_buff *skb,
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index ac4d08de5df4..0710379491bf 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -1,4 +1,4 @@
1/* Copyright (C) 2007-2015 B.A.T.M.A.N. contributors: 1/* Copyright (C) 2007-2016 B.A.T.M.A.N. contributors:
2 * 2 *
3 * Marek Lindner, Simon Wunderlich 3 * Marek Lindner, Simon Wunderlich
4 * 4 *
@@ -30,6 +30,7 @@
30#include <linux/if_vlan.h> 30#include <linux/if_vlan.h>
31#include <linux/jiffies.h> 31#include <linux/jiffies.h>
32#include <linux/kernel.h> 32#include <linux/kernel.h>
33#include <linux/kref.h>
33#include <linux/list.h> 34#include <linux/list.h>
34#include <linux/lockdep.h> 35#include <linux/lockdep.h>
35#include <linux/netdevice.h> 36#include <linux/netdevice.h>
@@ -376,7 +377,7 @@ dropped_freed:
376 batadv_inc_counter(bat_priv, BATADV_CNT_TX_DROPPED); 377 batadv_inc_counter(bat_priv, BATADV_CNT_TX_DROPPED);
377end: 378end:
378 if (primary_if) 379 if (primary_if)
379 batadv_hardif_free_ref(primary_if); 380 batadv_hardif_put(primary_if);
380 return NETDEV_TX_OK; 381 return NETDEV_TX_OK;
381} 382}
382 383
@@ -478,22 +479,34 @@ out:
478} 479}
479 480
480/** 481/**
481 * batadv_softif_vlan_free_ref - decrease the vlan object refcounter and 482 * batadv_softif_vlan_release - release vlan from lists and queue for free after
482 * possibly free it 483 * rcu grace period
483 * @softif_vlan: the vlan object to release 484 * @ref: kref pointer of the vlan object
484 */ 485 */
485void batadv_softif_vlan_free_ref(struct batadv_softif_vlan *vlan) 486static void batadv_softif_vlan_release(struct kref *ref)
487{
488 struct batadv_softif_vlan *vlan;
489
490 vlan = container_of(ref, struct batadv_softif_vlan, refcount);
491
492 spin_lock_bh(&vlan->bat_priv->softif_vlan_list_lock);
493 hlist_del_rcu(&vlan->list);
494 spin_unlock_bh(&vlan->bat_priv->softif_vlan_list_lock);
495
496 kfree_rcu(vlan, rcu);
497}
498
499/**
500 * batadv_softif_vlan_put - decrease the vlan object refcounter and
501 * possibly release it
502 * @vlan: the vlan object to release
503 */
504void batadv_softif_vlan_put(struct batadv_softif_vlan *vlan)
486{ 505{
487 if (!vlan) 506 if (!vlan)
488 return; 507 return;
489 508
490 if (atomic_dec_and_test(&vlan->refcount)) { 509 kref_put(&vlan->refcount, batadv_softif_vlan_release);
491 spin_lock_bh(&vlan->bat_priv->softif_vlan_list_lock);
492 hlist_del_rcu(&vlan->list);
493 spin_unlock_bh(&vlan->bat_priv->softif_vlan_list_lock);
494
495 kfree_rcu(vlan, rcu);
496 }
497} 510}
498 511
499/** 512/**
@@ -501,7 +514,7 @@ void batadv_softif_vlan_free_ref(struct batadv_softif_vlan *vlan)
501 * @bat_priv: the bat priv with all the soft interface information 514 * @bat_priv: the bat priv with all the soft interface information
502 * @vid: the identifier of the vlan object to retrieve 515 * @vid: the identifier of the vlan object to retrieve
503 * 516 *
504 * Returns the private data of the vlan matching the vid passed as argument or 517 * Return: the private data of the vlan matching the vid passed as argument or
505 * NULL otherwise. The refcounter of the returned object is incremented by 1. 518 * NULL otherwise. The refcounter of the returned object is incremented by 1.
506 */ 519 */
507struct batadv_softif_vlan *batadv_softif_vlan_get(struct batadv_priv *bat_priv, 520struct batadv_softif_vlan *batadv_softif_vlan_get(struct batadv_priv *bat_priv,
@@ -514,7 +527,7 @@ struct batadv_softif_vlan *batadv_softif_vlan_get(struct batadv_priv *bat_priv,
514 if (vlan_tmp->vid != vid) 527 if (vlan_tmp->vid != vid)
515 continue; 528 continue;
516 529
517 if (!atomic_inc_not_zero(&vlan_tmp->refcount)) 530 if (!kref_get_unless_zero(&vlan_tmp->refcount))
518 continue; 531 continue;
519 532
520 vlan = vlan_tmp; 533 vlan = vlan_tmp;
@@ -530,7 +543,7 @@ struct batadv_softif_vlan *batadv_softif_vlan_get(struct batadv_priv *bat_priv,
530 * @bat_priv: the bat priv with all the soft interface information 543 * @bat_priv: the bat priv with all the soft interface information
531 * @vid: the VLAN identifier 544 * @vid: the VLAN identifier
532 * 545 *
533 * Returns 0 on success, a negative error otherwise. 546 * Return: 0 on success, a negative error otherwise.
534 */ 547 */
535int batadv_softif_create_vlan(struct batadv_priv *bat_priv, unsigned short vid) 548int batadv_softif_create_vlan(struct batadv_priv *bat_priv, unsigned short vid)
536{ 549{
@@ -539,7 +552,7 @@ int batadv_softif_create_vlan(struct batadv_priv *bat_priv, unsigned short vid)
539 552
540 vlan = batadv_softif_vlan_get(bat_priv, vid); 553 vlan = batadv_softif_vlan_get(bat_priv, vid);
541 if (vlan) { 554 if (vlan) {
542 batadv_softif_vlan_free_ref(vlan); 555 batadv_softif_vlan_put(vlan);
543 return -EEXIST; 556 return -EEXIST;
544 } 557 }
545 558
@@ -549,7 +562,7 @@ int batadv_softif_create_vlan(struct batadv_priv *bat_priv, unsigned short vid)
549 562
550 vlan->bat_priv = bat_priv; 563 vlan->bat_priv = bat_priv;
551 vlan->vid = vid; 564 vlan->vid = vid;
552 atomic_set(&vlan->refcount, 1); 565 kref_init(&vlan->refcount);
553 566
554 atomic_set(&vlan->ap_isolation, 0); 567 atomic_set(&vlan->ap_isolation, 0);
555 568
@@ -588,18 +601,19 @@ static void batadv_softif_destroy_vlan(struct batadv_priv *bat_priv,
588 vlan->vid, "vlan interface destroyed", false); 601 vlan->vid, "vlan interface destroyed", false);
589 602
590 batadv_sysfs_del_vlan(bat_priv, vlan); 603 batadv_sysfs_del_vlan(bat_priv, vlan);
591 batadv_softif_vlan_free_ref(vlan); 604 batadv_softif_vlan_put(vlan);
592} 605}
593 606
594/** 607/**
595 * batadv_interface_add_vid - ndo_add_vid API implementation 608 * batadv_interface_add_vid - ndo_add_vid API implementation
596 * @dev: the netdev of the mesh interface 609 * @dev: the netdev of the mesh interface
610 * @proto: protocol of the the vlan id
597 * @vid: identifier of the new vlan 611 * @vid: identifier of the new vlan
598 * 612 *
599 * Set up all the internal structures for handling the new vlan on top of the 613 * Set up all the internal structures for handling the new vlan on top of the
600 * mesh interface 614 * mesh interface
601 * 615 *
602 * Returns 0 on success or a negative error code in case of failure. 616 * Return: 0 on success or a negative error code in case of failure.
603 */ 617 */
604static int batadv_interface_add_vid(struct net_device *dev, __be16 proto, 618static int batadv_interface_add_vid(struct net_device *dev, __be16 proto,
605 unsigned short vid) 619 unsigned short vid)
@@ -632,7 +646,7 @@ static int batadv_interface_add_vid(struct net_device *dev, __be16 proto,
632 if (!vlan->kobj) { 646 if (!vlan->kobj) {
633 ret = batadv_sysfs_add_vlan(bat_priv->soft_iface, vlan); 647 ret = batadv_sysfs_add_vlan(bat_priv->soft_iface, vlan);
634 if (ret) { 648 if (ret) {
635 batadv_softif_vlan_free_ref(vlan); 649 batadv_softif_vlan_put(vlan);
636 return ret; 650 return ret;
637 } 651 }
638 } 652 }
@@ -651,12 +665,13 @@ static int batadv_interface_add_vid(struct net_device *dev, __be16 proto,
651/** 665/**
652 * batadv_interface_kill_vid - ndo_kill_vid API implementation 666 * batadv_interface_kill_vid - ndo_kill_vid API implementation
653 * @dev: the netdev of the mesh interface 667 * @dev: the netdev of the mesh interface
668 * @proto: protocol of the the vlan id
654 * @vid: identifier of the deleted vlan 669 * @vid: identifier of the deleted vlan
655 * 670 *
656 * Destroy all the internal structures used to handle the vlan identified by vid 671 * Destroy all the internal structures used to handle the vlan identified by vid
657 * on top of the mesh interface 672 * on top of the mesh interface
658 * 673 *
659 * Returns 0 on success, -EINVAL if the specified prototype is not ETH_P_8021Q 674 * Return: 0 on success, -EINVAL if the specified prototype is not ETH_P_8021Q
660 * or -ENOENT if the specified vlan id wasn't registered. 675 * or -ENOENT if the specified vlan id wasn't registered.
661 */ 676 */
662static int batadv_interface_kill_vid(struct net_device *dev, __be16 proto, 677static int batadv_interface_kill_vid(struct net_device *dev, __be16 proto,
@@ -678,7 +693,7 @@ static int batadv_interface_kill_vid(struct net_device *dev, __be16 proto,
678 batadv_softif_destroy_vlan(bat_priv, vlan); 693 batadv_softif_destroy_vlan(bat_priv, vlan);
679 694
680 /* finally free the vlan object */ 695 /* finally free the vlan object */
681 batadv_softif_vlan_free_ref(vlan); 696 batadv_softif_vlan_put(vlan);
682 697
683 return 0; 698 return 0;
684} 699}
@@ -734,7 +749,7 @@ static void batadv_softif_destroy_finish(struct work_struct *work)
734 vlan = batadv_softif_vlan_get(bat_priv, BATADV_NO_FLAGS); 749 vlan = batadv_softif_vlan_get(bat_priv, BATADV_NO_FLAGS);
735 if (vlan) { 750 if (vlan) {
736 batadv_softif_destroy_vlan(bat_priv, vlan); 751 batadv_softif_destroy_vlan(bat_priv, vlan);
737 batadv_softif_vlan_free_ref(vlan); 752 batadv_softif_vlan_put(vlan);
738 } 753 }
739 754
740 batadv_sysfs_del_meshif(soft_iface); 755 batadv_sysfs_del_meshif(soft_iface);
@@ -745,7 +760,7 @@ static void batadv_softif_destroy_finish(struct work_struct *work)
745 * batadv_softif_init_late - late stage initialization of soft interface 760 * batadv_softif_init_late - late stage initialization of soft interface
746 * @dev: registered network device to modify 761 * @dev: registered network device to modify
747 * 762 *
748 * Returns error code on failures 763 * Return: error code on failures
749 */ 764 */
750static int batadv_softif_init_late(struct net_device *dev) 765static int batadv_softif_init_late(struct net_device *dev)
751{ 766{
@@ -847,7 +862,7 @@ free_bat_counters:
847 * @dev: batadv_soft_interface used as master interface 862 * @dev: batadv_soft_interface used as master interface
848 * @slave_dev: net_device which should become the slave interface 863 * @slave_dev: net_device which should become the slave interface
849 * 864 *
850 * Return 0 if successful or error otherwise. 865 * Return: 0 if successful or error otherwise.
851 */ 866 */
852static int batadv_softif_slave_add(struct net_device *dev, 867static int batadv_softif_slave_add(struct net_device *dev,
853 struct net_device *slave_dev) 868 struct net_device *slave_dev)
@@ -863,7 +878,7 @@ static int batadv_softif_slave_add(struct net_device *dev,
863 878
864out: 879out:
865 if (hard_iface) 880 if (hard_iface)
866 batadv_hardif_free_ref(hard_iface); 881 batadv_hardif_put(hard_iface);
867 return ret; 882 return ret;
868} 883}
869 884
@@ -872,7 +887,7 @@ out:
872 * @dev: batadv_soft_interface used as master interface 887 * @dev: batadv_soft_interface used as master interface
873 * @slave_dev: net_device which should be removed from the master interface 888 * @slave_dev: net_device which should be removed from the master interface
874 * 889 *
875 * Return 0 if successful or error otherwise. 890 * Return: 0 if successful or error otherwise.
876 */ 891 */
877static int batadv_softif_slave_del(struct net_device *dev, 892static int batadv_softif_slave_del(struct net_device *dev,
878 struct net_device *slave_dev) 893 struct net_device *slave_dev)
@@ -890,7 +905,7 @@ static int batadv_softif_slave_del(struct net_device *dev,
890 905
891out: 906out:
892 if (hard_iface) 907 if (hard_iface)
893 batadv_hardif_free_ref(hard_iface); 908 batadv_hardif_put(hard_iface);
894 return ret; 909 return ret;
895} 910}
896 911
diff --git a/net/batman-adv/soft-interface.h b/net/batman-adv/soft-interface.h
index 8e82176f40b1..9ae265703d23 100644
--- a/net/batman-adv/soft-interface.h
+++ b/net/batman-adv/soft-interface.h
@@ -1,4 +1,4 @@
1/* Copyright (C) 2007-2015 B.A.T.M.A.N. contributors: 1/* Copyright (C) 2007-2016 B.A.T.M.A.N. contributors:
2 * 2 *
3 * Marek Lindner 3 * Marek Lindner
4 * 4 *
@@ -34,7 +34,7 @@ void batadv_softif_destroy_sysfs(struct net_device *soft_iface);
34int batadv_softif_is_valid(const struct net_device *net_dev); 34int batadv_softif_is_valid(const struct net_device *net_dev);
35extern struct rtnl_link_ops batadv_link_ops; 35extern struct rtnl_link_ops batadv_link_ops;
36int batadv_softif_create_vlan(struct batadv_priv *bat_priv, unsigned short vid); 36int batadv_softif_create_vlan(struct batadv_priv *bat_priv, unsigned short vid);
37void batadv_softif_vlan_free_ref(struct batadv_softif_vlan *softif_vlan); 37void batadv_softif_vlan_put(struct batadv_softif_vlan *softif_vlan);
38struct batadv_softif_vlan *batadv_softif_vlan_get(struct batadv_priv *bat_priv, 38struct batadv_softif_vlan *batadv_softif_vlan_get(struct batadv_priv *bat_priv,
39 unsigned short vid); 39 unsigned short vid);
40 40
diff --git a/net/batman-adv/sysfs.c b/net/batman-adv/sysfs.c
index fe87777fda8a..e7cf51333a36 100644
--- a/net/batman-adv/sysfs.c
+++ b/net/batman-adv/sysfs.c
@@ -1,4 +1,4 @@
1/* Copyright (C) 2010-2015 B.A.T.M.A.N. contributors: 1/* Copyright (C) 2010-2016 B.A.T.M.A.N. contributors:
2 * 2 *
3 * Marek Lindner 3 * Marek Lindner
4 * 4 *
@@ -25,6 +25,7 @@
25#include <linux/fs.h> 25#include <linux/fs.h>
26#include <linux/if.h> 26#include <linux/if.h>
27#include <linux/if_vlan.h> 27#include <linux/if_vlan.h>
28#include <linux/kref.h>
28#include <linux/kernel.h> 29#include <linux/kernel.h>
29#include <linux/netdevice.h> 30#include <linux/netdevice.h>
30#include <linux/printk.h> 31#include <linux/printk.h>
@@ -64,7 +65,7 @@ static struct batadv_priv *batadv_kobj_to_batpriv(struct kobject *obj)
64 * batadv_vlan_kobj_to_batpriv - convert a vlan kobj in the associated batpriv 65 * batadv_vlan_kobj_to_batpriv - convert a vlan kobj in the associated batpriv
65 * @obj: kobject to covert 66 * @obj: kobject to covert
66 * 67 *
67 * Returns the associated batadv_priv struct. 68 * Return: the associated batadv_priv struct.
68 */ 69 */
69static struct batadv_priv *batadv_vlan_kobj_to_batpriv(struct kobject *obj) 70static struct batadv_priv *batadv_vlan_kobj_to_batpriv(struct kobject *obj)
70{ 71{
@@ -82,9 +83,10 @@ static struct batadv_priv *batadv_vlan_kobj_to_batpriv(struct kobject *obj)
82 83
83/** 84/**
84 * batadv_kobj_to_vlan - convert a kobj in the associated softif_vlan struct 85 * batadv_kobj_to_vlan - convert a kobj in the associated softif_vlan struct
86 * @bat_priv: the bat priv with all the soft interface information
85 * @obj: kobject to covert 87 * @obj: kobject to covert
86 * 88 *
87 * Returns the associated softif_vlan struct if found, NULL otherwise. 89 * Return: the associated softif_vlan struct if found, NULL otherwise.
88 */ 90 */
89static struct batadv_softif_vlan * 91static struct batadv_softif_vlan *
90batadv_kobj_to_vlan(struct batadv_priv *bat_priv, struct kobject *obj) 92batadv_kobj_to_vlan(struct batadv_priv *bat_priv, struct kobject *obj)
@@ -96,7 +98,7 @@ batadv_kobj_to_vlan(struct batadv_priv *bat_priv, struct kobject *obj)
96 if (vlan_tmp->kobj != obj) 98 if (vlan_tmp->kobj != obj)
97 continue; 99 continue;
98 100
99 if (!atomic_inc_not_zero(&vlan_tmp->refcount)) 101 if (!kref_get_unless_zero(&vlan_tmp->refcount))
100 continue; 102 continue;
101 103
102 vlan = vlan_tmp; 104 vlan = vlan_tmp;
@@ -214,7 +216,7 @@ ssize_t batadv_store_vlan_##_name(struct kobject *kobj, \
214 attr, &vlan->_name, \ 216 attr, &vlan->_name, \
215 bat_priv->soft_iface); \ 217 bat_priv->soft_iface); \
216 \ 218 \
217 batadv_softif_vlan_free_ref(vlan); \ 219 batadv_softif_vlan_put(vlan); \
218 return res; \ 220 return res; \
219} 221}
220 222
@@ -229,7 +231,7 @@ ssize_t batadv_show_vlan_##_name(struct kobject *kobj, \
229 atomic_read(&vlan->_name) == 0 ? \ 231 atomic_read(&vlan->_name) == 0 ? \
230 "disabled" : "enabled"); \ 232 "disabled" : "enabled"); \
231 \ 233 \
232 batadv_softif_vlan_free_ref(vlan); \ 234 batadv_softif_vlan_put(vlan); \
233 return res; \ 235 return res; \
234} 236}
235 237
@@ -240,6 +242,55 @@ ssize_t batadv_show_vlan_##_name(struct kobject *kobj, \
240 static BATADV_ATTR_VLAN(_name, _mode, batadv_show_vlan_##_name, \ 242 static BATADV_ATTR_VLAN(_name, _mode, batadv_show_vlan_##_name, \
241 batadv_store_vlan_##_name) 243 batadv_store_vlan_##_name)
242 244
245#define BATADV_ATTR_HIF_STORE_UINT(_name, _var, _min, _max, _post_func) \
246ssize_t batadv_store_##_name(struct kobject *kobj, \
247 struct attribute *attr, char *buff, \
248 size_t count) \
249{ \
250 struct net_device *net_dev = batadv_kobj_to_netdev(kobj); \
251 struct batadv_hard_iface *hard_iface; \
252 ssize_t length; \
253 \
254 hard_iface = batadv_hardif_get_by_netdev(net_dev); \
255 if (!hard_iface) \
256 return 0; \
257 \
258 length = __batadv_store_uint_attr(buff, count, _min, _max, \
259 _post_func, attr, \
260 &hard_iface->_var, net_dev); \
261 \
262 batadv_hardif_put(hard_iface); \
263 return length; \
264}
265
266#define BATADV_ATTR_HIF_SHOW_UINT(_name, _var) \
267ssize_t batadv_show_##_name(struct kobject *kobj, \
268 struct attribute *attr, char *buff) \
269{ \
270 struct net_device *net_dev = batadv_kobj_to_netdev(kobj); \
271 struct batadv_hard_iface *hard_iface; \
272 ssize_t length; \
273 \
274 hard_iface = batadv_hardif_get_by_netdev(net_dev); \
275 if (!hard_iface) \
276 return 0; \
277 \
278 length = sprintf(buff, "%i\n", atomic_read(&hard_iface->_var)); \
279 \
280 batadv_hardif_put(hard_iface); \
281 return length; \
282}
283
284/* Use this, if you are going to set [name] in hard_iface to an
285 * unsigned integer value
286 */
287#define BATADV_ATTR_HIF_UINT(_name, _var, _mode, _min, _max, _post_func)\
288 static BATADV_ATTR_HIF_STORE_UINT(_name, _var, _min, \
289 _max, _post_func) \
290 static BATADV_ATTR_HIF_SHOW_UINT(_name, _var) \
291 static BATADV_ATTR(_name, _mode, batadv_show_##_name, \
292 batadv_store_##_name)
293
243static int batadv_store_bool_attr(char *buff, size_t count, 294static int batadv_store_bool_attr(char *buff, size_t count,
244 struct net_device *net_dev, 295 struct net_device *net_dev,
245 const char *attr_name, atomic_t *attr, 296 const char *attr_name, atomic_t *attr,
@@ -491,7 +542,7 @@ static ssize_t batadv_store_gw_bwidth(struct kobject *kobj,
491 * @attr: the batman-adv attribute the user is interacting with 542 * @attr: the batman-adv attribute the user is interacting with
492 * @buff: the buffer that will contain the data to send back to the user 543 * @buff: the buffer that will contain the data to send back to the user
493 * 544 *
494 * Returns the number of bytes written into 'buff' on success or a negative 545 * Return: the number of bytes written into 'buff' on success or a negative
495 * error code in case of failure 546 * error code in case of failure
496 */ 547 */
497static ssize_t batadv_show_isolation_mark(struct kobject *kobj, 548static ssize_t batadv_show_isolation_mark(struct kobject *kobj,
@@ -511,7 +562,7 @@ static ssize_t batadv_show_isolation_mark(struct kobject *kobj,
511 * @buff: the buffer containing the user data 562 * @buff: the buffer containing the user data
512 * @count: number of bytes in the buffer 563 * @count: number of bytes in the buffer
513 * 564 *
514 * Returns 'count' on success or a negative error code in case of failure 565 * Return: 'count' on success or a negative error code in case of failure
515 */ 566 */
516static ssize_t batadv_store_isolation_mark(struct kobject *kobj, 567static ssize_t batadv_store_isolation_mark(struct kobject *kobj,
517 struct attribute *attr, char *buff, 568 struct attribute *attr, char *buff,
@@ -620,9 +671,7 @@ static struct batadv_attribute *batadv_mesh_attrs[] = {
620 671
621BATADV_ATTR_VLAN_BOOL(ap_isolation, S_IRUGO | S_IWUSR, NULL); 672BATADV_ATTR_VLAN_BOOL(ap_isolation, S_IRUGO | S_IWUSR, NULL);
622 673
623/** 674/* array of vlan specific sysfs attributes */
624 * batadv_vlan_attrs - array of vlan specific sysfs attributes
625 */
626static struct batadv_attribute *batadv_vlan_attrs[] = { 675static struct batadv_attribute *batadv_vlan_attrs[] = {
627 &batadv_attr_vlan_ap_isolation, 676 &batadv_attr_vlan_ap_isolation,
628 NULL, 677 NULL,
@@ -683,7 +732,7 @@ void batadv_sysfs_del_meshif(struct net_device *dev)
683 * @dev: netdev of the mesh interface 732 * @dev: netdev of the mesh interface
684 * @vlan: private data of the newly added VLAN interface 733 * @vlan: private data of the newly added VLAN interface
685 * 734 *
686 * Returns 0 on success and -ENOMEM if any of the structure allocations fails. 735 * Return: 0 on success and -ENOMEM if any of the structure allocations fails.
687 */ 736 */
688int batadv_sysfs_add_vlan(struct net_device *dev, 737int batadv_sysfs_add_vlan(struct net_device *dev,
689 struct batadv_softif_vlan *vlan) 738 struct batadv_softif_vlan *vlan)
@@ -771,7 +820,7 @@ static ssize_t batadv_show_mesh_iface(struct kobject *kobj,
771 820
772 length = sprintf(buff, "%s\n", ifname); 821 length = sprintf(buff, "%s\n", ifname);
773 822
774 batadv_hardif_free_ref(hard_iface); 823 batadv_hardif_put(hard_iface);
775 824
776 return length; 825 return length;
777} 826}
@@ -795,7 +844,7 @@ static ssize_t batadv_store_mesh_iface(struct kobject *kobj,
795 if (strlen(buff) >= IFNAMSIZ) { 844 if (strlen(buff) >= IFNAMSIZ) {
796 pr_err("Invalid parameter for 'mesh_iface' setting received: interface name too long '%s'\n", 845 pr_err("Invalid parameter for 'mesh_iface' setting received: interface name too long '%s'\n",
797 buff); 846 buff);
798 batadv_hardif_free_ref(hard_iface); 847 batadv_hardif_put(hard_iface);
799 return -EINVAL; 848 return -EINVAL;
800 } 849 }
801 850
@@ -829,7 +878,7 @@ static ssize_t batadv_store_mesh_iface(struct kobject *kobj,
829unlock: 878unlock:
830 rtnl_unlock(); 879 rtnl_unlock();
831out: 880out:
832 batadv_hardif_free_ref(hard_iface); 881 batadv_hardif_put(hard_iface);
833 return ret; 882 return ret;
834} 883}
835 884
@@ -863,18 +912,99 @@ static ssize_t batadv_show_iface_status(struct kobject *kobj,
863 break; 912 break;
864 } 913 }
865 914
866 batadv_hardif_free_ref(hard_iface); 915 batadv_hardif_put(hard_iface);
867 916
868 return length; 917 return length;
869} 918}
870 919
920#ifdef CONFIG_BATMAN_ADV_BATMAN_V
921
922/**
923 * batadv_store_throughput_override - parse and store throughput override
924 * entered by the user
925 * @kobj: kobject representing the private mesh sysfs directory
926 * @attr: the batman-adv attribute the user is interacting with
927 * @buff: the buffer containing the user data
928 * @count: number of bytes in the buffer
929 *
930 * Return: 'count' on success or a negative error code in case of failure
931 */
932static ssize_t batadv_store_throughput_override(struct kobject *kobj,
933 struct attribute *attr,
934 char *buff, size_t count)
935{
936 struct net_device *net_dev = batadv_kobj_to_netdev(kobj);
937 struct batadv_hard_iface *hard_iface;
938 u32 tp_override;
939 u32 old_tp_override;
940 bool ret;
941
942 hard_iface = batadv_hardif_get_by_netdev(net_dev);
943 if (!hard_iface)
944 return -EINVAL;
945
946 if (buff[count - 1] == '\n')
947 buff[count - 1] = '\0';
948
949 ret = batadv_parse_throughput(net_dev, buff, "throughput_override",
950 &tp_override);
951 if (!ret)
952 return count;
953
954 old_tp_override = atomic_read(&hard_iface->bat_v.throughput_override);
955 if (old_tp_override == tp_override)
956 goto out;
957
958 batadv_info(net_dev, "%s: Changing from: %u.%u MBit to: %u.%u MBit\n",
959 "throughput_override",
960 old_tp_override / 10, old_tp_override % 10,
961 tp_override / 10, tp_override % 10);
962
963 atomic_set(&hard_iface->bat_v.throughput_override, tp_override);
964
965out:
966 batadv_hardif_put(hard_iface);
967 return count;
968}
969
970static ssize_t batadv_show_throughput_override(struct kobject *kobj,
971 struct attribute *attr,
972 char *buff)
973{
974 struct net_device *net_dev = batadv_kobj_to_netdev(kobj);
975 struct batadv_hard_iface *hard_iface;
976 u32 tp_override;
977
978 hard_iface = batadv_hardif_get_by_netdev(net_dev);
979 if (!hard_iface)
980 return -EINVAL;
981
982 tp_override = atomic_read(&hard_iface->bat_v.throughput_override);
983
984 return sprintf(buff, "%u.%u MBit\n", tp_override / 10,
985 tp_override % 10);
986}
987
988#endif
989
871static BATADV_ATTR(mesh_iface, S_IRUGO | S_IWUSR, batadv_show_mesh_iface, 990static BATADV_ATTR(mesh_iface, S_IRUGO | S_IWUSR, batadv_show_mesh_iface,
872 batadv_store_mesh_iface); 991 batadv_store_mesh_iface);
873static BATADV_ATTR(iface_status, S_IRUGO, batadv_show_iface_status, NULL); 992static BATADV_ATTR(iface_status, S_IRUGO, batadv_show_iface_status, NULL);
993#ifdef CONFIG_BATMAN_ADV_BATMAN_V
994BATADV_ATTR_HIF_UINT(elp_interval, bat_v.elp_interval, S_IRUGO | S_IWUSR,
995 2 * BATADV_JITTER, INT_MAX, NULL);
996static BATADV_ATTR(throughput_override, S_IRUGO | S_IWUSR,
997 batadv_show_throughput_override,
998 batadv_store_throughput_override);
999#endif
874 1000
875static struct batadv_attribute *batadv_batman_attrs[] = { 1001static struct batadv_attribute *batadv_batman_attrs[] = {
876 &batadv_attr_mesh_iface, 1002 &batadv_attr_mesh_iface,
877 &batadv_attr_iface_status, 1003 &batadv_attr_iface_status,
1004#ifdef CONFIG_BATMAN_ADV_BATMAN_V
1005 &batadv_attr_elp_interval,
1006 &batadv_attr_throughput_override,
1007#endif
878 NULL, 1008 NULL,
879}; 1009};
880 1010
diff --git a/net/batman-adv/sysfs.h b/net/batman-adv/sysfs.h
index 61974428a7af..c76021b4e198 100644
--- a/net/batman-adv/sysfs.h
+++ b/net/batman-adv/sysfs.h
@@ -1,4 +1,4 @@
1/* Copyright (C) 2010-2015 B.A.T.M.A.N. contributors: 1/* Copyright (C) 2010-2016 B.A.T.M.A.N. contributors:
2 * 2 *
3 * Marek Lindner 3 * Marek Lindner
4 * 4 *
diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index cdfc85fa2743..0b43e86328a5 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -1,4 +1,4 @@
1/* Copyright (C) 2007-2015 B.A.T.M.A.N. contributors: 1/* Copyright (C) 2007-2016 B.A.T.M.A.N. contributors:
2 * 2 *
3 * Marek Lindner, Simon Wunderlich, Antonio Quartulli 3 * Marek Lindner, Simon Wunderlich, Antonio Quartulli
4 * 4 *
@@ -31,6 +31,7 @@
31#include <linux/jhash.h> 31#include <linux/jhash.h>
32#include <linux/jiffies.h> 32#include <linux/jiffies.h>
33#include <linux/kernel.h> 33#include <linux/kernel.h>
34#include <linux/kref.h>
34#include <linux/list.h> 35#include <linux/list.h>
35#include <linux/lockdep.h> 36#include <linux/lockdep.h>
36#include <linux/netdevice.h> 37#include <linux/netdevice.h>
@@ -68,7 +69,15 @@ static void batadv_tt_global_del(struct batadv_priv *bat_priv,
68 unsigned short vid, const char *message, 69 unsigned short vid, const char *message,
69 bool roaming); 70 bool roaming);
70 71
71/* returns 1 if they are the same mac addr and vid */ 72/**
73 * batadv_compare_tt - check if two TT entries are the same
74 * @node: the list element pointer of the first TT entry
75 * @data2: pointer to the tt_common_entry of the second TT entry
76 *
77 * Compare the MAC address and the VLAN ID of the two TT entries and check if
78 * they are the same TT client.
79 * Return: 1 if the two TT clients are the same, 0 otherwise
80 */
72static int batadv_compare_tt(const struct hlist_node *node, const void *data2) 81static int batadv_compare_tt(const struct hlist_node *node, const void *data2)
73{ 82{
74 const void *data1 = container_of(node, struct batadv_tt_common_entry, 83 const void *data1 = container_of(node, struct batadv_tt_common_entry,
@@ -84,7 +93,7 @@ static int batadv_compare_tt(const struct hlist_node *node, const void *data2)
84 * @data: pointer to the tt_common_entry object to map 93 * @data: pointer to the tt_common_entry object to map
85 * @size: the size of the hash table 94 * @size: the size of the hash table
86 * 95 *
87 * Returns the hash index where the object represented by 'data' should be 96 * Return: the hash index where the object represented by 'data' should be
88 * stored at. 97 * stored at.
89 */ 98 */
90static inline u32 batadv_choose_tt(const void *data, u32 size) 99static inline u32 batadv_choose_tt(const void *data, u32 size)
@@ -105,7 +114,7 @@ static inline u32 batadv_choose_tt(const void *data, u32 size)
105 * @addr: the mac address of the client to look for 114 * @addr: the mac address of the client to look for
106 * @vid: VLAN identifier 115 * @vid: VLAN identifier
107 * 116 *
108 * Returns a pointer to the tt_common struct belonging to the searched client if 117 * Return: a pointer to the tt_common struct belonging to the searched client if
109 * found, NULL otherwise. 118 * found, NULL otherwise.
110 */ 119 */
111static struct batadv_tt_common_entry * 120static struct batadv_tt_common_entry *
@@ -133,7 +142,7 @@ batadv_tt_hash_find(struct batadv_hashtable *hash, const u8 *addr,
133 if (tt->vid != vid) 142 if (tt->vid != vid)
134 continue; 143 continue;
135 144
136 if (!atomic_inc_not_zero(&tt->refcount)) 145 if (!kref_get_unless_zero(&tt->refcount))
137 continue; 146 continue;
138 147
139 tt_tmp = tt; 148 tt_tmp = tt;
@@ -150,7 +159,7 @@ batadv_tt_hash_find(struct batadv_hashtable *hash, const u8 *addr,
150 * @addr: the mac address of the client to look for 159 * @addr: the mac address of the client to look for
151 * @vid: VLAN identifier 160 * @vid: VLAN identifier
152 * 161 *
153 * Returns a pointer to the corresponding tt_local_entry struct if the client is 162 * Return: a pointer to the corresponding tt_local_entry struct if the client is
154 * found, NULL otherwise. 163 * found, NULL otherwise.
155 */ 164 */
156static struct batadv_tt_local_entry * 165static struct batadv_tt_local_entry *
@@ -175,7 +184,7 @@ batadv_tt_local_hash_find(struct batadv_priv *bat_priv, const u8 *addr,
175 * @addr: the mac address of the client to look for 184 * @addr: the mac address of the client to look for
176 * @vid: VLAN identifier 185 * @vid: VLAN identifier
177 * 186 *
178 * Returns a pointer to the corresponding tt_global_entry struct if the client 187 * Return: a pointer to the corresponding tt_global_entry struct if the client
179 * is found, NULL otherwise. 188 * is found, NULL otherwise.
180 */ 189 */
181static struct batadv_tt_global_entry * 190static struct batadv_tt_global_entry *
@@ -194,34 +203,68 @@ batadv_tt_global_hash_find(struct batadv_priv *bat_priv, const u8 *addr,
194 return tt_global_entry; 203 return tt_global_entry;
195} 204}
196 205
206/**
207 * batadv_tt_local_entry_release - release tt_local_entry from lists and queue
208 * for free after rcu grace period
209 * @ref: kref pointer of the nc_node
210 */
211static void batadv_tt_local_entry_release(struct kref *ref)
212{
213 struct batadv_tt_local_entry *tt_local_entry;
214
215 tt_local_entry = container_of(ref, struct batadv_tt_local_entry,
216 common.refcount);
217
218 kfree_rcu(tt_local_entry, common.rcu);
219}
220
221/**
222 * batadv_tt_local_entry_put - decrement the tt_local_entry refcounter and
223 * possibly release it
224 * @tt_local_entry: tt_local_entry to be free'd
225 */
197static void 226static void
198batadv_tt_local_entry_free_ref(struct batadv_tt_local_entry *tt_local_entry) 227batadv_tt_local_entry_put(struct batadv_tt_local_entry *tt_local_entry)
199{ 228{
200 if (atomic_dec_and_test(&tt_local_entry->common.refcount)) 229 kref_put(&tt_local_entry->common.refcount,
201 kfree_rcu(tt_local_entry, common.rcu); 230 batadv_tt_local_entry_release);
202} 231}
203 232
204/** 233/**
205 * batadv_tt_global_entry_free_ref - decrement the refcounter for a 234 * batadv_tt_global_entry_release - release tt_global_entry from lists and queue
206 * tt_global_entry and possibly free it 235 * for free after rcu grace period
207 * @tt_global_entry: the object to free 236 * @ref: kref pointer of the nc_node
237 */
238static void batadv_tt_global_entry_release(struct kref *ref)
239{
240 struct batadv_tt_global_entry *tt_global_entry;
241
242 tt_global_entry = container_of(ref, struct batadv_tt_global_entry,
243 common.refcount);
244
245 batadv_tt_global_del_orig_list(tt_global_entry);
246 kfree_rcu(tt_global_entry, common.rcu);
247}
248
249/**
250 * batadv_tt_global_entry_put - decrement the tt_global_entry refcounter and
251 * possibly release it
252 * @tt_global_entry: tt_global_entry to be free'd
208 */ 253 */
209static void 254static void
210batadv_tt_global_entry_free_ref(struct batadv_tt_global_entry *tt_global_entry) 255batadv_tt_global_entry_put(struct batadv_tt_global_entry *tt_global_entry)
211{ 256{
212 if (atomic_dec_and_test(&tt_global_entry->common.refcount)) { 257 kref_put(&tt_global_entry->common.refcount,
213 batadv_tt_global_del_orig_list(tt_global_entry); 258 batadv_tt_global_entry_release);
214 kfree_rcu(tt_global_entry, common.rcu);
215 }
216} 259}
217 260
218/** 261/**
219 * batadv_tt_global_hash_count - count the number of orig entries 262 * batadv_tt_global_hash_count - count the number of orig entries
220 * @hash: hash table containing the tt entries 263 * @bat_priv: the bat priv with all the soft interface information
221 * @addr: the mac address of the client to count entries for 264 * @addr: the mac address of the client to count entries for
222 * @vid: VLAN identifier 265 * @vid: VLAN identifier
223 * 266 *
224 * Return the number of originators advertising the given address/data 267 * Return: the number of originators advertising the given address/data
225 * (excluding ourself). 268 * (excluding ourself).
226 */ 269 */
227int batadv_tt_global_hash_count(struct batadv_priv *bat_priv, 270int batadv_tt_global_hash_count(struct batadv_priv *bat_priv,
@@ -235,7 +278,7 @@ int batadv_tt_global_hash_count(struct batadv_priv *bat_priv,
235 return 0; 278 return 0;
236 279
237 count = atomic_read(&tt_global_entry->orig_list_count); 280 count = atomic_read(&tt_global_entry->orig_list_count);
238 batadv_tt_global_entry_free_ref(tt_global_entry); 281 batadv_tt_global_entry_put(tt_global_entry);
239 282
240 return count; 283 return count;
241} 284}
@@ -258,7 +301,7 @@ static void batadv_tt_local_size_mod(struct batadv_priv *bat_priv,
258 301
259 atomic_add(v, &vlan->tt.num_entries); 302 atomic_add(v, &vlan->tt.num_entries);
260 303
261 batadv_softif_vlan_free_ref(vlan); 304 batadv_softif_vlan_put(vlan);
262} 305}
263 306
264/** 307/**
@@ -286,9 +329,9 @@ static void batadv_tt_local_size_dec(struct batadv_priv *bat_priv,
286} 329}
287 330
288/** 331/**
289 * batadv_tt_global_size_mod - change the size by v of the local table 332 * batadv_tt_global_size_mod - change the size by v of the global table
290 * identified by vid 333 * for orig_node identified by vid
291 * @bat_priv: the bat priv with all the soft interface information 334 * @orig_node: the originator for which the table has to be modified
292 * @vid: the VLAN identifier 335 * @vid: the VLAN identifier
293 * @v: the amount to sum to the global table size 336 * @v: the amount to sum to the global table size
294 */ 337 */
@@ -303,12 +346,14 @@ static void batadv_tt_global_size_mod(struct batadv_orig_node *orig_node,
303 346
304 if (atomic_add_return(v, &vlan->tt.num_entries) == 0) { 347 if (atomic_add_return(v, &vlan->tt.num_entries) == 0) {
305 spin_lock_bh(&orig_node->vlan_list_lock); 348 spin_lock_bh(&orig_node->vlan_list_lock);
306 hlist_del_init_rcu(&vlan->list); 349 if (!hlist_unhashed(&vlan->list)) {
350 hlist_del_init_rcu(&vlan->list);
351 batadv_orig_node_vlan_put(vlan);
352 }
307 spin_unlock_bh(&orig_node->vlan_list_lock); 353 spin_unlock_bh(&orig_node->vlan_list_lock);
308 batadv_orig_node_vlan_free_ref(vlan);
309 } 354 }
310 355
311 batadv_orig_node_vlan_free_ref(vlan); 356 batadv_orig_node_vlan_put(vlan);
312} 357}
313 358
314/** 359/**
@@ -338,22 +383,28 @@ static void batadv_tt_global_size_dec(struct batadv_orig_node *orig_node,
338/** 383/**
339 * batadv_tt_orig_list_entry_release - release tt orig entry from lists and 384 * batadv_tt_orig_list_entry_release - release tt orig entry from lists and
340 * queue for free after rcu grace period 385 * queue for free after rcu grace period
341 * @orig_entry: tt orig entry to be free'd 386 * @ref: kref pointer of the tt orig entry
342 */ 387 */
343static void 388static void batadv_tt_orig_list_entry_release(struct kref *ref)
344batadv_tt_orig_list_entry_release(struct batadv_tt_orig_list_entry *orig_entry)
345{ 389{
346 batadv_orig_node_free_ref(orig_entry->orig_node); 390 struct batadv_tt_orig_list_entry *orig_entry;
391
392 orig_entry = container_of(ref, struct batadv_tt_orig_list_entry,
393 refcount);
394
395 batadv_orig_node_put(orig_entry->orig_node);
347 kfree_rcu(orig_entry, rcu); 396 kfree_rcu(orig_entry, rcu);
348} 397}
349 398
399/**
400 * batadv_tt_orig_list_entry_put - decrement the tt orig entry refcounter and
401 * possibly release it
402 * @orig_entry: tt orig entry to be free'd
403 */
350static void 404static void
351batadv_tt_orig_list_entry_free_ref(struct batadv_tt_orig_list_entry *orig_entry) 405batadv_tt_orig_list_entry_put(struct batadv_tt_orig_list_entry *orig_entry)
352{ 406{
353 if (!atomic_dec_and_test(&orig_entry->refcount)) 407 kref_put(&orig_entry->refcount, batadv_tt_orig_list_entry_release);
354 return;
355
356 batadv_tt_orig_list_entry_release(orig_entry);
357} 408}
358 409
359/** 410/**
@@ -435,7 +486,7 @@ unlock:
435 * batadv_tt_len - compute length in bytes of given number of tt changes 486 * batadv_tt_len - compute length in bytes of given number of tt changes
436 * @changes_num: number of tt changes 487 * @changes_num: number of tt changes
437 * 488 *
438 * Returns computed length in bytes. 489 * Return: computed length in bytes.
439 */ 490 */
440static int batadv_tt_len(int changes_num) 491static int batadv_tt_len(int changes_num)
441{ 492{
@@ -446,7 +497,7 @@ static int batadv_tt_len(int changes_num)
446 * batadv_tt_entries - compute the number of entries fitting in tt_len bytes 497 * batadv_tt_entries - compute the number of entries fitting in tt_len bytes
447 * @tt_len: available space 498 * @tt_len: available space
448 * 499 *
449 * Returns the number of entries. 500 * Return: the number of entries.
450 */ 501 */
451static u16 batadv_tt_entries(u16 tt_len) 502static u16 batadv_tt_entries(u16 tt_len)
452{ 503{
@@ -458,7 +509,7 @@ static u16 batadv_tt_entries(u16 tt_len)
458 * size when transmitted over the air 509 * size when transmitted over the air
459 * @bat_priv: the bat priv with all the soft interface information 510 * @bat_priv: the bat priv with all the soft interface information
460 * 511 *
461 * Returns local translation table size in bytes. 512 * Return: local translation table size in bytes.
462 */ 513 */
463static int batadv_tt_local_table_transmit_size(struct batadv_priv *bat_priv) 514static int batadv_tt_local_table_transmit_size(struct batadv_priv *bat_priv)
464{ 515{
@@ -510,7 +561,7 @@ static void batadv_tt_global_free(struct batadv_priv *bat_priv,
510 561
511 batadv_hash_remove(bat_priv->tt.global_hash, batadv_compare_tt, 562 batadv_hash_remove(bat_priv->tt.global_hash, batadv_compare_tt,
512 batadv_choose_tt, &tt_global->common); 563 batadv_choose_tt, &tt_global->common);
513 batadv_tt_global_entry_free_ref(tt_global); 564 batadv_tt_global_entry_put(tt_global);
514} 565}
515 566
516/** 567/**
@@ -524,7 +575,7 @@ static void batadv_tt_global_free(struct batadv_priv *bat_priv,
524 * @mark: the value contained in the skb->mark field of the received packet (if 575 * @mark: the value contained in the skb->mark field of the received packet (if
525 * any) 576 * any)
526 * 577 *
527 * Returns true if the client was successfully added, false otherwise. 578 * Return: true if the client was successfully added, false otherwise.
528 */ 579 */
529bool batadv_tt_local_add(struct net_device *soft_iface, const u8 *addr, 580bool batadv_tt_local_add(struct net_device *soft_iface, const u8 *addr,
530 unsigned short vid, int ifindex, u32 mark) 581 unsigned short vid, int ifindex, u32 mark)
@@ -618,7 +669,8 @@ bool batadv_tt_local_add(struct net_device *soft_iface, const u8 *addr,
618 tt_local->common.vid = vid; 669 tt_local->common.vid = vid;
619 if (batadv_is_wifi_netdev(in_dev)) 670 if (batadv_is_wifi_netdev(in_dev))
620 tt_local->common.flags |= BATADV_TT_CLIENT_WIFI; 671 tt_local->common.flags |= BATADV_TT_CLIENT_WIFI;
621 atomic_set(&tt_local->common.refcount, 2); 672 kref_init(&tt_local->common.refcount);
673 kref_get(&tt_local->common.refcount);
622 tt_local->last_seen = jiffies; 674 tt_local->last_seen = jiffies;
623 tt_local->common.added_at = tt_local->last_seen; 675 tt_local->common.added_at = tt_local->last_seen;
624 676
@@ -635,8 +687,8 @@ bool batadv_tt_local_add(struct net_device *soft_iface, const u8 *addr,
635 687
636 if (unlikely(hash_added != 0)) { 688 if (unlikely(hash_added != 0)) {
637 /* remove the reference for the hash */ 689 /* remove the reference for the hash */
638 batadv_tt_local_entry_free_ref(tt_local); 690 batadv_tt_local_entry_put(tt_local);
639 batadv_softif_vlan_free_ref(vlan); 691 batadv_softif_vlan_put(vlan);
640 goto out; 692 goto out;
641 } 693 }
642 694
@@ -702,9 +754,9 @@ out:
702 if (in_dev) 754 if (in_dev)
703 dev_put(in_dev); 755 dev_put(in_dev);
704 if (tt_local) 756 if (tt_local)
705 batadv_tt_local_entry_free_ref(tt_local); 757 batadv_tt_local_entry_put(tt_local);
706 if (tt_global) 758 if (tt_global)
707 batadv_tt_global_entry_free_ref(tt_global); 759 batadv_tt_global_entry_put(tt_global);
708 return ret; 760 return ret;
709} 761}
710 762
@@ -719,12 +771,11 @@ out:
719 * function reserves the amount of space needed to send the entire global TT 771 * function reserves the amount of space needed to send the entire global TT
720 * table. In case of success the value is updated with the real amount of 772 * table. In case of success the value is updated with the real amount of
721 * reserved bytes 773 * reserved bytes
722
723 * Allocate the needed amount of memory for the entire TT TVLV and write its 774 * Allocate the needed amount of memory for the entire TT TVLV and write its
724 * header made up by one tvlv_tt_data object and a series of tvlv_tt_vlan_data 775 * header made up by one tvlv_tt_data object and a series of tvlv_tt_vlan_data
725 * objects, one per active VLAN served by the originator node. 776 * objects, one per active VLAN served by the originator node.
726 * 777 *
727 * Return the size of the allocated buffer or 0 in case of failure. 778 * Return: the size of the allocated buffer or 0 in case of failure.
728 */ 779 */
729static u16 780static u16
730batadv_tt_prepare_tvlv_global_data(struct batadv_orig_node *orig_node, 781batadv_tt_prepare_tvlv_global_data(struct batadv_orig_node *orig_node,
@@ -798,7 +849,7 @@ out:
798 * header made up by one tvlv_tt_data object and a series of tvlv_tt_vlan_data 849 * header made up by one tvlv_tt_data object and a series of tvlv_tt_vlan_data
799 * objects, one per active VLAN. 850 * objects, one per active VLAN.
800 * 851 *
801 * Return the size of the allocated buffer or 0 in case of failure. 852 * Return: the size of the allocated buffer or 0 in case of failure.
802 */ 853 */
803static u16 854static u16
804batadv_tt_prepare_tvlv_local_data(struct batadv_priv *bat_priv, 855batadv_tt_prepare_tvlv_local_data(struct batadv_priv *bat_priv,
@@ -1003,13 +1054,13 @@ int batadv_tt_local_seq_print_text(struct seq_file *seq, void *offset)
1003 no_purge ? 0 : last_seen_msecs, 1054 no_purge ? 0 : last_seen_msecs,
1004 vlan->tt.crc); 1055 vlan->tt.crc);
1005 1056
1006 batadv_softif_vlan_free_ref(vlan); 1057 batadv_softif_vlan_put(vlan);
1007 } 1058 }
1008 rcu_read_unlock(); 1059 rcu_read_unlock();
1009 } 1060 }
1010out: 1061out:
1011 if (primary_if) 1062 if (primary_if)
1012 batadv_hardif_free_ref(primary_if); 1063 batadv_hardif_put(primary_if);
1013 return 0; 1064 return 0;
1014} 1065}
1015 1066
@@ -1040,7 +1091,7 @@ batadv_tt_local_set_pending(struct batadv_priv *bat_priv,
1040 * @message: message to append to the log on deletion 1091 * @message: message to append to the log on deletion
1041 * @roaming: true if the deletion is due to a roaming event 1092 * @roaming: true if the deletion is due to a roaming event
1042 * 1093 *
1043 * Returns the flags assigned to the local entry before being deleted 1094 * Return: the flags assigned to the local entry before being deleted
1044 */ 1095 */
1045u16 batadv_tt_local_remove(struct batadv_priv *bat_priv, const u8 *addr, 1096u16 batadv_tt_local_remove(struct batadv_priv *bat_priv, const u8 *addr,
1046 unsigned short vid, const char *message, 1097 unsigned short vid, const char *message,
@@ -1086,19 +1137,19 @@ u16 batadv_tt_local_remove(struct batadv_priv *bat_priv, const u8 *addr,
1086 goto out; 1137 goto out;
1087 1138
1088 /* extra call to free the local tt entry */ 1139 /* extra call to free the local tt entry */
1089 batadv_tt_local_entry_free_ref(tt_local_entry); 1140 batadv_tt_local_entry_put(tt_local_entry);
1090 1141
1091 /* decrease the reference held for this vlan */ 1142 /* decrease the reference held for this vlan */
1092 vlan = batadv_softif_vlan_get(bat_priv, vid); 1143 vlan = batadv_softif_vlan_get(bat_priv, vid);
1093 if (!vlan) 1144 if (!vlan)
1094 goto out; 1145 goto out;
1095 1146
1096 batadv_softif_vlan_free_ref(vlan); 1147 batadv_softif_vlan_put(vlan);
1097 batadv_softif_vlan_free_ref(vlan); 1148 batadv_softif_vlan_put(vlan);
1098 1149
1099out: 1150out:
1100 if (tt_local_entry) 1151 if (tt_local_entry)
1101 batadv_tt_local_entry_free_ref(tt_local_entry); 1152 batadv_tt_local_entry_put(tt_local_entry);
1102 1153
1103 return curr_flags; 1154 return curr_flags;
1104} 1155}
@@ -1194,11 +1245,11 @@ static void batadv_tt_local_table_free(struct batadv_priv *bat_priv)
1194 vlan = batadv_softif_vlan_get(bat_priv, 1245 vlan = batadv_softif_vlan_get(bat_priv,
1195 tt_common_entry->vid); 1246 tt_common_entry->vid);
1196 if (vlan) { 1247 if (vlan) {
1197 batadv_softif_vlan_free_ref(vlan); 1248 batadv_softif_vlan_put(vlan);
1198 batadv_softif_vlan_free_ref(vlan); 1249 batadv_softif_vlan_put(vlan);
1199 } 1250 }
1200 1251
1201 batadv_tt_local_entry_free_ref(tt_local); 1252 batadv_tt_local_entry_put(tt_local);
1202 } 1253 }
1203 spin_unlock_bh(list_lock); 1254 spin_unlock_bh(list_lock);
1204 } 1255 }
@@ -1240,10 +1291,16 @@ static void batadv_tt_changes_list_free(struct batadv_priv *bat_priv)
1240 spin_unlock_bh(&bat_priv->tt.changes_list_lock); 1291 spin_unlock_bh(&bat_priv->tt.changes_list_lock);
1241} 1292}
1242 1293
1243/* retrieves the orig_tt_list_entry belonging to orig_node from the 1294/**
1295 * batadv_tt_global_orig_entry_find - find a TT orig_list_entry
1296 * @entry: the TT global entry where the orig_list_entry has to be
1297 * extracted from
1298 * @orig_node: the originator for which the orig_list_entry has to be found
1299 *
1300 * retrieve the orig_tt_list_entry belonging to orig_node from the
1244 * batadv_tt_global_entry list 1301 * batadv_tt_global_entry list
1245 * 1302 *
1246 * returns it with an increased refcounter, NULL if not found 1303 * Return: it with an increased refcounter, NULL if not found
1247 */ 1304 */
1248static struct batadv_tt_orig_list_entry * 1305static struct batadv_tt_orig_list_entry *
1249batadv_tt_global_orig_entry_find(const struct batadv_tt_global_entry *entry, 1306batadv_tt_global_orig_entry_find(const struct batadv_tt_global_entry *entry,
@@ -1257,7 +1314,7 @@ batadv_tt_global_orig_entry_find(const struct batadv_tt_global_entry *entry,
1257 hlist_for_each_entry_rcu(tmp_orig_entry, head, list) { 1314 hlist_for_each_entry_rcu(tmp_orig_entry, head, list) {
1258 if (tmp_orig_entry->orig_node != orig_node) 1315 if (tmp_orig_entry->orig_node != orig_node)
1259 continue; 1316 continue;
1260 if (!atomic_inc_not_zero(&tmp_orig_entry->refcount)) 1317 if (!kref_get_unless_zero(&tmp_orig_entry->refcount))
1261 continue; 1318 continue;
1262 1319
1263 orig_entry = tmp_orig_entry; 1320 orig_entry = tmp_orig_entry;
@@ -1268,8 +1325,15 @@ batadv_tt_global_orig_entry_find(const struct batadv_tt_global_entry *entry,
1268 return orig_entry; 1325 return orig_entry;
1269} 1326}
1270 1327
1271/* find out if an orig_node is already in the list of a tt_global_entry. 1328/**
1272 * returns true if found, false otherwise 1329 * batadv_tt_global_entry_has_orig - check if a TT global entry is also handled
1330 * by a given originator
1331 * @entry: the TT global entry to check
1332 * @orig_node: the originator to search in the list
1333 *
1334 * find out if an orig_node is already in the list of a tt_global_entry.
1335 *
1336 * Return: true if found, false otherwise
1273 */ 1337 */
1274static bool 1338static bool
1275batadv_tt_global_entry_has_orig(const struct batadv_tt_global_entry *entry, 1339batadv_tt_global_entry_has_orig(const struct batadv_tt_global_entry *entry,
@@ -1281,7 +1345,7 @@ batadv_tt_global_entry_has_orig(const struct batadv_tt_global_entry *entry,
1281 orig_entry = batadv_tt_global_orig_entry_find(entry, orig_node); 1345 orig_entry = batadv_tt_global_orig_entry_find(entry, orig_node);
1282 if (orig_entry) { 1346 if (orig_entry) {
1283 found = true; 1347 found = true;
1284 batadv_tt_orig_list_entry_free_ref(orig_entry); 1348 batadv_tt_orig_list_entry_put(orig_entry);
1285 } 1349 }
1286 1350
1287 return found; 1351 return found;
@@ -1307,11 +1371,12 @@ batadv_tt_global_orig_entry_add(struct batadv_tt_global_entry *tt_global,
1307 goto out; 1371 goto out;
1308 1372
1309 INIT_HLIST_NODE(&orig_entry->list); 1373 INIT_HLIST_NODE(&orig_entry->list);
1310 atomic_inc(&orig_node->refcount); 1374 kref_get(&orig_node->refcount);
1311 batadv_tt_global_size_inc(orig_node, tt_global->common.vid); 1375 batadv_tt_global_size_inc(orig_node, tt_global->common.vid);
1312 orig_entry->orig_node = orig_node; 1376 orig_entry->orig_node = orig_node;
1313 orig_entry->ttvn = ttvn; 1377 orig_entry->ttvn = ttvn;
1314 atomic_set(&orig_entry->refcount, 2); 1378 kref_init(&orig_entry->refcount);
1379 kref_get(&orig_entry->refcount);
1315 1380
1316 spin_lock_bh(&tt_global->list_lock); 1381 spin_lock_bh(&tt_global->list_lock);
1317 hlist_add_head_rcu(&orig_entry->list, 1382 hlist_add_head_rcu(&orig_entry->list,
@@ -1321,7 +1386,7 @@ batadv_tt_global_orig_entry_add(struct batadv_tt_global_entry *tt_global,
1321 1386
1322out: 1387out:
1323 if (orig_entry) 1388 if (orig_entry)
1324 batadv_tt_orig_list_entry_free_ref(orig_entry); 1389 batadv_tt_orig_list_entry_put(orig_entry);
1325} 1390}
1326 1391
1327/** 1392/**
@@ -1341,7 +1406,7 @@ out:
1341 * 1406 *
1342 * The caller must hold orig_node refcount. 1407 * The caller must hold orig_node refcount.
1343 * 1408 *
1344 * Return true if the new entry has been added, false otherwise 1409 * Return: true if the new entry has been added, false otherwise
1345 */ 1410 */
1346static bool batadv_tt_global_add(struct batadv_priv *bat_priv, 1411static bool batadv_tt_global_add(struct batadv_priv *bat_priv,
1347 struct batadv_orig_node *orig_node, 1412 struct batadv_orig_node *orig_node,
@@ -1387,7 +1452,8 @@ static bool batadv_tt_global_add(struct batadv_priv *bat_priv,
1387 */ 1452 */
1388 if (flags & BATADV_TT_CLIENT_ROAM) 1453 if (flags & BATADV_TT_CLIENT_ROAM)
1389 tt_global_entry->roam_at = jiffies; 1454 tt_global_entry->roam_at = jiffies;
1390 atomic_set(&common->refcount, 2); 1455 kref_init(&common->refcount);
1456 kref_get(&common->refcount);
1391 common->added_at = jiffies; 1457 common->added_at = jiffies;
1392 1458
1393 INIT_HLIST_HEAD(&tt_global_entry->orig_list); 1459 INIT_HLIST_HEAD(&tt_global_entry->orig_list);
@@ -1401,7 +1467,7 @@ static bool batadv_tt_global_add(struct batadv_priv *bat_priv,
1401 1467
1402 if (unlikely(hash_added != 0)) { 1468 if (unlikely(hash_added != 0)) {
1403 /* remove the reference for the hash */ 1469 /* remove the reference for the hash */
1404 batadv_tt_global_entry_free_ref(tt_global_entry); 1470 batadv_tt_global_entry_put(tt_global_entry);
1405 goto out_remove; 1471 goto out_remove;
1406 } 1472 }
1407 } else { 1473 } else {
@@ -1487,9 +1553,9 @@ out_remove:
1487 1553
1488out: 1554out:
1489 if (tt_global_entry) 1555 if (tt_global_entry)
1490 batadv_tt_global_entry_free_ref(tt_global_entry); 1556 batadv_tt_global_entry_put(tt_global_entry);
1491 if (tt_local_entry) 1557 if (tt_local_entry)
1492 batadv_tt_local_entry_free_ref(tt_local_entry); 1558 batadv_tt_local_entry_put(tt_local_entry);
1493 return ret; 1559 return ret;
1494} 1560}
1495 1561
@@ -1499,7 +1565,7 @@ out:
1499 * @tt_global_entry: global translation table entry to be analyzed 1565 * @tt_global_entry: global translation table entry to be analyzed
1500 * 1566 *
1501 * This functon assumes the caller holds rcu_read_lock(). 1567 * This functon assumes the caller holds rcu_read_lock().
1502 * Returns best originator list entry or NULL on errors. 1568 * Return: best originator list entry or NULL on errors.
1503 */ 1569 */
1504static struct batadv_tt_orig_list_entry * 1570static struct batadv_tt_orig_list_entry *
1505batadv_transtable_best_orig(struct batadv_priv *bat_priv, 1571batadv_transtable_best_orig(struct batadv_priv *bat_priv,
@@ -1520,20 +1586,20 @@ batadv_transtable_best_orig(struct batadv_priv *bat_priv,
1520 if (best_router && 1586 if (best_router &&
1521 bao->bat_neigh_cmp(router, BATADV_IF_DEFAULT, 1587 bao->bat_neigh_cmp(router, BATADV_IF_DEFAULT,
1522 best_router, BATADV_IF_DEFAULT) <= 0) { 1588 best_router, BATADV_IF_DEFAULT) <= 0) {
1523 batadv_neigh_node_free_ref(router); 1589 batadv_neigh_node_put(router);
1524 continue; 1590 continue;
1525 } 1591 }
1526 1592
1527 /* release the refcount for the "old" best */ 1593 /* release the refcount for the "old" best */
1528 if (best_router) 1594 if (best_router)
1529 batadv_neigh_node_free_ref(best_router); 1595 batadv_neigh_node_put(best_router);
1530 1596
1531 best_entry = orig_entry; 1597 best_entry = orig_entry;
1532 best_router = router; 1598 best_router = router;
1533 } 1599 }
1534 1600
1535 if (best_router) 1601 if (best_router)
1536 batadv_neigh_node_free_ref(best_router); 1602 batadv_neigh_node_put(best_router);
1537 1603
1538 return best_entry; 1604 return best_entry;
1539} 1605}
@@ -1586,7 +1652,7 @@ batadv_tt_global_print_entry(struct batadv_priv *bat_priv,
1586 ((flags & BATADV_TT_CLIENT_ISOLA) ? 'I' : '.'), 1652 ((flags & BATADV_TT_CLIENT_ISOLA) ? 'I' : '.'),
1587 ((flags & BATADV_TT_CLIENT_TEMP) ? 'T' : '.')); 1653 ((flags & BATADV_TT_CLIENT_TEMP) ? 'T' : '.'));
1588 1654
1589 batadv_orig_node_vlan_free_ref(vlan); 1655 batadv_orig_node_vlan_put(vlan);
1590 } 1656 }
1591 1657
1592print_list: 1658print_list:
@@ -1618,7 +1684,7 @@ print_list:
1618 ((flags & BATADV_TT_CLIENT_ISOLA) ? 'I' : '.'), 1684 ((flags & BATADV_TT_CLIENT_ISOLA) ? 'I' : '.'),
1619 ((flags & BATADV_TT_CLIENT_TEMP) ? 'T' : '.')); 1685 ((flags & BATADV_TT_CLIENT_TEMP) ? 'T' : '.'));
1620 1686
1621 batadv_orig_node_vlan_free_ref(vlan); 1687 batadv_orig_node_vlan_put(vlan);
1622 } 1688 }
1623} 1689}
1624 1690
@@ -1659,7 +1725,7 @@ int batadv_tt_global_seq_print_text(struct seq_file *seq, void *offset)
1659 } 1725 }
1660out: 1726out:
1661 if (primary_if) 1727 if (primary_if)
1662 batadv_hardif_free_ref(primary_if); 1728 batadv_hardif_put(primary_if);
1663 return 0; 1729 return 0;
1664} 1730}
1665 1731
@@ -1687,7 +1753,7 @@ _batadv_tt_global_del_orig_entry(struct batadv_tt_global_entry *tt_global_entry,
1687 * being part of a list 1753 * being part of a list
1688 */ 1754 */
1689 hlist_del_rcu(&orig_entry->list); 1755 hlist_del_rcu(&orig_entry->list);
1690 batadv_tt_orig_list_entry_free_ref(orig_entry); 1756 batadv_tt_orig_list_entry_put(orig_entry);
1691} 1757}
1692 1758
1693/* deletes the orig list of a tt_global_entry */ 1759/* deletes the orig list of a tt_global_entry */
@@ -1843,9 +1909,9 @@ static void batadv_tt_global_del(struct batadv_priv *bat_priv,
1843 1909
1844out: 1910out:
1845 if (tt_global_entry) 1911 if (tt_global_entry)
1846 batadv_tt_global_entry_free_ref(tt_global_entry); 1912 batadv_tt_global_entry_put(tt_global_entry);
1847 if (local_entry) 1913 if (local_entry)
1848 batadv_tt_local_entry_free_ref(local_entry); 1914 batadv_tt_local_entry_put(local_entry);
1849} 1915}
1850 1916
1851/** 1917/**
@@ -1899,7 +1965,7 @@ void batadv_tt_global_del_orig(struct batadv_priv *bat_priv,
1899 tt_global->common.addr, 1965 tt_global->common.addr,
1900 BATADV_PRINT_VID(vid), message); 1966 BATADV_PRINT_VID(vid), message);
1901 hlist_del_rcu(&tt_common_entry->hash_entry); 1967 hlist_del_rcu(&tt_common_entry->hash_entry);
1902 batadv_tt_global_entry_free_ref(tt_global); 1968 batadv_tt_global_entry_put(tt_global);
1903 } 1969 }
1904 } 1970 }
1905 spin_unlock_bh(list_lock); 1971 spin_unlock_bh(list_lock);
@@ -1962,7 +2028,7 @@ static void batadv_tt_global_purge(struct batadv_priv *bat_priv)
1962 2028
1963 hlist_del_rcu(&tt_common->hash_entry); 2029 hlist_del_rcu(&tt_common->hash_entry);
1964 2030
1965 batadv_tt_global_entry_free_ref(tt_global); 2031 batadv_tt_global_entry_put(tt_global);
1966 } 2032 }
1967 spin_unlock_bh(list_lock); 2033 spin_unlock_bh(list_lock);
1968 } 2034 }
@@ -1994,7 +2060,7 @@ static void batadv_tt_global_table_free(struct batadv_priv *bat_priv)
1994 tt_global = container_of(tt_common_entry, 2060 tt_global = container_of(tt_common_entry,
1995 struct batadv_tt_global_entry, 2061 struct batadv_tt_global_entry,
1996 common); 2062 common);
1997 batadv_tt_global_entry_free_ref(tt_global); 2063 batadv_tt_global_entry_put(tt_global);
1998 } 2064 }
1999 spin_unlock_bh(list_lock); 2065 spin_unlock_bh(list_lock);
2000 } 2066 }
@@ -2029,7 +2095,7 @@ _batadv_is_ap_isolated(struct batadv_tt_local_entry *tt_local_entry,
2029 * @addr: mac address of the destination client 2095 * @addr: mac address of the destination client
2030 * @vid: VLAN identifier 2096 * @vid: VLAN identifier
2031 * 2097 *
2032 * Returns a pointer to the originator that was selected as destination in the 2098 * Return: a pointer to the originator that was selected as destination in the
2033 * mesh for contacting the client 'addr', NULL otherwise. 2099 * mesh for contacting the client 'addr', NULL otherwise.
2034 * In case of multiple originators serving the same client, the function returns 2100 * In case of multiple originators serving the same client, the function returns
2035 * the best one (best in terms of metric towards the destination node). 2101 * the best one (best in terms of metric towards the destination node).
@@ -2069,15 +2135,15 @@ struct batadv_orig_node *batadv_transtable_search(struct batadv_priv *bat_priv,
2069 /* found anything? */ 2135 /* found anything? */
2070 if (best_entry) 2136 if (best_entry)
2071 orig_node = best_entry->orig_node; 2137 orig_node = best_entry->orig_node;
2072 if (orig_node && !atomic_inc_not_zero(&orig_node->refcount)) 2138 if (orig_node && !kref_get_unless_zero(&orig_node->refcount))
2073 orig_node = NULL; 2139 orig_node = NULL;
2074 rcu_read_unlock(); 2140 rcu_read_unlock();
2075 2141
2076out: 2142out:
2077 if (tt_global_entry) 2143 if (tt_global_entry)
2078 batadv_tt_global_entry_free_ref(tt_global_entry); 2144 batadv_tt_global_entry_put(tt_global_entry);
2079 if (tt_local_entry) 2145 if (tt_local_entry)
2080 batadv_tt_local_entry_free_ref(tt_local_entry); 2146 batadv_tt_local_entry_put(tt_local_entry);
2081 2147
2082 return orig_node; 2148 return orig_node;
2083} 2149}
@@ -2104,7 +2170,7 @@ out:
2104 * because the XOR operation can combine them all while trying to reduce the 2170 * because the XOR operation can combine them all while trying to reduce the
2105 * noise as much as possible. 2171 * noise as much as possible.
2106 * 2172 *
2107 * Returns the checksum of the global table of a given originator. 2173 * Return: the checksum of the global table of a given originator.
2108 */ 2174 */
2109static u32 batadv_tt_global_crc(struct batadv_priv *bat_priv, 2175static u32 batadv_tt_global_crc(struct batadv_priv *bat_priv,
2110 struct batadv_orig_node *orig_node, 2176 struct batadv_orig_node *orig_node,
@@ -2181,7 +2247,7 @@ static u32 batadv_tt_global_crc(struct batadv_priv *bat_priv,
2181 * For details about the computation, please refer to the documentation for 2247 * For details about the computation, please refer to the documentation for
2182 * batadv_tt_global_crc(). 2248 * batadv_tt_global_crc().
2183 * 2249 *
2184 * Returns the checksum of the local table 2250 * Return: the checksum of the local table
2185 */ 2251 */
2186static u32 batadv_tt_local_crc(struct batadv_priv *bat_priv, 2252static u32 batadv_tt_local_crc(struct batadv_priv *bat_priv,
2187 unsigned short vid) 2253 unsigned short vid)
@@ -2287,7 +2353,7 @@ static void batadv_tt_req_purge(struct batadv_priv *bat_priv)
2287 * @bat_priv: the bat priv with all the soft interface information 2353 * @bat_priv: the bat priv with all the soft interface information
2288 * @orig_node: orig node this request is being issued for 2354 * @orig_node: orig node this request is being issued for
2289 * 2355 *
2290 * Returns the pointer to the new tt_req_node struct if no request 2356 * Return: the pointer to the new tt_req_node struct if no request
2291 * has already been issued for this orig_node, NULL otherwise. 2357 * has already been issued for this orig_node, NULL otherwise.
2292 */ 2358 */
2293static struct batadv_tt_req_node * 2359static struct batadv_tt_req_node *
@@ -2322,7 +2388,7 @@ unlock:
2322 * @entry_ptr: to be checked local tt entry 2388 * @entry_ptr: to be checked local tt entry
2323 * @data_ptr: not used but definition required to satisfy the callback prototype 2389 * @data_ptr: not used but definition required to satisfy the callback prototype
2324 * 2390 *
2325 * Returns 1 if the entry is a valid, 0 otherwise. 2391 * Return: 1 if the entry is a valid, 0 otherwise.
2326 */ 2392 */
2327static int batadv_tt_local_valid(const void *entry_ptr, const void *data_ptr) 2393static int batadv_tt_local_valid(const void *entry_ptr, const void *data_ptr)
2328{ 2394{
@@ -2406,9 +2472,8 @@ static void batadv_tt_tvlv_generate(struct batadv_priv *bat_priv,
2406 * @orig_node: originator for which the CRCs have to be checked 2472 * @orig_node: originator for which the CRCs have to be checked
2407 * @tt_vlan: pointer to the first tvlv VLAN entry 2473 * @tt_vlan: pointer to the first tvlv VLAN entry
2408 * @num_vlan: number of tvlv VLAN entries 2474 * @num_vlan: number of tvlv VLAN entries
2409 * @create: if true, create VLAN objects if not found
2410 * 2475 *
2411 * Return true if all the received CRCs match the locally stored ones, false 2476 * Return: true if all the received CRCs match the locally stored ones, false
2412 * otherwise 2477 * otherwise
2413 */ 2478 */
2414static bool batadv_tt_global_check_crc(struct batadv_orig_node *orig_node, 2479static bool batadv_tt_global_check_crc(struct batadv_orig_node *orig_node,
@@ -2438,7 +2503,7 @@ static bool batadv_tt_global_check_crc(struct batadv_orig_node *orig_node,
2438 return false; 2503 return false;
2439 2504
2440 crc = vlan->tt.crc; 2505 crc = vlan->tt.crc;
2441 batadv_orig_node_vlan_free_ref(vlan); 2506 batadv_orig_node_vlan_put(vlan);
2442 2507
2443 if (crc != ntohl(tt_vlan_tmp->crc)) 2508 if (crc != ntohl(tt_vlan_tmp->crc))
2444 return false; 2509 return false;
@@ -2511,6 +2576,8 @@ static void batadv_tt_global_update_crc(struct batadv_priv *bat_priv,
2511 * @num_vlan: number of tvlv VLAN entries 2576 * @num_vlan: number of tvlv VLAN entries
2512 * @full_table: ask for the entire translation table if true, while only for the 2577 * @full_table: ask for the entire translation table if true, while only for the
2513 * last TT diff otherwise 2578 * last TT diff otherwise
2579 *
2580 * Return: true if the TT Request was sent, false otherwise
2514 */ 2581 */
2515static int batadv_send_tt_request(struct batadv_priv *bat_priv, 2582static int batadv_send_tt_request(struct batadv_priv *bat_priv,
2516 struct batadv_orig_node *dst_orig_node, 2583 struct batadv_orig_node *dst_orig_node,
@@ -2571,7 +2638,7 @@ static int batadv_send_tt_request(struct batadv_priv *bat_priv,
2571 2638
2572out: 2639out:
2573 if (primary_if) 2640 if (primary_if)
2574 batadv_hardif_free_ref(primary_if); 2641 batadv_hardif_put(primary_if);
2575 if (ret && tt_req_node) { 2642 if (ret && tt_req_node) {
2576 spin_lock_bh(&bat_priv->tt.req_list_lock); 2643 spin_lock_bh(&bat_priv->tt.req_list_lock);
2577 /* hlist_del_init() verifies tt_req_node still is in the list */ 2644 /* hlist_del_init() verifies tt_req_node still is in the list */
@@ -2591,7 +2658,7 @@ out:
2591 * @req_src: mac address of tt request sender 2658 * @req_src: mac address of tt request sender
2592 * @req_dst: mac address of tt request recipient 2659 * @req_dst: mac address of tt request recipient
2593 * 2660 *
2594 * Returns true if tt request reply was sent, false otherwise. 2661 * Return: true if tt request reply was sent, false otherwise.
2595 */ 2662 */
2596static bool batadv_send_other_tt_response(struct batadv_priv *bat_priv, 2663static bool batadv_send_other_tt_response(struct batadv_priv *bat_priv,
2597 struct batadv_tvlv_tt_data *tt_data, 2664 struct batadv_tvlv_tt_data *tt_data,
@@ -2709,9 +2776,9 @@ unlock:
2709 2776
2710out: 2777out:
2711 if (res_dst_orig_node) 2778 if (res_dst_orig_node)
2712 batadv_orig_node_free_ref(res_dst_orig_node); 2779 batadv_orig_node_put(res_dst_orig_node);
2713 if (req_dst_orig_node) 2780 if (req_dst_orig_node)
2714 batadv_orig_node_free_ref(req_dst_orig_node); 2781 batadv_orig_node_put(req_dst_orig_node);
2715 kfree(tvlv_tt_data); 2782 kfree(tvlv_tt_data);
2716 return ret; 2783 return ret;
2717} 2784}
@@ -2723,7 +2790,7 @@ out:
2723 * @tt_data: tt data containing the tt request information 2790 * @tt_data: tt data containing the tt request information
2724 * @req_src: mac address of tt request sender 2791 * @req_src: mac address of tt request sender
2725 * 2792 *
2726 * Returns true if tt request reply was sent, false otherwise. 2793 * Return: true if tt request reply was sent, false otherwise.
2727 */ 2794 */
2728static bool batadv_send_my_tt_response(struct batadv_priv *bat_priv, 2795static bool batadv_send_my_tt_response(struct batadv_priv *bat_priv,
2729 struct batadv_tvlv_tt_data *tt_data, 2796 struct batadv_tvlv_tt_data *tt_data,
@@ -2826,9 +2893,9 @@ unlock:
2826out: 2893out:
2827 spin_unlock_bh(&bat_priv->tt.commit_lock); 2894 spin_unlock_bh(&bat_priv->tt.commit_lock);
2828 if (orig_node) 2895 if (orig_node)
2829 batadv_orig_node_free_ref(orig_node); 2896 batadv_orig_node_put(orig_node);
2830 if (primary_if) 2897 if (primary_if)
2831 batadv_hardif_free_ref(primary_if); 2898 batadv_hardif_put(primary_if);
2832 kfree(tvlv_tt_data); 2899 kfree(tvlv_tt_data);
2833 /* The packet was for this host, so it doesn't need to be re-routed */ 2900 /* The packet was for this host, so it doesn't need to be re-routed */
2834 return true; 2901 return true;
@@ -2841,7 +2908,7 @@ out:
2841 * @req_src: mac address of tt request sender 2908 * @req_src: mac address of tt request sender
2842 * @req_dst: mac address of tt request recipient 2909 * @req_dst: mac address of tt request recipient
2843 * 2910 *
2844 * Returns true if tt request reply was sent, false otherwise. 2911 * Return: true if tt request reply was sent, false otherwise.
2845 */ 2912 */
2846static bool batadv_send_tt_response(struct batadv_priv *bat_priv, 2913static bool batadv_send_tt_response(struct batadv_priv *bat_priv,
2847 struct batadv_tvlv_tt_data *tt_data, 2914 struct batadv_tvlv_tt_data *tt_data,
@@ -2914,7 +2981,7 @@ static void batadv_tt_fill_gtable(struct batadv_priv *bat_priv,
2914 2981
2915out: 2982out:
2916 if (orig_node) 2983 if (orig_node)
2917 batadv_orig_node_free_ref(orig_node); 2984 batadv_orig_node_put(orig_node);
2918} 2985}
2919 2986
2920static void batadv_tt_update_changes(struct batadv_priv *bat_priv, 2987static void batadv_tt_update_changes(struct batadv_priv *bat_priv,
@@ -2936,7 +3003,7 @@ static void batadv_tt_update_changes(struct batadv_priv *bat_priv,
2936 * @addr: the mac address of the client to check 3003 * @addr: the mac address of the client to check
2937 * @vid: VLAN identifier 3004 * @vid: VLAN identifier
2938 * 3005 *
2939 * Returns true if the client is served by this node, false otherwise. 3006 * Return: true if the client is served by this node, false otherwise.
2940 */ 3007 */
2941bool batadv_is_my_client(struct batadv_priv *bat_priv, const u8 *addr, 3008bool batadv_is_my_client(struct batadv_priv *bat_priv, const u8 *addr,
2942 unsigned short vid) 3009 unsigned short vid)
@@ -2956,7 +3023,7 @@ bool batadv_is_my_client(struct batadv_priv *bat_priv, const u8 *addr,
2956 ret = true; 3023 ret = true;
2957out: 3024out:
2958 if (tt_local_entry) 3025 if (tt_local_entry)
2959 batadv_tt_local_entry_free_ref(tt_local_entry); 3026 batadv_tt_local_entry_put(tt_local_entry);
2960 return ret; 3027 return ret;
2961} 3028}
2962 3029
@@ -3020,7 +3087,7 @@ static void batadv_handle_tt_response(struct batadv_priv *bat_priv,
3020 spin_unlock_bh(&bat_priv->tt.req_list_lock); 3087 spin_unlock_bh(&bat_priv->tt.req_list_lock);
3021out: 3088out:
3022 if (orig_node) 3089 if (orig_node)
3023 batadv_orig_node_free_ref(orig_node); 3090 batadv_orig_node_put(orig_node);
3024} 3091}
3025 3092
3026static void batadv_tt_roam_list_free(struct batadv_priv *bat_priv) 3093static void batadv_tt_roam_list_free(struct batadv_priv *bat_priv)
@@ -3053,11 +3120,16 @@ static void batadv_tt_roam_purge(struct batadv_priv *bat_priv)
3053 spin_unlock_bh(&bat_priv->tt.roam_list_lock); 3120 spin_unlock_bh(&bat_priv->tt.roam_list_lock);
3054} 3121}
3055 3122
3056/* This function checks whether the client already reached the 3123/**
3124 * batadv_tt_check_roam_count - check if a client has roamed too frequently
3125 * @bat_priv: the bat priv with all the soft interface information
3126 * @client: mac address of the roaming client
3127 *
3128 * This function checks whether the client already reached the
3057 * maximum number of possible roaming phases. In this case the ROAMING_ADV 3129 * maximum number of possible roaming phases. In this case the ROAMING_ADV
3058 * will not be sent. 3130 * will not be sent.
3059 * 3131 *
3060 * returns true if the ROAMING_ADV can be sent, false otherwise 3132 * Return: true if the ROAMING_ADV can be sent, false otherwise
3061 */ 3133 */
3062static bool batadv_tt_check_roam_count(struct batadv_priv *bat_priv, u8 *client) 3134static bool batadv_tt_check_roam_count(struct batadv_priv *bat_priv, u8 *client)
3063{ 3135{
@@ -3146,7 +3218,7 @@ static void batadv_send_roam_adv(struct batadv_priv *bat_priv, u8 *client,
3146 3218
3147out: 3219out:
3148 if (primary_if) 3220 if (primary_if)
3149 batadv_hardif_free_ref(primary_if); 3221 batadv_hardif_put(primary_if);
3150} 3222}
3151 3223
3152static void batadv_tt_purge(struct work_struct *work) 3224static void batadv_tt_purge(struct work_struct *work)
@@ -3270,11 +3342,11 @@ static void batadv_tt_local_purge_pending_clients(struct batadv_priv *bat_priv)
3270 /* decrease the reference held for this vlan */ 3342 /* decrease the reference held for this vlan */
3271 vlan = batadv_softif_vlan_get(bat_priv, tt_common->vid); 3343 vlan = batadv_softif_vlan_get(bat_priv, tt_common->vid);
3272 if (vlan) { 3344 if (vlan) {
3273 batadv_softif_vlan_free_ref(vlan); 3345 batadv_softif_vlan_put(vlan);
3274 batadv_softif_vlan_free_ref(vlan); 3346 batadv_softif_vlan_put(vlan);
3275 } 3347 }
3276 3348
3277 batadv_tt_local_entry_free_ref(tt_local); 3349 batadv_tt_local_entry_put(tt_local);
3278 } 3350 }
3279 spin_unlock_bh(list_lock); 3351 spin_unlock_bh(list_lock);
3280 } 3352 }
@@ -3357,11 +3429,11 @@ bool batadv_is_ap_isolated(struct batadv_priv *bat_priv, u8 *src, u8 *dst,
3357 ret = true; 3429 ret = true;
3358 3430
3359out: 3431out:
3360 batadv_softif_vlan_free_ref(vlan); 3432 batadv_softif_vlan_put(vlan);
3361 if (tt_global_entry) 3433 if (tt_global_entry)
3362 batadv_tt_global_entry_free_ref(tt_global_entry); 3434 batadv_tt_global_entry_put(tt_global_entry);
3363 if (tt_local_entry) 3435 if (tt_local_entry)
3364 batadv_tt_local_entry_free_ref(tt_local_entry); 3436 batadv_tt_local_entry_put(tt_local_entry);
3365 return ret; 3437 return ret;
3366} 3438}
3367 3439
@@ -3369,13 +3441,12 @@ out:
3369 * batadv_tt_update_orig - update global translation table with new tt 3441 * batadv_tt_update_orig - update global translation table with new tt
3370 * information received via ogms 3442 * information received via ogms
3371 * @bat_priv: the bat priv with all the soft interface information 3443 * @bat_priv: the bat priv with all the soft interface information
3372 * @orig: the orig_node of the ogm 3444 * @orig_node: the orig_node of the ogm
3373 * @tt_vlan: pointer to the first tvlv VLAN entry 3445 * @tt_buff: pointer to the first tvlv VLAN entry
3374 * @tt_num_vlan: number of tvlv VLAN entries 3446 * @tt_num_vlan: number of tvlv VLAN entries
3375 * @tt_change: pointer to the first entry in the TT buffer 3447 * @tt_change: pointer to the first entry in the TT buffer
3376 * @tt_num_changes: number of tt changes inside the tt buffer 3448 * @tt_num_changes: number of tt changes inside the tt buffer
3377 * @ttvn: translation table version number of this changeset 3449 * @ttvn: translation table version number of this changeset
3378 * @tt_crc: crc32 checksum of orig node's translation table
3379 */ 3450 */
3380static void batadv_tt_update_orig(struct batadv_priv *bat_priv, 3451static void batadv_tt_update_orig(struct batadv_priv *bat_priv,
3381 struct batadv_orig_node *orig_node, 3452 struct batadv_orig_node *orig_node,
@@ -3457,7 +3528,7 @@ request_table:
3457 * @addr: the mac address of the client to check 3528 * @addr: the mac address of the client to check
3458 * @vid: VLAN identifier 3529 * @vid: VLAN identifier
3459 * 3530 *
3460 * Returns true if we know that the client has moved from its old originator 3531 * Return: true if we know that the client has moved from its old originator
3461 * to another one. This entry is still kept for consistency purposes and will be 3532 * to another one. This entry is still kept for consistency purposes and will be
3462 * deleted later by a DEL or because of timeout 3533 * deleted later by a DEL or because of timeout
3463 */ 3534 */
@@ -3472,7 +3543,7 @@ bool batadv_tt_global_client_is_roaming(struct batadv_priv *bat_priv,
3472 goto out; 3543 goto out;
3473 3544
3474 ret = tt_global_entry->common.flags & BATADV_TT_CLIENT_ROAM; 3545 ret = tt_global_entry->common.flags & BATADV_TT_CLIENT_ROAM;
3475 batadv_tt_global_entry_free_ref(tt_global_entry); 3546 batadv_tt_global_entry_put(tt_global_entry);
3476out: 3547out:
3477 return ret; 3548 return ret;
3478} 3549}
@@ -3483,7 +3554,7 @@ out:
3483 * @addr: the mac address of the local client to query 3554 * @addr: the mac address of the local client to query
3484 * @vid: VLAN identifier 3555 * @vid: VLAN identifier
3485 * 3556 *
3486 * Returns true if the local client is known to be roaming (it is not served by 3557 * Return: true if the local client is known to be roaming (it is not served by
3487 * this node anymore) or not. If yes, the client is still present in the table 3558 * this node anymore) or not. If yes, the client is still present in the table
3488 * to keep the latter consistent with the node TTVN 3559 * to keep the latter consistent with the node TTVN
3489 */ 3560 */
@@ -3498,7 +3569,7 @@ bool batadv_tt_local_client_is_roaming(struct batadv_priv *bat_priv,
3498 goto out; 3569 goto out;
3499 3570
3500 ret = tt_local_entry->common.flags & BATADV_TT_CLIENT_ROAM; 3571 ret = tt_local_entry->common.flags & BATADV_TT_CLIENT_ROAM;
3501 batadv_tt_local_entry_free_ref(tt_local_entry); 3572 batadv_tt_local_entry_put(tt_local_entry);
3502out: 3573out:
3503 return ret; 3574 return ret;
3504} 3575}
@@ -3612,7 +3683,7 @@ static void batadv_tt_tvlv_ogm_handler_v1(struct batadv_priv *bat_priv,
3612 * @tvlv_value: tvlv buffer containing the tt data 3683 * @tvlv_value: tvlv buffer containing the tt data
3613 * @tvlv_value_len: tvlv buffer length 3684 * @tvlv_value_len: tvlv buffer length
3614 * 3685 *
3615 * Returns NET_RX_DROP if the tt tvlv is to be re-routed, NET_RX_SUCCESS 3686 * Return: NET_RX_DROP if the tt tvlv is to be re-routed, NET_RX_SUCCESS
3616 * otherwise. 3687 * otherwise.
3617 */ 3688 */
3618static int batadv_tt_tvlv_unicast_handler_v1(struct batadv_priv *bat_priv, 3689static int batadv_tt_tvlv_unicast_handler_v1(struct batadv_priv *bat_priv,
@@ -3693,7 +3764,7 @@ static int batadv_tt_tvlv_unicast_handler_v1(struct batadv_priv *bat_priv,
3693 * @tvlv_value: tvlv buffer containing the tt data 3764 * @tvlv_value: tvlv buffer containing the tt data
3694 * @tvlv_value_len: tvlv buffer length 3765 * @tvlv_value_len: tvlv buffer length
3695 * 3766 *
3696 * Returns NET_RX_DROP if the tt roam tvlv is to be re-routed, NET_RX_SUCCESS 3767 * Return: NET_RX_DROP if the tt roam tvlv is to be re-routed, NET_RX_SUCCESS
3697 * otherwise. 3768 * otherwise.
3698 */ 3769 */
3699static int batadv_roam_tvlv_unicast_handler_v1(struct batadv_priv *bat_priv, 3770static int batadv_roam_tvlv_unicast_handler_v1(struct batadv_priv *bat_priv,
@@ -3731,7 +3802,7 @@ static int batadv_roam_tvlv_unicast_handler_v1(struct batadv_priv *bat_priv,
3731 3802
3732out: 3803out:
3733 if (orig_node) 3804 if (orig_node)
3734 batadv_orig_node_free_ref(orig_node); 3805 batadv_orig_node_put(orig_node);
3735 return NET_RX_SUCCESS; 3806 return NET_RX_SUCCESS;
3736} 3807}
3737 3808
@@ -3739,7 +3810,7 @@ out:
3739 * batadv_tt_init - initialise the translation table internals 3810 * batadv_tt_init - initialise the translation table internals
3740 * @bat_priv: the bat priv with all the soft interface information 3811 * @bat_priv: the bat priv with all the soft interface information
3741 * 3812 *
3742 * Return 0 on success or negative error number in case of failure. 3813 * Return: 0 on success or negative error number in case of failure.
3743 */ 3814 */
3744int batadv_tt_init(struct batadv_priv *bat_priv) 3815int batadv_tt_init(struct batadv_priv *bat_priv)
3745{ 3816{
@@ -3777,7 +3848,7 @@ int batadv_tt_init(struct batadv_priv *bat_priv)
3777 * @addr: the mac address of the client 3848 * @addr: the mac address of the client
3778 * @vid: the identifier of the VLAN where this client is connected 3849 * @vid: the identifier of the VLAN where this client is connected
3779 * 3850 *
3780 * Returns true if the client is marked with the TT_CLIENT_ISOLA flag, false 3851 * Return: true if the client is marked with the TT_CLIENT_ISOLA flag, false
3781 * otherwise 3852 * otherwise
3782 */ 3853 */
3783bool batadv_tt_global_is_isolated(struct batadv_priv *bat_priv, 3854bool batadv_tt_global_is_isolated(struct batadv_priv *bat_priv,
@@ -3792,7 +3863,7 @@ bool batadv_tt_global_is_isolated(struct batadv_priv *bat_priv,
3792 3863
3793 ret = tt->common.flags & BATADV_TT_CLIENT_ISOLA; 3864 ret = tt->common.flags & BATADV_TT_CLIENT_ISOLA;
3794 3865
3795 batadv_tt_global_entry_free_ref(tt); 3866 batadv_tt_global_entry_put(tt);
3796 3867
3797 return ret; 3868 return ret;
3798} 3869}
diff --git a/net/batman-adv/translation-table.h b/net/batman-adv/translation-table.h
index abd8e116e5fb..7c7e2c006bfe 100644
--- a/net/batman-adv/translation-table.h
+++ b/net/batman-adv/translation-table.h
@@ -1,4 +1,4 @@
1/* Copyright (C) 2007-2015 B.A.T.M.A.N. contributors: 1/* Copyright (C) 2007-2016 B.A.T.M.A.N. contributors:
2 * 2 *
3 * Marek Lindner, Simon Wunderlich, Antonio Quartulli 3 * Marek Lindner, Simon Wunderlich, Antonio Quartulli
4 * 4 *
diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h
index 3437b667a2cd..9abfb3e73c34 100644
--- a/net/batman-adv/types.h
+++ b/net/batman-adv/types.h
@@ -1,4 +1,4 @@
1/* Copyright (C) 2007-2015 B.A.T.M.A.N. contributors: 1/* Copyright (C) 2007-2016 B.A.T.M.A.N. contributors:
2 * 2 *
3 * Marek Lindner, Simon Wunderlich 3 * Marek Lindner, Simon Wunderlich
4 * 4 *
@@ -22,9 +22,11 @@
22#error only "main.h" can be included directly 22#error only "main.h" can be included directly
23#endif 23#endif
24 24
25#include <linux/average.h>
25#include <linux/bitops.h> 26#include <linux/bitops.h>
26#include <linux/compiler.h> 27#include <linux/compiler.h>
27#include <linux/if_ether.h> 28#include <linux/if_ether.h>
29#include <linux/kref.h>
28#include <linux/netdevice.h> 30#include <linux/netdevice.h>
29#include <linux/sched.h> /* for linux/wait.h */ 31#include <linux/sched.h> /* for linux/wait.h */
30#include <linux/spinlock.h> 32#include <linux/spinlock.h>
@@ -73,7 +75,7 @@ enum batadv_dhcp_recipient {
73#define BATADV_TT_SYNC_MASK 0x00F0 75#define BATADV_TT_SYNC_MASK 0x00F0
74 76
75/** 77/**
76 * struct batadv_hard_iface_bat_iv - per hard interface B.A.T.M.A.N. IV data 78 * struct batadv_hard_iface_bat_iv - per hard-interface B.A.T.M.A.N. IV data
77 * @ogm_buff: buffer holding the OGM packet 79 * @ogm_buff: buffer holding the OGM packet
78 * @ogm_buff_len: length of the OGM packet buffer 80 * @ogm_buff_len: length of the OGM packet buffer
79 * @ogm_seqno: OGM sequence number - used to identify each OGM 81 * @ogm_seqno: OGM sequence number - used to identify each OGM
@@ -85,6 +87,36 @@ struct batadv_hard_iface_bat_iv {
85}; 87};
86 88
87/** 89/**
90 * enum batadv_v_hard_iface_flags - interface flags useful to B.A.T.M.A.N. V
91 * @BATADV_FULL_DUPLEX: tells if the connection over this link is full-duplex
92 * @BATADV_WARNING_DEFAULT: tells whether we have warned the user that no
93 * throughput data is available for this interface and that default values are
94 * assumed.
95 */
96enum batadv_v_hard_iface_flags {
97 BATADV_FULL_DUPLEX = BIT(0),
98 BATADV_WARNING_DEFAULT = BIT(1),
99};
100
101/**
102 * struct batadv_hard_iface_bat_v - per hard-interface B.A.T.M.A.N. V data
103 * @elp_interval: time interval between two ELP transmissions
104 * @elp_seqno: current ELP sequence number
105 * @elp_skb: base skb containing the ELP message to send
106 * @elp_wq: workqueue used to schedule ELP transmissions
107 * @throughput_override: throughput override to disable link auto-detection
108 * @flags: interface specific flags
109 */
110struct batadv_hard_iface_bat_v {
111 atomic_t elp_interval;
112 atomic_t elp_seqno;
113 struct sk_buff *elp_skb;
114 struct delayed_work elp_wq;
115 atomic_t throughput_override;
116 u8 flags;
117};
118
119/**
88 * struct batadv_hard_iface - network device known to batman-adv 120 * struct batadv_hard_iface - network device known to batman-adv
89 * @list: list node for batadv_hardif_list 121 * @list: list node for batadv_hardif_list
90 * @if_num: identificator of the interface 122 * @if_num: identificator of the interface
@@ -97,8 +129,9 @@ struct batadv_hard_iface_bat_iv {
97 * batman-adv for this interface 129 * batman-adv for this interface
98 * @soft_iface: the batman-adv interface which uses this network interface 130 * @soft_iface: the batman-adv interface which uses this network interface
99 * @rcu: struct used for freeing in an RCU-safe manner 131 * @rcu: struct used for freeing in an RCU-safe manner
100 * @bat_iv: BATMAN IV specific per hard interface data 132 * @bat_iv: per hard-interface B.A.T.M.A.N. IV data
101 * @cleanup_work: work queue callback item for hard interface deinit 133 * @bat_v: per hard-interface B.A.T.M.A.N. V data
134 * @cleanup_work: work queue callback item for hard-interface deinit
102 * @debug_dir: dentry for nc subdir in batman-adv directory in debugfs 135 * @debug_dir: dentry for nc subdir in batman-adv directory in debugfs
103 * @neigh_list: list of unique single hop neighbors via this interface 136 * @neigh_list: list of unique single hop neighbors via this interface
104 * @neigh_list_lock: lock protecting neigh_list 137 * @neigh_list_lock: lock protecting neigh_list
@@ -110,11 +143,14 @@ struct batadv_hard_iface {
110 struct net_device *net_dev; 143 struct net_device *net_dev;
111 u8 num_bcasts; 144 u8 num_bcasts;
112 struct kobject *hardif_obj; 145 struct kobject *hardif_obj;
113 atomic_t refcount; 146 struct kref refcount;
114 struct packet_type batman_adv_ptype; 147 struct packet_type batman_adv_ptype;
115 struct net_device *soft_iface; 148 struct net_device *soft_iface;
116 struct rcu_head rcu; 149 struct rcu_head rcu;
117 struct batadv_hard_iface_bat_iv bat_iv; 150 struct batadv_hard_iface_bat_iv bat_iv;
151#ifdef CONFIG_BATMAN_ADV_BATMAN_V
152 struct batadv_hard_iface_bat_v bat_v;
153#endif
118 struct work_struct cleanup_work; 154 struct work_struct cleanup_work;
119 struct dentry *debug_dir; 155 struct dentry *debug_dir;
120 struct hlist_head neigh_list; 156 struct hlist_head neigh_list;
@@ -125,10 +161,11 @@ struct batadv_hard_iface {
125/** 161/**
126 * struct batadv_orig_ifinfo - originator info per outgoing interface 162 * struct batadv_orig_ifinfo - originator info per outgoing interface
127 * @list: list node for orig_node::ifinfo_list 163 * @list: list node for orig_node::ifinfo_list
128 * @if_outgoing: pointer to outgoing hard interface 164 * @if_outgoing: pointer to outgoing hard-interface
129 * @router: router that should be used to reach this originator 165 * @router: router that should be used to reach this originator
130 * @last_real_seqno: last and best known sequence number 166 * @last_real_seqno: last and best known sequence number
131 * @last_ttl: ttl of last received packet 167 * @last_ttl: ttl of last received packet
168 * @last_seqno_forwarded: seqno of the OGM which was forwarded last
132 * @batman_seqno_reset: time when the batman seqno window was reset 169 * @batman_seqno_reset: time when the batman seqno window was reset
133 * @refcount: number of contexts the object is used 170 * @refcount: number of contexts the object is used
134 * @rcu: struct used for freeing in an RCU-safe manner 171 * @rcu: struct used for freeing in an RCU-safe manner
@@ -139,8 +176,9 @@ struct batadv_orig_ifinfo {
139 struct batadv_neigh_node __rcu *router; /* rcu protected pointer */ 176 struct batadv_neigh_node __rcu *router; /* rcu protected pointer */
140 u32 last_real_seqno; 177 u32 last_real_seqno;
141 u8 last_ttl; 178 u8 last_ttl;
179 u32 last_seqno_forwarded;
142 unsigned long batman_seqno_reset; 180 unsigned long batman_seqno_reset;
143 atomic_t refcount; 181 struct kref refcount;
144 struct rcu_head rcu; 182 struct rcu_head rcu;
145}; 183};
146 184
@@ -196,13 +234,13 @@ struct batadv_orig_node_vlan {
196 unsigned short vid; 234 unsigned short vid;
197 struct batadv_vlan_tt tt; 235 struct batadv_vlan_tt tt;
198 struct hlist_node list; 236 struct hlist_node list;
199 atomic_t refcount; 237 struct kref refcount;
200 struct rcu_head rcu; 238 struct rcu_head rcu;
201}; 239};
202 240
203/** 241/**
204 * struct batadv_orig_bat_iv - B.A.T.M.A.N. IV private orig_node members 242 * struct batadv_orig_bat_iv - B.A.T.M.A.N. IV private orig_node members
205 * @bcast_own: set of bitfields (one per hard interface) where each one counts 243 * @bcast_own: set of bitfields (one per hard-interface) where each one counts
206 * the number of our OGMs this orig_node rebroadcasted "back" to us (relative 244 * the number of our OGMs this orig_node rebroadcasted "back" to us (relative
207 * to last_real_seqno). Every bitfield is BATADV_TQ_LOCAL_WINDOW_SIZE bits long. 245 * to last_real_seqno). Every bitfield is BATADV_TQ_LOCAL_WINDOW_SIZE bits long.
208 * @bcast_own_sum: sum of bcast_own 246 * @bcast_own_sum: sum of bcast_own
@@ -298,7 +336,7 @@ struct batadv_orig_node {
298 struct batadv_priv *bat_priv; 336 struct batadv_priv *bat_priv;
299 /* bcast_seqno_lock protects: bcast_bits & last_bcast_seqno */ 337 /* bcast_seqno_lock protects: bcast_bits & last_bcast_seqno */
300 spinlock_t bcast_seqno_lock; 338 spinlock_t bcast_seqno_lock;
301 atomic_t refcount; 339 struct kref refcount;
302 struct rcu_head rcu; 340 struct rcu_head rcu;
303#ifdef CONFIG_BATMAN_ADV_NC 341#ifdef CONFIG_BATMAN_ADV_NC
304 struct list_head in_coding_list; 342 struct list_head in_coding_list;
@@ -341,15 +379,36 @@ struct batadv_gw_node {
341 struct batadv_orig_node *orig_node; 379 struct batadv_orig_node *orig_node;
342 u32 bandwidth_down; 380 u32 bandwidth_down;
343 u32 bandwidth_up; 381 u32 bandwidth_up;
344 atomic_t refcount; 382 struct kref refcount;
345 struct rcu_head rcu; 383 struct rcu_head rcu;
346}; 384};
347 385
386DECLARE_EWMA(throughput, 1024, 8)
387
388/**
389 * struct batadv_hardif_neigh_node_bat_v - B.A.T.M.A.N. V private neighbor
390 * information
391 * @throughput: ewma link throughput towards this neighbor
392 * @elp_interval: time interval between two ELP transmissions
393 * @elp_latest_seqno: latest and best known ELP sequence number
394 * @last_unicast_tx: when the last unicast packet has been sent to this neighbor
395 * @metric_work: work queue callback item for metric update
396 */
397struct batadv_hardif_neigh_node_bat_v {
398 struct ewma_throughput throughput;
399 u32 elp_interval;
400 u32 elp_latest_seqno;
401 unsigned long last_unicast_tx;
402 struct work_struct metric_work;
403};
404
348/** 405/**
349 * batadv_hardif_neigh_node - unique neighbor per hard interface 406 * struct batadv_hardif_neigh_node - unique neighbor per hard-interface
350 * @list: list node for batadv_hard_iface::neigh_list 407 * @list: list node for batadv_hard_iface::neigh_list
351 * @addr: the MAC address of the neighboring interface 408 * @addr: the MAC address of the neighboring interface
352 * @if_incoming: pointer to incoming hard interface 409 * @if_incoming: pointer to incoming hard-interface
410 * @last_seen: when last packet via this neighbor was received
411 * @bat_v: B.A.T.M.A.N. V private data
353 * @refcount: number of contexts the object is used 412 * @refcount: number of contexts the object is used
354 * @rcu: struct used for freeing in a RCU-safe manner 413 * @rcu: struct used for freeing in a RCU-safe manner
355 */ 414 */
@@ -358,7 +417,10 @@ struct batadv_hardif_neigh_node {
358 u8 addr[ETH_ALEN]; 417 u8 addr[ETH_ALEN];
359 struct batadv_hard_iface *if_incoming; 418 struct batadv_hard_iface *if_incoming;
360 unsigned long last_seen; 419 unsigned long last_seen;
361 atomic_t refcount; 420#ifdef CONFIG_BATMAN_ADV_BATMAN_V
421 struct batadv_hardif_neigh_node_bat_v bat_v;
422#endif
423 struct kref refcount;
362 struct rcu_head rcu; 424 struct rcu_head rcu;
363}; 425};
364 426
@@ -369,7 +431,7 @@ struct batadv_hardif_neigh_node {
369 * @addr: the MAC address of the neighboring interface 431 * @addr: the MAC address of the neighboring interface
370 * @ifinfo_list: list for routing metrics per outgoing interface 432 * @ifinfo_list: list for routing metrics per outgoing interface
371 * @ifinfo_lock: lock protecting private ifinfo members and list 433 * @ifinfo_lock: lock protecting private ifinfo members and list
372 * @if_incoming: pointer to incoming hard interface 434 * @if_incoming: pointer to incoming hard-interface
373 * @last_seen: when last packet via this neighbor was received 435 * @last_seen: when last packet via this neighbor was received
374 * @refcount: number of contexts the object is used 436 * @refcount: number of contexts the object is used
375 * @rcu: struct used for freeing in an RCU-safe manner 437 * @rcu: struct used for freeing in an RCU-safe manner
@@ -382,13 +444,13 @@ struct batadv_neigh_node {
382 spinlock_t ifinfo_lock; /* protects ifinfo_list and its members */ 444 spinlock_t ifinfo_lock; /* protects ifinfo_list and its members */
383 struct batadv_hard_iface *if_incoming; 445 struct batadv_hard_iface *if_incoming;
384 unsigned long last_seen; 446 unsigned long last_seen;
385 atomic_t refcount; 447 struct kref refcount;
386 struct rcu_head rcu; 448 struct rcu_head rcu;
387}; 449};
388 450
389/** 451/**
390 * struct batadv_neigh_ifinfo_bat_iv - neighbor information per outgoing 452 * struct batadv_neigh_ifinfo_bat_iv - neighbor information per outgoing
391 * interface for BATMAN IV 453 * interface for B.A.T.M.A.N. IV
392 * @tq_recv: ring buffer of received TQ values from this neigh node 454 * @tq_recv: ring buffer of received TQ values from this neigh node
393 * @tq_index: ring buffer index 455 * @tq_index: ring buffer index
394 * @tq_avg: averaged tq of all tq values in the ring buffer (tq_recv) 456 * @tq_avg: averaged tq of all tq values in the ring buffer (tq_recv)
@@ -405,10 +467,22 @@ struct batadv_neigh_ifinfo_bat_iv {
405}; 467};
406 468
407/** 469/**
470 * struct batadv_neigh_ifinfo_bat_v - neighbor information per outgoing
471 * interface for B.A.T.M.A.N. V
472 * @throughput: last throughput metric received from originator via this neigh
473 * @last_seqno: last sequence number known for this neighbor
474 */
475struct batadv_neigh_ifinfo_bat_v {
476 u32 throughput;
477 u32 last_seqno;
478};
479
480/**
408 * struct batadv_neigh_ifinfo - neighbor information per outgoing interface 481 * struct batadv_neigh_ifinfo - neighbor information per outgoing interface
409 * @list: list node for batadv_neigh_node::ifinfo_list 482 * @list: list node for batadv_neigh_node::ifinfo_list
410 * @if_outgoing: pointer to outgoing hard interface 483 * @if_outgoing: pointer to outgoing hard-interface
411 * @bat_iv: B.A.T.M.A.N. IV private structure 484 * @bat_iv: B.A.T.M.A.N. IV private structure
485 * @bat_v: B.A.T.M.A.N. V private data
412 * @last_ttl: last received ttl from this neigh node 486 * @last_ttl: last received ttl from this neigh node
413 * @refcount: number of contexts the object is used 487 * @refcount: number of contexts the object is used
414 * @rcu: struct used for freeing in a RCU-safe manner 488 * @rcu: struct used for freeing in a RCU-safe manner
@@ -417,8 +491,11 @@ struct batadv_neigh_ifinfo {
417 struct hlist_node list; 491 struct hlist_node list;
418 struct batadv_hard_iface *if_outgoing; 492 struct batadv_hard_iface *if_outgoing;
419 struct batadv_neigh_ifinfo_bat_iv bat_iv; 493 struct batadv_neigh_ifinfo_bat_iv bat_iv;
494#ifdef CONFIG_BATMAN_ADV_BATMAN_V
495 struct batadv_neigh_ifinfo_bat_v bat_v;
496#endif
420 u8 last_ttl; 497 u8 last_ttl;
421 atomic_t refcount; 498 struct kref refcount;
422 struct rcu_head rcu; 499 struct rcu_head rcu;
423}; 500};
424 501
@@ -744,11 +821,25 @@ struct batadv_softif_vlan {
744 atomic_t ap_isolation; /* boolean */ 821 atomic_t ap_isolation; /* boolean */
745 struct batadv_vlan_tt tt; 822 struct batadv_vlan_tt tt;
746 struct hlist_node list; 823 struct hlist_node list;
747 atomic_t refcount; 824 struct kref refcount;
748 struct rcu_head rcu; 825 struct rcu_head rcu;
749}; 826};
750 827
751/** 828/**
829 * struct batadv_priv_bat_v - B.A.T.M.A.N. V per soft-interface private data
830 * @ogm_buff: buffer holding the OGM packet
831 * @ogm_buff_len: length of the OGM packet buffer
832 * @ogm_seqno: OGM sequence number - used to identify each OGM
833 * @ogm_wq: workqueue used to schedule OGM transmissions
834 */
835struct batadv_priv_bat_v {
836 unsigned char *ogm_buff;
837 int ogm_buff_len;
838 atomic_t ogm_seqno;
839 struct delayed_work ogm_wq;
840};
841
842/**
752 * struct batadv_priv - per mesh interface data 843 * struct batadv_priv - per mesh interface data
753 * @mesh_state: current status of the mesh (inactive/active/deactivating) 844 * @mesh_state: current status of the mesh (inactive/active/deactivating)
754 * @soft_iface: net device which holds this struct as private data 845 * @soft_iface: net device which holds this struct as private data
@@ -771,6 +862,9 @@ struct batadv_softif_vlan {
771 * @orig_interval: OGM broadcast interval in milliseconds 862 * @orig_interval: OGM broadcast interval in milliseconds
772 * @hop_penalty: penalty which will be applied to an OGM's tq-field on every hop 863 * @hop_penalty: penalty which will be applied to an OGM's tq-field on every hop
773 * @log_level: configured log level (see batadv_dbg_level) 864 * @log_level: configured log level (see batadv_dbg_level)
865 * @isolation_mark: the skb->mark value used to match packets for AP isolation
866 * @isolation_mark_mask: bitmask identifying the bits in skb->mark to be used
867 * for the isolation mark
774 * @bcast_seqno: last sent broadcast packet sequence number 868 * @bcast_seqno: last sent broadcast packet sequence number
775 * @bcast_queue_left: number of remaining buffered broadcast packet slots 869 * @bcast_queue_left: number of remaining buffered broadcast packet slots
776 * @batman_queue_left: number of remaining OGM packet slots 870 * @batman_queue_left: number of remaining OGM packet slots
@@ -783,8 +877,8 @@ struct batadv_softif_vlan {
783 * @forw_bat_list_lock: lock protecting forw_bat_list 877 * @forw_bat_list_lock: lock protecting forw_bat_list
784 * @forw_bcast_list_lock: lock protecting forw_bcast_list 878 * @forw_bcast_list_lock: lock protecting forw_bcast_list
785 * @orig_work: work queue callback item for orig node purging 879 * @orig_work: work queue callback item for orig node purging
786 * @cleanup_work: work queue callback item for soft interface deinit 880 * @cleanup_work: work queue callback item for soft-interface deinit
787 * @primary_if: one of the hard interfaces assigned to this mesh interface 881 * @primary_if: one of the hard-interfaces assigned to this mesh interface
788 * becomes the primary interface 882 * becomes the primary interface
789 * @bat_algo_ops: routing algorithm used by this mesh interface 883 * @bat_algo_ops: routing algorithm used by this mesh interface
790 * @softif_vlan_list: a list of softif_vlan structs, one per VLAN created on top 884 * @softif_vlan_list: a list of softif_vlan structs, one per VLAN created on top
@@ -799,6 +893,7 @@ struct batadv_softif_vlan {
799 * @mcast: multicast data 893 * @mcast: multicast data
800 * @network_coding: bool indicating whether network coding is enabled 894 * @network_coding: bool indicating whether network coding is enabled
801 * @nc: network coding data 895 * @nc: network coding data
896 * @bat_v: B.A.T.M.A.N. V per soft-interface private data
802 */ 897 */
803struct batadv_priv { 898struct batadv_priv {
804 atomic_t mesh_state; 899 atomic_t mesh_state;
@@ -864,6 +959,9 @@ struct batadv_priv {
864 atomic_t network_coding; 959 atomic_t network_coding;
865 struct batadv_priv_nc nc; 960 struct batadv_priv_nc nc;
866#endif /* CONFIG_BATMAN_ADV_NC */ 961#endif /* CONFIG_BATMAN_ADV_NC */
962#ifdef CONFIG_BATMAN_ADV_BATMAN_V
963 struct batadv_priv_bat_v bat_v;
964#endif
867}; 965};
868 966
869/** 967/**
@@ -925,7 +1023,7 @@ struct batadv_bla_backbone_gw {
925 atomic_t request_sent; 1023 atomic_t request_sent;
926 u16 crc; 1024 u16 crc;
927 spinlock_t crc_lock; /* protects crc */ 1025 spinlock_t crc_lock; /* protects crc */
928 atomic_t refcount; 1026 struct kref refcount;
929 struct rcu_head rcu; 1027 struct rcu_head rcu;
930}; 1028};
931 1029
@@ -946,7 +1044,7 @@ struct batadv_bla_claim {
946 unsigned long lasttime; 1044 unsigned long lasttime;
947 struct hlist_node hash_entry; 1045 struct hlist_node hash_entry;
948 struct rcu_head rcu; 1046 struct rcu_head rcu;
949 atomic_t refcount; 1047 struct kref refcount;
950}; 1048};
951#endif 1049#endif
952 1050
@@ -967,7 +1065,7 @@ struct batadv_tt_common_entry {
967 struct hlist_node hash_entry; 1065 struct hlist_node hash_entry;
968 u16 flags; 1066 u16 flags;
969 unsigned long added_at; 1067 unsigned long added_at;
970 atomic_t refcount; 1068 struct kref refcount;
971 struct rcu_head rcu; 1069 struct rcu_head rcu;
972}; 1070};
973 1071
@@ -1009,7 +1107,7 @@ struct batadv_tt_orig_list_entry {
1009 struct batadv_orig_node *orig_node; 1107 struct batadv_orig_node *orig_node;
1010 u8 ttvn; 1108 u8 ttvn;
1011 struct hlist_node list; 1109 struct hlist_node list;
1012 atomic_t refcount; 1110 struct kref refcount;
1013 struct rcu_head rcu; 1111 struct rcu_head rcu;
1014}; 1112};
1015 1113
@@ -1062,7 +1160,7 @@ struct batadv_tt_roam_node {
1062struct batadv_nc_node { 1160struct batadv_nc_node {
1063 struct list_head list; 1161 struct list_head list;
1064 u8 addr[ETH_ALEN]; 1162 u8 addr[ETH_ALEN];
1065 atomic_t refcount; 1163 struct kref refcount;
1066 struct rcu_head rcu; 1164 struct rcu_head rcu;
1067 struct batadv_orig_node *orig_node; 1165 struct batadv_orig_node *orig_node;
1068 unsigned long last_seen; 1166 unsigned long last_seen;
@@ -1082,7 +1180,7 @@ struct batadv_nc_node {
1082struct batadv_nc_path { 1180struct batadv_nc_path {
1083 struct hlist_node hash_entry; 1181 struct hlist_node hash_entry;
1084 struct rcu_head rcu; 1182 struct rcu_head rcu;
1085 atomic_t refcount; 1183 struct kref refcount;
1086 struct list_head packet_list; 1184 struct list_head packet_list;
1087 spinlock_t packet_list_lock; /* Protects packet_list */ 1185 spinlock_t packet_list_lock; /* Protects packet_list */
1088 u8 next_hop[ETH_ALEN]; 1186 u8 next_hop[ETH_ALEN];
@@ -1225,7 +1323,7 @@ struct batadv_dat_entry {
1225 unsigned short vid; 1323 unsigned short vid;
1226 unsigned long last_update; 1324 unsigned long last_update;
1227 struct hlist_node hash_entry; 1325 struct hlist_node hash_entry;
1228 atomic_t refcount; 1326 struct kref refcount;
1229 struct rcu_head rcu; 1327 struct rcu_head rcu;
1230}; 1328};
1231 1329
@@ -1261,7 +1359,7 @@ struct batadv_dat_candidate {
1261struct batadv_tvlv_container { 1359struct batadv_tvlv_container {
1262 struct hlist_node list; 1360 struct hlist_node list;
1263 struct batadv_tvlv_hdr tvlv_hdr; 1361 struct batadv_tvlv_hdr tvlv_hdr;
1264 atomic_t refcount; 1362 struct kref refcount;
1265}; 1363};
1266 1364
1267/** 1365/**
@@ -1288,7 +1386,7 @@ struct batadv_tvlv_handler {
1288 u8 type; 1386 u8 type;
1289 u8 version; 1387 u8 version;
1290 u8 flags; 1388 u8 flags;
1291 atomic_t refcount; 1389 struct kref refcount;
1292 struct rcu_head rcu; 1390 struct rcu_head rcu;
1293}; 1391};
1294 1392
diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c
index d040365ba98e..8a4cc2f7f0db 100644
--- a/net/bluetooth/6lowpan.c
+++ b/net/bluetooth/6lowpan.c
@@ -307,6 +307,9 @@ static int recv_pkt(struct sk_buff *skb, struct net_device *dev,
307 307
308 /* check that it's our buffer */ 308 /* check that it's our buffer */
309 if (lowpan_is_ipv6(*skb_network_header(skb))) { 309 if (lowpan_is_ipv6(*skb_network_header(skb))) {
310 /* Pull off the 1-byte of 6lowpan header. */
311 skb_pull(skb, 1);
312
310 /* Copy the packet so that the IPv6 header is 313 /* Copy the packet so that the IPv6 header is
311 * properly aligned. 314 * properly aligned.
312 */ 315 */
@@ -317,6 +320,7 @@ static int recv_pkt(struct sk_buff *skb, struct net_device *dev,
317 320
318 local_skb->protocol = htons(ETH_P_IPV6); 321 local_skb->protocol = htons(ETH_P_IPV6);
319 local_skb->pkt_type = PACKET_HOST; 322 local_skb->pkt_type = PACKET_HOST;
323 local_skb->dev = dev;
320 324
321 skb_set_transport_header(local_skb, sizeof(struct ipv6hdr)); 325 skb_set_transport_header(local_skb, sizeof(struct ipv6hdr));
322 326
@@ -335,6 +339,8 @@ static int recv_pkt(struct sk_buff *skb, struct net_device *dev,
335 if (!local_skb) 339 if (!local_skb)
336 goto drop; 340 goto drop;
337 341
342 local_skb->dev = dev;
343
338 ret = iphc_decompress(local_skb, dev, chan); 344 ret = iphc_decompress(local_skb, dev, chan);
339 if (ret < 0) { 345 if (ret < 0) {
340 kfree_skb(local_skb); 346 kfree_skb(local_skb);
@@ -343,7 +349,6 @@ static int recv_pkt(struct sk_buff *skb, struct net_device *dev,
343 349
344 local_skb->protocol = htons(ETH_P_IPV6); 350 local_skb->protocol = htons(ETH_P_IPV6);
345 local_skb->pkt_type = PACKET_HOST; 351 local_skb->pkt_type = PACKET_HOST;
346 local_skb->dev = dev;
347 352
348 if (give_skb_to_upper(local_skb, dev) 353 if (give_skb_to_upper(local_skb, dev)
349 != NET_RX_SUCCESS) { 354 != NET_RX_SUCCESS) {
diff --git a/net/bluetooth/Kconfig b/net/bluetooth/Kconfig
index 95d1a66ba03a..06c31b9a68b0 100644
--- a/net/bluetooth/Kconfig
+++ b/net/bluetooth/Kconfig
@@ -69,6 +69,15 @@ config BT_6LOWPAN
69 help 69 help
70 IPv6 compression over Bluetooth Low Energy. 70 IPv6 compression over Bluetooth Low Energy.
71 71
72config BT_LEDS
73 bool "Enable LED triggers"
74 depends on BT
75 depends on LEDS_CLASS
76 select LEDS_TRIGGERS
77 help
78 This option selects a few LED triggers for different
79 Bluetooth events.
80
72config BT_SELFTEST 81config BT_SELFTEST
73 bool "Bluetooth self testing support" 82 bool "Bluetooth self testing support"
74 depends on BT && DEBUG_KERNEL 83 depends on BT && DEBUG_KERNEL
diff --git a/net/bluetooth/Makefile b/net/bluetooth/Makefile
index 2b15ae8c1def..b3ff12eb9b6d 100644
--- a/net/bluetooth/Makefile
+++ b/net/bluetooth/Makefile
@@ -17,6 +17,7 @@ bluetooth-y := af_bluetooth.o hci_core.o hci_conn.o hci_event.o mgmt.o \
17 17
18bluetooth-$(CONFIG_BT_BREDR) += sco.o 18bluetooth-$(CONFIG_BT_BREDR) += sco.o
19bluetooth-$(CONFIG_BT_HS) += a2mp.o amp.o 19bluetooth-$(CONFIG_BT_HS) += a2mp.o amp.o
20bluetooth-$(CONFIG_BT_LEDS) += leds.o
20bluetooth-$(CONFIG_BT_DEBUGFS) += hci_debugfs.o 21bluetooth-$(CONFIG_BT_DEBUGFS) += hci_debugfs.o
21bluetooth-$(CONFIG_BT_SELFTEST) += selftest.o 22bluetooth-$(CONFIG_BT_SELFTEST) += selftest.o
22 23
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index 32575b49f4a0..bf9f8a801a2e 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -719,6 +719,13 @@ done:
719 hci_dev_unlock(hdev); 719 hci_dev_unlock(hdev);
720} 720}
721 721
722static bool conn_use_rpa(struct hci_conn *conn)
723{
724 struct hci_dev *hdev = conn->hdev;
725
726 return hci_dev_test_flag(hdev, HCI_PRIVACY);
727}
728
722static void hci_req_add_le_create_conn(struct hci_request *req, 729static void hci_req_add_le_create_conn(struct hci_request *req,
723 struct hci_conn *conn) 730 struct hci_conn *conn)
724{ 731{
@@ -726,14 +733,15 @@ static void hci_req_add_le_create_conn(struct hci_request *req,
726 struct hci_dev *hdev = conn->hdev; 733 struct hci_dev *hdev = conn->hdev;
727 u8 own_addr_type; 734 u8 own_addr_type;
728 735
729 memset(&cp, 0, sizeof(cp));
730
731 /* Update random address, but set require_privacy to false so 736 /* Update random address, but set require_privacy to false so
732 * that we never connect with an non-resolvable address. 737 * that we never connect with an non-resolvable address.
733 */ 738 */
734 if (hci_update_random_address(req, false, &own_addr_type)) 739 if (hci_update_random_address(req, false, conn_use_rpa(conn),
740 &own_addr_type))
735 return; 741 return;
736 742
743 memset(&cp, 0, sizeof(cp));
744
737 /* Set window to be the same value as the interval to enable 745 /* Set window to be the same value as the interval to enable
738 * continuous scanning. 746 * continuous scanning.
739 */ 747 */
@@ -774,7 +782,8 @@ static void hci_req_directed_advertising(struct hci_request *req,
774 /* Set require_privacy to false so that the remote device has a 782 /* Set require_privacy to false so that the remote device has a
775 * chance of identifying us. 783 * chance of identifying us.
776 */ 784 */
777 if (hci_update_random_address(req, false, &own_addr_type) < 0) 785 if (hci_update_random_address(req, false, conn_use_rpa(conn),
786 &own_addr_type) < 0)
778 return; 787 return;
779 788
780 memset(&cp, 0, sizeof(cp)); 789 memset(&cp, 0, sizeof(cp));
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 47bcef754796..2713fc86e85a 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -40,6 +40,7 @@
40#include "hci_request.h" 40#include "hci_request.h"
41#include "hci_debugfs.h" 41#include "hci_debugfs.h"
42#include "smp.h" 42#include "smp.h"
43#include "leds.h"
43 44
44static void hci_rx_work(struct work_struct *work); 45static void hci_rx_work(struct work_struct *work);
45static void hci_cmd_work(struct work_struct *work); 46static void hci_cmd_work(struct work_struct *work);
@@ -1395,6 +1396,7 @@ static int hci_dev_do_open(struct hci_dev *hdev)
1395 hci_dev_set_flag(hdev, HCI_RPA_EXPIRED); 1396 hci_dev_set_flag(hdev, HCI_RPA_EXPIRED);
1396 set_bit(HCI_UP, &hdev->flags); 1397 set_bit(HCI_UP, &hdev->flags);
1397 hci_sock_dev_event(hdev, HCI_DEV_UP); 1398 hci_sock_dev_event(hdev, HCI_DEV_UP);
1399 hci_leds_update_powered(hdev, true);
1398 if (!hci_dev_test_flag(hdev, HCI_SETUP) && 1400 if (!hci_dev_test_flag(hdev, HCI_SETUP) &&
1399 !hci_dev_test_flag(hdev, HCI_CONFIG) && 1401 !hci_dev_test_flag(hdev, HCI_CONFIG) &&
1400 !hci_dev_test_flag(hdev, HCI_UNCONFIGURED) && 1402 !hci_dev_test_flag(hdev, HCI_UNCONFIGURED) &&
@@ -1532,6 +1534,8 @@ int hci_dev_do_close(struct hci_dev *hdev)
1532 return 0; 1534 return 0;
1533 } 1535 }
1534 1536
1537 hci_leds_update_powered(hdev, false);
1538
1535 /* Flush RX and TX works */ 1539 /* Flush RX and TX works */
1536 flush_work(&hdev->tx_work); 1540 flush_work(&hdev->tx_work);
1537 flush_work(&hdev->rx_work); 1541 flush_work(&hdev->rx_work);
@@ -2017,6 +2021,7 @@ static void hci_power_on(struct work_struct *work)
2017 if (test_bit(HCI_UP, &hdev->flags) && 2021 if (test_bit(HCI_UP, &hdev->flags) &&
2018 hci_dev_test_flag(hdev, HCI_MGMT) && 2022 hci_dev_test_flag(hdev, HCI_MGMT) &&
2019 hci_dev_test_and_clear_flag(hdev, HCI_AUTO_OFF)) { 2023 hci_dev_test_and_clear_flag(hdev, HCI_AUTO_OFF)) {
2024 cancel_delayed_work(&hdev->power_off);
2020 hci_req_sync_lock(hdev); 2025 hci_req_sync_lock(hdev);
2021 err = __hci_req_hci_power_on(hdev); 2026 err = __hci_req_hci_power_on(hdev);
2022 hci_req_sync_unlock(hdev); 2027 hci_req_sync_unlock(hdev);
@@ -3067,6 +3072,8 @@ int hci_register_dev(struct hci_dev *hdev)
3067 if (error < 0) 3072 if (error < 0)
3068 goto err_wqueue; 3073 goto err_wqueue;
3069 3074
3075 hci_leds_init(hdev);
3076
3070 hdev->rfkill = rfkill_alloc(hdev->name, &hdev->dev, 3077 hdev->rfkill = rfkill_alloc(hdev->name, &hdev->dev,
3071 RFKILL_TYPE_BLUETOOTH, &hci_rfkill_ops, 3078 RFKILL_TYPE_BLUETOOTH, &hci_rfkill_ops,
3072 hdev); 3079 hdev);
@@ -4112,8 +4119,10 @@ void hci_req_cmd_complete(struct hci_dev *hdev, u16 opcode, u8 status,
4112 break; 4119 break;
4113 } 4120 }
4114 4121
4115 *req_complete = bt_cb(skb)->hci.req_complete; 4122 if (bt_cb(skb)->hci.req_flags & HCI_REQ_SKB)
4116 *req_complete_skb = bt_cb(skb)->hci.req_complete_skb; 4123 *req_complete_skb = bt_cb(skb)->hci.req_complete_skb;
4124 else
4125 *req_complete = bt_cb(skb)->hci.req_complete;
4117 kfree_skb(skb); 4126 kfree_skb(skb);
4118 } 4127 }
4119 spin_unlock_irqrestore(&hdev->cmd_q.lock, flags); 4128 spin_unlock_irqrestore(&hdev->cmd_q.lock, flags);
diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c
index 41b5f3813f02..6e125d76df0d 100644
--- a/net/bluetooth/hci_request.c
+++ b/net/bluetooth/hci_request.c
@@ -688,21 +688,29 @@ static u8 update_white_list(struct hci_request *req)
688 * command to remove it from the controller. 688 * command to remove it from the controller.
689 */ 689 */
690 list_for_each_entry(b, &hdev->le_white_list, list) { 690 list_for_each_entry(b, &hdev->le_white_list, list) {
691 struct hci_cp_le_del_from_white_list cp; 691 /* If the device is neither in pend_le_conns nor
692 * pend_le_reports then remove it from the whitelist.
693 */
694 if (!hci_pend_le_action_lookup(&hdev->pend_le_conns,
695 &b->bdaddr, b->bdaddr_type) &&
696 !hci_pend_le_action_lookup(&hdev->pend_le_reports,
697 &b->bdaddr, b->bdaddr_type)) {
698 struct hci_cp_le_del_from_white_list cp;
699
700 cp.bdaddr_type = b->bdaddr_type;
701 bacpy(&cp.bdaddr, &b->bdaddr);
692 702
693 if (hci_pend_le_action_lookup(&hdev->pend_le_conns, 703 hci_req_add(req, HCI_OP_LE_DEL_FROM_WHITE_LIST,
694 &b->bdaddr, b->bdaddr_type) || 704 sizeof(cp), &cp);
695 hci_pend_le_action_lookup(&hdev->pend_le_reports,
696 &b->bdaddr, b->bdaddr_type)) {
697 white_list_entries++;
698 continue; 705 continue;
699 } 706 }
700 707
701 cp.bdaddr_type = b->bdaddr_type; 708 if (hci_find_irk_by_addr(hdev, &b->bdaddr, b->bdaddr_type)) {
702 bacpy(&cp.bdaddr, &b->bdaddr); 709 /* White list can not be used with RPAs */
710 return 0x00;
711 }
703 712
704 hci_req_add(req, HCI_OP_LE_DEL_FROM_WHITE_LIST, 713 white_list_entries++;
705 sizeof(cp), &cp);
706 } 714 }
707 715
708 /* Since all no longer valid white list entries have been 716 /* Since all no longer valid white list entries have been
@@ -763,6 +771,11 @@ static u8 update_white_list(struct hci_request *req)
763 return 0x01; 771 return 0x01;
764} 772}
765 773
774static bool scan_use_rpa(struct hci_dev *hdev)
775{
776 return hci_dev_test_flag(hdev, HCI_PRIVACY);
777}
778
766void hci_req_add_le_passive_scan(struct hci_request *req) 779void hci_req_add_le_passive_scan(struct hci_request *req)
767{ 780{
768 struct hci_cp_le_set_scan_param param_cp; 781 struct hci_cp_le_set_scan_param param_cp;
@@ -777,7 +790,8 @@ void hci_req_add_le_passive_scan(struct hci_request *req)
777 * advertising with our address will be correctly reported 790 * advertising with our address will be correctly reported
778 * by the controller. 791 * by the controller.
779 */ 792 */
780 if (hci_update_random_address(req, false, &own_addr_type)) 793 if (hci_update_random_address(req, false, scan_use_rpa(hdev),
794 &own_addr_type))
781 return; 795 return;
782 796
783 /* Adding or removing entries from the white list must 797 /* Adding or removing entries from the white list must
@@ -858,6 +872,11 @@ static u32 get_adv_instance_flags(struct hci_dev *hdev, u8 instance)
858 if (hci_dev_test_flag(hdev, HCI_ADVERTISING_CONNECTABLE)) 872 if (hci_dev_test_flag(hdev, HCI_ADVERTISING_CONNECTABLE))
859 flags |= MGMT_ADV_FLAG_CONNECTABLE; 873 flags |= MGMT_ADV_FLAG_CONNECTABLE;
860 874
875 if (hci_dev_test_flag(hdev, HCI_LIMITED_DISCOVERABLE))
876 flags |= MGMT_ADV_FLAG_LIMITED_DISCOV;
877 else if (hci_dev_test_flag(hdev, HCI_DISCOVERABLE))
878 flags |= MGMT_ADV_FLAG_DISCOV;
879
861 return flags; 880 return flags;
862 } 881 }
863 882
@@ -870,6 +889,29 @@ static u32 get_adv_instance_flags(struct hci_dev *hdev, u8 instance)
870 return adv_instance->flags; 889 return adv_instance->flags;
871} 890}
872 891
892static bool adv_use_rpa(struct hci_dev *hdev, uint32_t flags)
893{
894 /* If privacy is not enabled don't use RPA */
895 if (!hci_dev_test_flag(hdev, HCI_PRIVACY))
896 return false;
897
898 /* If basic privacy mode is enabled use RPA */
899 if (!hci_dev_test_flag(hdev, HCI_LIMITED_PRIVACY))
900 return true;
901
902 /* If limited privacy mode is enabled don't use RPA if we're
903 * both discoverable and bondable.
904 */
905 if ((flags & MGMT_ADV_FLAG_DISCOV) &&
906 hci_dev_test_flag(hdev, HCI_BONDABLE))
907 return false;
908
909 /* We're neither bondable nor discoverable in the limited
910 * privacy mode, therefore use RPA.
911 */
912 return true;
913}
914
873void __hci_req_enable_advertising(struct hci_request *req) 915void __hci_req_enable_advertising(struct hci_request *req)
874{ 916{
875 struct hci_dev *hdev = req->hdev; 917 struct hci_dev *hdev = req->hdev;
@@ -903,7 +945,9 @@ void __hci_req_enable_advertising(struct hci_request *req)
903 * advertising is used. In that case it is fine to use a 945 * advertising is used. In that case it is fine to use a
904 * non-resolvable private address. 946 * non-resolvable private address.
905 */ 947 */
906 if (hci_update_random_address(req, !connectable, &own_addr_type) < 0) 948 if (hci_update_random_address(req, !connectable,
949 adv_use_rpa(hdev, flags),
950 &own_addr_type) < 0)
907 return; 951 return;
908 952
909 memset(&cp, 0, sizeof(cp)); 953 memset(&cp, 0, sizeof(cp));
@@ -1317,7 +1361,7 @@ static void set_random_addr(struct hci_request *req, bdaddr_t *rpa)
1317} 1361}
1318 1362
1319int hci_update_random_address(struct hci_request *req, bool require_privacy, 1363int hci_update_random_address(struct hci_request *req, bool require_privacy,
1320 u8 *own_addr_type) 1364 bool use_rpa, u8 *own_addr_type)
1321{ 1365{
1322 struct hci_dev *hdev = req->hdev; 1366 struct hci_dev *hdev = req->hdev;
1323 int err; 1367 int err;
@@ -1326,7 +1370,7 @@ int hci_update_random_address(struct hci_request *req, bool require_privacy,
1326 * current RPA has expired or there is something else than 1370 * current RPA has expired or there is something else than
1327 * the current RPA in use, then generate a new one. 1371 * the current RPA in use, then generate a new one.
1328 */ 1372 */
1329 if (hci_dev_test_flag(hdev, HCI_PRIVACY)) { 1373 if (use_rpa) {
1330 int to; 1374 int to;
1331 1375
1332 *own_addr_type = ADDR_LE_DEV_RANDOM; 1376 *own_addr_type = ADDR_LE_DEV_RANDOM;
@@ -1588,9 +1632,16 @@ static int discoverable_update(struct hci_request *req, unsigned long opt)
1588 /* Advertising instances don't use the global discoverable setting, so 1632 /* Advertising instances don't use the global discoverable setting, so
1589 * only update AD if advertising was enabled using Set Advertising. 1633 * only update AD if advertising was enabled using Set Advertising.
1590 */ 1634 */
1591 if (hci_dev_test_flag(hdev, HCI_ADVERTISING)) 1635 if (hci_dev_test_flag(hdev, HCI_ADVERTISING)) {
1592 __hci_req_update_adv_data(req, 0x00); 1636 __hci_req_update_adv_data(req, 0x00);
1593 1637
1638 /* Discoverable mode affects the local advertising
1639 * address in limited privacy mode.
1640 */
1641 if (hci_dev_test_flag(hdev, HCI_LIMITED_PRIVACY))
1642 __hci_req_enable_advertising(req);
1643 }
1644
1594 hci_dev_unlock(hdev); 1645 hci_dev_unlock(hdev);
1595 1646
1596 return 0; 1647 return 0;
@@ -1933,7 +1984,8 @@ static int active_scan(struct hci_request *req, unsigned long opt)
1933 * address (when privacy feature has been enabled) or non-resolvable 1984 * address (when privacy feature has been enabled) or non-resolvable
1934 * private address. 1985 * private address.
1935 */ 1986 */
1936 err = hci_update_random_address(req, true, &own_addr_type); 1987 err = hci_update_random_address(req, true, scan_use_rpa(hdev),
1988 &own_addr_type);
1937 if (err < 0) 1989 if (err < 0)
1938 own_addr_type = ADDR_LE_DEV_PUBLIC; 1990 own_addr_type = ADDR_LE_DEV_PUBLIC;
1939 1991
diff --git a/net/bluetooth/hci_request.h b/net/bluetooth/hci_request.h
index 64ff8c040d50..b2d044bdc732 100644
--- a/net/bluetooth/hci_request.h
+++ b/net/bluetooth/hci_request.h
@@ -89,7 +89,7 @@ static inline void hci_req_update_scan(struct hci_dev *hdev)
89void __hci_req_update_scan(struct hci_request *req); 89void __hci_req_update_scan(struct hci_request *req);
90 90
91int hci_update_random_address(struct hci_request *req, bool require_privacy, 91int hci_update_random_address(struct hci_request *req, bool require_privacy,
92 u8 *own_addr_type); 92 bool use_rpa, u8 *own_addr_type);
93 93
94int hci_abort_conn(struct hci_conn *conn, u8 reason); 94int hci_abort_conn(struct hci_conn *conn, u8 reason);
95void __hci_abort_conn(struct hci_request *req, struct hci_conn *conn, 95void __hci_abort_conn(struct hci_request *req, struct hci_conn *conn,
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index 39a5149f3010..eb4f5f24cbe3 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -197,10 +197,20 @@ int l2cap_add_psm(struct l2cap_chan *chan, bdaddr_t *src, __le16 psm)
197 chan->sport = psm; 197 chan->sport = psm;
198 err = 0; 198 err = 0;
199 } else { 199 } else {
200 u16 p; 200 u16 p, start, end, incr;
201
202 if (chan->src_type == BDADDR_BREDR) {
203 start = L2CAP_PSM_DYN_START;
204 end = L2CAP_PSM_AUTO_END;
205 incr = 2;
206 } else {
207 start = L2CAP_PSM_LE_DYN_START;
208 end = L2CAP_PSM_LE_DYN_END;
209 incr = 1;
210 }
201 211
202 err = -EINVAL; 212 err = -EINVAL;
203 for (p = 0x1001; p < 0x1100; p += 2) 213 for (p = start; p <= end; p += incr)
204 if (!__l2cap_global_chan_by_addr(cpu_to_le16(p), src)) { 214 if (!__l2cap_global_chan_by_addr(cpu_to_le16(p), src)) {
205 chan->psm = cpu_to_le16(p); 215 chan->psm = cpu_to_le16(p);
206 chan->sport = cpu_to_le16(p); 216 chan->sport = cpu_to_le16(p);
diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c
index 1bb551527044..e4cae72895a7 100644
--- a/net/bluetooth/l2cap_sock.c
+++ b/net/bluetooth/l2cap_sock.c
@@ -58,7 +58,7 @@ static int l2cap_validate_bredr_psm(u16 psm)
58 return -EINVAL; 58 return -EINVAL;
59 59
60 /* Restrict usage of well-known PSMs */ 60 /* Restrict usage of well-known PSMs */
61 if (psm < 0x1001 && !capable(CAP_NET_BIND_SERVICE)) 61 if (psm < L2CAP_PSM_DYN_START && !capable(CAP_NET_BIND_SERVICE))
62 return -EACCES; 62 return -EACCES;
63 63
64 return 0; 64 return 0;
@@ -67,11 +67,11 @@ static int l2cap_validate_bredr_psm(u16 psm)
67static int l2cap_validate_le_psm(u16 psm) 67static int l2cap_validate_le_psm(u16 psm)
68{ 68{
69 /* Valid LE_PSM ranges are defined only until 0x00ff */ 69 /* Valid LE_PSM ranges are defined only until 0x00ff */
70 if (psm > 0x00ff) 70 if (psm > L2CAP_PSM_LE_DYN_END)
71 return -EINVAL; 71 return -EINVAL;
72 72
73 /* Restrict fixed, SIG assigned PSM values to CAP_NET_BIND_SERVICE */ 73 /* Restrict fixed, SIG assigned PSM values to CAP_NET_BIND_SERVICE */
74 if (psm <= 0x007f && !capable(CAP_NET_BIND_SERVICE)) 74 if (psm < L2CAP_PSM_LE_DYN_START && !capable(CAP_NET_BIND_SERVICE))
75 return -EACCES; 75 return -EACCES;
76 76
77 return 0; 77 return 0;
@@ -125,6 +125,9 @@ static int l2cap_sock_bind(struct socket *sock, struct sockaddr *addr, int alen)
125 goto done; 125 goto done;
126 } 126 }
127 127
128 bacpy(&chan->src, &la.l2_bdaddr);
129 chan->src_type = la.l2_bdaddr_type;
130
128 if (la.l2_cid) 131 if (la.l2_cid)
129 err = l2cap_add_scid(chan, __le16_to_cpu(la.l2_cid)); 132 err = l2cap_add_scid(chan, __le16_to_cpu(la.l2_cid));
130 else 133 else
@@ -156,9 +159,6 @@ static int l2cap_sock_bind(struct socket *sock, struct sockaddr *addr, int alen)
156 break; 159 break;
157 } 160 }
158 161
159 bacpy(&chan->src, &la.l2_bdaddr);
160 chan->src_type = la.l2_bdaddr_type;
161
162 if (chan->psm && bdaddr_type_is_le(chan->src_type)) 162 if (chan->psm && bdaddr_type_is_le(chan->src_type))
163 chan->mode = L2CAP_MODE_LE_FLOWCTL; 163 chan->mode = L2CAP_MODE_LE_FLOWCTL;
164 164
diff --git a/net/bluetooth/leds.c b/net/bluetooth/leds.c
new file mode 100644
index 000000000000..8319c8440c89
--- /dev/null
+++ b/net/bluetooth/leds.c
@@ -0,0 +1,74 @@
1/*
2 * Copyright 2015, Heiner Kallweit <hkallweit1@gmail.com>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 */
8
9#include <net/bluetooth/bluetooth.h>
10#include <net/bluetooth/hci_core.h>
11
12#include "leds.h"
13
14struct hci_basic_led_trigger {
15 struct led_trigger led_trigger;
16 struct hci_dev *hdev;
17};
18
19#define to_hci_basic_led_trigger(arg) container_of(arg, \
20 struct hci_basic_led_trigger, led_trigger)
21
22void hci_leds_update_powered(struct hci_dev *hdev, bool enabled)
23{
24 if (hdev->power_led)
25 led_trigger_event(hdev->power_led,
26 enabled ? LED_FULL : LED_OFF);
27}
28
29static void power_activate(struct led_classdev *led_cdev)
30{
31 struct hci_basic_led_trigger *htrig;
32 bool powered;
33
34 htrig = to_hci_basic_led_trigger(led_cdev->trigger);
35 powered = test_bit(HCI_UP, &htrig->hdev->flags);
36
37 led_trigger_event(led_cdev->trigger, powered ? LED_FULL : LED_OFF);
38}
39
40static struct led_trigger *led_allocate_basic(struct hci_dev *hdev,
41 void (*activate)(struct led_classdev *led_cdev),
42 const char *name)
43{
44 struct hci_basic_led_trigger *htrig;
45
46 htrig = devm_kzalloc(&hdev->dev, sizeof(*htrig), GFP_KERNEL);
47 if (!htrig)
48 return NULL;
49
50 htrig->hdev = hdev;
51 htrig->led_trigger.activate = activate;
52 htrig->led_trigger.name = devm_kasprintf(&hdev->dev, GFP_KERNEL,
53 "%s-%s", hdev->name,
54 name);
55 if (!htrig->led_trigger.name)
56 goto err_alloc;
57
58 if (devm_led_trigger_register(&hdev->dev, &htrig->led_trigger))
59 goto err_register;
60
61 return &htrig->led_trigger;
62
63err_register:
64 devm_kfree(&hdev->dev, (void *)htrig->led_trigger.name);
65err_alloc:
66 devm_kfree(&hdev->dev, htrig);
67 return NULL;
68}
69
70void hci_leds_init(struct hci_dev *hdev)
71{
72 /* initialize power_led */
73 hdev->power_led = led_allocate_basic(hdev, power_activate, "power");
74}
diff --git a/net/bluetooth/leds.h b/net/bluetooth/leds.h
new file mode 100644
index 000000000000..a9c4d6ea01cf
--- /dev/null
+++ b/net/bluetooth/leds.h
@@ -0,0 +1,16 @@
1/*
2 * Copyright 2015, Heiner Kallweit <hkallweit1@gmail.com>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 */
8
9#if IS_ENABLED(CONFIG_BT_LEDS)
10void hci_leds_update_powered(struct hci_dev *hdev, bool enabled);
11void hci_leds_init(struct hci_dev *hdev);
12#else
13static inline void hci_leds_update_powered(struct hci_dev *hdev,
14 bool enabled) {}
15static inline void hci_leds_init(struct hci_dev *hdev) {}
16#endif
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index 5a5089cb6570..9e4b931588cf 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -38,7 +38,7 @@
38#include "mgmt_util.h" 38#include "mgmt_util.h"
39 39
40#define MGMT_VERSION 1 40#define MGMT_VERSION 1
41#define MGMT_REVISION 11 41#define MGMT_REVISION 12
42 42
43static const u16 mgmt_commands[] = { 43static const u16 mgmt_commands[] = {
44 MGMT_OP_READ_INDEX_LIST, 44 MGMT_OP_READ_INDEX_LIST,
@@ -1382,8 +1382,19 @@ static int set_bondable(struct sock *sk, struct hci_dev *hdev, void *data,
1382 if (err < 0) 1382 if (err < 0)
1383 goto unlock; 1383 goto unlock;
1384 1384
1385 if (changed) 1385 if (changed) {
1386 /* In limited privacy mode the change of bondable mode
1387 * may affect the local advertising address.
1388 */
1389 if (hdev_is_powered(hdev) &&
1390 hci_dev_test_flag(hdev, HCI_ADVERTISING) &&
1391 hci_dev_test_flag(hdev, HCI_DISCOVERABLE) &&
1392 hci_dev_test_flag(hdev, HCI_LIMITED_PRIVACY))
1393 queue_work(hdev->req_workqueue,
1394 &hdev->discoverable_update);
1395
1386 err = new_settings(hdev, sk); 1396 err = new_settings(hdev, sk);
1397 }
1387 1398
1388unlock: 1399unlock:
1389 hci_dev_unlock(hdev); 1400 hci_dev_unlock(hdev);
@@ -4423,7 +4434,7 @@ static int set_privacy(struct sock *sk, struct hci_dev *hdev, void *cp_data,
4423 return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_PRIVACY, 4434 return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_PRIVACY,
4424 MGMT_STATUS_NOT_SUPPORTED); 4435 MGMT_STATUS_NOT_SUPPORTED);
4425 4436
4426 if (cp->privacy != 0x00 && cp->privacy != 0x01) 4437 if (cp->privacy != 0x00 && cp->privacy != 0x01 && cp->privacy != 0x02)
4427 return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_PRIVACY, 4438 return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_PRIVACY,
4428 MGMT_STATUS_INVALID_PARAMS); 4439 MGMT_STATUS_INVALID_PARAMS);
4429 4440
@@ -4442,10 +4453,15 @@ static int set_privacy(struct sock *sk, struct hci_dev *hdev, void *cp_data,
4442 changed = !hci_dev_test_and_set_flag(hdev, HCI_PRIVACY); 4453 changed = !hci_dev_test_and_set_flag(hdev, HCI_PRIVACY);
4443 memcpy(hdev->irk, cp->irk, sizeof(hdev->irk)); 4454 memcpy(hdev->irk, cp->irk, sizeof(hdev->irk));
4444 hci_dev_set_flag(hdev, HCI_RPA_EXPIRED); 4455 hci_dev_set_flag(hdev, HCI_RPA_EXPIRED);
4456 if (cp->privacy == 0x02)
4457 hci_dev_set_flag(hdev, HCI_LIMITED_PRIVACY);
4458 else
4459 hci_dev_clear_flag(hdev, HCI_LIMITED_PRIVACY);
4445 } else { 4460 } else {
4446 changed = hci_dev_test_and_clear_flag(hdev, HCI_PRIVACY); 4461 changed = hci_dev_test_and_clear_flag(hdev, HCI_PRIVACY);
4447 memset(hdev->irk, 0, sizeof(hdev->irk)); 4462 memset(hdev->irk, 0, sizeof(hdev->irk));
4448 hci_dev_clear_flag(hdev, HCI_RPA_EXPIRED); 4463 hci_dev_clear_flag(hdev, HCI_RPA_EXPIRED);
4464 hci_dev_clear_flag(hdev, HCI_LIMITED_PRIVACY);
4449 } 4465 }
4450 4466
4451 err = send_settings_rsp(sk, MGMT_OP_SET_PRIVACY, hdev); 4467 err = send_settings_rsp(sk, MGMT_OP_SET_PRIVACY, hdev);
@@ -5979,6 +5995,10 @@ static int add_advertising(struct sock *sk, struct hci_dev *hdev,
5979 return mgmt_cmd_status(sk, hdev->id, MGMT_OP_ADD_ADVERTISING, 5995 return mgmt_cmd_status(sk, hdev->id, MGMT_OP_ADD_ADVERTISING,
5980 MGMT_STATUS_INVALID_PARAMS); 5996 MGMT_STATUS_INVALID_PARAMS);
5981 5997
5998 if (data_len != sizeof(*cp) + cp->adv_data_len + cp->scan_rsp_len)
5999 return mgmt_cmd_status(sk, hdev->id, MGMT_OP_ADD_ADVERTISING,
6000 MGMT_STATUS_INVALID_PARAMS);
6001
5982 flags = __le32_to_cpu(cp->flags); 6002 flags = __le32_to_cpu(cp->flags);
5983 timeout = __le16_to_cpu(cp->timeout); 6003 timeout = __le16_to_cpu(cp->timeout);
5984 duration = __le16_to_cpu(cp->duration); 6004 duration = __le16_to_cpu(cp->duration);
diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c
index ffed8a1d4f27..50976a6481f3 100644
--- a/net/bluetooth/smp.c
+++ b/net/bluetooth/smp.c
@@ -21,9 +21,10 @@
21*/ 21*/
22 22
23#include <linux/debugfs.h> 23#include <linux/debugfs.h>
24#include <linux/crypto.h>
25#include <linux/scatterlist.h> 24#include <linux/scatterlist.h>
26#include <crypto/b128ops.h> 25#include <crypto/b128ops.h>
26#include <crypto/hash.h>
27#include <crypto/skcipher.h>
27 28
28#include <net/bluetooth/bluetooth.h> 29#include <net/bluetooth/bluetooth.h>
29#include <net/bluetooth/hci_core.h> 30#include <net/bluetooth/hci_core.h>
@@ -87,8 +88,8 @@ struct smp_dev {
87 u8 min_key_size; 88 u8 min_key_size;
88 u8 max_key_size; 89 u8 max_key_size;
89 90
90 struct crypto_blkcipher *tfm_aes; 91 struct crypto_skcipher *tfm_aes;
91 struct crypto_hash *tfm_cmac; 92 struct crypto_shash *tfm_cmac;
92}; 93};
93 94
94struct smp_chan { 95struct smp_chan {
@@ -126,8 +127,8 @@ struct smp_chan {
126 u8 dhkey[32]; 127 u8 dhkey[32];
127 u8 mackey[16]; 128 u8 mackey[16];
128 129
129 struct crypto_blkcipher *tfm_aes; 130 struct crypto_skcipher *tfm_aes;
130 struct crypto_hash *tfm_cmac; 131 struct crypto_shash *tfm_cmac;
131}; 132};
132 133
133/* These debug key values are defined in the SMP section of the core 134/* These debug key values are defined in the SMP section of the core
@@ -165,12 +166,11 @@ static inline void swap_buf(const u8 *src, u8 *dst, size_t len)
165 * AES-CMAC, f4, f5, f6, g2 and h6. 166 * AES-CMAC, f4, f5, f6, g2 and h6.
166 */ 167 */
167 168
168static int aes_cmac(struct crypto_hash *tfm, const u8 k[16], const u8 *m, 169static int aes_cmac(struct crypto_shash *tfm, const u8 k[16], const u8 *m,
169 size_t len, u8 mac[16]) 170 size_t len, u8 mac[16])
170{ 171{
171 uint8_t tmp[16], mac_msb[16], msg_msb[CMAC_MSG_MAX]; 172 uint8_t tmp[16], mac_msb[16], msg_msb[CMAC_MSG_MAX];
172 struct hash_desc desc; 173 SHASH_DESC_ON_STACK(desc, tfm);
173 struct scatterlist sg;
174 int err; 174 int err;
175 175
176 if (len > CMAC_MSG_MAX) 176 if (len > CMAC_MSG_MAX)
@@ -181,10 +181,8 @@ static int aes_cmac(struct crypto_hash *tfm, const u8 k[16], const u8 *m,
181 return -EINVAL; 181 return -EINVAL;
182 } 182 }
183 183
184 desc.tfm = tfm; 184 desc->tfm = tfm;
185 desc.flags = 0; 185 desc->flags = 0;
186
187 crypto_hash_init(&desc);
188 186
189 /* Swap key and message from LSB to MSB */ 187 /* Swap key and message from LSB to MSB */
190 swap_buf(k, tmp, 16); 188 swap_buf(k, tmp, 16);
@@ -193,23 +191,16 @@ static int aes_cmac(struct crypto_hash *tfm, const u8 k[16], const u8 *m,
193 SMP_DBG("msg (len %zu) %*phN", len, (int) len, m); 191 SMP_DBG("msg (len %zu) %*phN", len, (int) len, m);
194 SMP_DBG("key %16phN", k); 192 SMP_DBG("key %16phN", k);
195 193
196 err = crypto_hash_setkey(tfm, tmp, 16); 194 err = crypto_shash_setkey(tfm, tmp, 16);
197 if (err) { 195 if (err) {
198 BT_ERR("cipher setkey failed: %d", err); 196 BT_ERR("cipher setkey failed: %d", err);
199 return err; 197 return err;
200 } 198 }
201 199
202 sg_init_one(&sg, msg_msb, len); 200 err = crypto_shash_digest(desc, msg_msb, len, mac_msb);
203 201 shash_desc_zero(desc);
204 err = crypto_hash_update(&desc, &sg, len);
205 if (err) {
206 BT_ERR("Hash update error %d", err);
207 return err;
208 }
209
210 err = crypto_hash_final(&desc, mac_msb);
211 if (err) { 202 if (err) {
212 BT_ERR("Hash final error %d", err); 203 BT_ERR("Hash computation error %d", err);
213 return err; 204 return err;
214 } 205 }
215 206
@@ -220,8 +211,8 @@ static int aes_cmac(struct crypto_hash *tfm, const u8 k[16], const u8 *m,
220 return 0; 211 return 0;
221} 212}
222 213
223static int smp_f4(struct crypto_hash *tfm_cmac, const u8 u[32], const u8 v[32], 214static int smp_f4(struct crypto_shash *tfm_cmac, const u8 u[32],
224 const u8 x[16], u8 z, u8 res[16]) 215 const u8 v[32], const u8 x[16], u8 z, u8 res[16])
225{ 216{
226 u8 m[65]; 217 u8 m[65];
227 int err; 218 int err;
@@ -243,7 +234,7 @@ static int smp_f4(struct crypto_hash *tfm_cmac, const u8 u[32], const u8 v[32],
243 return err; 234 return err;
244} 235}
245 236
246static int smp_f5(struct crypto_hash *tfm_cmac, const u8 w[32], 237static int smp_f5(struct crypto_shash *tfm_cmac, const u8 w[32],
247 const u8 n1[16], const u8 n2[16], const u8 a1[7], 238 const u8 n1[16], const u8 n2[16], const u8 a1[7],
248 const u8 a2[7], u8 mackey[16], u8 ltk[16]) 239 const u8 a2[7], u8 mackey[16], u8 ltk[16])
249{ 240{
@@ -296,7 +287,7 @@ static int smp_f5(struct crypto_hash *tfm_cmac, const u8 w[32],
296 return 0; 287 return 0;
297} 288}
298 289
299static int smp_f6(struct crypto_hash *tfm_cmac, const u8 w[16], 290static int smp_f6(struct crypto_shash *tfm_cmac, const u8 w[16],
300 const u8 n1[16], const u8 n2[16], const u8 r[16], 291 const u8 n1[16], const u8 n2[16], const u8 r[16],
301 const u8 io_cap[3], const u8 a1[7], const u8 a2[7], 292 const u8 io_cap[3], const u8 a1[7], const u8 a2[7],
302 u8 res[16]) 293 u8 res[16])
@@ -324,7 +315,7 @@ static int smp_f6(struct crypto_hash *tfm_cmac, const u8 w[16],
324 return err; 315 return err;
325} 316}
326 317
327static int smp_g2(struct crypto_hash *tfm_cmac, const u8 u[32], const u8 v[32], 318static int smp_g2(struct crypto_shash *tfm_cmac, const u8 u[32], const u8 v[32],
328 const u8 x[16], const u8 y[16], u32 *val) 319 const u8 x[16], const u8 y[16], u32 *val)
329{ 320{
330 u8 m[80], tmp[16]; 321 u8 m[80], tmp[16];
@@ -350,7 +341,7 @@ static int smp_g2(struct crypto_hash *tfm_cmac, const u8 u[32], const u8 v[32],
350 return 0; 341 return 0;
351} 342}
352 343
353static int smp_h6(struct crypto_hash *tfm_cmac, const u8 w[16], 344static int smp_h6(struct crypto_shash *tfm_cmac, const u8 w[16],
354 const u8 key_id[4], u8 res[16]) 345 const u8 key_id[4], u8 res[16])
355{ 346{
356 int err; 347 int err;
@@ -370,9 +361,9 @@ static int smp_h6(struct crypto_hash *tfm_cmac, const u8 w[16],
370 * s1 and ah. 361 * s1 and ah.
371 */ 362 */
372 363
373static int smp_e(struct crypto_blkcipher *tfm, const u8 *k, u8 *r) 364static int smp_e(struct crypto_skcipher *tfm, const u8 *k, u8 *r)
374{ 365{
375 struct blkcipher_desc desc; 366 SKCIPHER_REQUEST_ON_STACK(req, tfm);
376 struct scatterlist sg; 367 struct scatterlist sg;
377 uint8_t tmp[16], data[16]; 368 uint8_t tmp[16], data[16];
378 int err; 369 int err;
@@ -384,13 +375,10 @@ static int smp_e(struct crypto_blkcipher *tfm, const u8 *k, u8 *r)
384 return -EINVAL; 375 return -EINVAL;
385 } 376 }
386 377
387 desc.tfm = tfm;
388 desc.flags = 0;
389
390 /* The most significant octet of key corresponds to k[0] */ 378 /* The most significant octet of key corresponds to k[0] */
391 swap_buf(k, tmp, 16); 379 swap_buf(k, tmp, 16);
392 380
393 err = crypto_blkcipher_setkey(tfm, tmp, 16); 381 err = crypto_skcipher_setkey(tfm, tmp, 16);
394 if (err) { 382 if (err) {
395 BT_ERR("cipher setkey failed: %d", err); 383 BT_ERR("cipher setkey failed: %d", err);
396 return err; 384 return err;
@@ -401,7 +389,12 @@ static int smp_e(struct crypto_blkcipher *tfm, const u8 *k, u8 *r)
401 389
402 sg_init_one(&sg, data, 16); 390 sg_init_one(&sg, data, 16);
403 391
404 err = crypto_blkcipher_encrypt(&desc, &sg, &sg, 16); 392 skcipher_request_set_tfm(req, tfm);
393 skcipher_request_set_callback(req, 0, NULL, NULL);
394 skcipher_request_set_crypt(req, &sg, &sg, 16, NULL);
395
396 err = crypto_skcipher_encrypt(req);
397 skcipher_request_zero(req);
405 if (err) 398 if (err)
406 BT_ERR("Encrypt data error %d", err); 399 BT_ERR("Encrypt data error %d", err);
407 400
@@ -413,7 +406,7 @@ static int smp_e(struct crypto_blkcipher *tfm, const u8 *k, u8 *r)
413 return err; 406 return err;
414} 407}
415 408
416static int smp_c1(struct crypto_blkcipher *tfm_aes, const u8 k[16], 409static int smp_c1(struct crypto_skcipher *tfm_aes, const u8 k[16],
417 const u8 r[16], const u8 preq[7], const u8 pres[7], u8 _iat, 410 const u8 r[16], const u8 preq[7], const u8 pres[7], u8 _iat,
418 const bdaddr_t *ia, u8 _rat, const bdaddr_t *ra, u8 res[16]) 411 const bdaddr_t *ia, u8 _rat, const bdaddr_t *ra, u8 res[16])
419{ 412{
@@ -462,7 +455,7 @@ static int smp_c1(struct crypto_blkcipher *tfm_aes, const u8 k[16],
462 return err; 455 return err;
463} 456}
464 457
465static int smp_s1(struct crypto_blkcipher *tfm_aes, const u8 k[16], 458static int smp_s1(struct crypto_skcipher *tfm_aes, const u8 k[16],
466 const u8 r1[16], const u8 r2[16], u8 _r[16]) 459 const u8 r1[16], const u8 r2[16], u8 _r[16])
467{ 460{
468 int err; 461 int err;
@@ -478,7 +471,7 @@ static int smp_s1(struct crypto_blkcipher *tfm_aes, const u8 k[16],
478 return err; 471 return err;
479} 472}
480 473
481static int smp_ah(struct crypto_blkcipher *tfm, const u8 irk[16], 474static int smp_ah(struct crypto_skcipher *tfm, const u8 irk[16],
482 const u8 r[3], u8 res[3]) 475 const u8 r[3], u8 res[3])
483{ 476{
484 u8 _res[16]; 477 u8 _res[16];
@@ -766,8 +759,8 @@ static void smp_chan_destroy(struct l2cap_conn *conn)
766 kzfree(smp->slave_csrk); 759 kzfree(smp->slave_csrk);
767 kzfree(smp->link_key); 760 kzfree(smp->link_key);
768 761
769 crypto_free_blkcipher(smp->tfm_aes); 762 crypto_free_skcipher(smp->tfm_aes);
770 crypto_free_hash(smp->tfm_cmac); 763 crypto_free_shash(smp->tfm_cmac);
771 764
772 /* Ensure that we don't leave any debug key around if debug key 765 /* Ensure that we don't leave any debug key around if debug key
773 * support hasn't been explicitly enabled. 766 * support hasn't been explicitly enabled.
@@ -1072,22 +1065,6 @@ static void smp_notify_keys(struct l2cap_conn *conn)
1072 hcon->dst_type = smp->remote_irk->addr_type; 1065 hcon->dst_type = smp->remote_irk->addr_type;
1073 queue_work(hdev->workqueue, &conn->id_addr_update_work); 1066 queue_work(hdev->workqueue, &conn->id_addr_update_work);
1074 } 1067 }
1075
1076 /* When receiving an indentity resolving key for
1077 * a remote device that does not use a resolvable
1078 * private address, just remove the key so that
1079 * it is possible to use the controller white
1080 * list for scanning.
1081 *
1082 * Userspace will have been told to not store
1083 * this key at this point. So it is safe to
1084 * just remove it.
1085 */
1086 if (!bacmp(&smp->remote_irk->rpa, BDADDR_ANY)) {
1087 list_del_rcu(&smp->remote_irk->list);
1088 kfree_rcu(smp->remote_irk, rcu);
1089 smp->remote_irk = NULL;
1090 }
1091 } 1068 }
1092 1069
1093 if (smp->csrk) { 1070 if (smp->csrk) {
@@ -1382,17 +1359,17 @@ static struct smp_chan *smp_chan_create(struct l2cap_conn *conn)
1382 if (!smp) 1359 if (!smp)
1383 return NULL; 1360 return NULL;
1384 1361
1385 smp->tfm_aes = crypto_alloc_blkcipher("ecb(aes)", 0, CRYPTO_ALG_ASYNC); 1362 smp->tfm_aes = crypto_alloc_skcipher("ecb(aes)", 0, CRYPTO_ALG_ASYNC);
1386 if (IS_ERR(smp->tfm_aes)) { 1363 if (IS_ERR(smp->tfm_aes)) {
1387 BT_ERR("Unable to create ECB crypto context"); 1364 BT_ERR("Unable to create ECB crypto context");
1388 kzfree(smp); 1365 kzfree(smp);
1389 return NULL; 1366 return NULL;
1390 } 1367 }
1391 1368
1392 smp->tfm_cmac = crypto_alloc_hash("cmac(aes)", 0, CRYPTO_ALG_ASYNC); 1369 smp->tfm_cmac = crypto_alloc_shash("cmac(aes)", 0, 0);
1393 if (IS_ERR(smp->tfm_cmac)) { 1370 if (IS_ERR(smp->tfm_cmac)) {
1394 BT_ERR("Unable to create CMAC crypto context"); 1371 BT_ERR("Unable to create CMAC crypto context");
1395 crypto_free_blkcipher(smp->tfm_aes); 1372 crypto_free_skcipher(smp->tfm_aes);
1396 kzfree(smp); 1373 kzfree(smp);
1397 return NULL; 1374 return NULL;
1398 } 1375 }
@@ -3143,8 +3120,8 @@ static struct l2cap_chan *smp_add_cid(struct hci_dev *hdev, u16 cid)
3143{ 3120{
3144 struct l2cap_chan *chan; 3121 struct l2cap_chan *chan;
3145 struct smp_dev *smp; 3122 struct smp_dev *smp;
3146 struct crypto_blkcipher *tfm_aes; 3123 struct crypto_skcipher *tfm_aes;
3147 struct crypto_hash *tfm_cmac; 3124 struct crypto_shash *tfm_cmac;
3148 3125
3149 if (cid == L2CAP_CID_SMP_BREDR) { 3126 if (cid == L2CAP_CID_SMP_BREDR) {
3150 smp = NULL; 3127 smp = NULL;
@@ -3155,17 +3132,17 @@ static struct l2cap_chan *smp_add_cid(struct hci_dev *hdev, u16 cid)
3155 if (!smp) 3132 if (!smp)
3156 return ERR_PTR(-ENOMEM); 3133 return ERR_PTR(-ENOMEM);
3157 3134
3158 tfm_aes = crypto_alloc_blkcipher("ecb(aes)", 0, CRYPTO_ALG_ASYNC); 3135 tfm_aes = crypto_alloc_skcipher("ecb(aes)", 0, CRYPTO_ALG_ASYNC);
3159 if (IS_ERR(tfm_aes)) { 3136 if (IS_ERR(tfm_aes)) {
3160 BT_ERR("Unable to create ECB crypto context"); 3137 BT_ERR("Unable to create ECB crypto context");
3161 kzfree(smp); 3138 kzfree(smp);
3162 return ERR_CAST(tfm_aes); 3139 return ERR_CAST(tfm_aes);
3163 } 3140 }
3164 3141
3165 tfm_cmac = crypto_alloc_hash("cmac(aes)", 0, CRYPTO_ALG_ASYNC); 3142 tfm_cmac = crypto_alloc_shash("cmac(aes)", 0, 0);
3166 if (IS_ERR(tfm_cmac)) { 3143 if (IS_ERR(tfm_cmac)) {
3167 BT_ERR("Unable to create CMAC crypto context"); 3144 BT_ERR("Unable to create CMAC crypto context");
3168 crypto_free_blkcipher(tfm_aes); 3145 crypto_free_skcipher(tfm_aes);
3169 kzfree(smp); 3146 kzfree(smp);
3170 return ERR_CAST(tfm_cmac); 3147 return ERR_CAST(tfm_cmac);
3171 } 3148 }
@@ -3179,8 +3156,8 @@ create_chan:
3179 chan = l2cap_chan_create(); 3156 chan = l2cap_chan_create();
3180 if (!chan) { 3157 if (!chan) {
3181 if (smp) { 3158 if (smp) {
3182 crypto_free_blkcipher(smp->tfm_aes); 3159 crypto_free_skcipher(smp->tfm_aes);
3183 crypto_free_hash(smp->tfm_cmac); 3160 crypto_free_shash(smp->tfm_cmac);
3184 kzfree(smp); 3161 kzfree(smp);
3185 } 3162 }
3186 return ERR_PTR(-ENOMEM); 3163 return ERR_PTR(-ENOMEM);
@@ -3226,10 +3203,8 @@ static void smp_del_chan(struct l2cap_chan *chan)
3226 smp = chan->data; 3203 smp = chan->data;
3227 if (smp) { 3204 if (smp) {
3228 chan->data = NULL; 3205 chan->data = NULL;
3229 if (smp->tfm_aes) 3206 crypto_free_skcipher(smp->tfm_aes);
3230 crypto_free_blkcipher(smp->tfm_aes); 3207 crypto_free_shash(smp->tfm_cmac);
3231 if (smp->tfm_cmac)
3232 crypto_free_hash(smp->tfm_cmac);
3233 kzfree(smp); 3208 kzfree(smp);
3234 } 3209 }
3235 3210
@@ -3465,7 +3440,7 @@ void smp_unregister(struct hci_dev *hdev)
3465 3440
3466#if IS_ENABLED(CONFIG_BT_SELFTEST_SMP) 3441#if IS_ENABLED(CONFIG_BT_SELFTEST_SMP)
3467 3442
3468static int __init test_ah(struct crypto_blkcipher *tfm_aes) 3443static int __init test_ah(struct crypto_skcipher *tfm_aes)
3469{ 3444{
3470 const u8 irk[16] = { 3445 const u8 irk[16] = {
3471 0x9b, 0x7d, 0x39, 0x0a, 0xa6, 0x10, 0x10, 0x34, 3446 0x9b, 0x7d, 0x39, 0x0a, 0xa6, 0x10, 0x10, 0x34,
@@ -3485,7 +3460,7 @@ static int __init test_ah(struct crypto_blkcipher *tfm_aes)
3485 return 0; 3460 return 0;
3486} 3461}
3487 3462
3488static int __init test_c1(struct crypto_blkcipher *tfm_aes) 3463static int __init test_c1(struct crypto_skcipher *tfm_aes)
3489{ 3464{
3490 const u8 k[16] = { 3465 const u8 k[16] = {
3491 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 3466 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
@@ -3515,7 +3490,7 @@ static int __init test_c1(struct crypto_blkcipher *tfm_aes)
3515 return 0; 3490 return 0;
3516} 3491}
3517 3492
3518static int __init test_s1(struct crypto_blkcipher *tfm_aes) 3493static int __init test_s1(struct crypto_skcipher *tfm_aes)
3519{ 3494{
3520 const u8 k[16] = { 3495 const u8 k[16] = {
3521 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 3496 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
@@ -3540,7 +3515,7 @@ static int __init test_s1(struct crypto_blkcipher *tfm_aes)
3540 return 0; 3515 return 0;
3541} 3516}
3542 3517
3543static int __init test_f4(struct crypto_hash *tfm_cmac) 3518static int __init test_f4(struct crypto_shash *tfm_cmac)
3544{ 3519{
3545 const u8 u[32] = { 3520 const u8 u[32] = {
3546 0xe6, 0x9d, 0x35, 0x0e, 0x48, 0x01, 0x03, 0xcc, 3521 0xe6, 0x9d, 0x35, 0x0e, 0x48, 0x01, 0x03, 0xcc,
@@ -3572,7 +3547,7 @@ static int __init test_f4(struct crypto_hash *tfm_cmac)
3572 return 0; 3547 return 0;
3573} 3548}
3574 3549
3575static int __init test_f5(struct crypto_hash *tfm_cmac) 3550static int __init test_f5(struct crypto_shash *tfm_cmac)
3576{ 3551{
3577 const u8 w[32] = { 3552 const u8 w[32] = {
3578 0x98, 0xa6, 0xbf, 0x73, 0xf3, 0x34, 0x8d, 0x86, 3553 0x98, 0xa6, 0xbf, 0x73, 0xf3, 0x34, 0x8d, 0x86,
@@ -3609,7 +3584,7 @@ static int __init test_f5(struct crypto_hash *tfm_cmac)
3609 return 0; 3584 return 0;
3610} 3585}
3611 3586
3612static int __init test_f6(struct crypto_hash *tfm_cmac) 3587static int __init test_f6(struct crypto_shash *tfm_cmac)
3613{ 3588{
3614 const u8 w[16] = { 3589 const u8 w[16] = {
3615 0x20, 0x6e, 0x63, 0xce, 0x20, 0x6a, 0x3f, 0xfd, 3590 0x20, 0x6e, 0x63, 0xce, 0x20, 0x6a, 0x3f, 0xfd,
@@ -3642,7 +3617,7 @@ static int __init test_f6(struct crypto_hash *tfm_cmac)
3642 return 0; 3617 return 0;
3643} 3618}
3644 3619
3645static int __init test_g2(struct crypto_hash *tfm_cmac) 3620static int __init test_g2(struct crypto_shash *tfm_cmac)
3646{ 3621{
3647 const u8 u[32] = { 3622 const u8 u[32] = {
3648 0xe6, 0x9d, 0x35, 0x0e, 0x48, 0x01, 0x03, 0xcc, 3623 0xe6, 0x9d, 0x35, 0x0e, 0x48, 0x01, 0x03, 0xcc,
@@ -3674,7 +3649,7 @@ static int __init test_g2(struct crypto_hash *tfm_cmac)
3674 return 0; 3649 return 0;
3675} 3650}
3676 3651
3677static int __init test_h6(struct crypto_hash *tfm_cmac) 3652static int __init test_h6(struct crypto_shash *tfm_cmac)
3678{ 3653{
3679 const u8 w[16] = { 3654 const u8 w[16] = {
3680 0x9b, 0x7d, 0x39, 0x0a, 0xa6, 0x10, 0x10, 0x34, 3655 0x9b, 0x7d, 0x39, 0x0a, 0xa6, 0x10, 0x10, 0x34,
@@ -3711,8 +3686,8 @@ static const struct file_operations test_smp_fops = {
3711 .llseek = default_llseek, 3686 .llseek = default_llseek,
3712}; 3687};
3713 3688
3714static int __init run_selftests(struct crypto_blkcipher *tfm_aes, 3689static int __init run_selftests(struct crypto_skcipher *tfm_aes,
3715 struct crypto_hash *tfm_cmac) 3690 struct crypto_shash *tfm_cmac)
3716{ 3691{
3717 ktime_t calltime, delta, rettime; 3692 ktime_t calltime, delta, rettime;
3718 unsigned long long duration; 3693 unsigned long long duration;
@@ -3789,27 +3764,27 @@ done:
3789 3764
3790int __init bt_selftest_smp(void) 3765int __init bt_selftest_smp(void)
3791{ 3766{
3792 struct crypto_blkcipher *tfm_aes; 3767 struct crypto_skcipher *tfm_aes;
3793 struct crypto_hash *tfm_cmac; 3768 struct crypto_shash *tfm_cmac;
3794 int err; 3769 int err;
3795 3770
3796 tfm_aes = crypto_alloc_blkcipher("ecb(aes)", 0, CRYPTO_ALG_ASYNC); 3771 tfm_aes = crypto_alloc_skcipher("ecb(aes)", 0, CRYPTO_ALG_ASYNC);
3797 if (IS_ERR(tfm_aes)) { 3772 if (IS_ERR(tfm_aes)) {
3798 BT_ERR("Unable to create ECB crypto context"); 3773 BT_ERR("Unable to create ECB crypto context");
3799 return PTR_ERR(tfm_aes); 3774 return PTR_ERR(tfm_aes);
3800 } 3775 }
3801 3776
3802 tfm_cmac = crypto_alloc_hash("cmac(aes)", 0, CRYPTO_ALG_ASYNC); 3777 tfm_cmac = crypto_alloc_shash("cmac(aes)", 0, CRYPTO_ALG_ASYNC);
3803 if (IS_ERR(tfm_cmac)) { 3778 if (IS_ERR(tfm_cmac)) {
3804 BT_ERR("Unable to create CMAC crypto context"); 3779 BT_ERR("Unable to create CMAC crypto context");
3805 crypto_free_blkcipher(tfm_aes); 3780 crypto_free_skcipher(tfm_aes);
3806 return PTR_ERR(tfm_cmac); 3781 return PTR_ERR(tfm_cmac);
3807 } 3782 }
3808 3783
3809 err = run_selftests(tfm_aes, tfm_cmac); 3784 err = run_selftests(tfm_aes, tfm_cmac);
3810 3785
3811 crypto_free_hash(tfm_cmac); 3786 crypto_free_shash(tfm_cmac);
3812 crypto_free_blkcipher(tfm_aes); 3787 crypto_free_skcipher(tfm_aes);
3813 3788
3814 return err; 3789 return err;
3815} 3790}
diff --git a/net/bridge/br.c b/net/bridge/br.c
index a1abe4936fe1..3addc05b9a16 100644
--- a/net/bridge/br.c
+++ b/net/bridge/br.c
@@ -121,6 +121,7 @@ static struct notifier_block br_device_notifier = {
121 .notifier_call = br_device_event 121 .notifier_call = br_device_event
122}; 122};
123 123
124/* called with RTNL */
124static int br_switchdev_event(struct notifier_block *unused, 125static int br_switchdev_event(struct notifier_block *unused,
125 unsigned long event, void *ptr) 126 unsigned long event, void *ptr)
126{ 127{
@@ -130,7 +131,6 @@ static int br_switchdev_event(struct notifier_block *unused,
130 struct switchdev_notifier_fdb_info *fdb_info; 131 struct switchdev_notifier_fdb_info *fdb_info;
131 int err = NOTIFY_DONE; 132 int err = NOTIFY_DONE;
132 133
133 rtnl_lock();
134 p = br_port_get_rtnl(dev); 134 p = br_port_get_rtnl(dev);
135 if (!p) 135 if (!p)
136 goto out; 136 goto out;
@@ -155,7 +155,6 @@ static int br_switchdev_event(struct notifier_block *unused,
155 } 155 }
156 156
157out: 157out:
158 rtnl_unlock();
159 return err; 158 return err;
160} 159}
161 160
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index 82e3e9705017..dcea4f4c62b3 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -723,6 +723,8 @@ int br_fdb_dump(struct sk_buff *skb,
723 struct net_bridge_fdb_entry *f; 723 struct net_bridge_fdb_entry *f;
724 724
725 hlist_for_each_entry_rcu(f, &br->hash[i], hlist) { 725 hlist_for_each_entry_rcu(f, &br->hash[i], hlist) {
726 int err;
727
726 if (idx < cb->args[0]) 728 if (idx < cb->args[0])
727 goto skip; 729 goto skip;
728 730
@@ -741,12 +743,15 @@ int br_fdb_dump(struct sk_buff *skb,
741 if (!filter_dev && f->dst) 743 if (!filter_dev && f->dst)
742 goto skip; 744 goto skip;
743 745
744 if (fdb_fill_info(skb, br, f, 746 err = fdb_fill_info(skb, br, f,
745 NETLINK_CB(cb->skb).portid, 747 NETLINK_CB(cb->skb).portid,
746 cb->nlh->nlmsg_seq, 748 cb->nlh->nlmsg_seq,
747 RTM_NEWNEIGH, 749 RTM_NEWNEIGH,
748 NLM_F_MULTI) < 0) 750 NLM_F_MULTI);
751 if (err < 0) {
752 cb->args[1] = err;
749 break; 753 break;
754 }
750skip: 755skip:
751 ++idx; 756 ++idx;
752 } 757 }
diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index fcdb86dd5a23..f47759f05b6d 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -44,7 +44,6 @@ int br_dev_queue_push_xmit(struct net *net, struct sock *sk, struct sk_buff *skb
44 44
45 skb_push(skb, ETH_HLEN); 45 skb_push(skb, ETH_HLEN);
46 br_drop_fake_rtable(skb); 46 br_drop_fake_rtable(skb);
47 skb_sender_cpu_clear(skb);
48 47
49 if (skb->ip_summed == CHECKSUM_PARTIAL && 48 if (skb->ip_summed == CHECKSUM_PARTIAL &&
50 (skb->protocol == htons(ETH_P_8021Q) || 49 (skb->protocol == htons(ETH_P_8021Q) ||
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index c367b3e1b5ac..8217aecf025b 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -36,10 +36,10 @@
36 */ 36 */
37static int port_cost(struct net_device *dev) 37static int port_cost(struct net_device *dev)
38{ 38{
39 struct ethtool_cmd ecmd; 39 struct ethtool_link_ksettings ecmd;
40 40
41 if (!__ethtool_get_settings(dev, &ecmd)) { 41 if (!__ethtool_get_link_ksettings(dev, &ecmd)) {
42 switch (ethtool_cmd_speed(&ecmd)) { 42 switch (ecmd.base.speed) {
43 case SPEED_10000: 43 case SPEED_10000:
44 return 2; 44 return 2;
45 case SPEED_1000: 45 case SPEED_1000:
@@ -223,6 +223,31 @@ static void destroy_nbp_rcu(struct rcu_head *head)
223 destroy_nbp(p); 223 destroy_nbp(p);
224} 224}
225 225
226static unsigned get_max_headroom(struct net_bridge *br)
227{
228 unsigned max_headroom = 0;
229 struct net_bridge_port *p;
230
231 list_for_each_entry(p, &br->port_list, list) {
232 unsigned dev_headroom = netdev_get_fwd_headroom(p->dev);
233
234 if (dev_headroom > max_headroom)
235 max_headroom = dev_headroom;
236 }
237
238 return max_headroom;
239}
240
241static void update_headroom(struct net_bridge *br, int new_hr)
242{
243 struct net_bridge_port *p;
244
245 list_for_each_entry(p, &br->port_list, list)
246 netdev_set_rx_headroom(p->dev, new_hr);
247
248 br->dev->needed_headroom = new_hr;
249}
250
226/* Delete port(interface) from bridge is done in two steps. 251/* Delete port(interface) from bridge is done in two steps.
227 * via RCU. First step, marks device as down. That deletes 252 * via RCU. First step, marks device as down. That deletes
228 * all the timers and stops new packets from flowing through. 253 * all the timers and stops new packets from flowing through.
@@ -248,6 +273,9 @@ static void del_nbp(struct net_bridge_port *p)
248 br_ifinfo_notify(RTM_DELLINK, p); 273 br_ifinfo_notify(RTM_DELLINK, p);
249 274
250 list_del_rcu(&p->list); 275 list_del_rcu(&p->list);
276 if (netdev_get_fwd_headroom(dev) == br->dev->needed_headroom)
277 update_headroom(br, get_max_headroom(br));
278 netdev_reset_rx_headroom(dev);
251 279
252 nbp_vlan_flush(p); 280 nbp_vlan_flush(p);
253 br_fdb_delete_by_port(br, p, 0, 1); 281 br_fdb_delete_by_port(br, p, 0, 1);
@@ -409,6 +437,20 @@ int br_min_mtu(const struct net_bridge *br)
409 return mtu; 437 return mtu;
410} 438}
411 439
440static void br_set_gso_limits(struct net_bridge *br)
441{
442 unsigned int gso_max_size = GSO_MAX_SIZE;
443 u16 gso_max_segs = GSO_MAX_SEGS;
444 const struct net_bridge_port *p;
445
446 list_for_each_entry(p, &br->port_list, list) {
447 gso_max_size = min(gso_max_size, p->dev->gso_max_size);
448 gso_max_segs = min(gso_max_segs, p->dev->gso_max_segs);
449 }
450 br->dev->gso_max_size = gso_max_size;
451 br->dev->gso_max_segs = gso_max_segs;
452}
453
412/* 454/*
413 * Recomputes features using slave's features 455 * Recomputes features using slave's features
414 */ 456 */
@@ -438,6 +480,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
438{ 480{
439 struct net_bridge_port *p; 481 struct net_bridge_port *p;
440 int err = 0; 482 int err = 0;
483 unsigned br_hr, dev_hr;
441 bool changed_addr; 484 bool changed_addr;
442 485
443 /* Don't allow bridging non-ethernet like devices, or DSA-enabled 486 /* Don't allow bridging non-ethernet like devices, or DSA-enabled
@@ -505,8 +548,12 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
505 548
506 netdev_update_features(br->dev); 549 netdev_update_features(br->dev);
507 550
508 if (br->dev->needed_headroom < dev->needed_headroom) 551 br_hr = br->dev->needed_headroom;
509 br->dev->needed_headroom = dev->needed_headroom; 552 dev_hr = netdev_get_fwd_headroom(dev);
553 if (br_hr < dev_hr)
554 update_headroom(br, dev_hr);
555 else
556 netdev_set_rx_headroom(dev, br_hr);
510 557
511 if (br_fdb_insert(br, p, dev->dev_addr, 0)) 558 if (br_fdb_insert(br, p, dev->dev_addr, 0))
512 netdev_err(dev, "failed insert local address bridge forwarding table\n"); 559 netdev_err(dev, "failed insert local address bridge forwarding table\n");
@@ -531,6 +578,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
531 call_netdevice_notifiers(NETDEV_CHANGEADDR, br->dev); 578 call_netdevice_notifiers(NETDEV_CHANGEADDR, br->dev);
532 579
533 dev_set_mtu(br->dev, br_min_mtu(br)); 580 dev_set_mtu(br->dev, br_min_mtu(br));
581 br_set_gso_limits(br);
534 582
535 kobject_uevent(&p->kobj, KOBJ_ADD); 583 kobject_uevent(&p->kobj, KOBJ_ADD);
536 584
@@ -577,6 +625,7 @@ int br_del_if(struct net_bridge *br, struct net_device *dev)
577 del_nbp(p); 625 del_nbp(p);
578 626
579 dev_set_mtu(br->dev, br_min_mtu(br)); 627 dev_set_mtu(br->dev, br_min_mtu(br));
628 br_set_gso_limits(br);
580 629
581 spin_lock_bh(&br->lock); 630 spin_lock_bh(&br->lock);
582 changed_addr = br_stp_recalculate_bridge_id(br); 631 changed_addr = br_stp_recalculate_bridge_id(br);
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index f7fba74108a9..160797722228 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -222,7 +222,10 @@ static int br_handle_local_finish(struct net *net, struct sock *sk, struct sk_bu
222 /* check if vlan is allowed, to avoid spoofing */ 222 /* check if vlan is allowed, to avoid spoofing */
223 if (p->flags & BR_LEARNING && br_should_learn(p, skb, &vid)) 223 if (p->flags & BR_LEARNING && br_should_learn(p, skb, &vid))
224 br_fdb_update(p->br, p, eth_hdr(skb)->h_source, vid, false); 224 br_fdb_update(p->br, p, eth_hdr(skb)->h_source, vid, false);
225 return 0; /* process further */ 225
226 BR_INPUT_SKB_CB(skb)->brdev = p->br->dev;
227 br_pass_frame_up(skb);
228 return 0;
226} 229}
227 230
228/* 231/*
@@ -284,14 +287,9 @@ rx_handler_result_t br_handle_frame(struct sk_buff **pskb)
284 } 287 }
285 288
286 /* Deliver packet to local host only */ 289 /* Deliver packet to local host only */
287 if (NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN, 290 NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN, dev_net(skb->dev),
288 dev_net(skb->dev), NULL, skb, skb->dev, NULL, 291 NULL, skb, skb->dev, NULL, br_handle_local_finish);
289 br_handle_local_finish)) { 292 return RX_HANDLER_CONSUMED;
290 return RX_HANDLER_CONSUMED; /* consumed by filter */
291 } else {
292 *pskb = skb;
293 return RX_HANDLER_PASS; /* continue processing */
294 }
295 } 293 }
296 294
297forward: 295forward:
diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c
index 30e105f57f0d..253bc77eda3b 100644
--- a/net/bridge/br_mdb.c
+++ b/net/bridge/br_mdb.c
@@ -20,7 +20,7 @@ static int br_rports_fill_info(struct sk_buff *skb, struct netlink_callback *cb,
20{ 20{
21 struct net_bridge *br = netdev_priv(dev); 21 struct net_bridge *br = netdev_priv(dev);
22 struct net_bridge_port *p; 22 struct net_bridge_port *p;
23 struct nlattr *nest; 23 struct nlattr *nest, *port_nest;
24 24
25 if (!br->multicast_router || hlist_empty(&br->router_list)) 25 if (!br->multicast_router || hlist_empty(&br->router_list))
26 return 0; 26 return 0;
@@ -30,8 +30,20 @@ static int br_rports_fill_info(struct sk_buff *skb, struct netlink_callback *cb,
30 return -EMSGSIZE; 30 return -EMSGSIZE;
31 31
32 hlist_for_each_entry_rcu(p, &br->router_list, rlist) { 32 hlist_for_each_entry_rcu(p, &br->router_list, rlist) {
33 if (p && nla_put_u32(skb, MDBA_ROUTER_PORT, p->dev->ifindex)) 33 if (!p)
34 continue;
35 port_nest = nla_nest_start(skb, MDBA_ROUTER_PORT);
36 if (!port_nest)
37 goto fail;
38 if (nla_put_nohdr(skb, sizeof(u32), &p->dev->ifindex) ||
39 nla_put_u32(skb, MDBA_ROUTER_PATTR_TIMER,
40 br_timer_value(&p->multicast_router_timer)) ||
41 nla_put_u8(skb, MDBA_ROUTER_PATTR_TYPE,
42 p->multicast_router)) {
43 nla_nest_cancel(skb, port_nest);
34 goto fail; 44 goto fail;
45 }
46 nla_nest_end(skb, port_nest);
35 } 47 }
36 48
37 nla_nest_end(skb, nest); 49 nla_nest_end(skb, nest);
@@ -41,6 +53,14 @@ fail:
41 return -EMSGSIZE; 53 return -EMSGSIZE;
42} 54}
43 55
56static void __mdb_entry_fill_flags(struct br_mdb_entry *e, unsigned char flags)
57{
58 e->state = flags & MDB_PG_FLAGS_PERMANENT;
59 e->flags = 0;
60 if (flags & MDB_PG_FLAGS_OFFLOAD)
61 e->flags |= MDB_FLAGS_OFFLOAD;
62}
63
44static int br_mdb_fill_info(struct sk_buff *skb, struct netlink_callback *cb, 64static int br_mdb_fill_info(struct sk_buff *skb, struct netlink_callback *cb,
45 struct net_device *dev) 65 struct net_device *dev)
46{ 66{
@@ -80,26 +100,41 @@ static int br_mdb_fill_info(struct sk_buff *skb, struct netlink_callback *cb,
80 for (pp = &mp->ports; 100 for (pp = &mp->ports;
81 (p = rcu_dereference(*pp)) != NULL; 101 (p = rcu_dereference(*pp)) != NULL;
82 pp = &p->next) { 102 pp = &p->next) {
103 struct nlattr *nest_ent;
104 struct br_mdb_entry e;
105
83 port = p->port; 106 port = p->port;
84 if (port) { 107 if (!port)
85 struct br_mdb_entry e; 108 continue;
86 memset(&e, 0, sizeof(e)); 109
87 e.ifindex = port->dev->ifindex; 110 memset(&e, 0, sizeof(e));
88 e.state = p->state; 111 e.ifindex = port->dev->ifindex;
89 e.vid = p->addr.vid; 112 e.vid = p->addr.vid;
90 if (p->addr.proto == htons(ETH_P_IP)) 113 __mdb_entry_fill_flags(&e, p->flags);
91 e.addr.u.ip4 = p->addr.u.ip4; 114 if (p->addr.proto == htons(ETH_P_IP))
115 e.addr.u.ip4 = p->addr.u.ip4;
92#if IS_ENABLED(CONFIG_IPV6) 116#if IS_ENABLED(CONFIG_IPV6)
93 if (p->addr.proto == htons(ETH_P_IPV6)) 117 if (p->addr.proto == htons(ETH_P_IPV6))
94 e.addr.u.ip6 = p->addr.u.ip6; 118 e.addr.u.ip6 = p->addr.u.ip6;
95#endif 119#endif
96 e.addr.proto = p->addr.proto; 120 e.addr.proto = p->addr.proto;
97 if (nla_put(skb, MDBA_MDB_ENTRY_INFO, sizeof(e), &e)) { 121 nest_ent = nla_nest_start(skb,
98 nla_nest_cancel(skb, nest2); 122 MDBA_MDB_ENTRY_INFO);
99 err = -EMSGSIZE; 123 if (!nest_ent) {
100 goto out; 124 nla_nest_cancel(skb, nest2);
101 } 125 err = -EMSGSIZE;
126 goto out;
127 }
128 if (nla_put_nohdr(skb, sizeof(e), &e) ||
129 nla_put_u32(skb,
130 MDBA_MDB_EATTR_TIMER,
131 br_timer_value(&p->timer))) {
132 nla_nest_cancel(skb, nest_ent);
133 nla_nest_cancel(skb, nest2);
134 err = -EMSGSIZE;
135 goto out;
102 } 136 }
137 nla_nest_end(skb, nest_ent);
103 } 138 }
104 nla_nest_end(skb, nest2); 139 nla_nest_end(skb, nest2);
105 skip: 140 skip:
@@ -209,7 +244,7 @@ static inline size_t rtnl_mdb_nlmsg_size(void)
209} 244}
210 245
211static void __br_mdb_notify(struct net_device *dev, struct br_mdb_entry *entry, 246static void __br_mdb_notify(struct net_device *dev, struct br_mdb_entry *entry,
212 int type) 247 int type, struct net_bridge_port_group *pg)
213{ 248{
214 struct switchdev_obj_port_mdb mdb = { 249 struct switchdev_obj_port_mdb mdb = {
215 .obj = { 250 .obj = {
@@ -232,10 +267,13 @@ static void __br_mdb_notify(struct net_device *dev, struct br_mdb_entry *entry,
232#endif 267#endif
233 268
234 mdb.obj.orig_dev = port_dev; 269 mdb.obj.orig_dev = port_dev;
235 if (port_dev && type == RTM_NEWMDB) 270 if (port_dev && type == RTM_NEWMDB) {
236 switchdev_port_obj_add(port_dev, &mdb.obj); 271 err = switchdev_port_obj_add(port_dev, &mdb.obj);
237 else if (port_dev && type == RTM_DELMDB) 272 if (!err && pg)
273 pg->flags |= MDB_PG_FLAGS_OFFLOAD;
274 } else if (port_dev && type == RTM_DELMDB) {
238 switchdev_port_obj_del(port_dev, &mdb.obj); 275 switchdev_port_obj_del(port_dev, &mdb.obj);
276 }
239 277
240 skb = nlmsg_new(rtnl_mdb_nlmsg_size(), GFP_ATOMIC); 278 skb = nlmsg_new(rtnl_mdb_nlmsg_size(), GFP_ATOMIC);
241 if (!skb) 279 if (!skb)
@@ -253,21 +291,21 @@ errout:
253 rtnl_set_sk_err(net, RTNLGRP_MDB, err); 291 rtnl_set_sk_err(net, RTNLGRP_MDB, err);
254} 292}
255 293
256void br_mdb_notify(struct net_device *dev, struct net_bridge_port *port, 294void br_mdb_notify(struct net_device *dev, struct net_bridge_port_group *pg,
257 struct br_ip *group, int type, u8 state) 295 int type)
258{ 296{
259 struct br_mdb_entry entry; 297 struct br_mdb_entry entry;
260 298
261 memset(&entry, 0, sizeof(entry)); 299 memset(&entry, 0, sizeof(entry));
262 entry.ifindex = port->dev->ifindex; 300 entry.ifindex = pg->port->dev->ifindex;
263 entry.addr.proto = group->proto; 301 entry.addr.proto = pg->addr.proto;
264 entry.addr.u.ip4 = group->u.ip4; 302 entry.addr.u.ip4 = pg->addr.u.ip4;
265#if IS_ENABLED(CONFIG_IPV6) 303#if IS_ENABLED(CONFIG_IPV6)
266 entry.addr.u.ip6 = group->u.ip6; 304 entry.addr.u.ip6 = pg->addr.u.ip6;
267#endif 305#endif
268 entry.state = state; 306 entry.vid = pg->addr.vid;
269 entry.vid = group->vid; 307 __mdb_entry_fill_flags(&entry, pg->flags);
270 __br_mdb_notify(dev, &entry, type); 308 __br_mdb_notify(dev, &entry, type, pg);
271} 309}
272 310
273static int nlmsg_populate_rtr_fill(struct sk_buff *skb, 311static int nlmsg_populate_rtr_fill(struct sk_buff *skb,
@@ -412,7 +450,8 @@ static int br_mdb_parse(struct sk_buff *skb, struct nlmsghdr *nlh,
412} 450}
413 451
414static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port, 452static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port,
415 struct br_ip *group, unsigned char state) 453 struct br_ip *group, unsigned char state,
454 struct net_bridge_port_group **pg)
416{ 455{
417 struct net_bridge_mdb_entry *mp; 456 struct net_bridge_mdb_entry *mp;
418 struct net_bridge_port_group *p; 457 struct net_bridge_port_group *p;
@@ -425,8 +464,8 @@ static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port,
425 mp = br_mdb_ip_get(mdb, group); 464 mp = br_mdb_ip_get(mdb, group);
426 if (!mp) { 465 if (!mp) {
427 mp = br_multicast_new_group(br, port, group); 466 mp = br_multicast_new_group(br, port, group);
428 err = PTR_ERR(mp); 467 err = PTR_ERR_OR_ZERO(mp);
429 if (IS_ERR(mp)) 468 if (err)
430 return err; 469 return err;
431 } 470 }
432 471
@@ -443,6 +482,7 @@ static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port,
443 if (unlikely(!p)) 482 if (unlikely(!p))
444 return -ENOMEM; 483 return -ENOMEM;
445 rcu_assign_pointer(*pp, p); 484 rcu_assign_pointer(*pp, p);
485 *pg = p;
446 if (state == MDB_TEMPORARY) 486 if (state == MDB_TEMPORARY)
447 mod_timer(&p->timer, now + br->multicast_membership_interval); 487 mod_timer(&p->timer, now + br->multicast_membership_interval);
448 488
@@ -450,7 +490,8 @@ static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port,
450} 490}
451 491
452static int __br_mdb_add(struct net *net, struct net_bridge *br, 492static int __br_mdb_add(struct net *net, struct net_bridge *br,
453 struct br_mdb_entry *entry) 493 struct br_mdb_entry *entry,
494 struct net_bridge_port_group **pg)
454{ 495{
455 struct br_ip ip; 496 struct br_ip ip;
456 struct net_device *dev; 497 struct net_device *dev;
@@ -479,7 +520,7 @@ static int __br_mdb_add(struct net *net, struct net_bridge *br,
479#endif 520#endif
480 521
481 spin_lock_bh(&br->multicast_lock); 522 spin_lock_bh(&br->multicast_lock);
482 ret = br_mdb_add_group(br, p, &ip, entry->state); 523 ret = br_mdb_add_group(br, p, &ip, entry->state, pg);
483 spin_unlock_bh(&br->multicast_lock); 524 spin_unlock_bh(&br->multicast_lock);
484 return ret; 525 return ret;
485} 526}
@@ -487,6 +528,7 @@ static int __br_mdb_add(struct net *net, struct net_bridge *br,
487static int br_mdb_add(struct sk_buff *skb, struct nlmsghdr *nlh) 528static int br_mdb_add(struct sk_buff *skb, struct nlmsghdr *nlh)
488{ 529{
489 struct net *net = sock_net(skb->sk); 530 struct net *net = sock_net(skb->sk);
531 struct net_bridge_port_group *pg;
490 struct net_bridge_vlan_group *vg; 532 struct net_bridge_vlan_group *vg;
491 struct net_device *dev, *pdev; 533 struct net_device *dev, *pdev;
492 struct br_mdb_entry *entry; 534 struct br_mdb_entry *entry;
@@ -516,15 +558,15 @@ static int br_mdb_add(struct sk_buff *skb, struct nlmsghdr *nlh)
516 if (br_vlan_enabled(br) && vg && entry->vid == 0) { 558 if (br_vlan_enabled(br) && vg && entry->vid == 0) {
517 list_for_each_entry(v, &vg->vlan_list, vlist) { 559 list_for_each_entry(v, &vg->vlan_list, vlist) {
518 entry->vid = v->vid; 560 entry->vid = v->vid;
519 err = __br_mdb_add(net, br, entry); 561 err = __br_mdb_add(net, br, entry, &pg);
520 if (err) 562 if (err)
521 break; 563 break;
522 __br_mdb_notify(dev, entry, RTM_NEWMDB); 564 __br_mdb_notify(dev, entry, RTM_NEWMDB, pg);
523 } 565 }
524 } else { 566 } else {
525 err = __br_mdb_add(net, br, entry); 567 err = __br_mdb_add(net, br, entry, &pg);
526 if (!err) 568 if (!err)
527 __br_mdb_notify(dev, entry, RTM_NEWMDB); 569 __br_mdb_notify(dev, entry, RTM_NEWMDB, pg);
528 } 570 }
529 571
530 return err; 572 return err;
@@ -568,7 +610,7 @@ static int __br_mdb_del(struct net_bridge *br, struct br_mdb_entry *entry)
568 if (p->port->state == BR_STATE_DISABLED) 610 if (p->port->state == BR_STATE_DISABLED)
569 goto unlock; 611 goto unlock;
570 612
571 entry->state = p->state; 613 __mdb_entry_fill_flags(entry, p->flags);
572 rcu_assign_pointer(*pp, p->next); 614 rcu_assign_pointer(*pp, p->next);
573 hlist_del_init(&p->mglist); 615 hlist_del_init(&p->mglist);
574 del_timer(&p->timer); 616 del_timer(&p->timer);
@@ -620,12 +662,12 @@ static int br_mdb_del(struct sk_buff *skb, struct nlmsghdr *nlh)
620 entry->vid = v->vid; 662 entry->vid = v->vid;
621 err = __br_mdb_del(br, entry); 663 err = __br_mdb_del(br, entry);
622 if (!err) 664 if (!err)
623 __br_mdb_notify(dev, entry, RTM_DELMDB); 665 __br_mdb_notify(dev, entry, RTM_DELMDB, NULL);
624 } 666 }
625 } else { 667 } else {
626 err = __br_mdb_del(br, entry); 668 err = __br_mdb_del(br, entry);
627 if (!err) 669 if (!err)
628 __br_mdb_notify(dev, entry, RTM_DELMDB); 670 __br_mdb_notify(dev, entry, RTM_DELMDB, NULL);
629 } 671 }
630 672
631 return err; 673 return err;
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 03661d97463c..a4c15df2b792 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -283,8 +283,7 @@ static void br_multicast_del_pg(struct net_bridge *br,
283 rcu_assign_pointer(*pp, p->next); 283 rcu_assign_pointer(*pp, p->next);
284 hlist_del_init(&p->mglist); 284 hlist_del_init(&p->mglist);
285 del_timer(&p->timer); 285 del_timer(&p->timer);
286 br_mdb_notify(br->dev, p->port, &pg->addr, RTM_DELMDB, 286 br_mdb_notify(br->dev, p, RTM_DELMDB);
287 p->state);
288 call_rcu_bh(&p->rcu, br_multicast_free_pg); 287 call_rcu_bh(&p->rcu, br_multicast_free_pg);
289 288
290 if (!mp->ports && !mp->mglist && 289 if (!mp->ports && !mp->mglist &&
@@ -304,7 +303,7 @@ static void br_multicast_port_group_expired(unsigned long data)
304 303
305 spin_lock(&br->multicast_lock); 304 spin_lock(&br->multicast_lock);
306 if (!netif_running(br->dev) || timer_pending(&pg->timer) || 305 if (!netif_running(br->dev) || timer_pending(&pg->timer) ||
307 hlist_unhashed(&pg->mglist) || pg->state & MDB_PERMANENT) 306 hlist_unhashed(&pg->mglist) || pg->flags & MDB_PG_FLAGS_PERMANENT)
308 goto out; 307 goto out;
309 308
310 br_multicast_del_pg(br, pg); 309 br_multicast_del_pg(br, pg);
@@ -649,7 +648,7 @@ struct net_bridge_port_group *br_multicast_new_port_group(
649 struct net_bridge_port *port, 648 struct net_bridge_port *port,
650 struct br_ip *group, 649 struct br_ip *group,
651 struct net_bridge_port_group __rcu *next, 650 struct net_bridge_port_group __rcu *next,
652 unsigned char state) 651 unsigned char flags)
653{ 652{
654 struct net_bridge_port_group *p; 653 struct net_bridge_port_group *p;
655 654
@@ -659,7 +658,7 @@ struct net_bridge_port_group *br_multicast_new_port_group(
659 658
660 p->addr = *group; 659 p->addr = *group;
661 p->port = port; 660 p->port = port;
662 p->state = state; 661 p->flags = flags;
663 rcu_assign_pointer(p->next, next); 662 rcu_assign_pointer(p->next, next);
664 hlist_add_head(&p->mglist, &port->mglist); 663 hlist_add_head(&p->mglist, &port->mglist);
665 setup_timer(&p->timer, br_multicast_port_group_expired, 664 setup_timer(&p->timer, br_multicast_port_group_expired,
@@ -702,11 +701,11 @@ static int br_multicast_add_group(struct net_bridge *br,
702 break; 701 break;
703 } 702 }
704 703
705 p = br_multicast_new_port_group(port, group, *pp, MDB_TEMPORARY); 704 p = br_multicast_new_port_group(port, group, *pp, 0);
706 if (unlikely(!p)) 705 if (unlikely(!p))
707 goto err; 706 goto err;
708 rcu_assign_pointer(*pp, p); 707 rcu_assign_pointer(*pp, p);
709 br_mdb_notify(br->dev, port, group, RTM_NEWMDB, MDB_TEMPORARY); 708 br_mdb_notify(br->dev, p, RTM_NEWMDB);
710 709
711found: 710found:
712 mod_timer(&p->timer, now + br->multicast_membership_interval); 711 mod_timer(&p->timer, now + br->multicast_membership_interval);
@@ -760,13 +759,17 @@ static void br_multicast_router_expired(unsigned long data)
760 struct net_bridge *br = port->br; 759 struct net_bridge *br = port->br;
761 760
762 spin_lock(&br->multicast_lock); 761 spin_lock(&br->multicast_lock);
763 if (port->multicast_router != 1 || 762 if (port->multicast_router == MDB_RTR_TYPE_DISABLED ||
763 port->multicast_router == MDB_RTR_TYPE_PERM ||
764 timer_pending(&port->multicast_router_timer) || 764 timer_pending(&port->multicast_router_timer) ||
765 hlist_unhashed(&port->rlist)) 765 hlist_unhashed(&port->rlist))
766 goto out; 766 goto out;
767 767
768 hlist_del_init_rcu(&port->rlist); 768 hlist_del_init_rcu(&port->rlist);
769 br_rtr_notify(br->dev, port, RTM_DELMDB); 769 br_rtr_notify(br->dev, port, RTM_DELMDB);
770 /* Don't allow timer refresh if the router expired */
771 if (port->multicast_router == MDB_RTR_TYPE_TEMP)
772 port->multicast_router = MDB_RTR_TYPE_TEMP_QUERY;
770 773
771out: 774out:
772 spin_unlock(&br->multicast_lock); 775 spin_unlock(&br->multicast_lock);
@@ -913,7 +916,7 @@ static void br_ip6_multicast_port_query_expired(unsigned long data)
913 916
914void br_multicast_add_port(struct net_bridge_port *port) 917void br_multicast_add_port(struct net_bridge_port *port)
915{ 918{
916 port->multicast_router = 1; 919 port->multicast_router = MDB_RTR_TYPE_TEMP_QUERY;
917 920
918 setup_timer(&port->multicast_router_timer, br_multicast_router_expired, 921 setup_timer(&port->multicast_router_timer, br_multicast_router_expired,
919 (unsigned long)port); 922 (unsigned long)port);
@@ -960,7 +963,8 @@ void br_multicast_enable_port(struct net_bridge_port *port)
960#if IS_ENABLED(CONFIG_IPV6) 963#if IS_ENABLED(CONFIG_IPV6)
961 br_multicast_enable(&port->ip6_own_query); 964 br_multicast_enable(&port->ip6_own_query);
962#endif 965#endif
963 if (port->multicast_router == 2 && hlist_unhashed(&port->rlist)) 966 if (port->multicast_router == MDB_RTR_TYPE_PERM &&
967 hlist_unhashed(&port->rlist))
964 br_multicast_add_router(br, port); 968 br_multicast_add_router(br, port);
965 969
966out: 970out:
@@ -975,12 +979,15 @@ void br_multicast_disable_port(struct net_bridge_port *port)
975 979
976 spin_lock(&br->multicast_lock); 980 spin_lock(&br->multicast_lock);
977 hlist_for_each_entry_safe(pg, n, &port->mglist, mglist) 981 hlist_for_each_entry_safe(pg, n, &port->mglist, mglist)
978 if (pg->state == MDB_TEMPORARY) 982 if (!(pg->flags & MDB_PG_FLAGS_PERMANENT))
979 br_multicast_del_pg(br, pg); 983 br_multicast_del_pg(br, pg);
980 984
981 if (!hlist_unhashed(&port->rlist)) { 985 if (!hlist_unhashed(&port->rlist)) {
982 hlist_del_init_rcu(&port->rlist); 986 hlist_del_init_rcu(&port->rlist);
983 br_rtr_notify(br->dev, port, RTM_DELMDB); 987 br_rtr_notify(br->dev, port, RTM_DELMDB);
988 /* Don't allow timer refresh if disabling */
989 if (port->multicast_router == MDB_RTR_TYPE_TEMP)
990 port->multicast_router = MDB_RTR_TYPE_TEMP_QUERY;
984 } 991 }
985 del_timer(&port->multicast_router_timer); 992 del_timer(&port->multicast_router_timer);
986 del_timer(&port->ip4_own_query.timer); 993 del_timer(&port->ip4_own_query.timer);
@@ -1228,13 +1235,14 @@ static void br_multicast_mark_router(struct net_bridge *br,
1228 unsigned long now = jiffies; 1235 unsigned long now = jiffies;
1229 1236
1230 if (!port) { 1237 if (!port) {
1231 if (br->multicast_router == 1) 1238 if (br->multicast_router == MDB_RTR_TYPE_TEMP_QUERY)
1232 mod_timer(&br->multicast_router_timer, 1239 mod_timer(&br->multicast_router_timer,
1233 now + br->multicast_querier_interval); 1240 now + br->multicast_querier_interval);
1234 return; 1241 return;
1235 } 1242 }
1236 1243
1237 if (port->multicast_router != 1) 1244 if (port->multicast_router == MDB_RTR_TYPE_DISABLED ||
1245 port->multicast_router == MDB_RTR_TYPE_PERM)
1238 return; 1246 return;
1239 1247
1240 br_multicast_add_router(br, port); 1248 br_multicast_add_router(br, port);
@@ -1453,8 +1461,7 @@ br_multicast_leave_group(struct net_bridge *br,
1453 hlist_del_init(&p->mglist); 1461 hlist_del_init(&p->mglist);
1454 del_timer(&p->timer); 1462 del_timer(&p->timer);
1455 call_rcu_bh(&p->rcu, br_multicast_free_pg); 1463 call_rcu_bh(&p->rcu, br_multicast_free_pg);
1456 br_mdb_notify(br->dev, port, group, RTM_DELMDB, 1464 br_mdb_notify(br->dev, p, RTM_DELMDB);
1457 p->state);
1458 1465
1459 if (!mp->ports && !mp->mglist && 1466 if (!mp->ports && !mp->mglist &&
1460 netif_running(br->dev)) 1467 netif_running(br->dev))
@@ -1715,7 +1722,7 @@ void br_multicast_init(struct net_bridge *br)
1715 br->hash_elasticity = 4; 1722 br->hash_elasticity = 4;
1716 br->hash_max = 512; 1723 br->hash_max = 512;
1717 1724
1718 br->multicast_router = 1; 1725 br->multicast_router = MDB_RTR_TYPE_TEMP_QUERY;
1719 br->multicast_querier = 0; 1726 br->multicast_querier = 0;
1720 br->multicast_query_use_ifaddr = 0; 1727 br->multicast_query_use_ifaddr = 0;
1721 br->multicast_last_member_count = 2; 1728 br->multicast_last_member_count = 2;
@@ -1825,11 +1832,11 @@ int br_multicast_set_router(struct net_bridge *br, unsigned long val)
1825 spin_lock_bh(&br->multicast_lock); 1832 spin_lock_bh(&br->multicast_lock);
1826 1833
1827 switch (val) { 1834 switch (val) {
1828 case 0: 1835 case MDB_RTR_TYPE_DISABLED:
1829 case 2: 1836 case MDB_RTR_TYPE_PERM:
1830 del_timer(&br->multicast_router_timer); 1837 del_timer(&br->multicast_router_timer);
1831 /* fall through */ 1838 /* fall through */
1832 case 1: 1839 case MDB_RTR_TYPE_TEMP_QUERY:
1833 br->multicast_router = val; 1840 br->multicast_router = val;
1834 err = 0; 1841 err = 0;
1835 break; 1842 break;
@@ -1840,37 +1847,53 @@ int br_multicast_set_router(struct net_bridge *br, unsigned long val)
1840 return err; 1847 return err;
1841} 1848}
1842 1849
1850static void __del_port_router(struct net_bridge_port *p)
1851{
1852 if (hlist_unhashed(&p->rlist))
1853 return;
1854 hlist_del_init_rcu(&p->rlist);
1855 br_rtr_notify(p->br->dev, p, RTM_DELMDB);
1856}
1857
1843int br_multicast_set_port_router(struct net_bridge_port *p, unsigned long val) 1858int br_multicast_set_port_router(struct net_bridge_port *p, unsigned long val)
1844{ 1859{
1845 struct net_bridge *br = p->br; 1860 struct net_bridge *br = p->br;
1861 unsigned long now = jiffies;
1846 int err = -EINVAL; 1862 int err = -EINVAL;
1847 1863
1848 spin_lock(&br->multicast_lock); 1864 spin_lock(&br->multicast_lock);
1849 1865 if (p->multicast_router == val) {
1850 switch (val) { 1866 /* Refresh the temp router port timer */
1851 case 0: 1867 if (p->multicast_router == MDB_RTR_TYPE_TEMP)
1852 case 1: 1868 mod_timer(&p->multicast_router_timer,
1853 case 2: 1869 now + br->multicast_querier_interval);
1854 p->multicast_router = val;
1855 err = 0; 1870 err = 0;
1856 1871 goto unlock;
1857 if (val < 2 && !hlist_unhashed(&p->rlist)) { 1872 }
1858 hlist_del_init_rcu(&p->rlist); 1873 switch (val) {
1859 br_rtr_notify(br->dev, p, RTM_DELMDB); 1874 case MDB_RTR_TYPE_DISABLED:
1860 } 1875 p->multicast_router = MDB_RTR_TYPE_DISABLED;
1861 1876 __del_port_router(p);
1862 if (val == 1) 1877 del_timer(&p->multicast_router_timer);
1863 break; 1878 break;
1864 1879 case MDB_RTR_TYPE_TEMP_QUERY:
1880 p->multicast_router = MDB_RTR_TYPE_TEMP_QUERY;
1881 __del_port_router(p);
1882 break;
1883 case MDB_RTR_TYPE_PERM:
1884 p->multicast_router = MDB_RTR_TYPE_PERM;
1865 del_timer(&p->multicast_router_timer); 1885 del_timer(&p->multicast_router_timer);
1866
1867 if (val == 0)
1868 break;
1869
1870 br_multicast_add_router(br, p); 1886 br_multicast_add_router(br, p);
1871 break; 1887 break;
1888 case MDB_RTR_TYPE_TEMP:
1889 p->multicast_router = MDB_RTR_TYPE_TEMP;
1890 br_multicast_mark_router(br, p);
1891 break;
1892 default:
1893 goto unlock;
1872 } 1894 }
1873 1895 err = 0;
1896unlock:
1874 spin_unlock(&br->multicast_lock); 1897 spin_unlock(&br->multicast_lock);
1875 1898
1876 return err; 1899 return err;
diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
index 7ddbe7ec81d6..44114a94c576 100644
--- a/net/bridge/br_netfilter_hooks.c
+++ b/net/bridge/br_netfilter_hooks.c
@@ -37,6 +37,7 @@
37#include <net/addrconf.h> 37#include <net/addrconf.h>
38#include <net/route.h> 38#include <net/route.h>
39#include <net/netfilter/br_netfilter.h> 39#include <net/netfilter/br_netfilter.h>
40#include <net/netns/generic.h>
40 41
41#include <asm/uaccess.h> 42#include <asm/uaccess.h>
42#include "br_private.h" 43#include "br_private.h"
@@ -44,6 +45,12 @@
44#include <linux/sysctl.h> 45#include <linux/sysctl.h>
45#endif 46#endif
46 47
48static int brnf_net_id __read_mostly;
49
50struct brnf_net {
51 bool enabled;
52};
53
47#ifdef CONFIG_SYSCTL 54#ifdef CONFIG_SYSCTL
48static struct ctl_table_header *brnf_sysctl_header; 55static struct ctl_table_header *brnf_sysctl_header;
49static int brnf_call_iptables __read_mostly = 1; 56static int brnf_call_iptables __read_mostly = 1;
@@ -938,6 +945,53 @@ static struct nf_hook_ops br_nf_ops[] __read_mostly = {
938 }, 945 },
939}; 946};
940 947
948static int brnf_device_event(struct notifier_block *unused, unsigned long event,
949 void *ptr)
950{
951 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
952 struct brnf_net *brnet;
953 struct net *net;
954 int ret;
955
956 if (event != NETDEV_REGISTER || !(dev->priv_flags & IFF_EBRIDGE))
957 return NOTIFY_DONE;
958
959 ASSERT_RTNL();
960
961 net = dev_net(dev);
962 brnet = net_generic(net, brnf_net_id);
963 if (brnet->enabled)
964 return NOTIFY_OK;
965
966 ret = nf_register_net_hooks(net, br_nf_ops, ARRAY_SIZE(br_nf_ops));
967 if (ret)
968 return NOTIFY_BAD;
969
970 brnet->enabled = true;
971 return NOTIFY_OK;
972}
973
974static void __net_exit brnf_exit_net(struct net *net)
975{
976 struct brnf_net *brnet = net_generic(net, brnf_net_id);
977
978 if (!brnet->enabled)
979 return;
980
981 nf_unregister_net_hooks(net, br_nf_ops, ARRAY_SIZE(br_nf_ops));
982 brnet->enabled = false;
983}
984
985static struct pernet_operations brnf_net_ops __read_mostly = {
986 .exit = brnf_exit_net,
987 .id = &brnf_net_id,
988 .size = sizeof(struct brnf_net),
989};
990
991static struct notifier_block brnf_notifier __read_mostly = {
992 .notifier_call = brnf_device_event,
993};
994
941#ifdef CONFIG_SYSCTL 995#ifdef CONFIG_SYSCTL
942static 996static
943int brnf_sysctl_call_tables(struct ctl_table *ctl, int write, 997int brnf_sysctl_call_tables(struct ctl_table *ctl, int write,
@@ -1003,16 +1057,23 @@ static int __init br_netfilter_init(void)
1003{ 1057{
1004 int ret; 1058 int ret;
1005 1059
1006 ret = nf_register_hooks(br_nf_ops, ARRAY_SIZE(br_nf_ops)); 1060 ret = register_pernet_subsys(&brnf_net_ops);
1007 if (ret < 0) 1061 if (ret < 0)
1008 return ret; 1062 return ret;
1009 1063
1064 ret = register_netdevice_notifier(&brnf_notifier);
1065 if (ret < 0) {
1066 unregister_pernet_subsys(&brnf_net_ops);
1067 return ret;
1068 }
1069
1010#ifdef CONFIG_SYSCTL 1070#ifdef CONFIG_SYSCTL
1011 brnf_sysctl_header = register_net_sysctl(&init_net, "net/bridge", brnf_table); 1071 brnf_sysctl_header = register_net_sysctl(&init_net, "net/bridge", brnf_table);
1012 if (brnf_sysctl_header == NULL) { 1072 if (brnf_sysctl_header == NULL) {
1013 printk(KERN_WARNING 1073 printk(KERN_WARNING
1014 "br_netfilter: can't register to sysctl.\n"); 1074 "br_netfilter: can't register to sysctl.\n");
1015 nf_unregister_hooks(br_nf_ops, ARRAY_SIZE(br_nf_ops)); 1075 unregister_netdevice_notifier(&brnf_notifier);
1076 unregister_pernet_subsys(&brnf_net_ops);
1016 return -ENOMEM; 1077 return -ENOMEM;
1017 } 1078 }
1018#endif 1079#endif
@@ -1024,7 +1085,8 @@ static int __init br_netfilter_init(void)
1024static void __exit br_netfilter_fini(void) 1085static void __exit br_netfilter_fini(void)
1025{ 1086{
1026 RCU_INIT_POINTER(nf_br_ops, NULL); 1087 RCU_INIT_POINTER(nf_br_ops, NULL);
1027 nf_unregister_hooks(br_nf_ops, ARRAY_SIZE(br_nf_ops)); 1088 unregister_netdevice_notifier(&brnf_notifier);
1089 unregister_pernet_subsys(&brnf_net_ops);
1028#ifdef CONFIG_SYSCTL 1090#ifdef CONFIG_SYSCTL
1029 unregister_net_sysctl_table(brnf_sysctl_header); 1091 unregister_net_sysctl_table(brnf_sysctl_header);
1030#endif 1092#endif
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 40197ff8918a..e9c635eae24d 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -598,7 +598,6 @@ static int br_set_port_state(struct net_bridge_port *p, u8 state)
598 return -ENETDOWN; 598 return -ENETDOWN;
599 599
600 br_set_state(p, state); 600 br_set_state(p, state);
601 br_log_state(p);
602 br_port_state_selection(p->br); 601 br_port_state_selection(p->br);
603 return 0; 602 return 0;
604} 603}
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 216018c76018..1b5d145dfcbf 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -150,6 +150,9 @@ struct net_bridge_fdb_entry
150 struct rcu_head rcu; 150 struct rcu_head rcu;
151}; 151};
152 152
153#define MDB_PG_FLAGS_PERMANENT BIT(0)
154#define MDB_PG_FLAGS_OFFLOAD BIT(1)
155
153struct net_bridge_port_group { 156struct net_bridge_port_group {
154 struct net_bridge_port *port; 157 struct net_bridge_port *port;
155 struct net_bridge_port_group __rcu *next; 158 struct net_bridge_port_group __rcu *next;
@@ -157,7 +160,7 @@ struct net_bridge_port_group {
157 struct rcu_head rcu; 160 struct rcu_head rcu;
158 struct timer_list timer; 161 struct timer_list timer;
159 struct br_ip addr; 162 struct br_ip addr;
160 unsigned char state; 163 unsigned char flags;
161}; 164};
162 165
163struct net_bridge_mdb_entry 166struct net_bridge_mdb_entry
@@ -554,11 +557,11 @@ void br_multicast_free_pg(struct rcu_head *head);
554struct net_bridge_port_group * 557struct net_bridge_port_group *
555br_multicast_new_port_group(struct net_bridge_port *port, struct br_ip *group, 558br_multicast_new_port_group(struct net_bridge_port *port, struct br_ip *group,
556 struct net_bridge_port_group __rcu *next, 559 struct net_bridge_port_group __rcu *next,
557 unsigned char state); 560 unsigned char flags);
558void br_mdb_init(void); 561void br_mdb_init(void);
559void br_mdb_uninit(void); 562void br_mdb_uninit(void);
560void br_mdb_notify(struct net_device *dev, struct net_bridge_port *port, 563void br_mdb_notify(struct net_device *dev, struct net_bridge_port_group *pg,
561 struct br_ip *group, int type, u8 state); 564 int type);
562void br_rtr_notify(struct net_device *dev, struct net_bridge_port *port, 565void br_rtr_notify(struct net_device *dev, struct net_bridge_port *port,
563 int type); 566 int type);
564 567
@@ -897,7 +900,6 @@ static inline void br_nf_core_fini(void) {}
897#endif 900#endif
898 901
899/* br_stp.c */ 902/* br_stp.c */
900void br_log_state(const struct net_bridge_port *p);
901void br_set_state(struct net_bridge_port *p, unsigned int state); 903void br_set_state(struct net_bridge_port *p, unsigned int state);
902struct net_bridge_port *br_get_port(struct net_bridge *br, u16 port_no); 904struct net_bridge_port *br_get_port(struct net_bridge *br, u16 port_no);
903void br_init_port(struct net_bridge_port *p); 905void br_init_port(struct net_bridge_port *p);
diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c
index b3cca126b103..e23449094188 100644
--- a/net/bridge/br_stp.c
+++ b/net/bridge/br_stp.c
@@ -30,13 +30,6 @@ static const char *const br_port_state_names[] = {
30 [BR_STATE_BLOCKING] = "blocking", 30 [BR_STATE_BLOCKING] = "blocking",
31}; 31};
32 32
33void br_log_state(const struct net_bridge_port *p)
34{
35 br_info(p->br, "port %u(%s) entered %s state\n",
36 (unsigned int) p->port_no, p->dev->name,
37 br_port_state_names[p->state]);
38}
39
40void br_set_state(struct net_bridge_port *p, unsigned int state) 33void br_set_state(struct net_bridge_port *p, unsigned int state)
41{ 34{
42 struct switchdev_attr attr = { 35 struct switchdev_attr attr = {
@@ -52,6 +45,10 @@ void br_set_state(struct net_bridge_port *p, unsigned int state)
52 if (err && err != -EOPNOTSUPP) 45 if (err && err != -EOPNOTSUPP)
53 br_warn(p->br, "error setting offload STP state on port %u(%s)\n", 46 br_warn(p->br, "error setting offload STP state on port %u(%s)\n",
54 (unsigned int) p->port_no, p->dev->name); 47 (unsigned int) p->port_no, p->dev->name);
48 else
49 br_info(p->br, "port %u(%s) entered %s state\n",
50 (unsigned int) p->port_no, p->dev->name,
51 br_port_state_names[p->state]);
55} 52}
56 53
57/* called under bridge lock */ 54/* called under bridge lock */
@@ -126,7 +123,6 @@ static void br_root_port_block(const struct net_bridge *br,
126 (unsigned int) p->port_no, p->dev->name); 123 (unsigned int) p->port_no, p->dev->name);
127 124
128 br_set_state(p, BR_STATE_LISTENING); 125 br_set_state(p, BR_STATE_LISTENING);
129 br_log_state(p);
130 br_ifinfo_notify(RTM_NEWLINK, p); 126 br_ifinfo_notify(RTM_NEWLINK, p);
131 127
132 if (br->forward_delay > 0) 128 if (br->forward_delay > 0)
@@ -407,7 +403,6 @@ static void br_make_blocking(struct net_bridge_port *p)
407 br_topology_change_detection(p->br); 403 br_topology_change_detection(p->br);
408 404
409 br_set_state(p, BR_STATE_BLOCKING); 405 br_set_state(p, BR_STATE_BLOCKING);
410 br_log_state(p);
411 br_ifinfo_notify(RTM_NEWLINK, p); 406 br_ifinfo_notify(RTM_NEWLINK, p);
412 407
413 del_timer(&p->forward_delay_timer); 408 del_timer(&p->forward_delay_timer);
@@ -431,7 +426,6 @@ static void br_make_forwarding(struct net_bridge_port *p)
431 else 426 else
432 br_set_state(p, BR_STATE_LEARNING); 427 br_set_state(p, BR_STATE_LEARNING);
433 428
434 br_log_state(p);
435 br_ifinfo_notify(RTM_NEWLINK, p); 429 br_ifinfo_notify(RTM_NEWLINK, p);
436 430
437 if (br->forward_delay != 0) 431 if (br->forward_delay != 0)
@@ -568,6 +562,14 @@ int br_set_max_age(struct net_bridge *br, unsigned long val)
568 562
569} 563}
570 564
565/* Set time interval that dynamic forwarding entries live
566 * For pure software bridge, allow values outside the 802.1
567 * standard specification for special cases:
568 * 0 - entry never ages (all permanant)
569 * 1 - entry disappears (no persistance)
570 *
571 * Offloaded switch entries maybe more restrictive
572 */
571int br_set_ageing_time(struct net_bridge *br, u32 ageing_time) 573int br_set_ageing_time(struct net_bridge *br, u32 ageing_time)
572{ 574{
573 struct switchdev_attr attr = { 575 struct switchdev_attr attr = {
@@ -579,9 +581,6 @@ int br_set_ageing_time(struct net_bridge *br, u32 ageing_time)
579 unsigned long t = clock_t_to_jiffies(ageing_time); 581 unsigned long t = clock_t_to_jiffies(ageing_time);
580 int err; 582 int err;
581 583
582 if (t < BR_MIN_AGEING_TIME || t > BR_MAX_AGEING_TIME)
583 return -ERANGE;
584
585 err = switchdev_port_attr_set(br->dev, &attr); 584 err = switchdev_port_attr_set(br->dev, &attr);
586 if (err) 585 if (err)
587 return err; 586 return err;
diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c
index a31ac6ad76a2..984d46263007 100644
--- a/net/bridge/br_stp_if.c
+++ b/net/bridge/br_stp_if.c
@@ -102,7 +102,6 @@ void br_stp_enable_port(struct net_bridge_port *p)
102{ 102{
103 br_init_port(p); 103 br_init_port(p);
104 br_port_state_selection(p->br); 104 br_port_state_selection(p->br);
105 br_log_state(p);
106 br_ifinfo_notify(RTM_NEWLINK, p); 105 br_ifinfo_notify(RTM_NEWLINK, p);
107} 106}
108 107
@@ -118,7 +117,6 @@ void br_stp_disable_port(struct net_bridge_port *p)
118 p->topology_change_ack = 0; 117 p->topology_change_ack = 0;
119 p->config_pending = 0; 118 p->config_pending = 0;
120 119
121 br_log_state(p);
122 br_ifinfo_notify(RTM_NEWLINK, p); 120 br_ifinfo_notify(RTM_NEWLINK, p);
123 121
124 del_timer(&p->message_age_timer); 122 del_timer(&p->message_age_timer);
diff --git a/net/bridge/br_stp_timer.c b/net/bridge/br_stp_timer.c
index 5f0f5af0ec35..da058b85aa22 100644
--- a/net/bridge/br_stp_timer.c
+++ b/net/bridge/br_stp_timer.c
@@ -98,7 +98,6 @@ static void br_forward_delay_timer_expired(unsigned long arg)
98 br_topology_change_detection(br); 98 br_topology_change_detection(br);
99 netif_carrier_on(br->dev); 99 netif_carrier_on(br->dev);
100 } 100 }
101 br_log_state(p);
102 rcu_read_lock(); 101 rcu_read_lock();
103 br_ifinfo_notify(RTM_NEWLINK, p); 102 br_ifinfo_notify(RTM_NEWLINK, p);
104 rcu_read_unlock(); 103 rcu_read_unlock();
diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
index 85e43af4af7a..9309bb4f2a5b 100644
--- a/net/bridge/br_vlan.c
+++ b/net/bridge/br_vlan.c
@@ -955,6 +955,13 @@ err_rhtbl:
955 */ 955 */
956int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags) 956int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags)
957{ 957{
958 struct switchdev_obj_port_vlan v = {
959 .obj.orig_dev = port->dev,
960 .obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN,
961 .flags = flags,
962 .vid_begin = vid,
963 .vid_end = vid,
964 };
958 struct net_bridge_vlan *vlan; 965 struct net_bridge_vlan *vlan;
959 int ret; 966 int ret;
960 967
@@ -962,6 +969,10 @@ int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags)
962 969
963 vlan = br_vlan_find(nbp_vlan_group(port), vid); 970 vlan = br_vlan_find(nbp_vlan_group(port), vid);
964 if (vlan) { 971 if (vlan) {
972 /* Pass the flags to the hardware bridge */
973 ret = switchdev_port_obj_add(port->dev, &v.obj);
974 if (ret && ret != -EOPNOTSUPP)
975 return ret;
965 __vlan_add_flags(vlan, flags); 976 __vlan_add_flags(vlan, flags);
966 return 0; 977 return 0;
967 } 978 }
diff --git a/net/bridge/netfilter/nft_reject_bridge.c b/net/bridge/netfilter/nft_reject_bridge.c
index fdba3d9fbff3..adc8d7221dbb 100644
--- a/net/bridge/netfilter/nft_reject_bridge.c
+++ b/net/bridge/netfilter/nft_reject_bridge.c
@@ -48,6 +48,7 @@ static void nft_reject_br_send_v4_tcp_reset(struct sk_buff *oldskb,
48 struct iphdr *niph; 48 struct iphdr *niph;
49 const struct tcphdr *oth; 49 const struct tcphdr *oth;
50 struct tcphdr _oth; 50 struct tcphdr _oth;
51 struct net *net = sock_net(oldskb->sk);
51 52
52 if (!nft_bridge_iphdr_validate(oldskb)) 53 if (!nft_bridge_iphdr_validate(oldskb))
53 return; 54 return;
@@ -63,9 +64,9 @@ static void nft_reject_br_send_v4_tcp_reset(struct sk_buff *oldskb,
63 64
64 skb_reserve(nskb, LL_MAX_HEADER); 65 skb_reserve(nskb, LL_MAX_HEADER);
65 niph = nf_reject_iphdr_put(nskb, oldskb, IPPROTO_TCP, 66 niph = nf_reject_iphdr_put(nskb, oldskb, IPPROTO_TCP,
66 sysctl_ip_default_ttl); 67 net->ipv4.sysctl_ip_default_ttl);
67 nf_reject_ip_tcphdr_put(nskb, oldskb, oth); 68 nf_reject_ip_tcphdr_put(nskb, oldskb, oth);
68 niph->ttl = sysctl_ip_default_ttl; 69 niph->ttl = net->ipv4.sysctl_ip_default_ttl;
69 niph->tot_len = htons(nskb->len); 70 niph->tot_len = htons(nskb->len);
70 ip_send_check(niph); 71 ip_send_check(niph);
71 72
@@ -85,6 +86,7 @@ static void nft_reject_br_send_v4_unreach(struct sk_buff *oldskb,
85 void *payload; 86 void *payload;
86 __wsum csum; 87 __wsum csum;
87 u8 proto; 88 u8 proto;
89 struct net *net = sock_net(oldskb->sk);
88 90
89 if (oldskb->csum_bad || !nft_bridge_iphdr_validate(oldskb)) 91 if (oldskb->csum_bad || !nft_bridge_iphdr_validate(oldskb))
90 return; 92 return;
@@ -119,7 +121,7 @@ static void nft_reject_br_send_v4_unreach(struct sk_buff *oldskb,
119 121
120 skb_reserve(nskb, LL_MAX_HEADER); 122 skb_reserve(nskb, LL_MAX_HEADER);
121 niph = nf_reject_iphdr_put(nskb, oldskb, IPPROTO_ICMP, 123 niph = nf_reject_iphdr_put(nskb, oldskb, IPPROTO_ICMP,
122 sysctl_ip_default_ttl); 124 net->ipv4.sysctl_ip_default_ttl);
123 125
124 skb_reset_transport_header(nskb); 126 skb_reset_transport_header(nskb);
125 icmph = (struct icmphdr *)skb_put(nskb, sizeof(struct icmphdr)); 127 icmph = (struct icmphdr *)skb_put(nskb, sizeof(struct icmphdr));
diff --git a/net/caif/cfpkt_skbuff.c b/net/caif/cfpkt_skbuff.c
index f6c3b2137eea..59ce1fcc220c 100644
--- a/net/caif/cfpkt_skbuff.c
+++ b/net/caif/cfpkt_skbuff.c
@@ -286,7 +286,7 @@ int cfpkt_setlen(struct cfpkt *pkt, u16 len)
286 else 286 else
287 skb_trim(skb, len); 287 skb_trim(skb, len);
288 288
289 return cfpkt_getlen(pkt); 289 return cfpkt_getlen(pkt);
290 } 290 }
291 291
292 /* Need to expand SKB */ 292 /* Need to expand SKB */
diff --git a/net/caif/cfrfml.c b/net/caif/cfrfml.c
index 61d7617d9249..b82440e1fcb4 100644
--- a/net/caif/cfrfml.c
+++ b/net/caif/cfrfml.c
@@ -159,7 +159,7 @@ static int cfrfml_receive(struct cflayer *layr, struct cfpkt *pkt)
159 tmppkt = NULL; 159 tmppkt = NULL;
160 160
161 /* Verify that length is correct */ 161 /* Verify that length is correct */
162 err = EPROTO; 162 err = -EPROTO;
163 if (rfml->pdu_size != cfpkt_getlen(pkt) - RFM_HEAD_SIZE + 1) 163 if (rfml->pdu_size != cfpkt_getlen(pkt) - RFM_HEAD_SIZE + 1)
164 goto out; 164 goto out;
165 } 165 }
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
index bcbec33c6a14..dcc18c6f7cf9 100644
--- a/net/ceph/ceph_common.c
+++ b/net/ceph/ceph_common.c
@@ -361,7 +361,6 @@ ceph_parse_options(char *options, const char *dev_name,
361 opt->osd_keepalive_timeout = CEPH_OSD_KEEPALIVE_DEFAULT; 361 opt->osd_keepalive_timeout = CEPH_OSD_KEEPALIVE_DEFAULT;
362 opt->mount_timeout = CEPH_MOUNT_TIMEOUT_DEFAULT; 362 opt->mount_timeout = CEPH_MOUNT_TIMEOUT_DEFAULT;
363 opt->osd_idle_ttl = CEPH_OSD_IDLE_TTL_DEFAULT; 363 opt->osd_idle_ttl = CEPH_OSD_IDLE_TTL_DEFAULT;
364 opt->monc_ping_timeout = CEPH_MONC_PING_TIMEOUT_DEFAULT;
365 364
366 /* get mon ip(s) */ 365 /* get mon ip(s) */
367 /* ip1[:port1][,ip2[:port2]...] */ 366 /* ip1[:port1][,ip2[:port2]...] */
@@ -686,6 +685,9 @@ int __ceph_open_session(struct ceph_client *client, unsigned long started)
686 return client->auth_err; 685 return client->auth_err;
687 } 686 }
688 687
688 pr_info("client%llu fsid %pU\n", ceph_client_id(client), &client->fsid);
689 ceph_debugfs_client_init(client);
690
689 return 0; 691 return 0;
690} 692}
691EXPORT_SYMBOL(__ceph_open_session); 693EXPORT_SYMBOL(__ceph_open_session);
diff --git a/net/ceph/crush/mapper.c b/net/ceph/crush/mapper.c
index 393bfb22d5bb..5fcfb98f309e 100644
--- a/net/ceph/crush/mapper.c
+++ b/net/ceph/crush/mapper.c
@@ -403,6 +403,7 @@ static int is_out(const struct crush_map *map,
403 * @local_retries: localized retries 403 * @local_retries: localized retries
404 * @local_fallback_retries: localized fallback retries 404 * @local_fallback_retries: localized fallback retries
405 * @recurse_to_leaf: true if we want one device under each item of given type (chooseleaf instead of choose) 405 * @recurse_to_leaf: true if we want one device under each item of given type (chooseleaf instead of choose)
406 * @stable: stable mode starts rep=0 in the recursive call for all replicas
406 * @vary_r: pass r to recursive calls 407 * @vary_r: pass r to recursive calls
407 * @out2: second output vector for leaf items (if @recurse_to_leaf) 408 * @out2: second output vector for leaf items (if @recurse_to_leaf)
408 * @parent_r: r value passed from the parent 409 * @parent_r: r value passed from the parent
@@ -419,6 +420,7 @@ static int crush_choose_firstn(const struct crush_map *map,
419 unsigned int local_fallback_retries, 420 unsigned int local_fallback_retries,
420 int recurse_to_leaf, 421 int recurse_to_leaf,
421 unsigned int vary_r, 422 unsigned int vary_r,
423 unsigned int stable,
422 int *out2, 424 int *out2,
423 int parent_r) 425 int parent_r)
424{ 426{
@@ -433,13 +435,13 @@ static int crush_choose_firstn(const struct crush_map *map,
433 int collide, reject; 435 int collide, reject;
434 int count = out_size; 436 int count = out_size;
435 437
436 dprintk("CHOOSE%s bucket %d x %d outpos %d numrep %d tries %d recurse_tries %d local_retries %d local_fallback_retries %d parent_r %d\n", 438 dprintk("CHOOSE%s bucket %d x %d outpos %d numrep %d tries %d recurse_tries %d local_retries %d local_fallback_retries %d parent_r %d stable %d\n",
437 recurse_to_leaf ? "_LEAF" : "", 439 recurse_to_leaf ? "_LEAF" : "",
438 bucket->id, x, outpos, numrep, 440 bucket->id, x, outpos, numrep,
439 tries, recurse_tries, local_retries, local_fallback_retries, 441 tries, recurse_tries, local_retries, local_fallback_retries,
440 parent_r); 442 parent_r, stable);
441 443
442 for (rep = outpos; rep < numrep && count > 0 ; rep++) { 444 for (rep = stable ? 0 : outpos; rep < numrep && count > 0 ; rep++) {
443 /* keep trying until we get a non-out, non-colliding item */ 445 /* keep trying until we get a non-out, non-colliding item */
444 ftotal = 0; 446 ftotal = 0;
445 skip_rep = 0; 447 skip_rep = 0;
@@ -512,13 +514,14 @@ static int crush_choose_firstn(const struct crush_map *map,
512 if (crush_choose_firstn(map, 514 if (crush_choose_firstn(map,
513 map->buckets[-1-item], 515 map->buckets[-1-item],
514 weight, weight_max, 516 weight, weight_max,
515 x, outpos+1, 0, 517 x, stable ? 1 : outpos+1, 0,
516 out2, outpos, count, 518 out2, outpos, count,
517 recurse_tries, 0, 519 recurse_tries, 0,
518 local_retries, 520 local_retries,
519 local_fallback_retries, 521 local_fallback_retries,
520 0, 522 0,
521 vary_r, 523 vary_r,
524 stable,
522 NULL, 525 NULL,
523 sub_r) <= outpos) 526 sub_r) <= outpos)
524 /* didn't get leaf */ 527 /* didn't get leaf */
@@ -816,6 +819,7 @@ int crush_do_rule(const struct crush_map *map,
816 int choose_local_fallback_retries = map->choose_local_fallback_tries; 819 int choose_local_fallback_retries = map->choose_local_fallback_tries;
817 820
818 int vary_r = map->chooseleaf_vary_r; 821 int vary_r = map->chooseleaf_vary_r;
822 int stable = map->chooseleaf_stable;
819 823
820 if ((__u32)ruleno >= map->max_rules) { 824 if ((__u32)ruleno >= map->max_rules) {
821 dprintk(" bad ruleno %d\n", ruleno); 825 dprintk(" bad ruleno %d\n", ruleno);
@@ -835,7 +839,8 @@ int crush_do_rule(const struct crush_map *map,
835 case CRUSH_RULE_TAKE: 839 case CRUSH_RULE_TAKE:
836 if ((curstep->arg1 >= 0 && 840 if ((curstep->arg1 >= 0 &&
837 curstep->arg1 < map->max_devices) || 841 curstep->arg1 < map->max_devices) ||
838 (-1-curstep->arg1 < map->max_buckets && 842 (-1-curstep->arg1 >= 0 &&
843 -1-curstep->arg1 < map->max_buckets &&
839 map->buckets[-1-curstep->arg1])) { 844 map->buckets[-1-curstep->arg1])) {
840 w[0] = curstep->arg1; 845 w[0] = curstep->arg1;
841 wsize = 1; 846 wsize = 1;
@@ -869,6 +874,11 @@ int crush_do_rule(const struct crush_map *map,
869 vary_r = curstep->arg1; 874 vary_r = curstep->arg1;
870 break; 875 break;
871 876
877 case CRUSH_RULE_SET_CHOOSELEAF_STABLE:
878 if (curstep->arg1 >= 0)
879 stable = curstep->arg1;
880 break;
881
872 case CRUSH_RULE_CHOOSELEAF_FIRSTN: 882 case CRUSH_RULE_CHOOSELEAF_FIRSTN:
873 case CRUSH_RULE_CHOOSE_FIRSTN: 883 case CRUSH_RULE_CHOOSE_FIRSTN:
874 firstn = 1; 884 firstn = 1;
@@ -888,6 +898,7 @@ int crush_do_rule(const struct crush_map *map,
888 osize = 0; 898 osize = 0;
889 899
890 for (i = 0; i < wsize; i++) { 900 for (i = 0; i < wsize; i++) {
901 int bno;
891 /* 902 /*
892 * see CRUSH_N, CRUSH_N_MINUS macros. 903 * see CRUSH_N, CRUSH_N_MINUS macros.
893 * basically, numrep <= 0 means relative to 904 * basically, numrep <= 0 means relative to
@@ -900,6 +911,13 @@ int crush_do_rule(const struct crush_map *map,
900 continue; 911 continue;
901 } 912 }
902 j = 0; 913 j = 0;
914 /* make sure bucket id is valid */
915 bno = -1 - w[i];
916 if (bno < 0 || bno >= map->max_buckets) {
917 /* w[i] is probably CRUSH_ITEM_NONE */
918 dprintk(" bad w[i] %d\n", w[i]);
919 continue;
920 }
903 if (firstn) { 921 if (firstn) {
904 int recurse_tries; 922 int recurse_tries;
905 if (choose_leaf_tries) 923 if (choose_leaf_tries)
@@ -911,7 +929,7 @@ int crush_do_rule(const struct crush_map *map,
911 recurse_tries = choose_tries; 929 recurse_tries = choose_tries;
912 osize += crush_choose_firstn( 930 osize += crush_choose_firstn(
913 map, 931 map,
914 map->buckets[-1-w[i]], 932 map->buckets[bno],
915 weight, weight_max, 933 weight, weight_max,
916 x, numrep, 934 x, numrep,
917 curstep->arg2, 935 curstep->arg2,
@@ -923,6 +941,7 @@ int crush_do_rule(const struct crush_map *map,
923 choose_local_fallback_retries, 941 choose_local_fallback_retries,
924 recurse_to_leaf, 942 recurse_to_leaf,
925 vary_r, 943 vary_r,
944 stable,
926 c+osize, 945 c+osize,
927 0); 946 0);
928 } else { 947 } else {
@@ -930,7 +949,7 @@ int crush_do_rule(const struct crush_map *map,
930 numrep : (result_max-osize)); 949 numrep : (result_max-osize));
931 crush_choose_indep( 950 crush_choose_indep(
932 map, 951 map,
933 map->buckets[-1-w[i]], 952 map->buckets[bno],
934 weight, weight_max, 953 weight, weight_max,
935 x, out_size, numrep, 954 x, out_size, numrep,
936 curstep->arg2, 955 curstep->arg2,
diff --git a/net/ceph/crypto.c b/net/ceph/crypto.c
index 42e8649c6e79..db2847ac5f12 100644
--- a/net/ceph/crypto.c
+++ b/net/ceph/crypto.c
@@ -4,7 +4,8 @@
4#include <linux/err.h> 4#include <linux/err.h>
5#include <linux/scatterlist.h> 5#include <linux/scatterlist.h>
6#include <linux/slab.h> 6#include <linux/slab.h>
7#include <crypto/hash.h> 7#include <crypto/aes.h>
8#include <crypto/skcipher.h>
8#include <linux/key-type.h> 9#include <linux/key-type.h>
9 10
10#include <keys/ceph-type.h> 11#include <keys/ceph-type.h>
@@ -79,9 +80,9 @@ int ceph_crypto_key_unarmor(struct ceph_crypto_key *key, const char *inkey)
79 return 0; 80 return 0;
80} 81}
81 82
82static struct crypto_blkcipher *ceph_crypto_alloc_cipher(void) 83static struct crypto_skcipher *ceph_crypto_alloc_cipher(void)
83{ 84{
84 return crypto_alloc_blkcipher("cbc(aes)", 0, CRYPTO_ALG_ASYNC); 85 return crypto_alloc_skcipher("cbc(aes)", 0, CRYPTO_ALG_ASYNC);
85} 86}
86 87
87static const u8 *aes_iv = (u8 *)CEPH_AES_IV; 88static const u8 *aes_iv = (u8 *)CEPH_AES_IV;
@@ -162,11 +163,10 @@ static int ceph_aes_encrypt(const void *key, int key_len,
162{ 163{
163 struct scatterlist sg_in[2], prealloc_sg; 164 struct scatterlist sg_in[2], prealloc_sg;
164 struct sg_table sg_out; 165 struct sg_table sg_out;
165 struct crypto_blkcipher *tfm = ceph_crypto_alloc_cipher(); 166 struct crypto_skcipher *tfm = ceph_crypto_alloc_cipher();
166 struct blkcipher_desc desc = { .tfm = tfm, .flags = 0 }; 167 SKCIPHER_REQUEST_ON_STACK(req, tfm);
167 int ret; 168 int ret;
168 void *iv; 169 char iv[AES_BLOCK_SIZE];
169 int ivsize;
170 size_t zero_padding = (0x10 - (src_len & 0x0f)); 170 size_t zero_padding = (0x10 - (src_len & 0x0f));
171 char pad[16]; 171 char pad[16];
172 172
@@ -184,10 +184,13 @@ static int ceph_aes_encrypt(const void *key, int key_len,
184 if (ret) 184 if (ret)
185 goto out_tfm; 185 goto out_tfm;
186 186
187 crypto_blkcipher_setkey((void *)tfm, key, key_len); 187 crypto_skcipher_setkey((void *)tfm, key, key_len);
188 iv = crypto_blkcipher_crt(tfm)->iv; 188 memcpy(iv, aes_iv, AES_BLOCK_SIZE);
189 ivsize = crypto_blkcipher_ivsize(tfm); 189
190 memcpy(iv, aes_iv, ivsize); 190 skcipher_request_set_tfm(req, tfm);
191 skcipher_request_set_callback(req, 0, NULL, NULL);
192 skcipher_request_set_crypt(req, sg_in, sg_out.sgl,
193 src_len + zero_padding, iv);
191 194
192 /* 195 /*
193 print_hex_dump(KERN_ERR, "enc key: ", DUMP_PREFIX_NONE, 16, 1, 196 print_hex_dump(KERN_ERR, "enc key: ", DUMP_PREFIX_NONE, 16, 1,
@@ -197,8 +200,8 @@ static int ceph_aes_encrypt(const void *key, int key_len,
197 print_hex_dump(KERN_ERR, "enc pad: ", DUMP_PREFIX_NONE, 16, 1, 200 print_hex_dump(KERN_ERR, "enc pad: ", DUMP_PREFIX_NONE, 16, 1,
198 pad, zero_padding, 1); 201 pad, zero_padding, 1);
199 */ 202 */
200 ret = crypto_blkcipher_encrypt(&desc, sg_out.sgl, sg_in, 203 ret = crypto_skcipher_encrypt(req);
201 src_len + zero_padding); 204 skcipher_request_zero(req);
202 if (ret < 0) { 205 if (ret < 0) {
203 pr_err("ceph_aes_crypt failed %d\n", ret); 206 pr_err("ceph_aes_crypt failed %d\n", ret);
204 goto out_sg; 207 goto out_sg;
@@ -211,7 +214,7 @@ static int ceph_aes_encrypt(const void *key, int key_len,
211out_sg: 214out_sg:
212 teardown_sgtable(&sg_out); 215 teardown_sgtable(&sg_out);
213out_tfm: 216out_tfm:
214 crypto_free_blkcipher(tfm); 217 crypto_free_skcipher(tfm);
215 return ret; 218 return ret;
216} 219}
217 220
@@ -222,11 +225,10 @@ static int ceph_aes_encrypt2(const void *key, int key_len, void *dst,
222{ 225{
223 struct scatterlist sg_in[3], prealloc_sg; 226 struct scatterlist sg_in[3], prealloc_sg;
224 struct sg_table sg_out; 227 struct sg_table sg_out;
225 struct crypto_blkcipher *tfm = ceph_crypto_alloc_cipher(); 228 struct crypto_skcipher *tfm = ceph_crypto_alloc_cipher();
226 struct blkcipher_desc desc = { .tfm = tfm, .flags = 0 }; 229 SKCIPHER_REQUEST_ON_STACK(req, tfm);
227 int ret; 230 int ret;
228 void *iv; 231 char iv[AES_BLOCK_SIZE];
229 int ivsize;
230 size_t zero_padding = (0x10 - ((src1_len + src2_len) & 0x0f)); 232 size_t zero_padding = (0x10 - ((src1_len + src2_len) & 0x0f));
231 char pad[16]; 233 char pad[16];
232 234
@@ -245,10 +247,13 @@ static int ceph_aes_encrypt2(const void *key, int key_len, void *dst,
245 if (ret) 247 if (ret)
246 goto out_tfm; 248 goto out_tfm;
247 249
248 crypto_blkcipher_setkey((void *)tfm, key, key_len); 250 crypto_skcipher_setkey((void *)tfm, key, key_len);
249 iv = crypto_blkcipher_crt(tfm)->iv; 251 memcpy(iv, aes_iv, AES_BLOCK_SIZE);
250 ivsize = crypto_blkcipher_ivsize(tfm); 252
251 memcpy(iv, aes_iv, ivsize); 253 skcipher_request_set_tfm(req, tfm);
254 skcipher_request_set_callback(req, 0, NULL, NULL);
255 skcipher_request_set_crypt(req, sg_in, sg_out.sgl,
256 src1_len + src2_len + zero_padding, iv);
252 257
253 /* 258 /*
254 print_hex_dump(KERN_ERR, "enc key: ", DUMP_PREFIX_NONE, 16, 1, 259 print_hex_dump(KERN_ERR, "enc key: ", DUMP_PREFIX_NONE, 16, 1,
@@ -260,8 +265,8 @@ static int ceph_aes_encrypt2(const void *key, int key_len, void *dst,
260 print_hex_dump(KERN_ERR, "enc pad: ", DUMP_PREFIX_NONE, 16, 1, 265 print_hex_dump(KERN_ERR, "enc pad: ", DUMP_PREFIX_NONE, 16, 1,
261 pad, zero_padding, 1); 266 pad, zero_padding, 1);
262 */ 267 */
263 ret = crypto_blkcipher_encrypt(&desc, sg_out.sgl, sg_in, 268 ret = crypto_skcipher_encrypt(req);
264 src1_len + src2_len + zero_padding); 269 skcipher_request_zero(req);
265 if (ret < 0) { 270 if (ret < 0) {
266 pr_err("ceph_aes_crypt2 failed %d\n", ret); 271 pr_err("ceph_aes_crypt2 failed %d\n", ret);
267 goto out_sg; 272 goto out_sg;
@@ -274,7 +279,7 @@ static int ceph_aes_encrypt2(const void *key, int key_len, void *dst,
274out_sg: 279out_sg:
275 teardown_sgtable(&sg_out); 280 teardown_sgtable(&sg_out);
276out_tfm: 281out_tfm:
277 crypto_free_blkcipher(tfm); 282 crypto_free_skcipher(tfm);
278 return ret; 283 return ret;
279} 284}
280 285
@@ -284,11 +289,10 @@ static int ceph_aes_decrypt(const void *key, int key_len,
284{ 289{
285 struct sg_table sg_in; 290 struct sg_table sg_in;
286 struct scatterlist sg_out[2], prealloc_sg; 291 struct scatterlist sg_out[2], prealloc_sg;
287 struct crypto_blkcipher *tfm = ceph_crypto_alloc_cipher(); 292 struct crypto_skcipher *tfm = ceph_crypto_alloc_cipher();
288 struct blkcipher_desc desc = { .tfm = tfm }; 293 SKCIPHER_REQUEST_ON_STACK(req, tfm);
289 char pad[16]; 294 char pad[16];
290 void *iv; 295 char iv[AES_BLOCK_SIZE];
291 int ivsize;
292 int ret; 296 int ret;
293 int last_byte; 297 int last_byte;
294 298
@@ -302,10 +306,13 @@ static int ceph_aes_decrypt(const void *key, int key_len,
302 if (ret) 306 if (ret)
303 goto out_tfm; 307 goto out_tfm;
304 308
305 crypto_blkcipher_setkey((void *)tfm, key, key_len); 309 crypto_skcipher_setkey((void *)tfm, key, key_len);
306 iv = crypto_blkcipher_crt(tfm)->iv; 310 memcpy(iv, aes_iv, AES_BLOCK_SIZE);
307 ivsize = crypto_blkcipher_ivsize(tfm); 311
308 memcpy(iv, aes_iv, ivsize); 312 skcipher_request_set_tfm(req, tfm);
313 skcipher_request_set_callback(req, 0, NULL, NULL);
314 skcipher_request_set_crypt(req, sg_in.sgl, sg_out,
315 src_len, iv);
309 316
310 /* 317 /*
311 print_hex_dump(KERN_ERR, "dec key: ", DUMP_PREFIX_NONE, 16, 1, 318 print_hex_dump(KERN_ERR, "dec key: ", DUMP_PREFIX_NONE, 16, 1,
@@ -313,7 +320,8 @@ static int ceph_aes_decrypt(const void *key, int key_len,
313 print_hex_dump(KERN_ERR, "dec in: ", DUMP_PREFIX_NONE, 16, 1, 320 print_hex_dump(KERN_ERR, "dec in: ", DUMP_PREFIX_NONE, 16, 1,
314 src, src_len, 1); 321 src, src_len, 1);
315 */ 322 */
316 ret = crypto_blkcipher_decrypt(&desc, sg_out, sg_in.sgl, src_len); 323 ret = crypto_skcipher_decrypt(req);
324 skcipher_request_zero(req);
317 if (ret < 0) { 325 if (ret < 0) {
318 pr_err("ceph_aes_decrypt failed %d\n", ret); 326 pr_err("ceph_aes_decrypt failed %d\n", ret);
319 goto out_sg; 327 goto out_sg;
@@ -338,7 +346,7 @@ static int ceph_aes_decrypt(const void *key, int key_len,
338out_sg: 346out_sg:
339 teardown_sgtable(&sg_in); 347 teardown_sgtable(&sg_in);
340out_tfm: 348out_tfm:
341 crypto_free_blkcipher(tfm); 349 crypto_free_skcipher(tfm);
342 return ret; 350 return ret;
343} 351}
344 352
@@ -349,11 +357,10 @@ static int ceph_aes_decrypt2(const void *key, int key_len,
349{ 357{
350 struct sg_table sg_in; 358 struct sg_table sg_in;
351 struct scatterlist sg_out[3], prealloc_sg; 359 struct scatterlist sg_out[3], prealloc_sg;
352 struct crypto_blkcipher *tfm = ceph_crypto_alloc_cipher(); 360 struct crypto_skcipher *tfm = ceph_crypto_alloc_cipher();
353 struct blkcipher_desc desc = { .tfm = tfm }; 361 SKCIPHER_REQUEST_ON_STACK(req, tfm);
354 char pad[16]; 362 char pad[16];
355 void *iv; 363 char iv[AES_BLOCK_SIZE];
356 int ivsize;
357 int ret; 364 int ret;
358 int last_byte; 365 int last_byte;
359 366
@@ -368,10 +375,13 @@ static int ceph_aes_decrypt2(const void *key, int key_len,
368 if (ret) 375 if (ret)
369 goto out_tfm; 376 goto out_tfm;
370 377
371 crypto_blkcipher_setkey((void *)tfm, key, key_len); 378 crypto_skcipher_setkey((void *)tfm, key, key_len);
372 iv = crypto_blkcipher_crt(tfm)->iv; 379 memcpy(iv, aes_iv, AES_BLOCK_SIZE);
373 ivsize = crypto_blkcipher_ivsize(tfm); 380
374 memcpy(iv, aes_iv, ivsize); 381 skcipher_request_set_tfm(req, tfm);
382 skcipher_request_set_callback(req, 0, NULL, NULL);
383 skcipher_request_set_crypt(req, sg_in.sgl, sg_out,
384 src_len, iv);
375 385
376 /* 386 /*
377 print_hex_dump(KERN_ERR, "dec key: ", DUMP_PREFIX_NONE, 16, 1, 387 print_hex_dump(KERN_ERR, "dec key: ", DUMP_PREFIX_NONE, 16, 1,
@@ -379,7 +389,8 @@ static int ceph_aes_decrypt2(const void *key, int key_len,
379 print_hex_dump(KERN_ERR, "dec in: ", DUMP_PREFIX_NONE, 16, 1, 389 print_hex_dump(KERN_ERR, "dec in: ", DUMP_PREFIX_NONE, 16, 1,
380 src, src_len, 1); 390 src, src_len, 1);
381 */ 391 */
382 ret = crypto_blkcipher_decrypt(&desc, sg_out, sg_in.sgl, src_len); 392 ret = crypto_skcipher_decrypt(req);
393 skcipher_request_zero(req);
383 if (ret < 0) { 394 if (ret < 0) {
384 pr_err("ceph_aes_decrypt failed %d\n", ret); 395 pr_err("ceph_aes_decrypt failed %d\n", ret);
385 goto out_sg; 396 goto out_sg;
@@ -415,7 +426,7 @@ static int ceph_aes_decrypt2(const void *key, int key_len,
415out_sg: 426out_sg:
416 teardown_sgtable(&sg_in); 427 teardown_sgtable(&sg_in);
417out_tfm: 428out_tfm:
418 crypto_free_blkcipher(tfm); 429 crypto_free_skcipher(tfm);
419 return ret; 430 return ret;
420} 431}
421 432
diff --git a/net/ceph/debugfs.c b/net/ceph/debugfs.c
index 593dc2eabcc8..b902fbc7863e 100644
--- a/net/ceph/debugfs.c
+++ b/net/ceph/debugfs.c
@@ -112,15 +112,20 @@ static int monc_show(struct seq_file *s, void *p)
112 struct ceph_mon_generic_request *req; 112 struct ceph_mon_generic_request *req;
113 struct ceph_mon_client *monc = &client->monc; 113 struct ceph_mon_client *monc = &client->monc;
114 struct rb_node *rp; 114 struct rb_node *rp;
115 int i;
115 116
116 mutex_lock(&monc->mutex); 117 mutex_lock(&monc->mutex);
117 118
118 if (monc->have_mdsmap) 119 for (i = 0; i < ARRAY_SIZE(monc->subs); i++) {
119 seq_printf(s, "have mdsmap %u\n", (unsigned int)monc->have_mdsmap); 120 seq_printf(s, "have %s %u", ceph_sub_str[i],
120 if (monc->have_osdmap) 121 monc->subs[i].have);
121 seq_printf(s, "have osdmap %u\n", (unsigned int)monc->have_osdmap); 122 if (monc->subs[i].want)
122 if (monc->want_next_osdmap) 123 seq_printf(s, " want %llu%s",
123 seq_printf(s, "want next osdmap\n"); 124 le64_to_cpu(monc->subs[i].item.start),
125 (monc->subs[i].item.flags &
126 CEPH_SUBSCRIBE_ONETIME ? "" : "+"));
127 seq_putc(s, '\n');
128 }
124 129
125 for (rp = rb_first(&monc->generic_request_tree); rp; rp = rb_next(rp)) { 130 for (rp = rb_first(&monc->generic_request_tree); rp; rp = rb_next(rp)) {
126 __u16 op; 131 __u16 op;
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 9cfedf565f5b..1831f6353622 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -235,18 +235,12 @@ static struct workqueue_struct *ceph_msgr_wq;
235static int ceph_msgr_slab_init(void) 235static int ceph_msgr_slab_init(void)
236{ 236{
237 BUG_ON(ceph_msg_cache); 237 BUG_ON(ceph_msg_cache);
238 ceph_msg_cache = kmem_cache_create("ceph_msg", 238 ceph_msg_cache = KMEM_CACHE(ceph_msg, 0);
239 sizeof (struct ceph_msg),
240 __alignof__(struct ceph_msg), 0, NULL);
241
242 if (!ceph_msg_cache) 239 if (!ceph_msg_cache)
243 return -ENOMEM; 240 return -ENOMEM;
244 241
245 BUG_ON(ceph_msg_data_cache); 242 BUG_ON(ceph_msg_data_cache);
246 ceph_msg_data_cache = kmem_cache_create("ceph_msg_data", 243 ceph_msg_data_cache = KMEM_CACHE(ceph_msg_data, 0);
247 sizeof (struct ceph_msg_data),
248 __alignof__(struct ceph_msg_data),
249 0, NULL);
250 if (ceph_msg_data_cache) 244 if (ceph_msg_data_cache)
251 return 0; 245 return 0;
252 246
@@ -1197,6 +1191,13 @@ static bool ceph_msg_data_advance(struct ceph_msg_data_cursor *cursor,
1197 return new_piece; 1191 return new_piece;
1198} 1192}
1199 1193
1194static size_t sizeof_footer(struct ceph_connection *con)
1195{
1196 return (con->peer_features & CEPH_FEATURE_MSG_AUTH) ?
1197 sizeof(struct ceph_msg_footer) :
1198 sizeof(struct ceph_msg_footer_old);
1199}
1200
1200static void prepare_message_data(struct ceph_msg *msg, u32 data_len) 1201static void prepare_message_data(struct ceph_msg *msg, u32 data_len)
1201{ 1202{
1202 BUG_ON(!msg); 1203 BUG_ON(!msg);
@@ -1214,25 +1215,19 @@ static void prepare_message_data(struct ceph_msg *msg, u32 data_len)
1214static void prepare_write_message_footer(struct ceph_connection *con) 1215static void prepare_write_message_footer(struct ceph_connection *con)
1215{ 1216{
1216 struct ceph_msg *m = con->out_msg; 1217 struct ceph_msg *m = con->out_msg;
1217 int v = con->out_kvec_left;
1218 1218
1219 m->footer.flags |= CEPH_MSG_FOOTER_COMPLETE; 1219 m->footer.flags |= CEPH_MSG_FOOTER_COMPLETE;
1220 1220
1221 dout("prepare_write_message_footer %p\n", con); 1221 dout("prepare_write_message_footer %p\n", con);
1222 con->out_kvec[v].iov_base = &m->footer; 1222 con_out_kvec_add(con, sizeof_footer(con), &m->footer);
1223 if (con->peer_features & CEPH_FEATURE_MSG_AUTH) { 1223 if (con->peer_features & CEPH_FEATURE_MSG_AUTH) {
1224 if (con->ops->sign_message) 1224 if (con->ops->sign_message)
1225 con->ops->sign_message(m); 1225 con->ops->sign_message(m);
1226 else 1226 else
1227 m->footer.sig = 0; 1227 m->footer.sig = 0;
1228 con->out_kvec[v].iov_len = sizeof(m->footer);
1229 con->out_kvec_bytes += sizeof(m->footer);
1230 } else { 1228 } else {
1231 m->old_footer.flags = m->footer.flags; 1229 m->old_footer.flags = m->footer.flags;
1232 con->out_kvec[v].iov_len = sizeof(m->old_footer);
1233 con->out_kvec_bytes += sizeof(m->old_footer);
1234 } 1230 }
1235 con->out_kvec_left++;
1236 con->out_more = m->more_to_follow; 1231 con->out_more = m->more_to_follow;
1237 con->out_msg_done = true; 1232 con->out_msg_done = true;
1238} 1233}
@@ -2335,9 +2330,9 @@ static int read_partial_message(struct ceph_connection *con)
2335 ceph_pr_addr(&con->peer_addr.in_addr), 2330 ceph_pr_addr(&con->peer_addr.in_addr),
2336 seq, con->in_seq + 1); 2331 seq, con->in_seq + 1);
2337 con->in_base_pos = -front_len - middle_len - data_len - 2332 con->in_base_pos = -front_len - middle_len - data_len -
2338 sizeof(m->footer); 2333 sizeof_footer(con);
2339 con->in_tag = CEPH_MSGR_TAG_READY; 2334 con->in_tag = CEPH_MSGR_TAG_READY;
2340 return 0; 2335 return 1;
2341 } else if ((s64)seq - (s64)con->in_seq > 1) { 2336 } else if ((s64)seq - (s64)con->in_seq > 1) {
2342 pr_err("read_partial_message bad seq %lld expected %lld\n", 2337 pr_err("read_partial_message bad seq %lld expected %lld\n",
2343 seq, con->in_seq + 1); 2338 seq, con->in_seq + 1);
@@ -2360,10 +2355,10 @@ static int read_partial_message(struct ceph_connection *con)
2360 /* skip this message */ 2355 /* skip this message */
2361 dout("alloc_msg said skip message\n"); 2356 dout("alloc_msg said skip message\n");
2362 con->in_base_pos = -front_len - middle_len - data_len - 2357 con->in_base_pos = -front_len - middle_len - data_len -
2363 sizeof(m->footer); 2358 sizeof_footer(con);
2364 con->in_tag = CEPH_MSGR_TAG_READY; 2359 con->in_tag = CEPH_MSGR_TAG_READY;
2365 con->in_seq++; 2360 con->in_seq++;
2366 return 0; 2361 return 1;
2367 } 2362 }
2368 2363
2369 BUG_ON(!con->in_msg); 2364 BUG_ON(!con->in_msg);
@@ -2402,11 +2397,7 @@ static int read_partial_message(struct ceph_connection *con)
2402 } 2397 }
2403 2398
2404 /* footer */ 2399 /* footer */
2405 if (need_sign) 2400 size = sizeof_footer(con);
2406 size = sizeof(m->footer);
2407 else
2408 size = sizeof(m->old_footer);
2409
2410 end += size; 2401 end += size;
2411 ret = read_partial(con, end, size, &m->footer); 2402 ret = read_partial(con, end, size, &m->footer);
2412 if (ret <= 0) 2403 if (ret <= 0)
@@ -3082,10 +3073,7 @@ void ceph_msg_revoke(struct ceph_msg *msg)
3082 con->out_skip += con_out_kvec_skip(con); 3073 con->out_skip += con_out_kvec_skip(con);
3083 } else { 3074 } else {
3084 BUG_ON(!msg->data_length); 3075 BUG_ON(!msg->data_length);
3085 if (con->peer_features & CEPH_FEATURE_MSG_AUTH) 3076 con->out_skip += sizeof_footer(con);
3086 con->out_skip += sizeof(msg->footer);
3087 else
3088 con->out_skip += sizeof(msg->old_footer);
3089 } 3077 }
3090 /* data, middle, front */ 3078 /* data, middle, front */
3091 if (msg->data_length) 3079 if (msg->data_length)
diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c
index de85dddc3dc0..cf638c009cfa 100644
--- a/net/ceph/mon_client.c
+++ b/net/ceph/mon_client.c
@@ -122,51 +122,91 @@ static void __close_session(struct ceph_mon_client *monc)
122 ceph_msg_revoke(monc->m_subscribe); 122 ceph_msg_revoke(monc->m_subscribe);
123 ceph_msg_revoke_incoming(monc->m_subscribe_ack); 123 ceph_msg_revoke_incoming(monc->m_subscribe_ack);
124 ceph_con_close(&monc->con); 124 ceph_con_close(&monc->con);
125 monc->cur_mon = -1; 125
126 monc->pending_auth = 0; 126 monc->pending_auth = 0;
127 ceph_auth_reset(monc->auth); 127 ceph_auth_reset(monc->auth);
128} 128}
129 129
130/* 130/*
131 * Open a session with a (new) monitor. 131 * Pick a new monitor at random and set cur_mon. If we are repicking
132 * (i.e. cur_mon is already set), be sure to pick a different one.
132 */ 133 */
133static int __open_session(struct ceph_mon_client *monc) 134static void pick_new_mon(struct ceph_mon_client *monc)
134{ 135{
135 char r; 136 int old_mon = monc->cur_mon;
136 int ret;
137 137
138 if (monc->cur_mon < 0) { 138 BUG_ON(monc->monmap->num_mon < 1);
139 get_random_bytes(&r, 1); 139
140 monc->cur_mon = r % monc->monmap->num_mon; 140 if (monc->monmap->num_mon == 1) {
141 dout("open_session num=%d r=%d -> mon%d\n", 141 monc->cur_mon = 0;
142 monc->monmap->num_mon, r, monc->cur_mon);
143 monc->sub_sent = 0;
144 monc->sub_renew_after = jiffies; /* i.e., expired */
145 monc->want_next_osdmap = !!monc->want_next_osdmap;
146
147 dout("open_session mon%d opening\n", monc->cur_mon);
148 ceph_con_open(&monc->con,
149 CEPH_ENTITY_TYPE_MON, monc->cur_mon,
150 &monc->monmap->mon_inst[monc->cur_mon].addr);
151
152 /* send an initial keepalive to ensure our timestamp is
153 * valid by the time we are in an OPENED state */
154 ceph_con_keepalive(&monc->con);
155
156 /* initiatiate authentication handshake */
157 ret = ceph_auth_build_hello(monc->auth,
158 monc->m_auth->front.iov_base,
159 monc->m_auth->front_alloc_len);
160 __send_prepared_auth_request(monc, ret);
161 } else { 142 } else {
162 dout("open_session mon%d already open\n", monc->cur_mon); 143 int max = monc->monmap->num_mon;
144 int o = -1;
145 int n;
146
147 if (monc->cur_mon >= 0) {
148 if (monc->cur_mon < monc->monmap->num_mon)
149 o = monc->cur_mon;
150 if (o >= 0)
151 max--;
152 }
153
154 n = prandom_u32() % max;
155 if (o >= 0 && n >= o)
156 n++;
157
158 monc->cur_mon = n;
163 } 159 }
164 return 0; 160
161 dout("%s mon%d -> mon%d out of %d mons\n", __func__, old_mon,
162 monc->cur_mon, monc->monmap->num_mon);
163}
164
165/*
166 * Open a session with a new monitor.
167 */
168static void __open_session(struct ceph_mon_client *monc)
169{
170 int ret;
171
172 pick_new_mon(monc);
173
174 monc->hunting = true;
175 if (monc->had_a_connection) {
176 monc->hunt_mult *= CEPH_MONC_HUNT_BACKOFF;
177 if (monc->hunt_mult > CEPH_MONC_HUNT_MAX_MULT)
178 monc->hunt_mult = CEPH_MONC_HUNT_MAX_MULT;
179 }
180
181 monc->sub_renew_after = jiffies; /* i.e., expired */
182 monc->sub_renew_sent = 0;
183
184 dout("%s opening mon%d\n", __func__, monc->cur_mon);
185 ceph_con_open(&monc->con, CEPH_ENTITY_TYPE_MON, monc->cur_mon,
186 &monc->monmap->mon_inst[monc->cur_mon].addr);
187
188 /*
189 * send an initial keepalive to ensure our timestamp is valid
190 * by the time we are in an OPENED state
191 */
192 ceph_con_keepalive(&monc->con);
193
194 /* initiate authentication handshake */
195 ret = ceph_auth_build_hello(monc->auth,
196 monc->m_auth->front.iov_base,
197 monc->m_auth->front_alloc_len);
198 BUG_ON(ret <= 0);
199 __send_prepared_auth_request(monc, ret);
165} 200}
166 201
167static bool __sub_expired(struct ceph_mon_client *monc) 202static void reopen_session(struct ceph_mon_client *monc)
168{ 203{
169 return time_after_eq(jiffies, monc->sub_renew_after); 204 if (!monc->hunting)
205 pr_info("mon%d %s session lost, hunting for new mon\n",
206 monc->cur_mon, ceph_pr_addr(&monc->con.peer_addr.in_addr));
207
208 __close_session(monc);
209 __open_session(monc);
170} 210}
171 211
172/* 212/*
@@ -174,74 +214,70 @@ static bool __sub_expired(struct ceph_mon_client *monc)
174 */ 214 */
175static void __schedule_delayed(struct ceph_mon_client *monc) 215static void __schedule_delayed(struct ceph_mon_client *monc)
176{ 216{
177 struct ceph_options *opt = monc->client->options;
178 unsigned long delay; 217 unsigned long delay;
179 218
180 if (monc->cur_mon < 0 || __sub_expired(monc)) { 219 if (monc->hunting)
181 delay = 10 * HZ; 220 delay = CEPH_MONC_HUNT_INTERVAL * monc->hunt_mult;
182 } else { 221 else
183 delay = 20 * HZ; 222 delay = CEPH_MONC_PING_INTERVAL;
184 if (opt->monc_ping_timeout > 0) 223
185 delay = min(delay, opt->monc_ping_timeout / 3);
186 }
187 dout("__schedule_delayed after %lu\n", delay); 224 dout("__schedule_delayed after %lu\n", delay);
188 schedule_delayed_work(&monc->delayed_work, 225 mod_delayed_work(system_wq, &monc->delayed_work,
189 round_jiffies_relative(delay)); 226 round_jiffies_relative(delay));
190} 227}
191 228
229const char *ceph_sub_str[] = {
230 [CEPH_SUB_MDSMAP] = "mdsmap",
231 [CEPH_SUB_MONMAP] = "monmap",
232 [CEPH_SUB_OSDMAP] = "osdmap",
233};
234
192/* 235/*
193 * Send subscribe request for mdsmap and/or osdmap. 236 * Send subscribe request for one or more maps, according to
237 * monc->subs.
194 */ 238 */
195static void __send_subscribe(struct ceph_mon_client *monc) 239static void __send_subscribe(struct ceph_mon_client *monc)
196{ 240{
197 dout("__send_subscribe sub_sent=%u exp=%u want_osd=%d\n", 241 struct ceph_msg *msg = monc->m_subscribe;
198 (unsigned int)monc->sub_sent, __sub_expired(monc), 242 void *p = msg->front.iov_base;
199 monc->want_next_osdmap); 243 void *const end = p + msg->front_alloc_len;
200 if ((__sub_expired(monc) && !monc->sub_sent) || 244 int num = 0;
201 monc->want_next_osdmap == 1) { 245 int i;
202 struct ceph_msg *msg = monc->m_subscribe; 246
203 struct ceph_mon_subscribe_item *i; 247 dout("%s sent %lu\n", __func__, monc->sub_renew_sent);
204 void *p, *end; 248
205 int num; 249 BUG_ON(monc->cur_mon < 0);
206 250
207 p = msg->front.iov_base; 251 if (!monc->sub_renew_sent)
208 end = p + msg->front_alloc_len; 252 monc->sub_renew_sent = jiffies | 1; /* never 0 */
209 253
210 num = 1 + !!monc->want_next_osdmap + !!monc->want_mdsmap; 254 msg->hdr.version = cpu_to_le16(2);
211 ceph_encode_32(&p, num); 255
212 256 for (i = 0; i < ARRAY_SIZE(monc->subs); i++) {
213 if (monc->want_next_osdmap) { 257 if (monc->subs[i].want)
214 dout("__send_subscribe to 'osdmap' %u\n", 258 num++;
215 (unsigned int)monc->have_osdmap);
216 ceph_encode_string(&p, end, "osdmap", 6);
217 i = p;
218 i->have = cpu_to_le64(monc->have_osdmap);
219 i->onetime = 1;
220 p += sizeof(*i);
221 monc->want_next_osdmap = 2; /* requested */
222 }
223 if (monc->want_mdsmap) {
224 dout("__send_subscribe to 'mdsmap' %u+\n",
225 (unsigned int)monc->have_mdsmap);
226 ceph_encode_string(&p, end, "mdsmap", 6);
227 i = p;
228 i->have = cpu_to_le64(monc->have_mdsmap);
229 i->onetime = 0;
230 p += sizeof(*i);
231 }
232 ceph_encode_string(&p, end, "monmap", 6);
233 i = p;
234 i->have = 0;
235 i->onetime = 0;
236 p += sizeof(*i);
237
238 msg->front.iov_len = p - msg->front.iov_base;
239 msg->hdr.front_len = cpu_to_le32(msg->front.iov_len);
240 ceph_msg_revoke(msg);
241 ceph_con_send(&monc->con, ceph_msg_get(msg));
242
243 monc->sub_sent = jiffies | 1; /* never 0 */
244 } 259 }
260 BUG_ON(num < 1); /* monmap sub is always there */
261 ceph_encode_32(&p, num);
262 for (i = 0; i < ARRAY_SIZE(monc->subs); i++) {
263 const char *s = ceph_sub_str[i];
264
265 if (!monc->subs[i].want)
266 continue;
267
268 dout("%s %s start %llu flags 0x%x\n", __func__, s,
269 le64_to_cpu(monc->subs[i].item.start),
270 monc->subs[i].item.flags);
271 ceph_encode_string(&p, end, s, strlen(s));
272 memcpy(p, &monc->subs[i].item, sizeof(monc->subs[i].item));
273 p += sizeof(monc->subs[i].item);
274 }
275
276 BUG_ON(p != (end - 35 - (ARRAY_SIZE(monc->subs) - num) * 19));
277 msg->front.iov_len = p - msg->front.iov_base;
278 msg->hdr.front_len = cpu_to_le32(msg->front.iov_len);
279 ceph_msg_revoke(msg);
280 ceph_con_send(&monc->con, ceph_msg_get(msg));
245} 281}
246 282
247static void handle_subscribe_ack(struct ceph_mon_client *monc, 283static void handle_subscribe_ack(struct ceph_mon_client *monc,
@@ -255,15 +291,16 @@ static void handle_subscribe_ack(struct ceph_mon_client *monc,
255 seconds = le32_to_cpu(h->duration); 291 seconds = le32_to_cpu(h->duration);
256 292
257 mutex_lock(&monc->mutex); 293 mutex_lock(&monc->mutex);
258 if (monc->hunting) { 294 if (monc->sub_renew_sent) {
259 pr_info("mon%d %s session established\n", 295 monc->sub_renew_after = monc->sub_renew_sent +
260 monc->cur_mon, 296 (seconds >> 1) * HZ - 1;
261 ceph_pr_addr(&monc->con.peer_addr.in_addr)); 297 dout("%s sent %lu duration %d renew after %lu\n", __func__,
262 monc->hunting = false; 298 monc->sub_renew_sent, seconds, monc->sub_renew_after);
299 monc->sub_renew_sent = 0;
300 } else {
301 dout("%s sent %lu renew after %lu, ignoring\n", __func__,
302 monc->sub_renew_sent, monc->sub_renew_after);
263 } 303 }
264 dout("handle_subscribe_ack after %d seconds\n", seconds);
265 monc->sub_renew_after = monc->sub_sent + (seconds >> 1)*HZ - 1;
266 monc->sub_sent = 0;
267 mutex_unlock(&monc->mutex); 304 mutex_unlock(&monc->mutex);
268 return; 305 return;
269bad: 306bad:
@@ -272,36 +309,82 @@ bad:
272} 309}
273 310
274/* 311/*
275 * Keep track of which maps we have 312 * Register interest in a map
313 *
314 * @sub: one of CEPH_SUB_*
315 * @epoch: X for "every map since X", or 0 for "just the latest"
276 */ 316 */
277int ceph_monc_got_mdsmap(struct ceph_mon_client *monc, u32 got) 317static bool __ceph_monc_want_map(struct ceph_mon_client *monc, int sub,
318 u32 epoch, bool continuous)
319{
320 __le64 start = cpu_to_le64(epoch);
321 u8 flags = !continuous ? CEPH_SUBSCRIBE_ONETIME : 0;
322
323 dout("%s %s epoch %u continuous %d\n", __func__, ceph_sub_str[sub],
324 epoch, continuous);
325
326 if (monc->subs[sub].want &&
327 monc->subs[sub].item.start == start &&
328 monc->subs[sub].item.flags == flags)
329 return false;
330
331 monc->subs[sub].item.start = start;
332 monc->subs[sub].item.flags = flags;
333 monc->subs[sub].want = true;
334
335 return true;
336}
337
338bool ceph_monc_want_map(struct ceph_mon_client *monc, int sub, u32 epoch,
339 bool continuous)
278{ 340{
341 bool need_request;
342
279 mutex_lock(&monc->mutex); 343 mutex_lock(&monc->mutex);
280 monc->have_mdsmap = got; 344 need_request = __ceph_monc_want_map(monc, sub, epoch, continuous);
281 mutex_unlock(&monc->mutex); 345 mutex_unlock(&monc->mutex);
282 return 0; 346
347 return need_request;
283} 348}
284EXPORT_SYMBOL(ceph_monc_got_mdsmap); 349EXPORT_SYMBOL(ceph_monc_want_map);
285 350
286int ceph_monc_got_osdmap(struct ceph_mon_client *monc, u32 got) 351/*
352 * Keep track of which maps we have
353 *
354 * @sub: one of CEPH_SUB_*
355 */
356static void __ceph_monc_got_map(struct ceph_mon_client *monc, int sub,
357 u32 epoch)
358{
359 dout("%s %s epoch %u\n", __func__, ceph_sub_str[sub], epoch);
360
361 if (monc->subs[sub].want) {
362 if (monc->subs[sub].item.flags & CEPH_SUBSCRIBE_ONETIME)
363 monc->subs[sub].want = false;
364 else
365 monc->subs[sub].item.start = cpu_to_le64(epoch + 1);
366 }
367
368 monc->subs[sub].have = epoch;
369}
370
371void ceph_monc_got_map(struct ceph_mon_client *monc, int sub, u32 epoch)
287{ 372{
288 mutex_lock(&monc->mutex); 373 mutex_lock(&monc->mutex);
289 monc->have_osdmap = got; 374 __ceph_monc_got_map(monc, sub, epoch);
290 monc->want_next_osdmap = 0;
291 mutex_unlock(&monc->mutex); 375 mutex_unlock(&monc->mutex);
292 return 0;
293} 376}
377EXPORT_SYMBOL(ceph_monc_got_map);
294 378
295/* 379/*
296 * Register interest in the next osdmap 380 * Register interest in the next osdmap
297 */ 381 */
298void ceph_monc_request_next_osdmap(struct ceph_mon_client *monc) 382void ceph_monc_request_next_osdmap(struct ceph_mon_client *monc)
299{ 383{
300 dout("request_next_osdmap have %u\n", monc->have_osdmap); 384 dout("%s have %u\n", __func__, monc->subs[CEPH_SUB_OSDMAP].have);
301 mutex_lock(&monc->mutex); 385 mutex_lock(&monc->mutex);
302 if (!monc->want_next_osdmap) 386 if (__ceph_monc_want_map(monc, CEPH_SUB_OSDMAP,
303 monc->want_next_osdmap = 1; 387 monc->subs[CEPH_SUB_OSDMAP].have + 1, false))
304 if (monc->want_next_osdmap < 2)
305 __send_subscribe(monc); 388 __send_subscribe(monc);
306 mutex_unlock(&monc->mutex); 389 mutex_unlock(&monc->mutex);
307} 390}
@@ -320,15 +403,15 @@ int ceph_monc_wait_osdmap(struct ceph_mon_client *monc, u32 epoch,
320 long ret; 403 long ret;
321 404
322 mutex_lock(&monc->mutex); 405 mutex_lock(&monc->mutex);
323 while (monc->have_osdmap < epoch) { 406 while (monc->subs[CEPH_SUB_OSDMAP].have < epoch) {
324 mutex_unlock(&monc->mutex); 407 mutex_unlock(&monc->mutex);
325 408
326 if (timeout && time_after_eq(jiffies, started + timeout)) 409 if (timeout && time_after_eq(jiffies, started + timeout))
327 return -ETIMEDOUT; 410 return -ETIMEDOUT;
328 411
329 ret = wait_event_interruptible_timeout(monc->client->auth_wq, 412 ret = wait_event_interruptible_timeout(monc->client->auth_wq,
330 monc->have_osdmap >= epoch, 413 monc->subs[CEPH_SUB_OSDMAP].have >= epoch,
331 ceph_timeout_jiffies(timeout)); 414 ceph_timeout_jiffies(timeout));
332 if (ret < 0) 415 if (ret < 0)
333 return ret; 416 return ret;
334 417
@@ -341,11 +424,14 @@ int ceph_monc_wait_osdmap(struct ceph_mon_client *monc, u32 epoch,
341EXPORT_SYMBOL(ceph_monc_wait_osdmap); 424EXPORT_SYMBOL(ceph_monc_wait_osdmap);
342 425
343/* 426/*
344 * 427 * Open a session with a random monitor. Request monmap and osdmap,
428 * which are waited upon in __ceph_open_session().
345 */ 429 */
346int ceph_monc_open_session(struct ceph_mon_client *monc) 430int ceph_monc_open_session(struct ceph_mon_client *monc)
347{ 431{
348 mutex_lock(&monc->mutex); 432 mutex_lock(&monc->mutex);
433 __ceph_monc_want_map(monc, CEPH_SUB_MONMAP, 0, true);
434 __ceph_monc_want_map(monc, CEPH_SUB_OSDMAP, 0, false);
349 __open_session(monc); 435 __open_session(monc);
350 __schedule_delayed(monc); 436 __schedule_delayed(monc);
351 mutex_unlock(&monc->mutex); 437 mutex_unlock(&monc->mutex);
@@ -353,29 +439,15 @@ int ceph_monc_open_session(struct ceph_mon_client *monc)
353} 439}
354EXPORT_SYMBOL(ceph_monc_open_session); 440EXPORT_SYMBOL(ceph_monc_open_session);
355 441
356/*
357 * We require the fsid and global_id in order to initialize our
358 * debugfs dir.
359 */
360static bool have_debugfs_info(struct ceph_mon_client *monc)
361{
362 dout("have_debugfs_info fsid %d globalid %lld\n",
363 (int)monc->client->have_fsid, monc->auth->global_id);
364 return monc->client->have_fsid && monc->auth->global_id > 0;
365}
366
367static void ceph_monc_handle_map(struct ceph_mon_client *monc, 442static void ceph_monc_handle_map(struct ceph_mon_client *monc,
368 struct ceph_msg *msg) 443 struct ceph_msg *msg)
369{ 444{
370 struct ceph_client *client = monc->client; 445 struct ceph_client *client = monc->client;
371 struct ceph_monmap *monmap = NULL, *old = monc->monmap; 446 struct ceph_monmap *monmap = NULL, *old = monc->monmap;
372 void *p, *end; 447 void *p, *end;
373 int had_debugfs_info, init_debugfs = 0;
374 448
375 mutex_lock(&monc->mutex); 449 mutex_lock(&monc->mutex);
376 450
377 had_debugfs_info = have_debugfs_info(monc);
378
379 dout("handle_monmap\n"); 451 dout("handle_monmap\n");
380 p = msg->front.iov_base; 452 p = msg->front.iov_base;
381 end = p + msg->front.iov_len; 453 end = p + msg->front.iov_len;
@@ -395,29 +467,11 @@ static void ceph_monc_handle_map(struct ceph_mon_client *monc,
395 client->monc.monmap = monmap; 467 client->monc.monmap = monmap;
396 kfree(old); 468 kfree(old);
397 469
398 if (!client->have_fsid) { 470 __ceph_monc_got_map(monc, CEPH_SUB_MONMAP, monc->monmap->epoch);
399 client->have_fsid = true; 471 client->have_fsid = true;
400 if (!had_debugfs_info && have_debugfs_info(monc)) {
401 pr_info("client%lld fsid %pU\n",
402 ceph_client_id(monc->client),
403 &monc->client->fsid);
404 init_debugfs = 1;
405 }
406 mutex_unlock(&monc->mutex);
407
408 if (init_debugfs) {
409 /*
410 * do debugfs initialization without mutex to avoid
411 * creating a locking dependency
412 */
413 ceph_debugfs_client_init(monc->client);
414 }
415 472
416 goto out_unlocked;
417 }
418out: 473out:
419 mutex_unlock(&monc->mutex); 474 mutex_unlock(&monc->mutex);
420out_unlocked:
421 wake_up_all(&client->auth_wq); 475 wake_up_all(&client->auth_wq);
422} 476}
423 477
@@ -745,18 +799,15 @@ static void delayed_work(struct work_struct *work)
745 dout("monc delayed_work\n"); 799 dout("monc delayed_work\n");
746 mutex_lock(&monc->mutex); 800 mutex_lock(&monc->mutex);
747 if (monc->hunting) { 801 if (monc->hunting) {
748 __close_session(monc); 802 dout("%s continuing hunt\n", __func__);
749 __open_session(monc); /* continue hunting */ 803 reopen_session(monc);
750 } else { 804 } else {
751 struct ceph_options *opt = monc->client->options;
752 int is_auth = ceph_auth_is_authenticated(monc->auth); 805 int is_auth = ceph_auth_is_authenticated(monc->auth);
753 if (ceph_con_keepalive_expired(&monc->con, 806 if (ceph_con_keepalive_expired(&monc->con,
754 opt->monc_ping_timeout)) { 807 CEPH_MONC_PING_TIMEOUT)) {
755 dout("monc keepalive timeout\n"); 808 dout("monc keepalive timeout\n");
756 is_auth = 0; 809 is_auth = 0;
757 __close_session(monc); 810 reopen_session(monc);
758 monc->hunting = true;
759 __open_session(monc);
760 } 811 }
761 812
762 if (!monc->hunting) { 813 if (!monc->hunting) {
@@ -764,8 +815,14 @@ static void delayed_work(struct work_struct *work)
764 __validate_auth(monc); 815 __validate_auth(monc);
765 } 816 }
766 817
767 if (is_auth) 818 if (is_auth) {
768 __send_subscribe(monc); 819 unsigned long now = jiffies;
820
821 dout("%s renew subs? now %lu renew after %lu\n",
822 __func__, now, monc->sub_renew_after);
823 if (time_after_eq(now, monc->sub_renew_after))
824 __send_subscribe(monc);
825 }
769 } 826 }
770 __schedule_delayed(monc); 827 __schedule_delayed(monc);
771 mutex_unlock(&monc->mutex); 828 mutex_unlock(&monc->mutex);
@@ -852,18 +909,14 @@ int ceph_monc_init(struct ceph_mon_client *monc, struct ceph_client *cl)
852 &monc->client->msgr); 909 &monc->client->msgr);
853 910
854 monc->cur_mon = -1; 911 monc->cur_mon = -1;
855 monc->hunting = true; 912 monc->had_a_connection = false;
856 monc->sub_renew_after = jiffies; 913 monc->hunt_mult = 1;
857 monc->sub_sent = 0;
858 914
859 INIT_DELAYED_WORK(&monc->delayed_work, delayed_work); 915 INIT_DELAYED_WORK(&monc->delayed_work, delayed_work);
860 monc->generic_request_tree = RB_ROOT; 916 monc->generic_request_tree = RB_ROOT;
861 monc->num_generic_requests = 0; 917 monc->num_generic_requests = 0;
862 monc->last_tid = 0; 918 monc->last_tid = 0;
863 919
864 monc->have_mdsmap = 0;
865 monc->have_osdmap = 0;
866 monc->want_next_osdmap = 1;
867 return 0; 920 return 0;
868 921
869out_auth_reply: 922out_auth_reply:
@@ -888,7 +941,7 @@ void ceph_monc_stop(struct ceph_mon_client *monc)
888 941
889 mutex_lock(&monc->mutex); 942 mutex_lock(&monc->mutex);
890 __close_session(monc); 943 __close_session(monc);
891 944 monc->cur_mon = -1;
892 mutex_unlock(&monc->mutex); 945 mutex_unlock(&monc->mutex);
893 946
894 /* 947 /*
@@ -910,26 +963,40 @@ void ceph_monc_stop(struct ceph_mon_client *monc)
910} 963}
911EXPORT_SYMBOL(ceph_monc_stop); 964EXPORT_SYMBOL(ceph_monc_stop);
912 965
966static void finish_hunting(struct ceph_mon_client *monc)
967{
968 if (monc->hunting) {
969 dout("%s found mon%d\n", __func__, monc->cur_mon);
970 monc->hunting = false;
971 monc->had_a_connection = true;
972 monc->hunt_mult /= 2; /* reduce by 50% */
973 if (monc->hunt_mult < 1)
974 monc->hunt_mult = 1;
975 }
976}
977
913static void handle_auth_reply(struct ceph_mon_client *monc, 978static void handle_auth_reply(struct ceph_mon_client *monc,
914 struct ceph_msg *msg) 979 struct ceph_msg *msg)
915{ 980{
916 int ret; 981 int ret;
917 int was_auth = 0; 982 int was_auth = 0;
918 int had_debugfs_info, init_debugfs = 0;
919 983
920 mutex_lock(&monc->mutex); 984 mutex_lock(&monc->mutex);
921 had_debugfs_info = have_debugfs_info(monc);
922 was_auth = ceph_auth_is_authenticated(monc->auth); 985 was_auth = ceph_auth_is_authenticated(monc->auth);
923 monc->pending_auth = 0; 986 monc->pending_auth = 0;
924 ret = ceph_handle_auth_reply(monc->auth, msg->front.iov_base, 987 ret = ceph_handle_auth_reply(monc->auth, msg->front.iov_base,
925 msg->front.iov_len, 988 msg->front.iov_len,
926 monc->m_auth->front.iov_base, 989 monc->m_auth->front.iov_base,
927 monc->m_auth->front_alloc_len); 990 monc->m_auth->front_alloc_len);
991 if (ret > 0) {
992 __send_prepared_auth_request(monc, ret);
993 goto out;
994 }
995
996 finish_hunting(monc);
997
928 if (ret < 0) { 998 if (ret < 0) {
929 monc->client->auth_err = ret; 999 monc->client->auth_err = ret;
930 wake_up_all(&monc->client->auth_wq);
931 } else if (ret > 0) {
932 __send_prepared_auth_request(monc, ret);
933 } else if (!was_auth && ceph_auth_is_authenticated(monc->auth)) { 1000 } else if (!was_auth && ceph_auth_is_authenticated(monc->auth)) {
934 dout("authenticated, starting session\n"); 1001 dout("authenticated, starting session\n");
935 1002
@@ -939,23 +1006,15 @@ static void handle_auth_reply(struct ceph_mon_client *monc,
939 1006
940 __send_subscribe(monc); 1007 __send_subscribe(monc);
941 __resend_generic_request(monc); 1008 __resend_generic_request(monc);
942 }
943 1009
944 if (!had_debugfs_info && have_debugfs_info(monc)) { 1010 pr_info("mon%d %s session established\n", monc->cur_mon,
945 pr_info("client%lld fsid %pU\n", 1011 ceph_pr_addr(&monc->con.peer_addr.in_addr));
946 ceph_client_id(monc->client),
947 &monc->client->fsid);
948 init_debugfs = 1;
949 } 1012 }
950 mutex_unlock(&monc->mutex);
951 1013
952 if (init_debugfs) { 1014out:
953 /* 1015 mutex_unlock(&monc->mutex);
954 * do debugfs initialization without mutex to avoid 1016 if (monc->client->auth_err < 0)
955 * creating a locking dependency 1017 wake_up_all(&monc->client->auth_wq);
956 */
957 ceph_debugfs_client_init(monc->client);
958 }
959} 1018}
960 1019
961static int __validate_auth(struct ceph_mon_client *monc) 1020static int __validate_auth(struct ceph_mon_client *monc)
@@ -1096,29 +1155,17 @@ static void mon_fault(struct ceph_connection *con)
1096{ 1155{
1097 struct ceph_mon_client *monc = con->private; 1156 struct ceph_mon_client *monc = con->private;
1098 1157
1099 if (!monc)
1100 return;
1101
1102 dout("mon_fault\n");
1103 mutex_lock(&monc->mutex); 1158 mutex_lock(&monc->mutex);
1104 if (!con->private) 1159 dout("%s mon%d\n", __func__, monc->cur_mon);
1105 goto out; 1160 if (monc->cur_mon >= 0) {
1106 1161 if (!monc->hunting) {
1107 if (!monc->hunting) 1162 dout("%s hunting for new mon\n", __func__);
1108 pr_info("mon%d %s session lost, " 1163 reopen_session(monc);
1109 "hunting for new mon\n", monc->cur_mon, 1164 __schedule_delayed(monc);
1110 ceph_pr_addr(&monc->con.peer_addr.in_addr)); 1165 } else {
1111 1166 dout("%s already hunting\n", __func__);
1112 __close_session(monc); 1167 }
1113 if (!monc->hunting) {
1114 /* start hunting */
1115 monc->hunting = true;
1116 __open_session(monc);
1117 } else {
1118 /* already hunting, let's wait a bit */
1119 __schedule_delayed(monc);
1120 } 1168 }
1121out:
1122 mutex_unlock(&monc->mutex); 1169 mutex_unlock(&monc->mutex);
1123} 1170}
1124 1171
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index f8f235930d88..32355d9d0103 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -338,9 +338,10 @@ static void ceph_osdc_release_request(struct kref *kref)
338 ceph_put_snap_context(req->r_snapc); 338 ceph_put_snap_context(req->r_snapc);
339 if (req->r_mempool) 339 if (req->r_mempool)
340 mempool_free(req, req->r_osdc->req_mempool); 340 mempool_free(req, req->r_osdc->req_mempool);
341 else 341 else if (req->r_num_ops <= CEPH_OSD_SLAB_OPS)
342 kmem_cache_free(ceph_osd_request_cache, req); 342 kmem_cache_free(ceph_osd_request_cache, req);
343 343 else
344 kfree(req);
344} 345}
345 346
346void ceph_osdc_get_request(struct ceph_osd_request *req) 347void ceph_osdc_get_request(struct ceph_osd_request *req)
@@ -369,28 +370,22 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
369 struct ceph_msg *msg; 370 struct ceph_msg *msg;
370 size_t msg_size; 371 size_t msg_size;
371 372
372 BUILD_BUG_ON(CEPH_OSD_MAX_OP > U16_MAX);
373 BUG_ON(num_ops > CEPH_OSD_MAX_OP);
374
375 msg_size = 4 + 4 + 8 + 8 + 4+8;
376 msg_size += 2 + 4 + 8 + 4 + 4; /* oloc */
377 msg_size += 1 + 8 + 4 + 4; /* pg_t */
378 msg_size += 4 + CEPH_MAX_OID_NAME_LEN; /* oid */
379 msg_size += 2 + num_ops*sizeof(struct ceph_osd_op);
380 msg_size += 8; /* snapid */
381 msg_size += 8; /* snap_seq */
382 msg_size += 8 * (snapc ? snapc->num_snaps : 0); /* snaps */
383 msg_size += 4;
384
385 if (use_mempool) { 373 if (use_mempool) {
374 BUG_ON(num_ops > CEPH_OSD_SLAB_OPS);
386 req = mempool_alloc(osdc->req_mempool, gfp_flags); 375 req = mempool_alloc(osdc->req_mempool, gfp_flags);
387 memset(req, 0, sizeof(*req)); 376 } else if (num_ops <= CEPH_OSD_SLAB_OPS) {
377 req = kmem_cache_alloc(ceph_osd_request_cache, gfp_flags);
388 } else { 378 } else {
389 req = kmem_cache_zalloc(ceph_osd_request_cache, gfp_flags); 379 BUG_ON(num_ops > CEPH_OSD_MAX_OPS);
380 req = kmalloc(sizeof(*req) + num_ops * sizeof(req->r_ops[0]),
381 gfp_flags);
390 } 382 }
391 if (req == NULL) 383 if (unlikely(!req))
392 return NULL; 384 return NULL;
393 385
386 /* req only, each op is zeroed in _osd_req_op_init() */
387 memset(req, 0, sizeof(*req));
388
394 req->r_osdc = osdc; 389 req->r_osdc = osdc;
395 req->r_mempool = use_mempool; 390 req->r_mempool = use_mempool;
396 req->r_num_ops = num_ops; 391 req->r_num_ops = num_ops;
@@ -408,18 +403,36 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
408 req->r_base_oloc.pool = -1; 403 req->r_base_oloc.pool = -1;
409 req->r_target_oloc.pool = -1; 404 req->r_target_oloc.pool = -1;
410 405
406 msg_size = OSD_OPREPLY_FRONT_LEN;
407 if (num_ops > CEPH_OSD_SLAB_OPS) {
408 /* ceph_osd_op and rval */
409 msg_size += (num_ops - CEPH_OSD_SLAB_OPS) *
410 (sizeof(struct ceph_osd_op) + 4);
411 }
412
411 /* create reply message */ 413 /* create reply message */
412 if (use_mempool) 414 if (use_mempool)
413 msg = ceph_msgpool_get(&osdc->msgpool_op_reply, 0); 415 msg = ceph_msgpool_get(&osdc->msgpool_op_reply, 0);
414 else 416 else
415 msg = ceph_msg_new(CEPH_MSG_OSD_OPREPLY, 417 msg = ceph_msg_new(CEPH_MSG_OSD_OPREPLY, msg_size,
416 OSD_OPREPLY_FRONT_LEN, gfp_flags, true); 418 gfp_flags, true);
417 if (!msg) { 419 if (!msg) {
418 ceph_osdc_put_request(req); 420 ceph_osdc_put_request(req);
419 return NULL; 421 return NULL;
420 } 422 }
421 req->r_reply = msg; 423 req->r_reply = msg;
422 424
425 msg_size = 4 + 4 + 4; /* client_inc, osdmap_epoch, flags */
426 msg_size += 4 + 4 + 4 + 8; /* mtime, reassert_version */
427 msg_size += 2 + 4 + 8 + 4 + 4; /* oloc */
428 msg_size += 1 + 8 + 4 + 4; /* pgid */
429 msg_size += 4 + CEPH_MAX_OID_NAME_LEN; /* oid */
430 msg_size += 2 + num_ops * sizeof(struct ceph_osd_op);
431 msg_size += 8; /* snapid */
432 msg_size += 8; /* snap_seq */
433 msg_size += 4 + 8 * (snapc ? snapc->num_snaps : 0); /* snaps */
434 msg_size += 4; /* retry_attempt */
435
423 /* create request message; allow space for oid */ 436 /* create request message; allow space for oid */
424 if (use_mempool) 437 if (use_mempool)
425 msg = ceph_msgpool_get(&osdc->msgpool_op, 0); 438 msg = ceph_msgpool_get(&osdc->msgpool_op, 0);
@@ -498,7 +511,7 @@ void osd_req_op_extent_init(struct ceph_osd_request *osd_req,
498 if (opcode == CEPH_OSD_OP_WRITE || opcode == CEPH_OSD_OP_WRITEFULL) 511 if (opcode == CEPH_OSD_OP_WRITE || opcode == CEPH_OSD_OP_WRITEFULL)
499 payload_len += length; 512 payload_len += length;
500 513
501 op->payload_len = payload_len; 514 op->indata_len = payload_len;
502} 515}
503EXPORT_SYMBOL(osd_req_op_extent_init); 516EXPORT_SYMBOL(osd_req_op_extent_init);
504 517
@@ -517,10 +530,32 @@ void osd_req_op_extent_update(struct ceph_osd_request *osd_req,
517 BUG_ON(length > previous); 530 BUG_ON(length > previous);
518 531
519 op->extent.length = length; 532 op->extent.length = length;
520 op->payload_len -= previous - length; 533 op->indata_len -= previous - length;
521} 534}
522EXPORT_SYMBOL(osd_req_op_extent_update); 535EXPORT_SYMBOL(osd_req_op_extent_update);
523 536
537void osd_req_op_extent_dup_last(struct ceph_osd_request *osd_req,
538 unsigned int which, u64 offset_inc)
539{
540 struct ceph_osd_req_op *op, *prev_op;
541
542 BUG_ON(which + 1 >= osd_req->r_num_ops);
543
544 prev_op = &osd_req->r_ops[which];
545 op = _osd_req_op_init(osd_req, which + 1, prev_op->op, prev_op->flags);
546 /* dup previous one */
547 op->indata_len = prev_op->indata_len;
548 op->outdata_len = prev_op->outdata_len;
549 op->extent = prev_op->extent;
550 /* adjust offset */
551 op->extent.offset += offset_inc;
552 op->extent.length -= offset_inc;
553
554 if (op->op == CEPH_OSD_OP_WRITE || op->op == CEPH_OSD_OP_WRITEFULL)
555 op->indata_len -= offset_inc;
556}
557EXPORT_SYMBOL(osd_req_op_extent_dup_last);
558
524void osd_req_op_cls_init(struct ceph_osd_request *osd_req, unsigned int which, 559void osd_req_op_cls_init(struct ceph_osd_request *osd_req, unsigned int which,
525 u16 opcode, const char *class, const char *method) 560 u16 opcode, const char *class, const char *method)
526{ 561{
@@ -554,7 +589,7 @@ void osd_req_op_cls_init(struct ceph_osd_request *osd_req, unsigned int which,
554 589
555 op->cls.argc = 0; /* currently unused */ 590 op->cls.argc = 0; /* currently unused */
556 591
557 op->payload_len = payload_len; 592 op->indata_len = payload_len;
558} 593}
559EXPORT_SYMBOL(osd_req_op_cls_init); 594EXPORT_SYMBOL(osd_req_op_cls_init);
560 595
@@ -587,7 +622,7 @@ int osd_req_op_xattr_init(struct ceph_osd_request *osd_req, unsigned int which,
587 op->xattr.cmp_mode = cmp_mode; 622 op->xattr.cmp_mode = cmp_mode;
588 623
589 ceph_osd_data_pagelist_init(&op->xattr.osd_data, pagelist); 624 ceph_osd_data_pagelist_init(&op->xattr.osd_data, pagelist);
590 op->payload_len = payload_len; 625 op->indata_len = payload_len;
591 return 0; 626 return 0;
592} 627}
593EXPORT_SYMBOL(osd_req_op_xattr_init); 628EXPORT_SYMBOL(osd_req_op_xattr_init);
@@ -707,7 +742,7 @@ static u64 osd_req_encode_op(struct ceph_osd_request *req,
707 BUG_ON(osd_data->type == CEPH_OSD_DATA_TYPE_NONE); 742 BUG_ON(osd_data->type == CEPH_OSD_DATA_TYPE_NONE);
708 dst->cls.indata_len = cpu_to_le32(data_length); 743 dst->cls.indata_len = cpu_to_le32(data_length);
709 ceph_osdc_msg_data_add(req->r_request, osd_data); 744 ceph_osdc_msg_data_add(req->r_request, osd_data);
710 src->payload_len += data_length; 745 src->indata_len += data_length;
711 request_data_len += data_length; 746 request_data_len += data_length;
712 } 747 }
713 osd_data = &src->cls.response_data; 748 osd_data = &src->cls.response_data;
@@ -750,7 +785,7 @@ static u64 osd_req_encode_op(struct ceph_osd_request *req,
750 785
751 dst->op = cpu_to_le16(src->op); 786 dst->op = cpu_to_le16(src->op);
752 dst->flags = cpu_to_le32(src->flags); 787 dst->flags = cpu_to_le32(src->flags);
753 dst->payload_len = cpu_to_le32(src->payload_len); 788 dst->payload_len = cpu_to_le32(src->indata_len);
754 789
755 return request_data_len; 790 return request_data_len;
756} 791}
@@ -1770,6 +1805,7 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg)
1770 u32 osdmap_epoch; 1805 u32 osdmap_epoch;
1771 int already_completed; 1806 int already_completed;
1772 u32 bytes; 1807 u32 bytes;
1808 u8 decode_redir;
1773 unsigned int i; 1809 unsigned int i;
1774 1810
1775 tid = le64_to_cpu(msg->hdr.tid); 1811 tid = le64_to_cpu(msg->hdr.tid);
@@ -1809,7 +1845,7 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg)
1809 1845
1810 ceph_decode_need(&p, end, 4, bad_put); 1846 ceph_decode_need(&p, end, 4, bad_put);
1811 numops = ceph_decode_32(&p); 1847 numops = ceph_decode_32(&p);
1812 if (numops > CEPH_OSD_MAX_OP) 1848 if (numops > CEPH_OSD_MAX_OPS)
1813 goto bad_put; 1849 goto bad_put;
1814 if (numops != req->r_num_ops) 1850 if (numops != req->r_num_ops)
1815 goto bad_put; 1851 goto bad_put;
@@ -1820,7 +1856,7 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg)
1820 int len; 1856 int len;
1821 1857
1822 len = le32_to_cpu(op->payload_len); 1858 len = le32_to_cpu(op->payload_len);
1823 req->r_reply_op_len[i] = len; 1859 req->r_ops[i].outdata_len = len;
1824 dout(" op %d has %d bytes\n", i, len); 1860 dout(" op %d has %d bytes\n", i, len);
1825 payload_len += len; 1861 payload_len += len;
1826 p += sizeof(*op); 1862 p += sizeof(*op);
@@ -1835,12 +1871,21 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg)
1835 ceph_decode_need(&p, end, 4 + numops * 4, bad_put); 1871 ceph_decode_need(&p, end, 4 + numops * 4, bad_put);
1836 retry_attempt = ceph_decode_32(&p); 1872 retry_attempt = ceph_decode_32(&p);
1837 for (i = 0; i < numops; i++) 1873 for (i = 0; i < numops; i++)
1838 req->r_reply_op_result[i] = ceph_decode_32(&p); 1874 req->r_ops[i].rval = ceph_decode_32(&p);
1839 1875
1840 if (le16_to_cpu(msg->hdr.version) >= 6) { 1876 if (le16_to_cpu(msg->hdr.version) >= 6) {
1841 p += 8 + 4; /* skip replay_version */ 1877 p += 8 + 4; /* skip replay_version */
1842 p += 8; /* skip user_version */ 1878 p += 8; /* skip user_version */
1843 1879
1880 if (le16_to_cpu(msg->hdr.version) >= 7)
1881 ceph_decode_8_safe(&p, end, decode_redir, bad_put);
1882 else
1883 decode_redir = 1;
1884 } else {
1885 decode_redir = 0;
1886 }
1887
1888 if (decode_redir) {
1844 err = ceph_redirect_decode(&p, end, &redir); 1889 err = ceph_redirect_decode(&p, end, &redir);
1845 if (err) 1890 if (err)
1846 goto bad_put; 1891 goto bad_put;
@@ -2177,7 +2222,8 @@ void ceph_osdc_handle_map(struct ceph_osd_client *osdc, struct ceph_msg *msg)
2177 goto bad; 2222 goto bad;
2178done: 2223done:
2179 downgrade_write(&osdc->map_sem); 2224 downgrade_write(&osdc->map_sem);
2180 ceph_monc_got_osdmap(&osdc->client->monc, osdc->osdmap->epoch); 2225 ceph_monc_got_map(&osdc->client->monc, CEPH_SUB_OSDMAP,
2226 osdc->osdmap->epoch);
2181 2227
2182 /* 2228 /*
2183 * subscribe to subsequent osdmap updates if full to ensure 2229 * subscribe to subsequent osdmap updates if full to ensure
@@ -2636,8 +2682,8 @@ int ceph_osdc_init(struct ceph_osd_client *osdc, struct ceph_client *client)
2636 round_jiffies_relative(osdc->client->options->osd_idle_ttl)); 2682 round_jiffies_relative(osdc->client->options->osd_idle_ttl));
2637 2683
2638 err = -ENOMEM; 2684 err = -ENOMEM;
2639 osdc->req_mempool = mempool_create_kmalloc_pool(10, 2685 osdc->req_mempool = mempool_create_slab_pool(10,
2640 sizeof(struct ceph_osd_request)); 2686 ceph_osd_request_cache);
2641 if (!osdc->req_mempool) 2687 if (!osdc->req_mempool)
2642 goto out; 2688 goto out;
2643 2689
@@ -2772,11 +2818,12 @@ EXPORT_SYMBOL(ceph_osdc_writepages);
2772 2818
2773int ceph_osdc_setup(void) 2819int ceph_osdc_setup(void)
2774{ 2820{
2821 size_t size = sizeof(struct ceph_osd_request) +
2822 CEPH_OSD_SLAB_OPS * sizeof(struct ceph_osd_req_op);
2823
2775 BUG_ON(ceph_osd_request_cache); 2824 BUG_ON(ceph_osd_request_cache);
2776 ceph_osd_request_cache = kmem_cache_create("ceph_osd_request", 2825 ceph_osd_request_cache = kmem_cache_create("ceph_osd_request", size,
2777 sizeof (struct ceph_osd_request), 2826 0, 0, NULL);
2778 __alignof__(struct ceph_osd_request),
2779 0, NULL);
2780 2827
2781 return ceph_osd_request_cache ? 0 : -ENOMEM; 2828 return ceph_osd_request_cache ? 0 : -ENOMEM;
2782} 2829}
@@ -2843,8 +2890,8 @@ static struct ceph_msg *get_reply(struct ceph_connection *con,
2843 mutex_lock(&osdc->request_mutex); 2890 mutex_lock(&osdc->request_mutex);
2844 req = __lookup_request(osdc, tid); 2891 req = __lookup_request(osdc, tid);
2845 if (!req) { 2892 if (!req) {
2846 pr_warn("%s osd%d tid %llu unknown, skipping\n", 2893 dout("%s osd%d tid %llu unknown, skipping\n", __func__,
2847 __func__, osd->o_osd, tid); 2894 osd->o_osd, tid);
2848 m = NULL; 2895 m = NULL;
2849 *skip = 1; 2896 *skip = 1;
2850 goto out; 2897 goto out;
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index 7d8f581d9f1f..243574c8cf33 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -342,23 +342,32 @@ static struct crush_map *crush_decode(void *pbyval, void *end)
342 c->choose_local_tries = ceph_decode_32(p); 342 c->choose_local_tries = ceph_decode_32(p);
343 c->choose_local_fallback_tries = ceph_decode_32(p); 343 c->choose_local_fallback_tries = ceph_decode_32(p);
344 c->choose_total_tries = ceph_decode_32(p); 344 c->choose_total_tries = ceph_decode_32(p);
345 dout("crush decode tunable choose_local_tries = %d", 345 dout("crush decode tunable choose_local_tries = %d\n",
346 c->choose_local_tries); 346 c->choose_local_tries);
347 dout("crush decode tunable choose_local_fallback_tries = %d", 347 dout("crush decode tunable choose_local_fallback_tries = %d\n",
348 c->choose_local_fallback_tries); 348 c->choose_local_fallback_tries);
349 dout("crush decode tunable choose_total_tries = %d", 349 dout("crush decode tunable choose_total_tries = %d\n",
350 c->choose_total_tries); 350 c->choose_total_tries);
351 351
352 ceph_decode_need(p, end, sizeof(u32), done); 352 ceph_decode_need(p, end, sizeof(u32), done);
353 c->chooseleaf_descend_once = ceph_decode_32(p); 353 c->chooseleaf_descend_once = ceph_decode_32(p);
354 dout("crush decode tunable chooseleaf_descend_once = %d", 354 dout("crush decode tunable chooseleaf_descend_once = %d\n",
355 c->chooseleaf_descend_once); 355 c->chooseleaf_descend_once);
356 356
357 ceph_decode_need(p, end, sizeof(u8), done); 357 ceph_decode_need(p, end, sizeof(u8), done);
358 c->chooseleaf_vary_r = ceph_decode_8(p); 358 c->chooseleaf_vary_r = ceph_decode_8(p);
359 dout("crush decode tunable chooseleaf_vary_r = %d", 359 dout("crush decode tunable chooseleaf_vary_r = %d\n",
360 c->chooseleaf_vary_r); 360 c->chooseleaf_vary_r);
361 361
362 /* skip straw_calc_version, allowed_bucket_algs */
363 ceph_decode_need(p, end, sizeof(u8) + sizeof(u32), done);
364 *p += sizeof(u8) + sizeof(u32);
365
366 ceph_decode_need(p, end, sizeof(u8), done);
367 c->chooseleaf_stable = ceph_decode_8(p);
368 dout("crush decode tunable chooseleaf_stable = %d\n",
369 c->chooseleaf_stable);
370
362done: 371done:
363 dout("crush_decode success\n"); 372 dout("crush_decode success\n");
364 return c; 373 return c;
diff --git a/net/ceph/pagevec.c b/net/ceph/pagevec.c
index d4f5f220a8e5..10297f7a89ba 100644
--- a/net/ceph/pagevec.c
+++ b/net/ceph/pagevec.c
@@ -24,7 +24,7 @@ struct page **ceph_get_direct_page_vector(const void __user *data,
24 return ERR_PTR(-ENOMEM); 24 return ERR_PTR(-ENOMEM);
25 25
26 while (got < num_pages) { 26 while (got < num_pages) {
27 rc = get_user_pages_unlocked(current, current->mm, 27 rc = get_user_pages_unlocked(
28 (unsigned long)data + ((unsigned long)got * PAGE_SIZE), 28 (unsigned long)data + ((unsigned long)got * PAGE_SIZE),
29 num_pages - got, write_page, 0, pages + got); 29 num_pages - got, write_page, 0, pages + got);
30 if (rc < 0) 30 if (rc < 0)
diff --git a/net/core/Makefile b/net/core/Makefile
index 0b835de04de3..d6508c2ddca5 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -24,3 +24,6 @@ obj-$(CONFIG_NET_PTP_CLASSIFY) += ptp_classifier.o
24obj-$(CONFIG_CGROUP_NET_PRIO) += netprio_cgroup.o 24obj-$(CONFIG_CGROUP_NET_PRIO) += netprio_cgroup.o
25obj-$(CONFIG_CGROUP_NET_CLASSID) += netclassid_cgroup.o 25obj-$(CONFIG_CGROUP_NET_CLASSID) += netclassid_cgroup.o
26obj-$(CONFIG_LWTUNNEL) += lwtunnel.o 26obj-$(CONFIG_LWTUNNEL) += lwtunnel.o
27obj-$(CONFIG_DST_CACHE) += dst_cache.o
28obj-$(CONFIG_HWBM) += hwbm.o
29obj-$(CONFIG_NET_DEVLINK) += devlink.o
diff --git a/net/core/dev.c b/net/core/dev.c
index cc9e3652cf93..b9bcbe77d913 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3829,8 +3829,14 @@ static void net_tx_action(struct softirq_action *h)
3829 trace_consume_skb(skb); 3829 trace_consume_skb(skb);
3830 else 3830 else
3831 trace_kfree_skb(skb, net_tx_action); 3831 trace_kfree_skb(skb, net_tx_action);
3832 __kfree_skb(skb); 3832
3833 if (skb->fclone != SKB_FCLONE_UNAVAILABLE)
3834 __kfree_skb(skb);
3835 else
3836 __kfree_skb_defer(skb);
3833 } 3837 }
3838
3839 __kfree_skb_flush();
3834 } 3840 }
3835 3841
3836 if (sd->output_queue) { 3842 if (sd->output_queue) {
@@ -4154,7 +4160,10 @@ ncls:
4154 ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev); 4160 ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
4155 } else { 4161 } else {
4156drop: 4162drop:
4157 atomic_long_inc(&skb->dev->rx_dropped); 4163 if (!deliver_exact)
4164 atomic_long_inc(&skb->dev->rx_dropped);
4165 else
4166 atomic_long_inc(&skb->dev->rx_nohandler);
4158 kfree_skb(skb); 4167 kfree_skb(skb);
4159 /* Jamal, now you will not able to escape explaining 4168 /* Jamal, now you will not able to escape explaining
4160 * me how you were going to use this. :-) 4169 * me how you were going to use this. :-)
@@ -4351,6 +4360,7 @@ static void gro_list_prepare(struct napi_struct *napi, struct sk_buff *skb)
4351 4360
4352 diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev; 4361 diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev;
4353 diffs |= p->vlan_tci ^ skb->vlan_tci; 4362 diffs |= p->vlan_tci ^ skb->vlan_tci;
4363 diffs |= skb_metadata_dst_cmp(p, skb);
4354 if (maclen == ETH_HLEN) 4364 if (maclen == ETH_HLEN)
4355 diffs |= compare_ether_header(skb_mac_header(p), 4365 diffs |= compare_ether_header(skb_mac_header(p),
4356 skb_mac_header(skb)); 4366 skb_mac_header(skb));
@@ -4428,7 +4438,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
4428 NAPI_GRO_CB(skb)->same_flow = 0; 4438 NAPI_GRO_CB(skb)->same_flow = 0;
4429 NAPI_GRO_CB(skb)->flush = 0; 4439 NAPI_GRO_CB(skb)->flush = 0;
4430 NAPI_GRO_CB(skb)->free = 0; 4440 NAPI_GRO_CB(skb)->free = 0;
4431 NAPI_GRO_CB(skb)->udp_mark = 0; 4441 NAPI_GRO_CB(skb)->encap_mark = 0;
4432 NAPI_GRO_CB(skb)->gro_remcsum_start = 0; 4442 NAPI_GRO_CB(skb)->gro_remcsum_start = 0;
4433 4443
4434 /* Setup for GRO checksum validation */ 4444 /* Setup for GRO checksum validation */
@@ -4548,10 +4558,12 @@ static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb)
4548 break; 4558 break;
4549 4559
4550 case GRO_MERGED_FREE: 4560 case GRO_MERGED_FREE:
4551 if (NAPI_GRO_CB(skb)->free == NAPI_GRO_FREE_STOLEN_HEAD) 4561 if (NAPI_GRO_CB(skb)->free == NAPI_GRO_FREE_STOLEN_HEAD) {
4562 skb_dst_drop(skb);
4552 kmem_cache_free(skbuff_head_cache, skb); 4563 kmem_cache_free(skbuff_head_cache, skb);
4553 else 4564 } else {
4554 __kfree_skb(skb); 4565 __kfree_skb(skb);
4566 }
4555 break; 4567 break;
4556 4568
4557 case GRO_HELD: 4569 case GRO_HELD:
@@ -5149,6 +5161,7 @@ static void net_rx_action(struct softirq_action *h)
5149 } 5161 }
5150 } 5162 }
5151 5163
5164 __kfree_skb_flush();
5152 local_irq_disable(); 5165 local_irq_disable();
5153 5166
5154 list_splice_tail_init(&sd->poll_list, &list); 5167 list_splice_tail_init(&sd->poll_list, &list);
@@ -5376,12 +5389,12 @@ void *netdev_lower_get_next(struct net_device *dev, struct list_head **iter)
5376{ 5389{
5377 struct netdev_adjacent *lower; 5390 struct netdev_adjacent *lower;
5378 5391
5379 lower = list_entry((*iter)->next, struct netdev_adjacent, list); 5392 lower = list_entry(*iter, struct netdev_adjacent, list);
5380 5393
5381 if (&lower->list == &dev->adj_list.lower) 5394 if (&lower->list == &dev->adj_list.lower)
5382 return NULL; 5395 return NULL;
5383 5396
5384 *iter = &lower->list; 5397 *iter = lower->list.next;
5385 5398
5386 return lower->dev; 5399 return lower->dev;
5387} 5400}
@@ -6432,6 +6445,7 @@ EXPORT_SYMBOL(dev_get_phys_port_id);
6432 * dev_get_phys_port_name - Get device physical port name 6445 * dev_get_phys_port_name - Get device physical port name
6433 * @dev: device 6446 * @dev: device
6434 * @name: port name 6447 * @name: port name
6448 * @len: limit of bytes to copy to name
6435 * 6449 *
6436 * Get device physical port name 6450 * Get device physical port name
6437 */ 6451 */
@@ -7250,24 +7264,31 @@ void netdev_run_todo(void)
7250 } 7264 }
7251} 7265}
7252 7266
7253/* Convert net_device_stats to rtnl_link_stats64. They have the same 7267/* Convert net_device_stats to rtnl_link_stats64. rtnl_link_stats64 has
7254 * fields in the same order, with only the type differing. 7268 * all the same fields in the same order as net_device_stats, with only
7269 * the type differing, but rtnl_link_stats64 may have additional fields
7270 * at the end for newer counters.
7255 */ 7271 */
7256void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64, 7272void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64,
7257 const struct net_device_stats *netdev_stats) 7273 const struct net_device_stats *netdev_stats)
7258{ 7274{
7259#if BITS_PER_LONG == 64 7275#if BITS_PER_LONG == 64
7260 BUILD_BUG_ON(sizeof(*stats64) != sizeof(*netdev_stats)); 7276 BUILD_BUG_ON(sizeof(*stats64) < sizeof(*netdev_stats));
7261 memcpy(stats64, netdev_stats, sizeof(*stats64)); 7277 memcpy(stats64, netdev_stats, sizeof(*stats64));
7278 /* zero out counters that only exist in rtnl_link_stats64 */
7279 memset((char *)stats64 + sizeof(*netdev_stats), 0,
7280 sizeof(*stats64) - sizeof(*netdev_stats));
7262#else 7281#else
7263 size_t i, n = sizeof(*stats64) / sizeof(u64); 7282 size_t i, n = sizeof(*netdev_stats) / sizeof(unsigned long);
7264 const unsigned long *src = (const unsigned long *)netdev_stats; 7283 const unsigned long *src = (const unsigned long *)netdev_stats;
7265 u64 *dst = (u64 *)stats64; 7284 u64 *dst = (u64 *)stats64;
7266 7285
7267 BUILD_BUG_ON(sizeof(*netdev_stats) / sizeof(unsigned long) != 7286 BUILD_BUG_ON(n > sizeof(*stats64) / sizeof(u64));
7268 sizeof(*stats64) / sizeof(u64));
7269 for (i = 0; i < n; i++) 7287 for (i = 0; i < n; i++)
7270 dst[i] = src[i]; 7288 dst[i] = src[i];
7289 /* zero out counters that only exist in rtnl_link_stats64 */
7290 memset((char *)stats64 + n * sizeof(u64), 0,
7291 sizeof(*stats64) - n * sizeof(u64));
7271#endif 7292#endif
7272} 7293}
7273EXPORT_SYMBOL(netdev_stats_to_stats64); 7294EXPORT_SYMBOL(netdev_stats_to_stats64);
@@ -7297,6 +7318,7 @@ struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev,
7297 } 7318 }
7298 storage->rx_dropped += atomic_long_read(&dev->rx_dropped); 7319 storage->rx_dropped += atomic_long_read(&dev->rx_dropped);
7299 storage->tx_dropped += atomic_long_read(&dev->tx_dropped); 7320 storage->tx_dropped += atomic_long_read(&dev->tx_dropped);
7321 storage->rx_nohandler += atomic_long_read(&dev->rx_nohandler);
7300 return storage; 7322 return storage;
7301} 7323}
7302EXPORT_SYMBOL(dev_get_stats); 7324EXPORT_SYMBOL(dev_get_stats);
@@ -7419,8 +7441,10 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
7419 dev->priv_flags = IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM; 7441 dev->priv_flags = IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM;
7420 setup(dev); 7442 setup(dev);
7421 7443
7422 if (!dev->tx_queue_len) 7444 if (!dev->tx_queue_len) {
7423 dev->priv_flags |= IFF_NO_QUEUE; 7445 dev->priv_flags |= IFF_NO_QUEUE;
7446 dev->tx_queue_len = 1;
7447 }
7424 7448
7425 dev->num_tx_queues = txqs; 7449 dev->num_tx_queues = txqs;
7426 dev->real_num_tx_queues = txqs; 7450 dev->real_num_tx_queues = txqs;
diff --git a/net/core/devlink.c b/net/core/devlink.c
new file mode 100644
index 000000000000..590fa561cb7f
--- /dev/null
+++ b/net/core/devlink.c
@@ -0,0 +1,738 @@
1/*
2 * net/core/devlink.c - Network physical/parent device Netlink interface
3 *
4 * Heavily inspired by net/wireless/
5 * Copyright (c) 2016 Mellanox Technologies. All rights reserved.
6 * Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com>
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 */
13
14#include <linux/kernel.h>
15#include <linux/module.h>
16#include <linux/types.h>
17#include <linux/slab.h>
18#include <linux/gfp.h>
19#include <linux/device.h>
20#include <linux/list.h>
21#include <linux/netdevice.h>
22#include <rdma/ib_verbs.h>
23#include <net/netlink.h>
24#include <net/genetlink.h>
25#include <net/rtnetlink.h>
26#include <net/net_namespace.h>
27#include <net/sock.h>
28#include <net/devlink.h>
29
30static LIST_HEAD(devlink_list);
31
32/* devlink_mutex
33 *
34 * An overall lock guarding every operation coming from userspace.
35 * It also guards devlink devices list and it is taken when
36 * driver registers/unregisters it.
37 */
38static DEFINE_MUTEX(devlink_mutex);
39
40/* devlink_port_mutex
41 *
42 * Shared lock to guard lists of ports in all devlink devices.
43 */
44static DEFINE_MUTEX(devlink_port_mutex);
45
46static struct net *devlink_net(const struct devlink *devlink)
47{
48 return read_pnet(&devlink->_net);
49}
50
51static void devlink_net_set(struct devlink *devlink, struct net *net)
52{
53 write_pnet(&devlink->_net, net);
54}
55
56static struct devlink *devlink_get_from_attrs(struct net *net,
57 struct nlattr **attrs)
58{
59 struct devlink *devlink;
60 char *busname;
61 char *devname;
62
63 if (!attrs[DEVLINK_ATTR_BUS_NAME] || !attrs[DEVLINK_ATTR_DEV_NAME])
64 return ERR_PTR(-EINVAL);
65
66 busname = nla_data(attrs[DEVLINK_ATTR_BUS_NAME]);
67 devname = nla_data(attrs[DEVLINK_ATTR_DEV_NAME]);
68
69 list_for_each_entry(devlink, &devlink_list, list) {
70 if (strcmp(devlink->dev->bus->name, busname) == 0 &&
71 strcmp(dev_name(devlink->dev), devname) == 0 &&
72 net_eq(devlink_net(devlink), net))
73 return devlink;
74 }
75
76 return ERR_PTR(-ENODEV);
77}
78
79static struct devlink *devlink_get_from_info(struct genl_info *info)
80{
81 return devlink_get_from_attrs(genl_info_net(info), info->attrs);
82}
83
84static struct devlink_port *devlink_port_get_by_index(struct devlink *devlink,
85 int port_index)
86{
87 struct devlink_port *devlink_port;
88
89 list_for_each_entry(devlink_port, &devlink->port_list, list) {
90 if (devlink_port->index == port_index)
91 return devlink_port;
92 }
93 return NULL;
94}
95
96static bool devlink_port_index_exists(struct devlink *devlink, int port_index)
97{
98 return devlink_port_get_by_index(devlink, port_index);
99}
100
101static struct devlink_port *devlink_port_get_from_attrs(struct devlink *devlink,
102 struct nlattr **attrs)
103{
104 if (attrs[DEVLINK_ATTR_PORT_INDEX]) {
105 u32 port_index = nla_get_u32(attrs[DEVLINK_ATTR_PORT_INDEX]);
106 struct devlink_port *devlink_port;
107
108 devlink_port = devlink_port_get_by_index(devlink, port_index);
109 if (!devlink_port)
110 return ERR_PTR(-ENODEV);
111 return devlink_port;
112 }
113 return ERR_PTR(-EINVAL);
114}
115
116static struct devlink_port *devlink_port_get_from_info(struct devlink *devlink,
117 struct genl_info *info)
118{
119 return devlink_port_get_from_attrs(devlink, info->attrs);
120}
121
122#define DEVLINK_NL_FLAG_NEED_PORT BIT(0)
123
124static int devlink_nl_pre_doit(const struct genl_ops *ops,
125 struct sk_buff *skb, struct genl_info *info)
126{
127 struct devlink *devlink;
128
129 mutex_lock(&devlink_mutex);
130 devlink = devlink_get_from_info(info);
131 if (IS_ERR(devlink)) {
132 mutex_unlock(&devlink_mutex);
133 return PTR_ERR(devlink);
134 }
135 info->user_ptr[0] = devlink;
136 if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_PORT) {
137 struct devlink_port *devlink_port;
138
139 mutex_lock(&devlink_port_mutex);
140 devlink_port = devlink_port_get_from_info(devlink, info);
141 if (IS_ERR(devlink_port)) {
142 mutex_unlock(&devlink_port_mutex);
143 mutex_unlock(&devlink_mutex);
144 return PTR_ERR(devlink_port);
145 }
146 info->user_ptr[1] = devlink_port;
147 }
148 return 0;
149}
150
151static void devlink_nl_post_doit(const struct genl_ops *ops,
152 struct sk_buff *skb, struct genl_info *info)
153{
154 if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_PORT)
155 mutex_unlock(&devlink_port_mutex);
156 mutex_unlock(&devlink_mutex);
157}
158
159static struct genl_family devlink_nl_family = {
160 .id = GENL_ID_GENERATE,
161 .name = DEVLINK_GENL_NAME,
162 .version = DEVLINK_GENL_VERSION,
163 .maxattr = DEVLINK_ATTR_MAX,
164 .netnsok = true,
165 .pre_doit = devlink_nl_pre_doit,
166 .post_doit = devlink_nl_post_doit,
167};
168
169enum devlink_multicast_groups {
170 DEVLINK_MCGRP_CONFIG,
171};
172
173static const struct genl_multicast_group devlink_nl_mcgrps[] = {
174 [DEVLINK_MCGRP_CONFIG] = { .name = DEVLINK_GENL_MCGRP_CONFIG_NAME },
175};
176
177static int devlink_nl_put_handle(struct sk_buff *msg, struct devlink *devlink)
178{
179 if (nla_put_string(msg, DEVLINK_ATTR_BUS_NAME, devlink->dev->bus->name))
180 return -EMSGSIZE;
181 if (nla_put_string(msg, DEVLINK_ATTR_DEV_NAME, dev_name(devlink->dev)))
182 return -EMSGSIZE;
183 return 0;
184}
185
186static int devlink_nl_fill(struct sk_buff *msg, struct devlink *devlink,
187 enum devlink_command cmd, u32 portid,
188 u32 seq, int flags)
189{
190 void *hdr;
191
192 hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
193 if (!hdr)
194 return -EMSGSIZE;
195
196 if (devlink_nl_put_handle(msg, devlink))
197 goto nla_put_failure;
198
199 genlmsg_end(msg, hdr);
200 return 0;
201
202nla_put_failure:
203 genlmsg_cancel(msg, hdr);
204 return -EMSGSIZE;
205}
206
207static void devlink_notify(struct devlink *devlink, enum devlink_command cmd)
208{
209 struct sk_buff *msg;
210 int err;
211
212 WARN_ON(cmd != DEVLINK_CMD_NEW && cmd != DEVLINK_CMD_DEL);
213
214 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
215 if (!msg)
216 return;
217
218 err = devlink_nl_fill(msg, devlink, cmd, 0, 0, 0);
219 if (err) {
220 nlmsg_free(msg);
221 return;
222 }
223
224 genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink),
225 msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
226}
227
228static int devlink_nl_port_fill(struct sk_buff *msg, struct devlink *devlink,
229 struct devlink_port *devlink_port,
230 enum devlink_command cmd, u32 portid,
231 u32 seq, int flags)
232{
233 void *hdr;
234
235 hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
236 if (!hdr)
237 return -EMSGSIZE;
238
239 if (devlink_nl_put_handle(msg, devlink))
240 goto nla_put_failure;
241 if (nla_put_u32(msg, DEVLINK_ATTR_PORT_INDEX, devlink_port->index))
242 goto nla_put_failure;
243 if (nla_put_u16(msg, DEVLINK_ATTR_PORT_TYPE, devlink_port->type))
244 goto nla_put_failure;
245 if (devlink_port->desired_type != DEVLINK_PORT_TYPE_NOTSET &&
246 nla_put_u16(msg, DEVLINK_ATTR_PORT_DESIRED_TYPE,
247 devlink_port->desired_type))
248 goto nla_put_failure;
249 if (devlink_port->type == DEVLINK_PORT_TYPE_ETH) {
250 struct net_device *netdev = devlink_port->type_dev;
251
252 if (netdev &&
253 (nla_put_u32(msg, DEVLINK_ATTR_PORT_NETDEV_IFINDEX,
254 netdev->ifindex) ||
255 nla_put_string(msg, DEVLINK_ATTR_PORT_NETDEV_NAME,
256 netdev->name)))
257 goto nla_put_failure;
258 }
259 if (devlink_port->type == DEVLINK_PORT_TYPE_IB) {
260 struct ib_device *ibdev = devlink_port->type_dev;
261
262 if (ibdev &&
263 nla_put_string(msg, DEVLINK_ATTR_PORT_IBDEV_NAME,
264 ibdev->name))
265 goto nla_put_failure;
266 }
267 if (devlink_port->split &&
268 nla_put_u32(msg, DEVLINK_ATTR_PORT_SPLIT_GROUP,
269 devlink_port->split_group))
270 goto nla_put_failure;
271
272 genlmsg_end(msg, hdr);
273 return 0;
274
275nla_put_failure:
276 genlmsg_cancel(msg, hdr);
277 return -EMSGSIZE;
278}
279
280static void devlink_port_notify(struct devlink_port *devlink_port,
281 enum devlink_command cmd)
282{
283 struct devlink *devlink = devlink_port->devlink;
284 struct sk_buff *msg;
285 int err;
286
287 if (!devlink_port->registered)
288 return;
289
290 WARN_ON(cmd != DEVLINK_CMD_PORT_NEW && cmd != DEVLINK_CMD_PORT_DEL);
291
292 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
293 if (!msg)
294 return;
295
296 err = devlink_nl_port_fill(msg, devlink, devlink_port, cmd, 0, 0, 0);
297 if (err) {
298 nlmsg_free(msg);
299 return;
300 }
301
302 genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink),
303 msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
304}
305
306static int devlink_nl_cmd_get_doit(struct sk_buff *skb, struct genl_info *info)
307{
308 struct devlink *devlink = info->user_ptr[0];
309 struct sk_buff *msg;
310 int err;
311
312 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
313 if (!msg)
314 return -ENOMEM;
315
316 err = devlink_nl_fill(msg, devlink, DEVLINK_CMD_NEW,
317 info->snd_portid, info->snd_seq, 0);
318 if (err) {
319 nlmsg_free(msg);
320 return err;
321 }
322
323 return genlmsg_reply(msg, info);
324}
325
326static int devlink_nl_cmd_get_dumpit(struct sk_buff *msg,
327 struct netlink_callback *cb)
328{
329 struct devlink *devlink;
330 int start = cb->args[0];
331 int idx = 0;
332 int err;
333
334 mutex_lock(&devlink_mutex);
335 list_for_each_entry(devlink, &devlink_list, list) {
336 if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
337 continue;
338 if (idx < start) {
339 idx++;
340 continue;
341 }
342 err = devlink_nl_fill(msg, devlink, DEVLINK_CMD_NEW,
343 NETLINK_CB(cb->skb).portid,
344 cb->nlh->nlmsg_seq, NLM_F_MULTI);
345 if (err)
346 goto out;
347 idx++;
348 }
349out:
350 mutex_unlock(&devlink_mutex);
351
352 cb->args[0] = idx;
353 return msg->len;
354}
355
356static int devlink_nl_cmd_port_get_doit(struct sk_buff *skb,
357 struct genl_info *info)
358{
359 struct devlink *devlink = info->user_ptr[0];
360 struct devlink_port *devlink_port = info->user_ptr[1];
361 struct sk_buff *msg;
362 int err;
363
364 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
365 if (!msg)
366 return -ENOMEM;
367
368 err = devlink_nl_port_fill(msg, devlink, devlink_port,
369 DEVLINK_CMD_PORT_NEW,
370 info->snd_portid, info->snd_seq, 0);
371 if (err) {
372 nlmsg_free(msg);
373 return err;
374 }
375
376 return genlmsg_reply(msg, info);
377}
378
379static int devlink_nl_cmd_port_get_dumpit(struct sk_buff *msg,
380 struct netlink_callback *cb)
381{
382 struct devlink *devlink;
383 struct devlink_port *devlink_port;
384 int start = cb->args[0];
385 int idx = 0;
386 int err;
387
388 mutex_lock(&devlink_mutex);
389 mutex_lock(&devlink_port_mutex);
390 list_for_each_entry(devlink, &devlink_list, list) {
391 if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
392 continue;
393 list_for_each_entry(devlink_port, &devlink->port_list, list) {
394 if (idx < start) {
395 idx++;
396 continue;
397 }
398 err = devlink_nl_port_fill(msg, devlink, devlink_port,
399 DEVLINK_CMD_NEW,
400 NETLINK_CB(cb->skb).portid,
401 cb->nlh->nlmsg_seq,
402 NLM_F_MULTI);
403 if (err)
404 goto out;
405 idx++;
406 }
407 }
408out:
409 mutex_unlock(&devlink_port_mutex);
410 mutex_unlock(&devlink_mutex);
411
412 cb->args[0] = idx;
413 return msg->len;
414}
415
416static int devlink_port_type_set(struct devlink *devlink,
417 struct devlink_port *devlink_port,
418 enum devlink_port_type port_type)
419
420{
421 int err;
422
423 if (devlink->ops && devlink->ops->port_type_set) {
424 if (port_type == DEVLINK_PORT_TYPE_NOTSET)
425 return -EINVAL;
426 err = devlink->ops->port_type_set(devlink_port, port_type);
427 if (err)
428 return err;
429 devlink_port->desired_type = port_type;
430 devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_NEW);
431 return 0;
432 }
433 return -EOPNOTSUPP;
434}
435
436static int devlink_nl_cmd_port_set_doit(struct sk_buff *skb,
437 struct genl_info *info)
438{
439 struct devlink *devlink = info->user_ptr[0];
440 struct devlink_port *devlink_port = info->user_ptr[1];
441 int err;
442
443 if (info->attrs[DEVLINK_ATTR_PORT_TYPE]) {
444 enum devlink_port_type port_type;
445
446 port_type = nla_get_u16(info->attrs[DEVLINK_ATTR_PORT_TYPE]);
447 err = devlink_port_type_set(devlink, devlink_port, port_type);
448 if (err)
449 return err;
450 }
451 return 0;
452}
453
454static int devlink_port_split(struct devlink *devlink,
455 u32 port_index, u32 count)
456
457{
458 if (devlink->ops && devlink->ops->port_split)
459 return devlink->ops->port_split(devlink, port_index, count);
460 return -EOPNOTSUPP;
461}
462
463static int devlink_nl_cmd_port_split_doit(struct sk_buff *skb,
464 struct genl_info *info)
465{
466 struct devlink *devlink = info->user_ptr[0];
467 u32 port_index;
468 u32 count;
469
470 if (!info->attrs[DEVLINK_ATTR_PORT_INDEX] ||
471 !info->attrs[DEVLINK_ATTR_PORT_SPLIT_COUNT])
472 return -EINVAL;
473
474 port_index = nla_get_u32(info->attrs[DEVLINK_ATTR_PORT_INDEX]);
475 count = nla_get_u32(info->attrs[DEVLINK_ATTR_PORT_SPLIT_COUNT]);
476 return devlink_port_split(devlink, port_index, count);
477}
478
479static int devlink_port_unsplit(struct devlink *devlink, u32 port_index)
480
481{
482 if (devlink->ops && devlink->ops->port_unsplit)
483 return devlink->ops->port_unsplit(devlink, port_index);
484 return -EOPNOTSUPP;
485}
486
487static int devlink_nl_cmd_port_unsplit_doit(struct sk_buff *skb,
488 struct genl_info *info)
489{
490 struct devlink *devlink = info->user_ptr[0];
491 u32 port_index;
492
493 if (!info->attrs[DEVLINK_ATTR_PORT_INDEX])
494 return -EINVAL;
495
496 port_index = nla_get_u32(info->attrs[DEVLINK_ATTR_PORT_INDEX]);
497 return devlink_port_unsplit(devlink, port_index);
498}
499
500static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
501 [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING },
502 [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING },
503 [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32 },
504 [DEVLINK_ATTR_PORT_TYPE] = { .type = NLA_U16 },
505 [DEVLINK_ATTR_PORT_SPLIT_COUNT] = { .type = NLA_U32 },
506};
507
508static const struct genl_ops devlink_nl_ops[] = {
509 {
510 .cmd = DEVLINK_CMD_GET,
511 .doit = devlink_nl_cmd_get_doit,
512 .dumpit = devlink_nl_cmd_get_dumpit,
513 .policy = devlink_nl_policy,
514 /* can be retrieved by unprivileged users */
515 },
516 {
517 .cmd = DEVLINK_CMD_PORT_GET,
518 .doit = devlink_nl_cmd_port_get_doit,
519 .dumpit = devlink_nl_cmd_port_get_dumpit,
520 .policy = devlink_nl_policy,
521 .internal_flags = DEVLINK_NL_FLAG_NEED_PORT,
522 /* can be retrieved by unprivileged users */
523 },
524 {
525 .cmd = DEVLINK_CMD_PORT_SET,
526 .doit = devlink_nl_cmd_port_set_doit,
527 .policy = devlink_nl_policy,
528 .flags = GENL_ADMIN_PERM,
529 .internal_flags = DEVLINK_NL_FLAG_NEED_PORT,
530 },
531 {
532 .cmd = DEVLINK_CMD_PORT_SPLIT,
533 .doit = devlink_nl_cmd_port_split_doit,
534 .policy = devlink_nl_policy,
535 .flags = GENL_ADMIN_PERM,
536 },
537 {
538 .cmd = DEVLINK_CMD_PORT_UNSPLIT,
539 .doit = devlink_nl_cmd_port_unsplit_doit,
540 .policy = devlink_nl_policy,
541 .flags = GENL_ADMIN_PERM,
542 },
543};
544
545/**
546 * devlink_alloc - Allocate new devlink instance resources
547 *
548 * @ops: ops
549 * @priv_size: size of user private data
550 *
551 * Allocate new devlink instance resources, including devlink index
552 * and name.
553 */
554struct devlink *devlink_alloc(const struct devlink_ops *ops, size_t priv_size)
555{
556 struct devlink *devlink;
557
558 devlink = kzalloc(sizeof(*devlink) + priv_size, GFP_KERNEL);
559 if (!devlink)
560 return NULL;
561 devlink->ops = ops;
562 devlink_net_set(devlink, &init_net);
563 INIT_LIST_HEAD(&devlink->port_list);
564 return devlink;
565}
566EXPORT_SYMBOL_GPL(devlink_alloc);
567
568/**
569 * devlink_register - Register devlink instance
570 *
571 * @devlink: devlink
572 */
573int devlink_register(struct devlink *devlink, struct device *dev)
574{
575 mutex_lock(&devlink_mutex);
576 devlink->dev = dev;
577 list_add_tail(&devlink->list, &devlink_list);
578 devlink_notify(devlink, DEVLINK_CMD_NEW);
579 mutex_unlock(&devlink_mutex);
580 return 0;
581}
582EXPORT_SYMBOL_GPL(devlink_register);
583
584/**
585 * devlink_unregister - Unregister devlink instance
586 *
587 * @devlink: devlink
588 */
589void devlink_unregister(struct devlink *devlink)
590{
591 mutex_lock(&devlink_mutex);
592 devlink_notify(devlink, DEVLINK_CMD_DEL);
593 list_del(&devlink->list);
594 mutex_unlock(&devlink_mutex);
595}
596EXPORT_SYMBOL_GPL(devlink_unregister);
597
598/**
599 * devlink_free - Free devlink instance resources
600 *
601 * @devlink: devlink
602 */
603void devlink_free(struct devlink *devlink)
604{
605 kfree(devlink);
606}
607EXPORT_SYMBOL_GPL(devlink_free);
608
609/**
610 * devlink_port_register - Register devlink port
611 *
612 * @devlink: devlink
613 * @devlink_port: devlink port
614 * @port_index
615 *
616 * Register devlink port with provided port index. User can use
617 * any indexing, even hw-related one. devlink_port structure
618 * is convenient to be embedded inside user driver private structure.
619 * Note that the caller should take care of zeroing the devlink_port
620 * structure.
621 */
622int devlink_port_register(struct devlink *devlink,
623 struct devlink_port *devlink_port,
624 unsigned int port_index)
625{
626 mutex_lock(&devlink_port_mutex);
627 if (devlink_port_index_exists(devlink, port_index)) {
628 mutex_unlock(&devlink_port_mutex);
629 return -EEXIST;
630 }
631 devlink_port->devlink = devlink;
632 devlink_port->index = port_index;
633 devlink_port->type = DEVLINK_PORT_TYPE_NOTSET;
634 devlink_port->registered = true;
635 list_add_tail(&devlink_port->list, &devlink->port_list);
636 mutex_unlock(&devlink_port_mutex);
637 devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_NEW);
638 return 0;
639}
640EXPORT_SYMBOL_GPL(devlink_port_register);
641
642/**
643 * devlink_port_unregister - Unregister devlink port
644 *
645 * @devlink_port: devlink port
646 */
647void devlink_port_unregister(struct devlink_port *devlink_port)
648{
649 devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_DEL);
650 mutex_lock(&devlink_port_mutex);
651 list_del(&devlink_port->list);
652 mutex_unlock(&devlink_port_mutex);
653}
654EXPORT_SYMBOL_GPL(devlink_port_unregister);
655
656static void __devlink_port_type_set(struct devlink_port *devlink_port,
657 enum devlink_port_type type,
658 void *type_dev)
659{
660 devlink_port->type = type;
661 devlink_port->type_dev = type_dev;
662 devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_NEW);
663}
664
665/**
666 * devlink_port_type_eth_set - Set port type to Ethernet
667 *
668 * @devlink_port: devlink port
669 * @netdev: related netdevice
670 */
671void devlink_port_type_eth_set(struct devlink_port *devlink_port,
672 struct net_device *netdev)
673{
674 return __devlink_port_type_set(devlink_port,
675 DEVLINK_PORT_TYPE_ETH, netdev);
676}
677EXPORT_SYMBOL_GPL(devlink_port_type_eth_set);
678
679/**
680 * devlink_port_type_ib_set - Set port type to InfiniBand
681 *
682 * @devlink_port: devlink port
683 * @ibdev: related IB device
684 */
685void devlink_port_type_ib_set(struct devlink_port *devlink_port,
686 struct ib_device *ibdev)
687{
688 return __devlink_port_type_set(devlink_port,
689 DEVLINK_PORT_TYPE_IB, ibdev);
690}
691EXPORT_SYMBOL_GPL(devlink_port_type_ib_set);
692
693/**
694 * devlink_port_type_clear - Clear port type
695 *
696 * @devlink_port: devlink port
697 */
698void devlink_port_type_clear(struct devlink_port *devlink_port)
699{
700 return __devlink_port_type_set(devlink_port,
701 DEVLINK_PORT_TYPE_NOTSET, NULL);
702}
703EXPORT_SYMBOL_GPL(devlink_port_type_clear);
704
705/**
706 * devlink_port_split_set - Set port is split
707 *
708 * @devlink_port: devlink port
709 * @split_group: split group - identifies group split port is part of
710 */
711void devlink_port_split_set(struct devlink_port *devlink_port,
712 u32 split_group)
713{
714 devlink_port->split = true;
715 devlink_port->split_group = split_group;
716 devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_NEW);
717}
718EXPORT_SYMBOL_GPL(devlink_port_split_set);
719
720static int __init devlink_module_init(void)
721{
722 return genl_register_family_with_ops_groups(&devlink_nl_family,
723 devlink_nl_ops,
724 devlink_nl_mcgrps);
725}
726
727static void __exit devlink_module_exit(void)
728{
729 genl_unregister_family(&devlink_nl_family);
730}
731
732module_init(devlink_module_init);
733module_exit(devlink_module_exit);
734
735MODULE_LICENSE("GPL v2");
736MODULE_AUTHOR("Jiri Pirko <jiri@mellanox.com>");
737MODULE_DESCRIPTION("Network physical device Netlink interface");
738MODULE_ALIAS_GENL_FAMILY(DEVLINK_GENL_NAME);
diff --git a/net/core/dst.c b/net/core/dst.c
index a1656e3b8d72..b5cbbe07f786 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -265,7 +265,7 @@ again:
265 lwtstate_put(dst->lwtstate); 265 lwtstate_put(dst->lwtstate);
266 266
267 if (dst->flags & DST_METADATA) 267 if (dst->flags & DST_METADATA)
268 kfree(dst); 268 metadata_dst_free((struct metadata_dst *)dst);
269 else 269 else
270 kmem_cache_free(dst->ops->kmem_cachep, dst); 270 kmem_cache_free(dst->ops->kmem_cachep, dst);
271 271
@@ -395,6 +395,14 @@ struct metadata_dst *metadata_dst_alloc(u8 optslen, gfp_t flags)
395} 395}
396EXPORT_SYMBOL_GPL(metadata_dst_alloc); 396EXPORT_SYMBOL_GPL(metadata_dst_alloc);
397 397
398void metadata_dst_free(struct metadata_dst *md_dst)
399{
400#ifdef CONFIG_DST_CACHE
401 dst_cache_destroy(&md_dst->u.tun_info.dst_cache);
402#endif
403 kfree(md_dst);
404}
405
398struct metadata_dst __percpu *metadata_dst_alloc_percpu(u8 optslen, gfp_t flags) 406struct metadata_dst __percpu *metadata_dst_alloc_percpu(u8 optslen, gfp_t flags)
399{ 407{
400 int cpu; 408 int cpu;
diff --git a/net/core/dst_cache.c b/net/core/dst_cache.c
new file mode 100644
index 000000000000..554d36449231
--- /dev/null
+++ b/net/core/dst_cache.c
@@ -0,0 +1,168 @@
1/*
2 * net/core/dst_cache.c - dst entry cache
3 *
4 * Copyright (c) 2016 Paolo Abeni <pabeni@redhat.com>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 */
11
12#include <linux/kernel.h>
13#include <linux/percpu.h>
14#include <net/dst_cache.h>
15#include <net/route.h>
16#if IS_ENABLED(CONFIG_IPV6)
17#include <net/ip6_fib.h>
18#endif
19#include <uapi/linux/in.h>
20
21struct dst_cache_pcpu {
22 unsigned long refresh_ts;
23 struct dst_entry *dst;
24 u32 cookie;
25 union {
26 struct in_addr in_saddr;
27 struct in6_addr in6_saddr;
28 };
29};
30
31static void dst_cache_per_cpu_dst_set(struct dst_cache_pcpu *dst_cache,
32 struct dst_entry *dst, u32 cookie)
33{
34 dst_release(dst_cache->dst);
35 if (dst)
36 dst_hold(dst);
37
38 dst_cache->cookie = cookie;
39 dst_cache->dst = dst;
40}
41
42static struct dst_entry *dst_cache_per_cpu_get(struct dst_cache *dst_cache,
43 struct dst_cache_pcpu *idst)
44{
45 struct dst_entry *dst;
46
47 dst = idst->dst;
48 if (!dst)
49 goto fail;
50
51 /* the cache already hold a dst reference; it can't go away */
52 dst_hold(dst);
53
54 if (unlikely(!time_after(idst->refresh_ts, dst_cache->reset_ts) ||
55 (dst->obsolete && !dst->ops->check(dst, idst->cookie)))) {
56 dst_cache_per_cpu_dst_set(idst, NULL, 0);
57 dst_release(dst);
58 goto fail;
59 }
60 return dst;
61
62fail:
63 idst->refresh_ts = jiffies;
64 return NULL;
65}
66
67struct dst_entry *dst_cache_get(struct dst_cache *dst_cache)
68{
69 if (!dst_cache->cache)
70 return NULL;
71
72 return dst_cache_per_cpu_get(dst_cache, this_cpu_ptr(dst_cache->cache));
73}
74EXPORT_SYMBOL_GPL(dst_cache_get);
75
76struct rtable *dst_cache_get_ip4(struct dst_cache *dst_cache, __be32 *saddr)
77{
78 struct dst_cache_pcpu *idst;
79 struct dst_entry *dst;
80
81 if (!dst_cache->cache)
82 return NULL;
83
84 idst = this_cpu_ptr(dst_cache->cache);
85 dst = dst_cache_per_cpu_get(dst_cache, idst);
86 if (!dst)
87 return NULL;
88
89 *saddr = idst->in_saddr.s_addr;
90 return container_of(dst, struct rtable, dst);
91}
92EXPORT_SYMBOL_GPL(dst_cache_get_ip4);
93
94void dst_cache_set_ip4(struct dst_cache *dst_cache, struct dst_entry *dst,
95 __be32 saddr)
96{
97 struct dst_cache_pcpu *idst;
98
99 if (!dst_cache->cache)
100 return;
101
102 idst = this_cpu_ptr(dst_cache->cache);
103 dst_cache_per_cpu_dst_set(idst, dst, 0);
104 idst->in_saddr.s_addr = saddr;
105}
106EXPORT_SYMBOL_GPL(dst_cache_set_ip4);
107
108#if IS_ENABLED(CONFIG_IPV6)
109void dst_cache_set_ip6(struct dst_cache *dst_cache, struct dst_entry *dst,
110 const struct in6_addr *addr)
111{
112 struct dst_cache_pcpu *idst;
113
114 if (!dst_cache->cache)
115 return;
116
117 idst = this_cpu_ptr(dst_cache->cache);
118 dst_cache_per_cpu_dst_set(this_cpu_ptr(dst_cache->cache), dst,
119 rt6_get_cookie((struct rt6_info *)dst));
120 idst->in6_saddr = *addr;
121}
122EXPORT_SYMBOL_GPL(dst_cache_set_ip6);
123
124struct dst_entry *dst_cache_get_ip6(struct dst_cache *dst_cache,
125 struct in6_addr *saddr)
126{
127 struct dst_cache_pcpu *idst;
128 struct dst_entry *dst;
129
130 if (!dst_cache->cache)
131 return NULL;
132
133 idst = this_cpu_ptr(dst_cache->cache);
134 dst = dst_cache_per_cpu_get(dst_cache, idst);
135 if (!dst)
136 return NULL;
137
138 *saddr = idst->in6_saddr;
139 return dst;
140}
141EXPORT_SYMBOL_GPL(dst_cache_get_ip6);
142#endif
143
144int dst_cache_init(struct dst_cache *dst_cache, gfp_t gfp)
145{
146 dst_cache->cache = alloc_percpu_gfp(struct dst_cache_pcpu,
147 gfp | __GFP_ZERO);
148 if (!dst_cache->cache)
149 return -ENOMEM;
150
151 dst_cache_reset(dst_cache);
152 return 0;
153}
154EXPORT_SYMBOL_GPL(dst_cache_init);
155
156void dst_cache_destroy(struct dst_cache *dst_cache)
157{
158 int i;
159
160 if (!dst_cache->cache)
161 return;
162
163 for_each_possible_cpu(i)
164 dst_release(per_cpu_ptr(dst_cache->cache, i)->dst);
165
166 free_percpu(dst_cache->cache);
167}
168EXPORT_SYMBOL_GPL(dst_cache_destroy);
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index daf04709dd3c..f426c5ad6149 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -98,6 +98,7 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN]
98 [NETIF_F_RXALL_BIT] = "rx-all", 98 [NETIF_F_RXALL_BIT] = "rx-all",
99 [NETIF_F_HW_L2FW_DOFFLOAD_BIT] = "l2-fwd-offload", 99 [NETIF_F_HW_L2FW_DOFFLOAD_BIT] = "l2-fwd-offload",
100 [NETIF_F_BUSY_POLL_BIT] = "busy-poll", 100 [NETIF_F_BUSY_POLL_BIT] = "busy-poll",
101 [NETIF_F_HW_TC_BIT] = "hw-tc-offload",
101}; 102};
102 103
103static const char 104static const char
@@ -386,43 +387,461 @@ static int __ethtool_set_flags(struct net_device *dev, u32 data)
386 return 0; 387 return 0;
387} 388}
388 389
389int __ethtool_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) 390static void convert_legacy_u32_to_link_mode(unsigned long *dst, u32 legacy_u32)
390{ 391{
392 bitmap_zero(dst, __ETHTOOL_LINK_MODE_MASK_NBITS);
393 dst[0] = legacy_u32;
394}
395
396/* return false if src had higher bits set. lower bits always updated. */
397static bool convert_link_mode_to_legacy_u32(u32 *legacy_u32,
398 const unsigned long *src)
399{
400 bool retval = true;
401
402 /* TODO: following test will soon always be true */
403 if (__ETHTOOL_LINK_MODE_MASK_NBITS > 32) {
404 __ETHTOOL_DECLARE_LINK_MODE_MASK(ext);
405
406 bitmap_zero(ext, __ETHTOOL_LINK_MODE_MASK_NBITS);
407 bitmap_fill(ext, 32);
408 bitmap_complement(ext, ext, __ETHTOOL_LINK_MODE_MASK_NBITS);
409 if (bitmap_intersects(ext, src,
410 __ETHTOOL_LINK_MODE_MASK_NBITS)) {
411 /* src mask goes beyond bit 31 */
412 retval = false;
413 }
414 }
415 *legacy_u32 = src[0];
416 return retval;
417}
418
419/* return false if legacy contained non-0 deprecated fields
420 * transceiver/maxtxpkt/maxrxpkt. rest of ksettings always updated
421 */
422static bool
423convert_legacy_settings_to_link_ksettings(
424 struct ethtool_link_ksettings *link_ksettings,
425 const struct ethtool_cmd *legacy_settings)
426{
427 bool retval = true;
428
429 memset(link_ksettings, 0, sizeof(*link_ksettings));
430
431 /* This is used to tell users that driver is still using these
432 * deprecated legacy fields, and they should not use
433 * %ETHTOOL_GLINKSETTINGS/%ETHTOOL_SLINKSETTINGS
434 */
435 if (legacy_settings->transceiver ||
436 legacy_settings->maxtxpkt ||
437 legacy_settings->maxrxpkt)
438 retval = false;
439
440 convert_legacy_u32_to_link_mode(
441 link_ksettings->link_modes.supported,
442 legacy_settings->supported);
443 convert_legacy_u32_to_link_mode(
444 link_ksettings->link_modes.advertising,
445 legacy_settings->advertising);
446 convert_legacy_u32_to_link_mode(
447 link_ksettings->link_modes.lp_advertising,
448 legacy_settings->lp_advertising);
449 link_ksettings->base.speed
450 = ethtool_cmd_speed(legacy_settings);
451 link_ksettings->base.duplex
452 = legacy_settings->duplex;
453 link_ksettings->base.port
454 = legacy_settings->port;
455 link_ksettings->base.phy_address
456 = legacy_settings->phy_address;
457 link_ksettings->base.autoneg
458 = legacy_settings->autoneg;
459 link_ksettings->base.mdio_support
460 = legacy_settings->mdio_support;
461 link_ksettings->base.eth_tp_mdix
462 = legacy_settings->eth_tp_mdix;
463 link_ksettings->base.eth_tp_mdix_ctrl
464 = legacy_settings->eth_tp_mdix_ctrl;
465 return retval;
466}
467
468/* return false if ksettings link modes had higher bits
469 * set. legacy_settings always updated (best effort)
470 */
471static bool
472convert_link_ksettings_to_legacy_settings(
473 struct ethtool_cmd *legacy_settings,
474 const struct ethtool_link_ksettings *link_ksettings)
475{
476 bool retval = true;
477
478 memset(legacy_settings, 0, sizeof(*legacy_settings));
479 /* this also clears the deprecated fields in legacy structure:
480 * __u8 transceiver;
481 * __u32 maxtxpkt;
482 * __u32 maxrxpkt;
483 */
484
485 retval &= convert_link_mode_to_legacy_u32(
486 &legacy_settings->supported,
487 link_ksettings->link_modes.supported);
488 retval &= convert_link_mode_to_legacy_u32(
489 &legacy_settings->advertising,
490 link_ksettings->link_modes.advertising);
491 retval &= convert_link_mode_to_legacy_u32(
492 &legacy_settings->lp_advertising,
493 link_ksettings->link_modes.lp_advertising);
494 ethtool_cmd_speed_set(legacy_settings, link_ksettings->base.speed);
495 legacy_settings->duplex
496 = link_ksettings->base.duplex;
497 legacy_settings->port
498 = link_ksettings->base.port;
499 legacy_settings->phy_address
500 = link_ksettings->base.phy_address;
501 legacy_settings->autoneg
502 = link_ksettings->base.autoneg;
503 legacy_settings->mdio_support
504 = link_ksettings->base.mdio_support;
505 legacy_settings->eth_tp_mdix
506 = link_ksettings->base.eth_tp_mdix;
507 legacy_settings->eth_tp_mdix_ctrl
508 = link_ksettings->base.eth_tp_mdix_ctrl;
509 return retval;
510}
511
512/* number of 32-bit words to store the user's link mode bitmaps */
513#define __ETHTOOL_LINK_MODE_MASK_NU32 \
514 DIV_ROUND_UP(__ETHTOOL_LINK_MODE_MASK_NBITS, 32)
515
516/* layout of the struct passed from/to userland */
517struct ethtool_link_usettings {
518 struct ethtool_link_settings base;
519 struct {
520 __u32 supported[__ETHTOOL_LINK_MODE_MASK_NU32];
521 __u32 advertising[__ETHTOOL_LINK_MODE_MASK_NU32];
522 __u32 lp_advertising[__ETHTOOL_LINK_MODE_MASK_NU32];
523 } link_modes;
524};
525
526/* Internal kernel helper to query a device ethtool_link_settings.
527 *
528 * Backward compatibility note: for compatibility with legacy drivers
529 * that implement only the ethtool_cmd API, this has to work with both
530 * drivers implementing get_link_ksettings API and drivers
531 * implementing get_settings API. When drivers implement get_settings
532 * and report ethtool_cmd deprecated fields
533 * (transceiver/maxrxpkt/maxtxpkt), these fields are silently ignored
534 * because the resulting struct ethtool_link_settings does not report them.
535 */
536int __ethtool_get_link_ksettings(struct net_device *dev,
537 struct ethtool_link_ksettings *link_ksettings)
538{
539 int err;
540 struct ethtool_cmd cmd;
541
391 ASSERT_RTNL(); 542 ASSERT_RTNL();
392 543
544 if (dev->ethtool_ops->get_link_ksettings) {
545 memset(link_ksettings, 0, sizeof(*link_ksettings));
546 return dev->ethtool_ops->get_link_ksettings(dev,
547 link_ksettings);
548 }
549
550 /* driver doesn't support %ethtool_link_ksettings API. revert to
551 * legacy %ethtool_cmd API, unless it's not supported either.
552 * TODO: remove when ethtool_ops::get_settings disappears internally
553 */
393 if (!dev->ethtool_ops->get_settings) 554 if (!dev->ethtool_ops->get_settings)
394 return -EOPNOTSUPP; 555 return -EOPNOTSUPP;
395 556
396 memset(cmd, 0, sizeof(struct ethtool_cmd)); 557 memset(&cmd, 0, sizeof(cmd));
397 cmd->cmd = ETHTOOL_GSET; 558 cmd.cmd = ETHTOOL_GSET;
398 return dev->ethtool_ops->get_settings(dev, cmd); 559 err = dev->ethtool_ops->get_settings(dev, &cmd);
560 if (err < 0)
561 return err;
562
563 /* we ignore deprecated fields transceiver/maxrxpkt/maxtxpkt
564 */
565 convert_legacy_settings_to_link_ksettings(link_ksettings, &cmd);
566 return err;
399} 567}
400EXPORT_SYMBOL(__ethtool_get_settings); 568EXPORT_SYMBOL(__ethtool_get_link_ksettings);
401 569
402static int ethtool_get_settings(struct net_device *dev, void __user *useraddr) 570/* convert ethtool_link_usettings in user space to a kernel internal
571 * ethtool_link_ksettings. return 0 on success, errno on error.
572 */
573static int load_link_ksettings_from_user(struct ethtool_link_ksettings *to,
574 const void __user *from)
403{ 575{
404 int err; 576 struct ethtool_link_usettings link_usettings;
405 struct ethtool_cmd cmd; 577
578 if (copy_from_user(&link_usettings, from, sizeof(link_usettings)))
579 return -EFAULT;
580
581 memcpy(&to->base, &link_usettings.base, sizeof(to->base));
582 bitmap_from_u32array(to->link_modes.supported,
583 __ETHTOOL_LINK_MODE_MASK_NBITS,
584 link_usettings.link_modes.supported,
585 __ETHTOOL_LINK_MODE_MASK_NU32);
586 bitmap_from_u32array(to->link_modes.advertising,
587 __ETHTOOL_LINK_MODE_MASK_NBITS,
588 link_usettings.link_modes.advertising,
589 __ETHTOOL_LINK_MODE_MASK_NU32);
590 bitmap_from_u32array(to->link_modes.lp_advertising,
591 __ETHTOOL_LINK_MODE_MASK_NBITS,
592 link_usettings.link_modes.lp_advertising,
593 __ETHTOOL_LINK_MODE_MASK_NU32);
594
595 return 0;
596}
597
598/* convert a kernel internal ethtool_link_ksettings to
599 * ethtool_link_usettings in user space. return 0 on success, errno on
600 * error.
601 */
602static int
603store_link_ksettings_for_user(void __user *to,
604 const struct ethtool_link_ksettings *from)
605{
606 struct ethtool_link_usettings link_usettings;
607
608 memcpy(&link_usettings.base, &from->base, sizeof(link_usettings));
609 bitmap_to_u32array(link_usettings.link_modes.supported,
610 __ETHTOOL_LINK_MODE_MASK_NU32,
611 from->link_modes.supported,
612 __ETHTOOL_LINK_MODE_MASK_NBITS);
613 bitmap_to_u32array(link_usettings.link_modes.advertising,
614 __ETHTOOL_LINK_MODE_MASK_NU32,
615 from->link_modes.advertising,
616 __ETHTOOL_LINK_MODE_MASK_NBITS);
617 bitmap_to_u32array(link_usettings.link_modes.lp_advertising,
618 __ETHTOOL_LINK_MODE_MASK_NU32,
619 from->link_modes.lp_advertising,
620 __ETHTOOL_LINK_MODE_MASK_NBITS);
621
622 if (copy_to_user(to, &link_usettings, sizeof(link_usettings)))
623 return -EFAULT;
624
625 return 0;
626}
627
628/* Query device for its ethtool_link_settings.
629 *
630 * Backward compatibility note: this function must fail when driver
631 * does not implement ethtool::get_link_ksettings, even if legacy
632 * ethtool_ops::get_settings is implemented. This tells new versions
633 * of ethtool that they should use the legacy API %ETHTOOL_GSET for
634 * this driver, so that they can correctly access the ethtool_cmd
635 * deprecated fields (transceiver/maxrxpkt/maxtxpkt), until no driver
636 * implements ethtool_ops::get_settings anymore.
637 */
638static int ethtool_get_link_ksettings(struct net_device *dev,
639 void __user *useraddr)
640{
641 int err = 0;
642 struct ethtool_link_ksettings link_ksettings;
406 643
407 err = __ethtool_get_settings(dev, &cmd); 644 ASSERT_RTNL();
645
646 if (!dev->ethtool_ops->get_link_ksettings)
647 return -EOPNOTSUPP;
648
649 /* handle bitmap nbits handshake */
650 if (copy_from_user(&link_ksettings.base, useraddr,
651 sizeof(link_ksettings.base)))
652 return -EFAULT;
653
654 if (__ETHTOOL_LINK_MODE_MASK_NU32
655 != link_ksettings.base.link_mode_masks_nwords) {
656 /* wrong link mode nbits requested */
657 memset(&link_ksettings, 0, sizeof(link_ksettings));
658 link_ksettings.base.cmd = ETHTOOL_GLINKSETTINGS;
659 /* send back number of words required as negative val */
660 compiletime_assert(__ETHTOOL_LINK_MODE_MASK_NU32 <= S8_MAX,
661 "need too many bits for link modes!");
662 link_ksettings.base.link_mode_masks_nwords
663 = -((s8)__ETHTOOL_LINK_MODE_MASK_NU32);
664
665 /* copy the base fields back to user, not the link
666 * mode bitmaps
667 */
668 if (copy_to_user(useraddr, &link_ksettings.base,
669 sizeof(link_ksettings.base)))
670 return -EFAULT;
671
672 return 0;
673 }
674
675 /* handshake successful: user/kernel agree on
676 * link_mode_masks_nwords
677 */
678
679 memset(&link_ksettings, 0, sizeof(link_ksettings));
680 err = dev->ethtool_ops->get_link_ksettings(dev, &link_ksettings);
408 if (err < 0) 681 if (err < 0)
409 return err; 682 return err;
410 683
684 /* make sure we tell the right values to user */
685 link_ksettings.base.cmd = ETHTOOL_GLINKSETTINGS;
686 link_ksettings.base.link_mode_masks_nwords
687 = __ETHTOOL_LINK_MODE_MASK_NU32;
688
689 return store_link_ksettings_for_user(useraddr, &link_ksettings);
690}
691
692/* Update device ethtool_link_settings.
693 *
694 * Backward compatibility note: this function must fail when driver
695 * does not implement ethtool::set_link_ksettings, even if legacy
696 * ethtool_ops::set_settings is implemented. This tells new versions
697 * of ethtool that they should use the legacy API %ETHTOOL_SSET for
698 * this driver, so that they can correctly update the ethtool_cmd
699 * deprecated fields (transceiver/maxrxpkt/maxtxpkt), until no driver
700 * implements ethtool_ops::get_settings anymore.
701 */
702static int ethtool_set_link_ksettings(struct net_device *dev,
703 void __user *useraddr)
704{
705 int err;
706 struct ethtool_link_ksettings link_ksettings;
707
708 ASSERT_RTNL();
709
710 if (!dev->ethtool_ops->set_link_ksettings)
711 return -EOPNOTSUPP;
712
713 /* make sure nbits field has expected value */
714 if (copy_from_user(&link_ksettings.base, useraddr,
715 sizeof(link_ksettings.base)))
716 return -EFAULT;
717
718 if (__ETHTOOL_LINK_MODE_MASK_NU32
719 != link_ksettings.base.link_mode_masks_nwords)
720 return -EINVAL;
721
722 /* copy the whole structure, now that we know it has expected
723 * format
724 */
725 err = load_link_ksettings_from_user(&link_ksettings, useraddr);
726 if (err)
727 return err;
728
729 /* re-check nwords field, just in case */
730 if (__ETHTOOL_LINK_MODE_MASK_NU32
731 != link_ksettings.base.link_mode_masks_nwords)
732 return -EINVAL;
733
734 return dev->ethtool_ops->set_link_ksettings(dev, &link_ksettings);
735}
736
737static void
738warn_incomplete_ethtool_legacy_settings_conversion(const char *details)
739{
740 char name[sizeof(current->comm)];
741
742 pr_info_once("warning: `%s' uses legacy ethtool link settings API, %s\n",
743 get_task_comm(name, current), details);
744}
745
746/* Query device for its ethtool_cmd settings.
747 *
748 * Backward compatibility note: for compatibility with legacy ethtool,
749 * this has to work with both drivers implementing get_link_ksettings
750 * API and drivers implementing get_settings API. When drivers
751 * implement get_link_ksettings and report higher link mode bits, a
752 * kernel warning is logged once (with name of 1st driver/device) to
753 * recommend user to upgrade ethtool, but the command is successful
754 * (only the lower link mode bits reported back to user).
755 */
756static int ethtool_get_settings(struct net_device *dev, void __user *useraddr)
757{
758 struct ethtool_cmd cmd;
759
760 ASSERT_RTNL();
761
762 if (dev->ethtool_ops->get_link_ksettings) {
763 /* First, use link_ksettings API if it is supported */
764 int err;
765 struct ethtool_link_ksettings link_ksettings;
766
767 memset(&link_ksettings, 0, sizeof(link_ksettings));
768 err = dev->ethtool_ops->get_link_ksettings(dev,
769 &link_ksettings);
770 if (err < 0)
771 return err;
772 if (!convert_link_ksettings_to_legacy_settings(&cmd,
773 &link_ksettings))
774 warn_incomplete_ethtool_legacy_settings_conversion(
775 "link modes are only partially reported");
776
777 /* send a sensible cmd tag back to user */
778 cmd.cmd = ETHTOOL_GSET;
779 } else {
780 /* driver doesn't support %ethtool_link_ksettings
781 * API. revert to legacy %ethtool_cmd API, unless it's
782 * not supported either.
783 */
784 int err;
785
786 if (!dev->ethtool_ops->get_settings)
787 return -EOPNOTSUPP;
788
789 memset(&cmd, 0, sizeof(cmd));
790 cmd.cmd = ETHTOOL_GSET;
791 err = dev->ethtool_ops->get_settings(dev, &cmd);
792 if (err < 0)
793 return err;
794 }
795
411 if (copy_to_user(useraddr, &cmd, sizeof(cmd))) 796 if (copy_to_user(useraddr, &cmd, sizeof(cmd)))
412 return -EFAULT; 797 return -EFAULT;
798
413 return 0; 799 return 0;
414} 800}
415 801
802/* Update device link settings with given ethtool_cmd.
803 *
804 * Backward compatibility note: for compatibility with legacy ethtool,
805 * this has to work with both drivers implementing set_link_ksettings
806 * API and drivers implementing set_settings API. When drivers
807 * implement set_link_ksettings and user's request updates deprecated
808 * ethtool_cmd fields (transceiver/maxrxpkt/maxtxpkt), a kernel
809 * warning is logged once (with name of 1st driver/device) to
810 * recommend user to upgrade ethtool, and the request is rejected.
811 */
416static int ethtool_set_settings(struct net_device *dev, void __user *useraddr) 812static int ethtool_set_settings(struct net_device *dev, void __user *useraddr)
417{ 813{
418 struct ethtool_cmd cmd; 814 struct ethtool_cmd cmd;
419 815
420 if (!dev->ethtool_ops->set_settings) 816 ASSERT_RTNL();
421 return -EOPNOTSUPP;
422 817
423 if (copy_from_user(&cmd, useraddr, sizeof(cmd))) 818 if (copy_from_user(&cmd, useraddr, sizeof(cmd)))
424 return -EFAULT; 819 return -EFAULT;
425 820
821 /* first, try new %ethtool_link_ksettings API. */
822 if (dev->ethtool_ops->set_link_ksettings) {
823 struct ethtool_link_ksettings link_ksettings;
824
825 if (!convert_legacy_settings_to_link_ksettings(&link_ksettings,
826 &cmd))
827 return -EINVAL;
828
829 link_ksettings.base.cmd = ETHTOOL_SLINKSETTINGS;
830 link_ksettings.base.link_mode_masks_nwords
831 = __ETHTOOL_LINK_MODE_MASK_NU32;
832 return dev->ethtool_ops->set_link_ksettings(dev,
833 &link_ksettings);
834 }
835
836 /* legacy %ethtool_cmd API */
837
838 /* TODO: return -EOPNOTSUPP when ethtool_ops::get_settings
839 * disappears internally
840 */
841
842 if (!dev->ethtool_ops->set_settings)
843 return -EOPNOTSUPP;
844
426 return dev->ethtool_ops->set_settings(dev, &cmd); 845 return dev->ethtool_ops->set_settings(dev, &cmd);
427} 846}
428 847
@@ -632,7 +1051,7 @@ static int ethtool_copy_validate_indir(u32 *indir, void __user *useraddr,
632 return 0; 1051 return 0;
633} 1052}
634 1053
635u8 netdev_rss_key[NETDEV_RSS_KEY_LEN]; 1054u8 netdev_rss_key[NETDEV_RSS_KEY_LEN] __read_mostly;
636 1055
637void netdev_rss_key_fill(void *buffer, size_t len) 1056void netdev_rss_key_fill(void *buffer, size_t len)
638{ 1057{
@@ -642,6 +1061,37 @@ void netdev_rss_key_fill(void *buffer, size_t len)
642} 1061}
643EXPORT_SYMBOL(netdev_rss_key_fill); 1062EXPORT_SYMBOL(netdev_rss_key_fill);
644 1063
1064static int ethtool_get_max_rxfh_channel(struct net_device *dev, u32 *max)
1065{
1066 u32 dev_size, current_max = 0;
1067 u32 *indir;
1068 int ret;
1069
1070 if (!dev->ethtool_ops->get_rxfh_indir_size ||
1071 !dev->ethtool_ops->get_rxfh)
1072 return -EOPNOTSUPP;
1073 dev_size = dev->ethtool_ops->get_rxfh_indir_size(dev);
1074 if (dev_size == 0)
1075 return -EOPNOTSUPP;
1076
1077 indir = kcalloc(dev_size, sizeof(indir[0]), GFP_USER);
1078 if (!indir)
1079 return -ENOMEM;
1080
1081 ret = dev->ethtool_ops->get_rxfh(dev, indir, NULL, NULL);
1082 if (ret)
1083 goto out;
1084
1085 while (dev_size--)
1086 current_max = max(current_max, indir[dev_size]);
1087
1088 *max = current_max;
1089
1090out:
1091 kfree(indir);
1092 return ret;
1093}
1094
645static noinline_for_stack int ethtool_get_rxfh_indir(struct net_device *dev, 1095static noinline_for_stack int ethtool_get_rxfh_indir(struct net_device *dev,
646 void __user *useraddr) 1096 void __user *useraddr)
647{ 1097{
@@ -738,6 +1188,14 @@ static noinline_for_stack int ethtool_set_rxfh_indir(struct net_device *dev,
738 } 1188 }
739 1189
740 ret = ops->set_rxfh(dev, indir, NULL, ETH_RSS_HASH_NO_CHANGE); 1190 ret = ops->set_rxfh(dev, indir, NULL, ETH_RSS_HASH_NO_CHANGE);
1191 if (ret)
1192 goto out;
1193
1194 /* indicate whether rxfh was set to default */
1195 if (user_size == 0)
1196 dev->priv_flags &= ~IFF_RXFH_CONFIGURED;
1197 else
1198 dev->priv_flags |= IFF_RXFH_CONFIGURED;
741 1199
742out: 1200out:
743 kfree(indir); 1201 kfree(indir);
@@ -897,6 +1355,14 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev,
897 } 1355 }
898 1356
899 ret = ops->set_rxfh(dev, indir, hkey, rxfh.hfunc); 1357 ret = ops->set_rxfh(dev, indir, hkey, rxfh.hfunc);
1358 if (ret)
1359 goto out;
1360
1361 /* indicate whether rxfh was set to default */
1362 if (rxfh.indir_size == 0)
1363 dev->priv_flags &= ~IFF_RXFH_CONFIGURED;
1364 else if (rxfh.indir_size != ETH_RXFH_INDIR_NO_CHANGE)
1365 dev->priv_flags |= IFF_RXFH_CONFIGURED;
900 1366
901out: 1367out:
902 kfree(rss_config); 1368 kfree(rss_config);
@@ -1227,14 +1693,31 @@ static noinline_for_stack int ethtool_get_channels(struct net_device *dev,
1227static noinline_for_stack int ethtool_set_channels(struct net_device *dev, 1693static noinline_for_stack int ethtool_set_channels(struct net_device *dev,
1228 void __user *useraddr) 1694 void __user *useraddr)
1229{ 1695{
1230 struct ethtool_channels channels; 1696 struct ethtool_channels channels, max;
1697 u32 max_rx_in_use = 0;
1231 1698
1232 if (!dev->ethtool_ops->set_channels) 1699 if (!dev->ethtool_ops->set_channels || !dev->ethtool_ops->get_channels)
1233 return -EOPNOTSUPP; 1700 return -EOPNOTSUPP;
1234 1701
1235 if (copy_from_user(&channels, useraddr, sizeof(channels))) 1702 if (copy_from_user(&channels, useraddr, sizeof(channels)))
1236 return -EFAULT; 1703 return -EFAULT;
1237 1704
1705 dev->ethtool_ops->get_channels(dev, &max);
1706
1707 /* ensure new counts are within the maximums */
1708 if ((channels.rx_count > max.max_rx) ||
1709 (channels.tx_count > max.max_tx) ||
1710 (channels.combined_count > max.max_combined) ||
1711 (channels.other_count > max.max_other))
1712 return -EINVAL;
1713
1714 /* ensure the new Rx count fits within the configured Rx flow
1715 * indirection table settings */
1716 if (netif_is_rxfh_configured(dev) &&
1717 !ethtool_get_max_rxfh_channel(dev, &max_rx_in_use) &&
1718 (channels.combined_count + channels.rx_count) <= max_rx_in_use)
1719 return -EINVAL;
1720
1238 return dev->ethtool_ops->set_channels(dev, &channels); 1721 return dev->ethtool_ops->set_channels(dev, &channels);
1239} 1722}
1240 1723
@@ -1823,13 +2306,121 @@ out:
1823 return ret; 2306 return ret;
1824} 2307}
1825 2308
2309static int ethtool_get_per_queue_coalesce(struct net_device *dev,
2310 void __user *useraddr,
2311 struct ethtool_per_queue_op *per_queue_opt)
2312{
2313 u32 bit;
2314 int ret;
2315 DECLARE_BITMAP(queue_mask, MAX_NUM_QUEUE);
2316
2317 if (!dev->ethtool_ops->get_per_queue_coalesce)
2318 return -EOPNOTSUPP;
2319
2320 useraddr += sizeof(*per_queue_opt);
2321
2322 bitmap_from_u32array(queue_mask,
2323 MAX_NUM_QUEUE,
2324 per_queue_opt->queue_mask,
2325 DIV_ROUND_UP(MAX_NUM_QUEUE, 32));
2326
2327 for_each_set_bit(bit, queue_mask, MAX_NUM_QUEUE) {
2328 struct ethtool_coalesce coalesce = { .cmd = ETHTOOL_GCOALESCE };
2329
2330 ret = dev->ethtool_ops->get_per_queue_coalesce(dev, bit, &coalesce);
2331 if (ret != 0)
2332 return ret;
2333 if (copy_to_user(useraddr, &coalesce, sizeof(coalesce)))
2334 return -EFAULT;
2335 useraddr += sizeof(coalesce);
2336 }
2337
2338 return 0;
2339}
2340
2341static int ethtool_set_per_queue_coalesce(struct net_device *dev,
2342 void __user *useraddr,
2343 struct ethtool_per_queue_op *per_queue_opt)
2344{
2345 u32 bit;
2346 int i, ret = 0;
2347 int n_queue;
2348 struct ethtool_coalesce *backup = NULL, *tmp = NULL;
2349 DECLARE_BITMAP(queue_mask, MAX_NUM_QUEUE);
2350
2351 if ((!dev->ethtool_ops->set_per_queue_coalesce) ||
2352 (!dev->ethtool_ops->get_per_queue_coalesce))
2353 return -EOPNOTSUPP;
2354
2355 useraddr += sizeof(*per_queue_opt);
2356
2357 bitmap_from_u32array(queue_mask,
2358 MAX_NUM_QUEUE,
2359 per_queue_opt->queue_mask,
2360 DIV_ROUND_UP(MAX_NUM_QUEUE, 32));
2361 n_queue = bitmap_weight(queue_mask, MAX_NUM_QUEUE);
2362 tmp = backup = kmalloc_array(n_queue, sizeof(*backup), GFP_KERNEL);
2363 if (!backup)
2364 return -ENOMEM;
2365
2366 for_each_set_bit(bit, queue_mask, MAX_NUM_QUEUE) {
2367 struct ethtool_coalesce coalesce;
2368
2369 ret = dev->ethtool_ops->get_per_queue_coalesce(dev, bit, tmp);
2370 if (ret != 0)
2371 goto roll_back;
2372
2373 tmp++;
2374
2375 if (copy_from_user(&coalesce, useraddr, sizeof(coalesce))) {
2376 ret = -EFAULT;
2377 goto roll_back;
2378 }
2379
2380 ret = dev->ethtool_ops->set_per_queue_coalesce(dev, bit, &coalesce);
2381 if (ret != 0)
2382 goto roll_back;
2383
2384 useraddr += sizeof(coalesce);
2385 }
2386
2387roll_back:
2388 if (ret != 0) {
2389 tmp = backup;
2390 for_each_set_bit(i, queue_mask, bit) {
2391 dev->ethtool_ops->set_per_queue_coalesce(dev, i, tmp);
2392 tmp++;
2393 }
2394 }
2395 kfree(backup);
2396
2397 return ret;
2398}
2399
2400static int ethtool_set_per_queue(struct net_device *dev, void __user *useraddr)
2401{
2402 struct ethtool_per_queue_op per_queue_opt;
2403
2404 if (copy_from_user(&per_queue_opt, useraddr, sizeof(per_queue_opt)))
2405 return -EFAULT;
2406
2407 switch (per_queue_opt.sub_command) {
2408 case ETHTOOL_GCOALESCE:
2409 return ethtool_get_per_queue_coalesce(dev, useraddr, &per_queue_opt);
2410 case ETHTOOL_SCOALESCE:
2411 return ethtool_set_per_queue_coalesce(dev, useraddr, &per_queue_opt);
2412 default:
2413 return -EOPNOTSUPP;
2414 };
2415}
2416
1826/* The main entry point in this file. Called from net/core/dev_ioctl.c */ 2417/* The main entry point in this file. Called from net/core/dev_ioctl.c */
1827 2418
1828int dev_ethtool(struct net *net, struct ifreq *ifr) 2419int dev_ethtool(struct net *net, struct ifreq *ifr)
1829{ 2420{
1830 struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name); 2421 struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name);
1831 void __user *useraddr = ifr->ifr_data; 2422 void __user *useraddr = ifr->ifr_data;
1832 u32 ethcmd; 2423 u32 ethcmd, sub_cmd;
1833 int rc; 2424 int rc;
1834 netdev_features_t old_features; 2425 netdev_features_t old_features;
1835 2426
@@ -1839,8 +2430,14 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
1839 if (copy_from_user(&ethcmd, useraddr, sizeof(ethcmd))) 2430 if (copy_from_user(&ethcmd, useraddr, sizeof(ethcmd)))
1840 return -EFAULT; 2431 return -EFAULT;
1841 2432
2433 if (ethcmd == ETHTOOL_PERQUEUE) {
2434 if (copy_from_user(&sub_cmd, useraddr + sizeof(ethcmd), sizeof(sub_cmd)))
2435 return -EFAULT;
2436 } else {
2437 sub_cmd = ethcmd;
2438 }
1842 /* Allow some commands to be done by anyone */ 2439 /* Allow some commands to be done by anyone */
1843 switch (ethcmd) { 2440 switch (sub_cmd) {
1844 case ETHTOOL_GSET: 2441 case ETHTOOL_GSET:
1845 case ETHTOOL_GDRVINFO: 2442 case ETHTOOL_GDRVINFO:
1846 case ETHTOOL_GMSGLVL: 2443 case ETHTOOL_GMSGLVL:
@@ -2070,6 +2667,15 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
2070 case ETHTOOL_GPHYSTATS: 2667 case ETHTOOL_GPHYSTATS:
2071 rc = ethtool_get_phy_stats(dev, useraddr); 2668 rc = ethtool_get_phy_stats(dev, useraddr);
2072 break; 2669 break;
2670 case ETHTOOL_PERQUEUE:
2671 rc = ethtool_set_per_queue(dev, useraddr);
2672 break;
2673 case ETHTOOL_GLINKSETTINGS:
2674 rc = ethtool_get_link_ksettings(dev, useraddr);
2675 break;
2676 case ETHTOOL_SLINKSETTINGS:
2677 rc = ethtool_set_link_ksettings(dev, useraddr);
2678 break;
2073 default: 2679 default:
2074 rc = -EOPNOTSUPP; 2680 rc = -EOPNOTSUPP;
2075 } 2681 }
diff --git a/net/core/filter.c b/net/core/filter.c
index 94d26201080d..b7177d01ecb0 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -530,12 +530,14 @@ do_pass:
530 *insn = BPF_MOV64_REG(BPF_REG_A, BPF_REG_TMP); 530 *insn = BPF_MOV64_REG(BPF_REG_A, BPF_REG_TMP);
531 break; 531 break;
532 532
533 /* RET_K, RET_A are remaped into 2 insns. */ 533 /* RET_K is remaped into 2 insns. RET_A case doesn't need an
534 * extra mov as BPF_REG_0 is already mapped into BPF_REG_A.
535 */
534 case BPF_RET | BPF_A: 536 case BPF_RET | BPF_A:
535 case BPF_RET | BPF_K: 537 case BPF_RET | BPF_K:
536 *insn++ = BPF_MOV32_RAW(BPF_RVAL(fp->code) == BPF_K ? 538 if (BPF_RVAL(fp->code) == BPF_K)
537 BPF_K : BPF_X, BPF_REG_0, 539 *insn++ = BPF_MOV32_RAW(BPF_K, BPF_REG_0,
538 BPF_REG_A, fp->k); 540 0, fp->k);
539 *insn = BPF_EXIT_INSN(); 541 *insn = BPF_EXIT_INSN();
540 break; 542 break;
541 543
@@ -1181,7 +1183,7 @@ static int __reuseport_attach_prog(struct bpf_prog *prog, struct sock *sk)
1181 if (bpf_prog_size(prog->len) > sysctl_optmem_max) 1183 if (bpf_prog_size(prog->len) > sysctl_optmem_max)
1182 return -ENOMEM; 1184 return -ENOMEM;
1183 1185
1184 if (sk_unhashed(sk)) { 1186 if (sk_unhashed(sk) && sk->sk_reuseport) {
1185 err = reuseport_alloc(sk); 1187 err = reuseport_alloc(sk);
1186 if (err) 1188 if (err)
1187 return err; 1189 return err;
@@ -1333,18 +1335,25 @@ int sk_reuseport_attach_bpf(u32 ufd, struct sock *sk)
1333 return 0; 1335 return 0;
1334} 1336}
1335 1337
1336#define BPF_LDST_LEN 16U 1338struct bpf_scratchpad {
1339 union {
1340 __be32 diff[MAX_BPF_STACK / sizeof(__be32)];
1341 u8 buff[MAX_BPF_STACK];
1342 };
1343};
1344
1345static DEFINE_PER_CPU(struct bpf_scratchpad, bpf_sp);
1337 1346
1338static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags) 1347static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags)
1339{ 1348{
1349 struct bpf_scratchpad *sp = this_cpu_ptr(&bpf_sp);
1340 struct sk_buff *skb = (struct sk_buff *) (long) r1; 1350 struct sk_buff *skb = (struct sk_buff *) (long) r1;
1341 int offset = (int) r2; 1351 int offset = (int) r2;
1342 void *from = (void *) (long) r3; 1352 void *from = (void *) (long) r3;
1343 unsigned int len = (unsigned int) r4; 1353 unsigned int len = (unsigned int) r4;
1344 char buf[BPF_LDST_LEN];
1345 void *ptr; 1354 void *ptr;
1346 1355
1347 if (unlikely(flags & ~(BPF_F_RECOMPUTE_CSUM))) 1356 if (unlikely(flags & ~(BPF_F_RECOMPUTE_CSUM | BPF_F_INVALIDATE_HASH)))
1348 return -EINVAL; 1357 return -EINVAL;
1349 1358
1350 /* bpf verifier guarantees that: 1359 /* bpf verifier guarantees that:
@@ -1355,14 +1364,12 @@ static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags)
1355 * 1364 *
1356 * so check for invalid 'offset' and too large 'len' 1365 * so check for invalid 'offset' and too large 'len'
1357 */ 1366 */
1358 if (unlikely((u32) offset > 0xffff || len > sizeof(buf))) 1367 if (unlikely((u32) offset > 0xffff || len > sizeof(sp->buff)))
1359 return -EFAULT; 1368 return -EFAULT;
1360 1369 if (unlikely(skb_try_make_writable(skb, offset + len)))
1361 if (unlikely(skb_cloned(skb) &&
1362 !skb_clone_writable(skb, offset + len)))
1363 return -EFAULT; 1370 return -EFAULT;
1364 1371
1365 ptr = skb_header_pointer(skb, offset, len, buf); 1372 ptr = skb_header_pointer(skb, offset, len, sp->buff);
1366 if (unlikely(!ptr)) 1373 if (unlikely(!ptr))
1367 return -EFAULT; 1374 return -EFAULT;
1368 1375
@@ -1371,17 +1378,19 @@ static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags)
1371 1378
1372 memcpy(ptr, from, len); 1379 memcpy(ptr, from, len);
1373 1380
1374 if (ptr == buf) 1381 if (ptr == sp->buff)
1375 /* skb_store_bits cannot return -EFAULT here */ 1382 /* skb_store_bits cannot return -EFAULT here */
1376 skb_store_bits(skb, offset, ptr, len); 1383 skb_store_bits(skb, offset, ptr, len);
1377 1384
1378 if (flags & BPF_F_RECOMPUTE_CSUM) 1385 if (flags & BPF_F_RECOMPUTE_CSUM)
1379 skb_postpush_rcsum(skb, ptr, len); 1386 skb_postpush_rcsum(skb, ptr, len);
1387 if (flags & BPF_F_INVALIDATE_HASH)
1388 skb_clear_hash(skb);
1380 1389
1381 return 0; 1390 return 0;
1382} 1391}
1383 1392
1384const struct bpf_func_proto bpf_skb_store_bytes_proto = { 1393static const struct bpf_func_proto bpf_skb_store_bytes_proto = {
1385 .func = bpf_skb_store_bytes, 1394 .func = bpf_skb_store_bytes,
1386 .gpl_only = false, 1395 .gpl_only = false,
1387 .ret_type = RET_INTEGER, 1396 .ret_type = RET_INTEGER,
@@ -1400,7 +1409,7 @@ static u64 bpf_skb_load_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
1400 unsigned int len = (unsigned int) r4; 1409 unsigned int len = (unsigned int) r4;
1401 void *ptr; 1410 void *ptr;
1402 1411
1403 if (unlikely((u32) offset > 0xffff || len > BPF_LDST_LEN)) 1412 if (unlikely((u32) offset > 0xffff || len > MAX_BPF_STACK))
1404 return -EFAULT; 1413 return -EFAULT;
1405 1414
1406 ptr = skb_header_pointer(skb, offset, len, to); 1415 ptr = skb_header_pointer(skb, offset, len, to);
@@ -1412,7 +1421,7 @@ static u64 bpf_skb_load_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
1412 return 0; 1421 return 0;
1413} 1422}
1414 1423
1415const struct bpf_func_proto bpf_skb_load_bytes_proto = { 1424static const struct bpf_func_proto bpf_skb_load_bytes_proto = {
1416 .func = bpf_skb_load_bytes, 1425 .func = bpf_skb_load_bytes,
1417 .gpl_only = false, 1426 .gpl_only = false,
1418 .ret_type = RET_INTEGER, 1427 .ret_type = RET_INTEGER,
@@ -1432,9 +1441,7 @@ static u64 bpf_l3_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags)
1432 return -EINVAL; 1441 return -EINVAL;
1433 if (unlikely((u32) offset > 0xffff)) 1442 if (unlikely((u32) offset > 0xffff))
1434 return -EFAULT; 1443 return -EFAULT;
1435 1444 if (unlikely(skb_try_make_writable(skb, offset + sizeof(sum))))
1436 if (unlikely(skb_cloned(skb) &&
1437 !skb_clone_writable(skb, offset + sizeof(sum))))
1438 return -EFAULT; 1445 return -EFAULT;
1439 1446
1440 ptr = skb_header_pointer(skb, offset, sizeof(sum), &sum); 1447 ptr = skb_header_pointer(skb, offset, sizeof(sum), &sum);
@@ -1442,6 +1449,12 @@ static u64 bpf_l3_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags)
1442 return -EFAULT; 1449 return -EFAULT;
1443 1450
1444 switch (flags & BPF_F_HDR_FIELD_MASK) { 1451 switch (flags & BPF_F_HDR_FIELD_MASK) {
1452 case 0:
1453 if (unlikely(from != 0))
1454 return -EINVAL;
1455
1456 csum_replace_by_diff(ptr, to);
1457 break;
1445 case 2: 1458 case 2:
1446 csum_replace2(ptr, from, to); 1459 csum_replace2(ptr, from, to);
1447 break; 1460 break;
@@ -1459,7 +1472,7 @@ static u64 bpf_l3_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags)
1459 return 0; 1472 return 0;
1460} 1473}
1461 1474
1462const struct bpf_func_proto bpf_l3_csum_replace_proto = { 1475static const struct bpf_func_proto bpf_l3_csum_replace_proto = {
1463 .func = bpf_l3_csum_replace, 1476 .func = bpf_l3_csum_replace,
1464 .gpl_only = false, 1477 .gpl_only = false,
1465 .ret_type = RET_INTEGER, 1478 .ret_type = RET_INTEGER,
@@ -1474,23 +1487,31 @@ static u64 bpf_l4_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags)
1474{ 1487{
1475 struct sk_buff *skb = (struct sk_buff *) (long) r1; 1488 struct sk_buff *skb = (struct sk_buff *) (long) r1;
1476 bool is_pseudo = flags & BPF_F_PSEUDO_HDR; 1489 bool is_pseudo = flags & BPF_F_PSEUDO_HDR;
1490 bool is_mmzero = flags & BPF_F_MARK_MANGLED_0;
1477 int offset = (int) r2; 1491 int offset = (int) r2;
1478 __sum16 sum, *ptr; 1492 __sum16 sum, *ptr;
1479 1493
1480 if (unlikely(flags & ~(BPF_F_PSEUDO_HDR | BPF_F_HDR_FIELD_MASK))) 1494 if (unlikely(flags & ~(BPF_F_MARK_MANGLED_0 | BPF_F_PSEUDO_HDR |
1495 BPF_F_HDR_FIELD_MASK)))
1481 return -EINVAL; 1496 return -EINVAL;
1482 if (unlikely((u32) offset > 0xffff)) 1497 if (unlikely((u32) offset > 0xffff))
1483 return -EFAULT; 1498 return -EFAULT;
1484 1499 if (unlikely(skb_try_make_writable(skb, offset + sizeof(sum))))
1485 if (unlikely(skb_cloned(skb) &&
1486 !skb_clone_writable(skb, offset + sizeof(sum))))
1487 return -EFAULT; 1500 return -EFAULT;
1488 1501
1489 ptr = skb_header_pointer(skb, offset, sizeof(sum), &sum); 1502 ptr = skb_header_pointer(skb, offset, sizeof(sum), &sum);
1490 if (unlikely(!ptr)) 1503 if (unlikely(!ptr))
1491 return -EFAULT; 1504 return -EFAULT;
1505 if (is_mmzero && !*ptr)
1506 return 0;
1492 1507
1493 switch (flags & BPF_F_HDR_FIELD_MASK) { 1508 switch (flags & BPF_F_HDR_FIELD_MASK) {
1509 case 0:
1510 if (unlikely(from != 0))
1511 return -EINVAL;
1512
1513 inet_proto_csum_replace_by_diff(ptr, skb, to, is_pseudo);
1514 break;
1494 case 2: 1515 case 2:
1495 inet_proto_csum_replace2(ptr, skb, from, to, is_pseudo); 1516 inet_proto_csum_replace2(ptr, skb, from, to, is_pseudo);
1496 break; 1517 break;
@@ -1501,6 +1522,8 @@ static u64 bpf_l4_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags)
1501 return -EINVAL; 1522 return -EINVAL;
1502 } 1523 }
1503 1524
1525 if (is_mmzero && !*ptr)
1526 *ptr = CSUM_MANGLED_0;
1504 if (ptr == &sum) 1527 if (ptr == &sum)
1505 /* skb_store_bits guaranteed to not return -EFAULT here */ 1528 /* skb_store_bits guaranteed to not return -EFAULT here */
1506 skb_store_bits(skb, offset, ptr, sizeof(sum)); 1529 skb_store_bits(skb, offset, ptr, sizeof(sum));
@@ -1508,7 +1531,7 @@ static u64 bpf_l4_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags)
1508 return 0; 1531 return 0;
1509} 1532}
1510 1533
1511const struct bpf_func_proto bpf_l4_csum_replace_proto = { 1534static const struct bpf_func_proto bpf_l4_csum_replace_proto = {
1512 .func = bpf_l4_csum_replace, 1535 .func = bpf_l4_csum_replace,
1513 .gpl_only = false, 1536 .gpl_only = false,
1514 .ret_type = RET_INTEGER, 1537 .ret_type = RET_INTEGER,
@@ -1519,6 +1542,45 @@ const struct bpf_func_proto bpf_l4_csum_replace_proto = {
1519 .arg5_type = ARG_ANYTHING, 1542 .arg5_type = ARG_ANYTHING,
1520}; 1543};
1521 1544
1545static u64 bpf_csum_diff(u64 r1, u64 from_size, u64 r3, u64 to_size, u64 seed)
1546{
1547 struct bpf_scratchpad *sp = this_cpu_ptr(&bpf_sp);
1548 u64 diff_size = from_size + to_size;
1549 __be32 *from = (__be32 *) (long) r1;
1550 __be32 *to = (__be32 *) (long) r3;
1551 int i, j = 0;
1552
1553 /* This is quite flexible, some examples:
1554 *
1555 * from_size == 0, to_size > 0, seed := csum --> pushing data
1556 * from_size > 0, to_size == 0, seed := csum --> pulling data
1557 * from_size > 0, to_size > 0, seed := 0 --> diffing data
1558 *
1559 * Even for diffing, from_size and to_size don't need to be equal.
1560 */
1561 if (unlikely(((from_size | to_size) & (sizeof(__be32) - 1)) ||
1562 diff_size > sizeof(sp->diff)))
1563 return -EINVAL;
1564
1565 for (i = 0; i < from_size / sizeof(__be32); i++, j++)
1566 sp->diff[j] = ~from[i];
1567 for (i = 0; i < to_size / sizeof(__be32); i++, j++)
1568 sp->diff[j] = to[i];
1569
1570 return csum_partial(sp->diff, diff_size, seed);
1571}
1572
1573static const struct bpf_func_proto bpf_csum_diff_proto = {
1574 .func = bpf_csum_diff,
1575 .gpl_only = false,
1576 .ret_type = RET_INTEGER,
1577 .arg1_type = ARG_PTR_TO_STACK,
1578 .arg2_type = ARG_CONST_STACK_SIZE_OR_ZERO,
1579 .arg3_type = ARG_PTR_TO_STACK,
1580 .arg4_type = ARG_CONST_STACK_SIZE_OR_ZERO,
1581 .arg5_type = ARG_ANYTHING,
1582};
1583
1522static u64 bpf_clone_redirect(u64 r1, u64 ifindex, u64 flags, u64 r4, u64 r5) 1584static u64 bpf_clone_redirect(u64 r1, u64 ifindex, u64 flags, u64 r4, u64 r5)
1523{ 1585{
1524 struct sk_buff *skb = (struct sk_buff *) (long) r1, *skb2; 1586 struct sk_buff *skb = (struct sk_buff *) (long) r1, *skb2;
@@ -1543,11 +1605,10 @@ static u64 bpf_clone_redirect(u64 r1, u64 ifindex, u64 flags, u64 r4, u64 r5)
1543 } 1605 }
1544 1606
1545 skb2->dev = dev; 1607 skb2->dev = dev;
1546 skb_sender_cpu_clear(skb2);
1547 return dev_queue_xmit(skb2); 1608 return dev_queue_xmit(skb2);
1548} 1609}
1549 1610
1550const struct bpf_func_proto bpf_clone_redirect_proto = { 1611static const struct bpf_func_proto bpf_clone_redirect_proto = {
1551 .func = bpf_clone_redirect, 1612 .func = bpf_clone_redirect,
1552 .gpl_only = false, 1613 .gpl_only = false,
1553 .ret_type = RET_INTEGER, 1614 .ret_type = RET_INTEGER,
@@ -1596,11 +1657,10 @@ int skb_do_redirect(struct sk_buff *skb)
1596 } 1657 }
1597 1658
1598 skb->dev = dev; 1659 skb->dev = dev;
1599 skb_sender_cpu_clear(skb);
1600 return dev_queue_xmit(skb); 1660 return dev_queue_xmit(skb);
1601} 1661}
1602 1662
1603const struct bpf_func_proto bpf_redirect_proto = { 1663static const struct bpf_func_proto bpf_redirect_proto = {
1604 .func = bpf_redirect, 1664 .func = bpf_redirect,
1605 .gpl_only = false, 1665 .gpl_only = false,
1606 .ret_type = RET_INTEGER, 1666 .ret_type = RET_INTEGER,
@@ -1622,14 +1682,7 @@ static const struct bpf_func_proto bpf_get_cgroup_classid_proto = {
1622 1682
1623static u64 bpf_get_route_realm(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) 1683static u64 bpf_get_route_realm(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
1624{ 1684{
1625#ifdef CONFIG_IP_ROUTE_CLASSID 1685 return dst_tclassid((struct sk_buff *) (unsigned long) r1);
1626 const struct dst_entry *dst;
1627
1628 dst = skb_dst((struct sk_buff *) (unsigned long) r1);
1629 if (dst)
1630 return dst->tclassid;
1631#endif
1632 return 0;
1633} 1686}
1634 1687
1635static const struct bpf_func_proto bpf_get_route_realm_proto = { 1688static const struct bpf_func_proto bpf_get_route_realm_proto = {
@@ -1682,6 +1735,13 @@ bool bpf_helper_changes_skb_data(void *func)
1682 return true; 1735 return true;
1683 if (func == bpf_skb_vlan_pop) 1736 if (func == bpf_skb_vlan_pop)
1684 return true; 1737 return true;
1738 if (func == bpf_skb_store_bytes)
1739 return true;
1740 if (func == bpf_l3_csum_replace)
1741 return true;
1742 if (func == bpf_l4_csum_replace)
1743 return true;
1744
1685 return false; 1745 return false;
1686} 1746}
1687 1747
@@ -1703,12 +1763,15 @@ static u64 bpf_skb_get_tunnel_key(u64 r1, u64 r2, u64 size, u64 flags, u64 r5)
1703 return -EPROTO; 1763 return -EPROTO;
1704 if (unlikely(size != sizeof(struct bpf_tunnel_key))) { 1764 if (unlikely(size != sizeof(struct bpf_tunnel_key))) {
1705 switch (size) { 1765 switch (size) {
1766 case offsetof(struct bpf_tunnel_key, tunnel_label):
1767 goto set_compat;
1706 case offsetof(struct bpf_tunnel_key, remote_ipv6[1]): 1768 case offsetof(struct bpf_tunnel_key, remote_ipv6[1]):
1707 /* Fixup deprecated structure layouts here, so we have 1769 /* Fixup deprecated structure layouts here, so we have
1708 * a common path later on. 1770 * a common path later on.
1709 */ 1771 */
1710 if (ip_tunnel_info_af(info) != AF_INET) 1772 if (ip_tunnel_info_af(info) != AF_INET)
1711 return -EINVAL; 1773 return -EINVAL;
1774set_compat:
1712 to = (struct bpf_tunnel_key *)compat; 1775 to = (struct bpf_tunnel_key *)compat;
1713 break; 1776 break;
1714 default: 1777 default:
@@ -1720,11 +1783,13 @@ static u64 bpf_skb_get_tunnel_key(u64 r1, u64 r2, u64 size, u64 flags, u64 r5)
1720 to->tunnel_tos = info->key.tos; 1783 to->tunnel_tos = info->key.tos;
1721 to->tunnel_ttl = info->key.ttl; 1784 to->tunnel_ttl = info->key.ttl;
1722 1785
1723 if (flags & BPF_F_TUNINFO_IPV6) 1786 if (flags & BPF_F_TUNINFO_IPV6) {
1724 memcpy(to->remote_ipv6, &info->key.u.ipv6.src, 1787 memcpy(to->remote_ipv6, &info->key.u.ipv6.src,
1725 sizeof(to->remote_ipv6)); 1788 sizeof(to->remote_ipv6));
1726 else 1789 to->tunnel_label = be32_to_cpu(info->key.label);
1790 } else {
1727 to->remote_ipv4 = be32_to_cpu(info->key.u.ipv4.src); 1791 to->remote_ipv4 = be32_to_cpu(info->key.u.ipv4.src);
1792 }
1728 1793
1729 if (unlikely(size != sizeof(struct bpf_tunnel_key))) 1794 if (unlikely(size != sizeof(struct bpf_tunnel_key)))
1730 memcpy((void *)(long) r2, to, size); 1795 memcpy((void *)(long) r2, to, size);
@@ -1732,7 +1797,7 @@ static u64 bpf_skb_get_tunnel_key(u64 r1, u64 r2, u64 size, u64 flags, u64 r5)
1732 return 0; 1797 return 0;
1733} 1798}
1734 1799
1735const struct bpf_func_proto bpf_skb_get_tunnel_key_proto = { 1800static const struct bpf_func_proto bpf_skb_get_tunnel_key_proto = {
1736 .func = bpf_skb_get_tunnel_key, 1801 .func = bpf_skb_get_tunnel_key,
1737 .gpl_only = false, 1802 .gpl_only = false,
1738 .ret_type = RET_INTEGER, 1803 .ret_type = RET_INTEGER,
@@ -1742,6 +1807,32 @@ const struct bpf_func_proto bpf_skb_get_tunnel_key_proto = {
1742 .arg4_type = ARG_ANYTHING, 1807 .arg4_type = ARG_ANYTHING,
1743}; 1808};
1744 1809
1810static u64 bpf_skb_get_tunnel_opt(u64 r1, u64 r2, u64 size, u64 r4, u64 r5)
1811{
1812 struct sk_buff *skb = (struct sk_buff *) (long) r1;
1813 u8 *to = (u8 *) (long) r2;
1814 const struct ip_tunnel_info *info = skb_tunnel_info(skb);
1815
1816 if (unlikely(!info ||
1817 !(info->key.tun_flags & TUNNEL_OPTIONS_PRESENT)))
1818 return -ENOENT;
1819 if (unlikely(size < info->options_len))
1820 return -ENOMEM;
1821
1822 ip_tunnel_info_opts_get(to, info);
1823
1824 return info->options_len;
1825}
1826
1827static const struct bpf_func_proto bpf_skb_get_tunnel_opt_proto = {
1828 .func = bpf_skb_get_tunnel_opt,
1829 .gpl_only = false,
1830 .ret_type = RET_INTEGER,
1831 .arg1_type = ARG_PTR_TO_CTX,
1832 .arg2_type = ARG_PTR_TO_STACK,
1833 .arg3_type = ARG_CONST_STACK_SIZE,
1834};
1835
1745static struct metadata_dst __percpu *md_dst; 1836static struct metadata_dst __percpu *md_dst;
1746 1837
1747static u64 bpf_skb_set_tunnel_key(u64 r1, u64 r2, u64 size, u64 flags, u64 r5) 1838static u64 bpf_skb_set_tunnel_key(u64 r1, u64 r2, u64 size, u64 flags, u64 r5)
@@ -1752,10 +1843,12 @@ static u64 bpf_skb_set_tunnel_key(u64 r1, u64 r2, u64 size, u64 flags, u64 r5)
1752 u8 compat[sizeof(struct bpf_tunnel_key)]; 1843 u8 compat[sizeof(struct bpf_tunnel_key)];
1753 struct ip_tunnel_info *info; 1844 struct ip_tunnel_info *info;
1754 1845
1755 if (unlikely(flags & ~(BPF_F_TUNINFO_IPV6))) 1846 if (unlikely(flags & ~(BPF_F_TUNINFO_IPV6 | BPF_F_ZERO_CSUM_TX |
1847 BPF_F_DONT_FRAGMENT)))
1756 return -EINVAL; 1848 return -EINVAL;
1757 if (unlikely(size != sizeof(struct bpf_tunnel_key))) { 1849 if (unlikely(size != sizeof(struct bpf_tunnel_key))) {
1758 switch (size) { 1850 switch (size) {
1851 case offsetof(struct bpf_tunnel_key, tunnel_label):
1759 case offsetof(struct bpf_tunnel_key, remote_ipv6[1]): 1852 case offsetof(struct bpf_tunnel_key, remote_ipv6[1]):
1760 /* Fixup deprecated structure layouts here, so we have 1853 /* Fixup deprecated structure layouts here, so we have
1761 * a common path later on. 1854 * a common path later on.
@@ -1768,6 +1861,8 @@ static u64 bpf_skb_set_tunnel_key(u64 r1, u64 r2, u64 size, u64 flags, u64 r5)
1768 return -EINVAL; 1861 return -EINVAL;
1769 } 1862 }
1770 } 1863 }
1864 if (unlikely(!(flags & BPF_F_TUNINFO_IPV6) && from->tunnel_label))
1865 return -EINVAL;
1771 1866
1772 skb_dst_drop(skb); 1867 skb_dst_drop(skb);
1773 dst_hold((struct dst_entry *) md); 1868 dst_hold((struct dst_entry *) md);
@@ -1776,7 +1871,10 @@ static u64 bpf_skb_set_tunnel_key(u64 r1, u64 r2, u64 size, u64 flags, u64 r5)
1776 info = &md->u.tun_info; 1871 info = &md->u.tun_info;
1777 info->mode = IP_TUNNEL_INFO_TX; 1872 info->mode = IP_TUNNEL_INFO_TX;
1778 1873
1779 info->key.tun_flags = TUNNEL_KEY; 1874 info->key.tun_flags = TUNNEL_KEY | TUNNEL_CSUM | TUNNEL_NOCACHE;
1875 if (flags & BPF_F_DONT_FRAGMENT)
1876 info->key.tun_flags |= TUNNEL_DONT_FRAGMENT;
1877
1780 info->key.tun_id = cpu_to_be64(from->tunnel_id); 1878 info->key.tun_id = cpu_to_be64(from->tunnel_id);
1781 info->key.tos = from->tunnel_tos; 1879 info->key.tos = from->tunnel_tos;
1782 info->key.ttl = from->tunnel_ttl; 1880 info->key.ttl = from->tunnel_ttl;
@@ -1785,14 +1883,18 @@ static u64 bpf_skb_set_tunnel_key(u64 r1, u64 r2, u64 size, u64 flags, u64 r5)
1785 info->mode |= IP_TUNNEL_INFO_IPV6; 1883 info->mode |= IP_TUNNEL_INFO_IPV6;
1786 memcpy(&info->key.u.ipv6.dst, from->remote_ipv6, 1884 memcpy(&info->key.u.ipv6.dst, from->remote_ipv6,
1787 sizeof(from->remote_ipv6)); 1885 sizeof(from->remote_ipv6));
1886 info->key.label = cpu_to_be32(from->tunnel_label) &
1887 IPV6_FLOWLABEL_MASK;
1788 } else { 1888 } else {
1789 info->key.u.ipv4.dst = cpu_to_be32(from->remote_ipv4); 1889 info->key.u.ipv4.dst = cpu_to_be32(from->remote_ipv4);
1890 if (flags & BPF_F_ZERO_CSUM_TX)
1891 info->key.tun_flags &= ~TUNNEL_CSUM;
1790 } 1892 }
1791 1893
1792 return 0; 1894 return 0;
1793} 1895}
1794 1896
1795const struct bpf_func_proto bpf_skb_set_tunnel_key_proto = { 1897static const struct bpf_func_proto bpf_skb_set_tunnel_key_proto = {
1796 .func = bpf_skb_set_tunnel_key, 1898 .func = bpf_skb_set_tunnel_key,
1797 .gpl_only = false, 1899 .gpl_only = false,
1798 .ret_type = RET_INTEGER, 1900 .ret_type = RET_INTEGER,
@@ -1802,17 +1904,53 @@ const struct bpf_func_proto bpf_skb_set_tunnel_key_proto = {
1802 .arg4_type = ARG_ANYTHING, 1904 .arg4_type = ARG_ANYTHING,
1803}; 1905};
1804 1906
1805static const struct bpf_func_proto *bpf_get_skb_set_tunnel_key_proto(void) 1907static u64 bpf_skb_set_tunnel_opt(u64 r1, u64 r2, u64 size, u64 r4, u64 r5)
1908{
1909 struct sk_buff *skb = (struct sk_buff *) (long) r1;
1910 u8 *from = (u8 *) (long) r2;
1911 struct ip_tunnel_info *info = skb_tunnel_info(skb);
1912 const struct metadata_dst *md = this_cpu_ptr(md_dst);
1913
1914 if (unlikely(info != &md->u.tun_info || (size & (sizeof(u32) - 1))))
1915 return -EINVAL;
1916 if (unlikely(size > IP_TUNNEL_OPTS_MAX))
1917 return -ENOMEM;
1918
1919 ip_tunnel_info_opts_set(info, from, size);
1920
1921 return 0;
1922}
1923
1924static const struct bpf_func_proto bpf_skb_set_tunnel_opt_proto = {
1925 .func = bpf_skb_set_tunnel_opt,
1926 .gpl_only = false,
1927 .ret_type = RET_INTEGER,
1928 .arg1_type = ARG_PTR_TO_CTX,
1929 .arg2_type = ARG_PTR_TO_STACK,
1930 .arg3_type = ARG_CONST_STACK_SIZE,
1931};
1932
1933static const struct bpf_func_proto *
1934bpf_get_skb_set_tunnel_proto(enum bpf_func_id which)
1806{ 1935{
1807 if (!md_dst) { 1936 if (!md_dst) {
1808 /* race is not possible, since it's called from 1937 /* Race is not possible, since it's called from verifier
1809 * verifier that is holding verifier mutex 1938 * that is holding verifier mutex.
1810 */ 1939 */
1811 md_dst = metadata_dst_alloc_percpu(0, GFP_KERNEL); 1940 md_dst = metadata_dst_alloc_percpu(IP_TUNNEL_OPTS_MAX,
1941 GFP_KERNEL);
1812 if (!md_dst) 1942 if (!md_dst)
1813 return NULL; 1943 return NULL;
1814 } 1944 }
1815 return &bpf_skb_set_tunnel_key_proto; 1945
1946 switch (which) {
1947 case BPF_FUNC_skb_set_tunnel_key:
1948 return &bpf_skb_set_tunnel_key_proto;
1949 case BPF_FUNC_skb_set_tunnel_opt:
1950 return &bpf_skb_set_tunnel_opt_proto;
1951 default:
1952 return NULL;
1953 }
1816} 1954}
1817 1955
1818static const struct bpf_func_proto * 1956static const struct bpf_func_proto *
@@ -1849,6 +1987,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id)
1849 return &bpf_skb_store_bytes_proto; 1987 return &bpf_skb_store_bytes_proto;
1850 case BPF_FUNC_skb_load_bytes: 1988 case BPF_FUNC_skb_load_bytes:
1851 return &bpf_skb_load_bytes_proto; 1989 return &bpf_skb_load_bytes_proto;
1990 case BPF_FUNC_csum_diff:
1991 return &bpf_csum_diff_proto;
1852 case BPF_FUNC_l3_csum_replace: 1992 case BPF_FUNC_l3_csum_replace:
1853 return &bpf_l3_csum_replace_proto; 1993 return &bpf_l3_csum_replace_proto;
1854 case BPF_FUNC_l4_csum_replace: 1994 case BPF_FUNC_l4_csum_replace:
@@ -1864,7 +2004,11 @@ tc_cls_act_func_proto(enum bpf_func_id func_id)
1864 case BPF_FUNC_skb_get_tunnel_key: 2004 case BPF_FUNC_skb_get_tunnel_key:
1865 return &bpf_skb_get_tunnel_key_proto; 2005 return &bpf_skb_get_tunnel_key_proto;
1866 case BPF_FUNC_skb_set_tunnel_key: 2006 case BPF_FUNC_skb_set_tunnel_key:
1867 return bpf_get_skb_set_tunnel_key_proto(); 2007 return bpf_get_skb_set_tunnel_proto(func_id);
2008 case BPF_FUNC_skb_get_tunnel_opt:
2009 return &bpf_skb_get_tunnel_opt_proto;
2010 case BPF_FUNC_skb_set_tunnel_opt:
2011 return bpf_get_skb_set_tunnel_proto(func_id);
1868 case BPF_FUNC_redirect: 2012 case BPF_FUNC_redirect:
1869 return &bpf_redirect_proto; 2013 return &bpf_redirect_proto;
1870 case BPF_FUNC_get_route_realm: 2014 case BPF_FUNC_get_route_realm:
@@ -1913,16 +2057,14 @@ static bool sk_filter_is_valid_access(int off, int size,
1913static bool tc_cls_act_is_valid_access(int off, int size, 2057static bool tc_cls_act_is_valid_access(int off, int size,
1914 enum bpf_access_type type) 2058 enum bpf_access_type type)
1915{ 2059{
1916 if (off == offsetof(struct __sk_buff, tc_classid))
1917 return type == BPF_WRITE ? true : false;
1918
1919 if (type == BPF_WRITE) { 2060 if (type == BPF_WRITE) {
1920 switch (off) { 2061 switch (off) {
1921 case offsetof(struct __sk_buff, mark): 2062 case offsetof(struct __sk_buff, mark):
1922 case offsetof(struct __sk_buff, tc_index): 2063 case offsetof(struct __sk_buff, tc_index):
1923 case offsetof(struct __sk_buff, priority): 2064 case offsetof(struct __sk_buff, priority):
1924 case offsetof(struct __sk_buff, cb[0]) ... 2065 case offsetof(struct __sk_buff, cb[0]) ...
1925 offsetof(struct __sk_buff, cb[4]): 2066 offsetof(struct __sk_buff, cb[4]):
2067 case offsetof(struct __sk_buff, tc_classid):
1926 break; 2068 break;
1927 default: 2069 default:
1928 return false; 2070 return false;
@@ -2039,8 +2181,10 @@ static u32 bpf_net_convert_ctx_access(enum bpf_access_type type, int dst_reg,
2039 ctx_off -= offsetof(struct __sk_buff, tc_classid); 2181 ctx_off -= offsetof(struct __sk_buff, tc_classid);
2040 ctx_off += offsetof(struct sk_buff, cb); 2182 ctx_off += offsetof(struct sk_buff, cb);
2041 ctx_off += offsetof(struct qdisc_skb_cb, tc_classid); 2183 ctx_off += offsetof(struct qdisc_skb_cb, tc_classid);
2042 WARN_ON(type != BPF_WRITE); 2184 if (type == BPF_WRITE)
2043 *insn++ = BPF_STX_MEM(BPF_H, dst_reg, src_reg, ctx_off); 2185 *insn++ = BPF_STX_MEM(BPF_H, dst_reg, src_reg, ctx_off);
2186 else
2187 *insn++ = BPF_LDX_MEM(BPF_H, dst_reg, src_reg, ctx_off);
2044 break; 2188 break;
2045 2189
2046 case offsetof(struct __sk_buff, tc_index): 2190 case offsetof(struct __sk_buff, tc_index):
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index d79699c9d1b9..a669dea146c6 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -19,25 +19,12 @@
19#include <net/flow_dissector.h> 19#include <net/flow_dissector.h>
20#include <scsi/fc/fc_fcoe.h> 20#include <scsi/fc/fc_fcoe.h>
21 21
22static bool dissector_uses_key(const struct flow_dissector *flow_dissector,
23 enum flow_dissector_key_id key_id)
24{
25 return flow_dissector->used_keys & (1 << key_id);
26}
27
28static void dissector_set_key(struct flow_dissector *flow_dissector, 22static void dissector_set_key(struct flow_dissector *flow_dissector,
29 enum flow_dissector_key_id key_id) 23 enum flow_dissector_key_id key_id)
30{ 24{
31 flow_dissector->used_keys |= (1 << key_id); 25 flow_dissector->used_keys |= (1 << key_id);
32} 26}
33 27
34static void *skb_flow_dissector_target(struct flow_dissector *flow_dissector,
35 enum flow_dissector_key_id key_id,
36 void *target_container)
37{
38 return ((char *) target_container) + flow_dissector->offset[key_id];
39}
40
41void skb_flow_dissector_init(struct flow_dissector *flow_dissector, 28void skb_flow_dissector_init(struct flow_dissector *flow_dissector,
42 const struct flow_dissector_key *key, 29 const struct flow_dissector_key *key,
43 unsigned int key_count) 30 unsigned int key_count)
@@ -178,15 +165,16 @@ ip:
178 165
179 ip_proto = iph->protocol; 166 ip_proto = iph->protocol;
180 167
181 if (!dissector_uses_key(flow_dissector, 168 if (dissector_uses_key(flow_dissector,
182 FLOW_DISSECTOR_KEY_IPV4_ADDRS)) 169 FLOW_DISSECTOR_KEY_IPV4_ADDRS)) {
183 break; 170 key_addrs = skb_flow_dissector_target(flow_dissector,
171 FLOW_DISSECTOR_KEY_IPV4_ADDRS,
172 target_container);
184 173
185 key_addrs = skb_flow_dissector_target(flow_dissector, 174 memcpy(&key_addrs->v4addrs, &iph->saddr,
186 FLOW_DISSECTOR_KEY_IPV4_ADDRS, target_container); 175 sizeof(key_addrs->v4addrs));
187 memcpy(&key_addrs->v4addrs, &iph->saddr, 176 key_control->addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
188 sizeof(key_addrs->v4addrs)); 177 }
189 key_control->addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
190 178
191 if (ip_is_fragment(iph)) { 179 if (ip_is_fragment(iph)) {
192 key_control->flags |= FLOW_DIS_IS_FRAGMENT; 180 key_control->flags |= FLOW_DIS_IS_FRAGMENT;
@@ -208,7 +196,6 @@ ip:
208 case htons(ETH_P_IPV6): { 196 case htons(ETH_P_IPV6): {
209 const struct ipv6hdr *iph; 197 const struct ipv6hdr *iph;
210 struct ipv6hdr _iph; 198 struct ipv6hdr _iph;
211 __be32 flow_label;
212 199
213ipv6: 200ipv6:
214 iph = __skb_header_pointer(skb, nhoff, sizeof(_iph), data, hlen, &_iph); 201 iph = __skb_header_pointer(skb, nhoff, sizeof(_iph), data, hlen, &_iph);
@@ -220,18 +207,21 @@ ipv6:
220 207
221 if (dissector_uses_key(flow_dissector, 208 if (dissector_uses_key(flow_dissector,
222 FLOW_DISSECTOR_KEY_IPV6_ADDRS)) { 209 FLOW_DISSECTOR_KEY_IPV6_ADDRS)) {
223 struct flow_dissector_key_ipv6_addrs *key_ipv6_addrs; 210 key_addrs = skb_flow_dissector_target(flow_dissector,
224 211 FLOW_DISSECTOR_KEY_IPV6_ADDRS,
225 key_ipv6_addrs = skb_flow_dissector_target(flow_dissector, 212 target_container);
226 FLOW_DISSECTOR_KEY_IPV6_ADDRS,
227 target_container);
228 213
229 memcpy(key_ipv6_addrs, &iph->saddr, sizeof(*key_ipv6_addrs)); 214 memcpy(&key_addrs->v6addrs, &iph->saddr,
215 sizeof(key_addrs->v6addrs));
230 key_control->addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; 216 key_control->addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
231 } 217 }
232 218
233 flow_label = ip6_flowlabel(iph); 219 if ((dissector_uses_key(flow_dissector,
234 if (flow_label) { 220 FLOW_DISSECTOR_KEY_FLOW_LABEL) ||
221 (flags & FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL)) &&
222 ip6_flowlabel(iph)) {
223 __be32 flow_label = ip6_flowlabel(iph);
224
235 if (dissector_uses_key(flow_dissector, 225 if (dissector_uses_key(flow_dissector,
236 FLOW_DISSECTOR_KEY_FLOW_LABEL)) { 226 FLOW_DISSECTOR_KEY_FLOW_LABEL)) {
237 key_tags = skb_flow_dissector_target(flow_dissector, 227 key_tags = skb_flow_dissector_target(flow_dissector,
@@ -336,8 +326,11 @@ mpls:
336 } 326 }
337 327
338 case htons(ETH_P_FCOE): 328 case htons(ETH_P_FCOE):
339 key_control->thoff = (u16)(nhoff + FCOE_HEADER_LEN); 329 if ((hlen - nhoff) < FCOE_HEADER_LEN)
340 /* fall through */ 330 goto out_bad;
331
332 nhoff += FCOE_HEADER_LEN;
333 goto out_good;
341 default: 334 default:
342 goto out_bad; 335 goto out_bad;
343 } 336 }
@@ -396,6 +389,13 @@ ip_proto_again:
396 goto out_bad; 389 goto out_bad;
397 proto = eth->h_proto; 390 proto = eth->h_proto;
398 nhoff += sizeof(*eth); 391 nhoff += sizeof(*eth);
392
393 /* Cap headers that we access via pointers at the
394 * end of the Ethernet header as our maximum alignment
395 * at that point is only 2 bytes.
396 */
397 if (NET_IP_ALIGN)
398 hlen = nhoff;
399 } 399 }
400 400
401 key_control->flags |= FLOW_DIS_ENCAPSULATION; 401 key_control->flags |= FLOW_DIS_ENCAPSULATION;
@@ -437,13 +437,12 @@ ip_proto_again:
437 key_control->flags |= FLOW_DIS_IS_FRAGMENT; 437 key_control->flags |= FLOW_DIS_IS_FRAGMENT;
438 438
439 nhoff += sizeof(_fh); 439 nhoff += sizeof(_fh);
440 ip_proto = fh->nexthdr;
440 441
441 if (!(fh->frag_off & htons(IP6_OFFSET))) { 442 if (!(fh->frag_off & htons(IP6_OFFSET))) {
442 key_control->flags |= FLOW_DIS_FIRST_FRAG; 443 key_control->flags |= FLOW_DIS_FIRST_FRAG;
443 if (flags & FLOW_DISSECTOR_F_PARSE_1ST_FRAG) { 444 if (flags & FLOW_DISSECTOR_F_PARSE_1ST_FRAG)
444 ip_proto = fh->nexthdr;
445 goto ip_proto_again; 445 goto ip_proto_again;
446 }
447 } 446 }
448 goto out_good; 447 goto out_good;
449 } 448 }
@@ -730,6 +729,11 @@ u32 __skb_get_poff(const struct sk_buff *skb, void *data,
730{ 729{
731 u32 poff = keys->control.thoff; 730 u32 poff = keys->control.thoff;
732 731
732 /* skip L4 headers for fragments after the first */
733 if ((keys->control.flags & FLOW_DIS_IS_FRAGMENT) &&
734 !(keys->control.flags & FLOW_DIS_FIRST_FRAG))
735 return poff;
736
733 switch (keys->basic.ip_proto) { 737 switch (keys->basic.ip_proto) {
734 case IPPROTO_TCP: { 738 case IPPROTO_TCP: {
735 /* access doff as u8 to avoid unaligned access */ 739 /* access doff as u8 to avoid unaligned access */
diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c
index 92d886f4adcb..4573d81093fe 100644
--- a/net/core/gen_estimator.c
+++ b/net/core/gen_estimator.c
@@ -191,6 +191,7 @@ struct gen_estimator *gen_find_node(const struct gnet_stats_basic_packed *bstats
191/** 191/**
192 * gen_new_estimator - create a new rate estimator 192 * gen_new_estimator - create a new rate estimator
193 * @bstats: basic statistics 193 * @bstats: basic statistics
194 * @cpu_bstats: bstats per cpu
194 * @rate_est: rate estimator statistics 195 * @rate_est: rate estimator statistics
195 * @stats_lock: statistics lock 196 * @stats_lock: statistics lock
196 * @opt: rate estimator configuration TLV 197 * @opt: rate estimator configuration TLV
@@ -287,6 +288,7 @@ EXPORT_SYMBOL(gen_kill_estimator);
287/** 288/**
288 * gen_replace_estimator - replace rate estimator configuration 289 * gen_replace_estimator - replace rate estimator configuration
289 * @bstats: basic statistics 290 * @bstats: basic statistics
291 * @cpu_bstats: bstats per cpu
290 * @rate_est: rate estimator statistics 292 * @rate_est: rate estimator statistics
291 * @stats_lock: statistics lock 293 * @stats_lock: statistics lock
292 * @opt: rate estimator configuration TLV 294 * @opt: rate estimator configuration TLV
diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c
index 1e2f46a69d50..e640462ea8bf 100644
--- a/net/core/gen_stats.c
+++ b/net/core/gen_stats.c
@@ -140,6 +140,7 @@ EXPORT_SYMBOL(__gnet_stats_copy_basic);
140/** 140/**
141 * gnet_stats_copy_basic - copy basic statistics into statistic TLV 141 * gnet_stats_copy_basic - copy basic statistics into statistic TLV
142 * @d: dumping handle 142 * @d: dumping handle
143 * @cpu: copy statistic per cpu
143 * @b: basic statistics 144 * @b: basic statistics
144 * 145 *
145 * Appends the basic statistics to the top level TLV created by 146 * Appends the basic statistics to the top level TLV created by
diff --git a/net/core/hwbm.c b/net/core/hwbm.c
new file mode 100644
index 000000000000..941c28486896
--- /dev/null
+++ b/net/core/hwbm.c
@@ -0,0 +1,87 @@
1/* Support for hardware buffer manager.
2 *
3 * Copyright (C) 2016 Marvell
4 *
5 * Gregory CLEMENT <gregory.clement@free-electrons.com>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 */
12#include <linux/kernel.h>
13#include <linux/printk.h>
14#include <linux/skbuff.h>
15#include <net/hwbm.h>
16
17void hwbm_buf_free(struct hwbm_pool *bm_pool, void *buf)
18{
19 if (likely(bm_pool->frag_size <= PAGE_SIZE))
20 skb_free_frag(buf);
21 else
22 kfree(buf);
23}
24EXPORT_SYMBOL_GPL(hwbm_buf_free);
25
26/* Refill processing for HW buffer management */
27int hwbm_pool_refill(struct hwbm_pool *bm_pool, gfp_t gfp)
28{
29 int frag_size = bm_pool->frag_size;
30 void *buf;
31
32 if (likely(frag_size <= PAGE_SIZE))
33 buf = netdev_alloc_frag(frag_size);
34 else
35 buf = kmalloc(frag_size, gfp);
36
37 if (!buf)
38 return -ENOMEM;
39
40 if (bm_pool->construct)
41 if (bm_pool->construct(bm_pool, buf)) {
42 hwbm_buf_free(bm_pool, buf);
43 return -ENOMEM;
44 }
45
46 return 0;
47}
48EXPORT_SYMBOL_GPL(hwbm_pool_refill);
49
50int hwbm_pool_add(struct hwbm_pool *bm_pool, unsigned int buf_num, gfp_t gfp)
51{
52 int err, i;
53 unsigned long flags;
54
55 spin_lock_irqsave(&bm_pool->lock, flags);
56 if (bm_pool->buf_num == bm_pool->size) {
57 pr_warn("pool already filled\n");
58 return bm_pool->buf_num;
59 }
60
61 if (buf_num + bm_pool->buf_num > bm_pool->size) {
62 pr_warn("cannot allocate %d buffers for pool\n",
63 buf_num);
64 return 0;
65 }
66
67 if ((buf_num + bm_pool->buf_num) < bm_pool->buf_num) {
68 pr_warn("Adding %d buffers to the %d current buffers will overflow\n",
69 buf_num, bm_pool->buf_num);
70 return 0;
71 }
72
73 for (i = 0; i < buf_num; i++) {
74 err = hwbm_pool_refill(bm_pool, gfp);
75 if (err < 0)
76 break;
77 }
78
79 /* Update BM driver with number of buffers added to pool */
80 bm_pool->buf_num += i;
81
82 pr_debug("hwpm pool: %d of %d buffers added\n", i, buf_num);
83 spin_unlock_irqrestore(&bm_pool->lock, flags);
84
85 return i;
86}
87EXPORT_SYMBOL_GPL(hwbm_pool_add);
diff --git a/net/core/lwtunnel.c b/net/core/lwtunnel.c
index 299cfc24d888..669ecc9f884e 100644
--- a/net/core/lwtunnel.c
+++ b/net/core/lwtunnel.c
@@ -27,6 +27,31 @@
27#include <net/rtnetlink.h> 27#include <net/rtnetlink.h>
28#include <net/ip6_fib.h> 28#include <net/ip6_fib.h>
29 29
30#ifdef CONFIG_MODULES
31
32static const char *lwtunnel_encap_str(enum lwtunnel_encap_types encap_type)
33{
34 /* Only lwt encaps implemented without using an interface for
35 * the encap need to return a string here.
36 */
37 switch (encap_type) {
38 case LWTUNNEL_ENCAP_MPLS:
39 return "MPLS";
40 case LWTUNNEL_ENCAP_ILA:
41 return "ILA";
42 case LWTUNNEL_ENCAP_IP6:
43 case LWTUNNEL_ENCAP_IP:
44 case LWTUNNEL_ENCAP_NONE:
45 case __LWTUNNEL_ENCAP_MAX:
46 /* should not have got here */
47 WARN_ON(1);
48 break;
49 }
50 return NULL;
51}
52
53#endif /* CONFIG_MODULES */
54
30struct lwtunnel_state *lwtunnel_state_alloc(int encap_len) 55struct lwtunnel_state *lwtunnel_state_alloc(int encap_len)
31{ 56{
32 struct lwtunnel_state *lws; 57 struct lwtunnel_state *lws;
@@ -85,6 +110,18 @@ int lwtunnel_build_state(struct net_device *dev, u16 encap_type,
85 ret = -EOPNOTSUPP; 110 ret = -EOPNOTSUPP;
86 rcu_read_lock(); 111 rcu_read_lock();
87 ops = rcu_dereference(lwtun_encaps[encap_type]); 112 ops = rcu_dereference(lwtun_encaps[encap_type]);
113#ifdef CONFIG_MODULES
114 if (!ops) {
115 const char *encap_type_str = lwtunnel_encap_str(encap_type);
116
117 if (encap_type_str) {
118 rcu_read_unlock();
119 request_module("rtnl-lwt-%s", encap_type_str);
120 rcu_read_lock();
121 ops = rcu_dereference(lwtun_encaps[encap_type]);
122 }
123 }
124#endif
88 if (likely(ops && ops->build_state)) 125 if (likely(ops && ops->build_state))
89 ret = ops->build_state(dev, encap, family, cfg, lws); 126 ret = ops->build_state(dev, encap, family, cfg, lws);
90 rcu_read_unlock(); 127 rcu_read_unlock();
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index b6c8a6629b39..2b3f76fe65f4 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -29,7 +29,6 @@
29 29
30#ifdef CONFIG_SYSFS 30#ifdef CONFIG_SYSFS
31static const char fmt_hex[] = "%#x\n"; 31static const char fmt_hex[] = "%#x\n";
32static const char fmt_long_hex[] = "%#lx\n";
33static const char fmt_dec[] = "%d\n"; 32static const char fmt_dec[] = "%d\n";
34static const char fmt_ulong[] = "%lu\n"; 33static const char fmt_ulong[] = "%lu\n";
35static const char fmt_u64[] = "%llu\n"; 34static const char fmt_u64[] = "%llu\n";
@@ -199,9 +198,10 @@ static ssize_t speed_show(struct device *dev,
199 return restart_syscall(); 198 return restart_syscall();
200 199
201 if (netif_running(netdev)) { 200 if (netif_running(netdev)) {
202 struct ethtool_cmd cmd; 201 struct ethtool_link_ksettings cmd;
203 if (!__ethtool_get_settings(netdev, &cmd)) 202
204 ret = sprintf(buf, fmt_dec, ethtool_cmd_speed(&cmd)); 203 if (!__ethtool_get_link_ksettings(netdev, &cmd))
204 ret = sprintf(buf, fmt_dec, cmd.base.speed);
205 } 205 }
206 rtnl_unlock(); 206 rtnl_unlock();
207 return ret; 207 return ret;
@@ -218,10 +218,12 @@ static ssize_t duplex_show(struct device *dev,
218 return restart_syscall(); 218 return restart_syscall();
219 219
220 if (netif_running(netdev)) { 220 if (netif_running(netdev)) {
221 struct ethtool_cmd cmd; 221 struct ethtool_link_ksettings cmd;
222 if (!__ethtool_get_settings(netdev, &cmd)) { 222
223 if (!__ethtool_get_link_ksettings(netdev, &cmd)) {
223 const char *duplex; 224 const char *duplex;
224 switch (cmd.duplex) { 225
226 switch (cmd.base.duplex) {
225 case DUPLEX_HALF: 227 case DUPLEX_HALF:
226 duplex = "half"; 228 duplex = "half";
227 break; 229 break;
@@ -574,6 +576,7 @@ NETSTAT_ENTRY(tx_heartbeat_errors);
574NETSTAT_ENTRY(tx_window_errors); 576NETSTAT_ENTRY(tx_window_errors);
575NETSTAT_ENTRY(rx_compressed); 577NETSTAT_ENTRY(rx_compressed);
576NETSTAT_ENTRY(tx_compressed); 578NETSTAT_ENTRY(tx_compressed);
579NETSTAT_ENTRY(rx_nohandler);
577 580
578static struct attribute *netstat_attrs[] = { 581static struct attribute *netstat_attrs[] = {
579 &dev_attr_rx_packets.attr, 582 &dev_attr_rx_packets.attr,
@@ -599,6 +602,7 @@ static struct attribute *netstat_attrs[] = {
599 &dev_attr_tx_window_errors.attr, 602 &dev_attr_tx_window_errors.attr,
600 &dev_attr_rx_compressed.attr, 603 &dev_attr_rx_compressed.attr,
601 &dev_attr_tx_compressed.attr, 604 &dev_attr_tx_compressed.attr,
605 &dev_attr_rx_nohandler.attr,
602 NULL 606 NULL
603}; 607};
604 608
diff --git a/net/core/netclassid_cgroup.c b/net/core/netclassid_cgroup.c
index 0260c84ed83c..11fce17274f6 100644
--- a/net/core/netclassid_cgroup.c
+++ b/net/core/netclassid_cgroup.c
@@ -9,7 +9,6 @@
9 * Authors: Thomas Graf <tgraf@suug.ch> 9 * Authors: Thomas Graf <tgraf@suug.ch>
10 */ 10 */
11 11
12#include <linux/module.h>
13#include <linux/slab.h> 12#include <linux/slab.h>
14#include <linux/cgroup.h> 13#include <linux/cgroup.h>
15#include <linux/fdtable.h> 14#include <linux/fdtable.h>
diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c
index f1efbc39ef6b..2ec86fc552df 100644
--- a/net/core/netprio_cgroup.c
+++ b/net/core/netprio_cgroup.c
@@ -11,7 +11,6 @@
11 11
12#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 12#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
13 13
14#include <linux/module.h>
15#include <linux/slab.h> 14#include <linux/slab.h>
16#include <linux/types.h> 15#include <linux/types.h>
17#include <linux/string.h> 16#include <linux/string.h>
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 1474cfd2dc1c..20999aa596dd 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -2856,7 +2856,7 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
2856 *vlan_encapsulated_proto = htons(ETH_P_IP); 2856 *vlan_encapsulated_proto = htons(ETH_P_IP);
2857 } 2857 }
2858 2858
2859 skb_set_mac_header(skb, 0); 2859 skb_reset_mac_header(skb);
2860 skb_set_network_header(skb, skb->len); 2860 skb_set_network_header(skb, skb->len);
2861 iph = (struct iphdr *) skb_put(skb, sizeof(struct iphdr)); 2861 iph = (struct iphdr *) skb_put(skb, sizeof(struct iphdr));
2862 2862
@@ -2983,7 +2983,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
2983 *vlan_encapsulated_proto = htons(ETH_P_IPV6); 2983 *vlan_encapsulated_proto = htons(ETH_P_IPV6);
2984 } 2984 }
2985 2985
2986 skb_set_mac_header(skb, 0); 2986 skb_reset_mac_header(skb);
2987 skb_set_network_header(skb, skb->len); 2987 skb_set_network_header(skb, skb->len);
2988 iph = (struct ipv6hdr *) skb_put(skb, sizeof(struct ipv6hdr)); 2988 iph = (struct ipv6hdr *) skb_put(skb, sizeof(struct ipv6hdr));
2989 2989
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index d735e854f916..f2066772d0f3 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -804,6 +804,8 @@ static void copy_rtnl_link_stats(struct rtnl_link_stats *a,
804 804
805 a->rx_compressed = b->rx_compressed; 805 a->rx_compressed = b->rx_compressed;
806 a->tx_compressed = b->tx_compressed; 806 a->tx_compressed = b->tx_compressed;
807
808 a->rx_nohandler = b->rx_nohandler;
807} 809}
808 810
809static void copy_rtnl_link_stats64(void *v, const struct rtnl_link_stats64 *b) 811static void copy_rtnl_link_stats64(void *v, const struct rtnl_link_stats64 *b)
@@ -893,6 +895,8 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev,
893 + nla_total_size(4) /* IFLA_PROMISCUITY */ 895 + nla_total_size(4) /* IFLA_PROMISCUITY */
894 + nla_total_size(4) /* IFLA_NUM_TX_QUEUES */ 896 + nla_total_size(4) /* IFLA_NUM_TX_QUEUES */
895 + nla_total_size(4) /* IFLA_NUM_RX_QUEUES */ 897 + nla_total_size(4) /* IFLA_NUM_RX_QUEUES */
898 + nla_total_size(4) /* IFLA_MAX_GSO_SEGS */
899 + nla_total_size(4) /* IFLA_MAX_GSO_SIZE */
896 + nla_total_size(1) /* IFLA_OPERSTATE */ 900 + nla_total_size(1) /* IFLA_OPERSTATE */
897 + nla_total_size(1) /* IFLA_LINKMODE */ 901 + nla_total_size(1) /* IFLA_LINKMODE */
898 + nla_total_size(4) /* IFLA_CARRIER_CHANGES */ 902 + nla_total_size(4) /* IFLA_CARRIER_CHANGES */
@@ -1221,6 +1225,8 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
1221 nla_put_u32(skb, IFLA_GROUP, dev->group) || 1225 nla_put_u32(skb, IFLA_GROUP, dev->group) ||
1222 nla_put_u32(skb, IFLA_PROMISCUITY, dev->promiscuity) || 1226 nla_put_u32(skb, IFLA_PROMISCUITY, dev->promiscuity) ||
1223 nla_put_u32(skb, IFLA_NUM_TX_QUEUES, dev->num_tx_queues) || 1227 nla_put_u32(skb, IFLA_NUM_TX_QUEUES, dev->num_tx_queues) ||
1228 nla_put_u32(skb, IFLA_GSO_MAX_SEGS, dev->gso_max_segs) ||
1229 nla_put_u32(skb, IFLA_GSO_MAX_SIZE, dev->gso_max_size) ||
1224#ifdef CONFIG_RPS 1230#ifdef CONFIG_RPS
1225 nla_put_u32(skb, IFLA_NUM_RX_QUEUES, dev->num_rx_queues) || 1231 nla_put_u32(skb, IFLA_NUM_RX_QUEUES, dev->num_rx_queues) ||
1226#endif 1232#endif
@@ -1387,15 +1393,8 @@ static const struct nla_policy ifla_vf_policy[IFLA_VF_MAX+1] = {
1387 [IFLA_VF_RSS_QUERY_EN] = { .len = sizeof(struct ifla_vf_rss_query_en) }, 1393 [IFLA_VF_RSS_QUERY_EN] = { .len = sizeof(struct ifla_vf_rss_query_en) },
1388 [IFLA_VF_STATS] = { .type = NLA_NESTED }, 1394 [IFLA_VF_STATS] = { .type = NLA_NESTED },
1389 [IFLA_VF_TRUST] = { .len = sizeof(struct ifla_vf_trust) }, 1395 [IFLA_VF_TRUST] = { .len = sizeof(struct ifla_vf_trust) },
1390}; 1396 [IFLA_VF_IB_NODE_GUID] = { .len = sizeof(struct ifla_vf_guid) },
1391 1397 [IFLA_VF_IB_PORT_GUID] = { .len = sizeof(struct ifla_vf_guid) },
1392static const struct nla_policy ifla_vf_stats_policy[IFLA_VF_STATS_MAX + 1] = {
1393 [IFLA_VF_STATS_RX_PACKETS] = { .type = NLA_U64 },
1394 [IFLA_VF_STATS_TX_PACKETS] = { .type = NLA_U64 },
1395 [IFLA_VF_STATS_RX_BYTES] = { .type = NLA_U64 },
1396 [IFLA_VF_STATS_TX_BYTES] = { .type = NLA_U64 },
1397 [IFLA_VF_STATS_BROADCAST] = { .type = NLA_U64 },
1398 [IFLA_VF_STATS_MULTICAST] = { .type = NLA_U64 },
1399}; 1398};
1400 1399
1401static const struct nla_policy ifla_port_policy[IFLA_PORT_MAX+1] = { 1400static const struct nla_policy ifla_port_policy[IFLA_PORT_MAX+1] = {
@@ -1412,6 +1411,58 @@ static const struct nla_policy ifla_port_policy[IFLA_PORT_MAX+1] = {
1412 [IFLA_PORT_RESPONSE] = { .type = NLA_U16, }, 1411 [IFLA_PORT_RESPONSE] = { .type = NLA_U16, },
1413}; 1412};
1414 1413
1414static const struct rtnl_link_ops *linkinfo_to_kind_ops(const struct nlattr *nla)
1415{
1416 const struct rtnl_link_ops *ops = NULL;
1417 struct nlattr *linfo[IFLA_INFO_MAX + 1];
1418
1419 if (nla_parse_nested(linfo, IFLA_INFO_MAX, nla, ifla_info_policy) < 0)
1420 return NULL;
1421
1422 if (linfo[IFLA_INFO_KIND]) {
1423 char kind[MODULE_NAME_LEN];
1424
1425 nla_strlcpy(kind, linfo[IFLA_INFO_KIND], sizeof(kind));
1426 ops = rtnl_link_ops_get(kind);
1427 }
1428
1429 return ops;
1430}
1431
1432static bool link_master_filtered(struct net_device *dev, int master_idx)
1433{
1434 struct net_device *master;
1435
1436 if (!master_idx)
1437 return false;
1438
1439 master = netdev_master_upper_dev_get(dev);
1440 if (!master || master->ifindex != master_idx)
1441 return true;
1442
1443 return false;
1444}
1445
1446static bool link_kind_filtered(const struct net_device *dev,
1447 const struct rtnl_link_ops *kind_ops)
1448{
1449 if (kind_ops && dev->rtnl_link_ops != kind_ops)
1450 return true;
1451
1452 return false;
1453}
1454
1455static bool link_dump_filtered(struct net_device *dev,
1456 int master_idx,
1457 const struct rtnl_link_ops *kind_ops)
1458{
1459 if (link_master_filtered(dev, master_idx) ||
1460 link_kind_filtered(dev, kind_ops))
1461 return true;
1462
1463 return false;
1464}
1465
1415static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) 1466static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
1416{ 1467{
1417 struct net *net = sock_net(skb->sk); 1468 struct net *net = sock_net(skb->sk);
@@ -1421,6 +1472,9 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
1421 struct hlist_head *head; 1472 struct hlist_head *head;
1422 struct nlattr *tb[IFLA_MAX+1]; 1473 struct nlattr *tb[IFLA_MAX+1];
1423 u32 ext_filter_mask = 0; 1474 u32 ext_filter_mask = 0;
1475 const struct rtnl_link_ops *kind_ops = NULL;
1476 unsigned int flags = NLM_F_MULTI;
1477 int master_idx = 0;
1424 int err; 1478 int err;
1425 int hdrlen; 1479 int hdrlen;
1426 1480
@@ -1443,18 +1497,29 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
1443 1497
1444 if (tb[IFLA_EXT_MASK]) 1498 if (tb[IFLA_EXT_MASK])
1445 ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]); 1499 ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]);
1500
1501 if (tb[IFLA_MASTER])
1502 master_idx = nla_get_u32(tb[IFLA_MASTER]);
1503
1504 if (tb[IFLA_LINKINFO])
1505 kind_ops = linkinfo_to_kind_ops(tb[IFLA_LINKINFO]);
1506
1507 if (master_idx || kind_ops)
1508 flags |= NLM_F_DUMP_FILTERED;
1446 } 1509 }
1447 1510
1448 for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { 1511 for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1449 idx = 0; 1512 idx = 0;
1450 head = &net->dev_index_head[h]; 1513 head = &net->dev_index_head[h];
1451 hlist_for_each_entry(dev, head, index_hlist) { 1514 hlist_for_each_entry(dev, head, index_hlist) {
1515 if (link_dump_filtered(dev, master_idx, kind_ops))
1516 continue;
1452 if (idx < s_idx) 1517 if (idx < s_idx)
1453 goto cont; 1518 goto cont;
1454 err = rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK, 1519 err = rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK,
1455 NETLINK_CB(cb->skb).portid, 1520 NETLINK_CB(cb->skb).portid,
1456 cb->nlh->nlmsg_seq, 0, 1521 cb->nlh->nlmsg_seq, 0,
1457 NLM_F_MULTI, 1522 flags,
1458 ext_filter_mask); 1523 ext_filter_mask);
1459 /* If we ran out of room on the first message, 1524 /* If we ran out of room on the first message,
1460 * we're in trouble 1525 * we're in trouble
@@ -1534,6 +1599,22 @@ static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[])
1534 return 0; 1599 return 0;
1535} 1600}
1536 1601
1602static int handle_infiniband_guid(struct net_device *dev, struct ifla_vf_guid *ivt,
1603 int guid_type)
1604{
1605 const struct net_device_ops *ops = dev->netdev_ops;
1606
1607 return ops->ndo_set_vf_guid(dev, ivt->vf, ivt->guid, guid_type);
1608}
1609
1610static int handle_vf_guid(struct net_device *dev, struct ifla_vf_guid *ivt, int guid_type)
1611{
1612 if (dev->type != ARPHRD_INFINIBAND)
1613 return -EOPNOTSUPP;
1614
1615 return handle_infiniband_guid(dev, ivt, guid_type);
1616}
1617
1537static int do_setvfinfo(struct net_device *dev, struct nlattr **tb) 1618static int do_setvfinfo(struct net_device *dev, struct nlattr **tb)
1538{ 1619{
1539 const struct net_device_ops *ops = dev->netdev_ops; 1620 const struct net_device_ops *ops = dev->netdev_ops;
@@ -1636,6 +1717,24 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr **tb)
1636 return err; 1717 return err;
1637 } 1718 }
1638 1719
1720 if (tb[IFLA_VF_IB_NODE_GUID]) {
1721 struct ifla_vf_guid *ivt = nla_data(tb[IFLA_VF_IB_NODE_GUID]);
1722
1723 if (!ops->ndo_set_vf_guid)
1724 return -EOPNOTSUPP;
1725
1726 return handle_vf_guid(dev, ivt, IFLA_VF_IB_NODE_GUID);
1727 }
1728
1729 if (tb[IFLA_VF_IB_PORT_GUID]) {
1730 struct ifla_vf_guid *ivt = nla_data(tb[IFLA_VF_IB_PORT_GUID]);
1731
1732 if (!ops->ndo_set_vf_guid)
1733 return -EOPNOTSUPP;
1734
1735 return handle_vf_guid(dev, ivt, IFLA_VF_IB_PORT_GUID);
1736 }
1737
1639 return err; 1738 return err;
1640} 1739}
1641 1740
@@ -2911,6 +3010,7 @@ int ndo_dflt_fdb_dump(struct sk_buff *skb,
2911 nlmsg_populate_fdb(skb, cb, dev, &idx, &dev->mc); 3010 nlmsg_populate_fdb(skb, cb, dev, &idx, &dev->mc);
2912out: 3011out:
2913 netif_addr_unlock_bh(dev); 3012 netif_addr_unlock_bh(dev);
3013 cb->args[1] = err;
2914 return idx; 3014 return idx;
2915} 3015}
2916EXPORT_SYMBOL(ndo_dflt_fdb_dump); 3016EXPORT_SYMBOL(ndo_dflt_fdb_dump);
@@ -2944,6 +3044,7 @@ static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb)
2944 ops = br_dev->netdev_ops; 3044 ops = br_dev->netdev_ops;
2945 } 3045 }
2946 3046
3047 cb->args[1] = 0;
2947 for_each_netdev(net, dev) { 3048 for_each_netdev(net, dev) {
2948 if (brport_idx && (dev->ifindex != brport_idx)) 3049 if (brport_idx && (dev->ifindex != brport_idx))
2949 continue; 3050 continue;
@@ -2971,12 +3072,16 @@ static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb)
2971 idx = cops->ndo_fdb_dump(skb, cb, br_dev, dev, 3072 idx = cops->ndo_fdb_dump(skb, cb, br_dev, dev,
2972 idx); 3073 idx);
2973 } 3074 }
3075 if (cb->args[1] == -EMSGSIZE)
3076 break;
2974 3077
2975 if (dev->netdev_ops->ndo_fdb_dump) 3078 if (dev->netdev_ops->ndo_fdb_dump)
2976 idx = dev->netdev_ops->ndo_fdb_dump(skb, cb, dev, NULL, 3079 idx = dev->netdev_ops->ndo_fdb_dump(skb, cb, dev, NULL,
2977 idx); 3080 idx);
2978 else 3081 else
2979 idx = ndo_dflt_fdb_dump(skb, cb, dev, NULL, idx); 3082 idx = ndo_dflt_fdb_dump(skb, cb, dev, NULL, idx);
3083 if (cb->args[1] == -EMSGSIZE)
3084 break;
2980 3085
2981 cops = NULL; 3086 cops = NULL;
2982 } 3087 }
diff --git a/net/core/scm.c b/net/core/scm.c
index 14596fb37172..2696aefdc148 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -87,6 +87,7 @@ static int scm_fp_copy(struct cmsghdr *cmsg, struct scm_fp_list **fplp)
87 *fplp = fpl; 87 *fplp = fpl;
88 fpl->count = 0; 88 fpl->count = 0;
89 fpl->max = SCM_MAX_FD; 89 fpl->max = SCM_MAX_FD;
90 fpl->user = NULL;
90 } 91 }
91 fpp = &fpl->fp[fpl->count]; 92 fpp = &fpl->fp[fpl->count];
92 93
@@ -107,6 +108,10 @@ static int scm_fp_copy(struct cmsghdr *cmsg, struct scm_fp_list **fplp)
107 *fpp++ = file; 108 *fpp++ = file;
108 fpl->count++; 109 fpl->count++;
109 } 110 }
111
112 if (!fpl->user)
113 fpl->user = get_uid(current_user());
114
110 return num; 115 return num;
111} 116}
112 117
@@ -119,6 +124,7 @@ void __scm_destroy(struct scm_cookie *scm)
119 scm->fp = NULL; 124 scm->fp = NULL;
120 for (i=fpl->count-1; i>=0; i--) 125 for (i=fpl->count-1; i>=0; i--)
121 fput(fpl->fp[i]); 126 fput(fpl->fp[i]);
127 free_uid(fpl->user);
122 kfree(fpl); 128 kfree(fpl);
123 } 129 }
124} 130}
@@ -336,6 +342,7 @@ struct scm_fp_list *scm_fp_dup(struct scm_fp_list *fpl)
336 for (i = 0; i < fpl->count; i++) 342 for (i = 0; i < fpl->count; i++)
337 get_file(fpl->fp[i]); 343 get_file(fpl->fp[i]);
338 new_fpl->max = new_fpl->count; 344 new_fpl->max = new_fpl->count;
345 new_fpl->user = get_uid(fpl->user);
339 } 346 }
340 return new_fpl; 347 return new_fpl;
341} 348}
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index b2df375ec9c2..d04c2d1c8c87 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -79,6 +79,8 @@
79 79
80struct kmem_cache *skbuff_head_cache __read_mostly; 80struct kmem_cache *skbuff_head_cache __read_mostly;
81static struct kmem_cache *skbuff_fclone_cache __read_mostly; 81static struct kmem_cache *skbuff_fclone_cache __read_mostly;
82int sysctl_max_skb_frags __read_mostly = MAX_SKB_FRAGS;
83EXPORT_SYMBOL(sysctl_max_skb_frags);
82 84
83/** 85/**
84 * skb_panic - private function for out-of-line support 86 * skb_panic - private function for out-of-line support
@@ -347,8 +349,16 @@ struct sk_buff *build_skb(void *data, unsigned int frag_size)
347} 349}
348EXPORT_SYMBOL(build_skb); 350EXPORT_SYMBOL(build_skb);
349 351
352#define NAPI_SKB_CACHE_SIZE 64
353
354struct napi_alloc_cache {
355 struct page_frag_cache page;
356 size_t skb_count;
357 void *skb_cache[NAPI_SKB_CACHE_SIZE];
358};
359
350static DEFINE_PER_CPU(struct page_frag_cache, netdev_alloc_cache); 360static DEFINE_PER_CPU(struct page_frag_cache, netdev_alloc_cache);
351static DEFINE_PER_CPU(struct page_frag_cache, napi_alloc_cache); 361static DEFINE_PER_CPU(struct napi_alloc_cache, napi_alloc_cache);
352 362
353static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask) 363static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
354{ 364{
@@ -378,9 +388,9 @@ EXPORT_SYMBOL(netdev_alloc_frag);
378 388
379static void *__napi_alloc_frag(unsigned int fragsz, gfp_t gfp_mask) 389static void *__napi_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
380{ 390{
381 struct page_frag_cache *nc = this_cpu_ptr(&napi_alloc_cache); 391 struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
382 392
383 return __alloc_page_frag(nc, fragsz, gfp_mask); 393 return __alloc_page_frag(&nc->page, fragsz, gfp_mask);
384} 394}
385 395
386void *napi_alloc_frag(unsigned int fragsz) 396void *napi_alloc_frag(unsigned int fragsz)
@@ -474,7 +484,7 @@ EXPORT_SYMBOL(__netdev_alloc_skb);
474struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len, 484struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len,
475 gfp_t gfp_mask) 485 gfp_t gfp_mask)
476{ 486{
477 struct page_frag_cache *nc = this_cpu_ptr(&napi_alloc_cache); 487 struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
478 struct sk_buff *skb; 488 struct sk_buff *skb;
479 void *data; 489 void *data;
480 490
@@ -494,7 +504,7 @@ struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len,
494 if (sk_memalloc_socks()) 504 if (sk_memalloc_socks())
495 gfp_mask |= __GFP_MEMALLOC; 505 gfp_mask |= __GFP_MEMALLOC;
496 506
497 data = __alloc_page_frag(nc, len, gfp_mask); 507 data = __alloc_page_frag(&nc->page, len, gfp_mask);
498 if (unlikely(!data)) 508 if (unlikely(!data))
499 return NULL; 509 return NULL;
500 510
@@ -505,7 +515,7 @@ struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len,
505 } 515 }
506 516
507 /* use OR instead of assignment to avoid clearing of bits in mask */ 517 /* use OR instead of assignment to avoid clearing of bits in mask */
508 if (nc->pfmemalloc) 518 if (nc->page.pfmemalloc)
509 skb->pfmemalloc = 1; 519 skb->pfmemalloc = 1;
510 skb->head_frag = 1; 520 skb->head_frag = 1;
511 521
@@ -747,6 +757,73 @@ void consume_skb(struct sk_buff *skb)
747} 757}
748EXPORT_SYMBOL(consume_skb); 758EXPORT_SYMBOL(consume_skb);
749 759
760void __kfree_skb_flush(void)
761{
762 struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
763
764 /* flush skb_cache if containing objects */
765 if (nc->skb_count) {
766 kmem_cache_free_bulk(skbuff_head_cache, nc->skb_count,
767 nc->skb_cache);
768 nc->skb_count = 0;
769 }
770}
771
772static inline void _kfree_skb_defer(struct sk_buff *skb)
773{
774 struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
775
776 /* drop skb->head and call any destructors for packet */
777 skb_release_all(skb);
778
779 /* record skb to CPU local list */
780 nc->skb_cache[nc->skb_count++] = skb;
781
782#ifdef CONFIG_SLUB
783 /* SLUB writes into objects when freeing */
784 prefetchw(skb);
785#endif
786
787 /* flush skb_cache if it is filled */
788 if (unlikely(nc->skb_count == NAPI_SKB_CACHE_SIZE)) {
789 kmem_cache_free_bulk(skbuff_head_cache, NAPI_SKB_CACHE_SIZE,
790 nc->skb_cache);
791 nc->skb_count = 0;
792 }
793}
794void __kfree_skb_defer(struct sk_buff *skb)
795{
796 _kfree_skb_defer(skb);
797}
798
799void napi_consume_skb(struct sk_buff *skb, int budget)
800{
801 if (unlikely(!skb))
802 return;
803
804 /* Zero budget indicate non-NAPI context called us, like netpoll */
805 if (unlikely(!budget)) {
806 dev_consume_skb_any(skb);
807 return;
808 }
809
810 if (likely(atomic_read(&skb->users) == 1))
811 smp_rmb();
812 else if (likely(!atomic_dec_and_test(&skb->users)))
813 return;
814 /* if reaching here SKB is ready to free */
815 trace_consume_skb(skb);
816
817 /* if SKB is a clone, don't handle this case */
818 if (skb->fclone != SKB_FCLONE_UNAVAILABLE) {
819 __kfree_skb(skb);
820 return;
821 }
822
823 _kfree_skb_defer(skb);
824}
825EXPORT_SYMBOL(napi_consume_skb);
826
750/* Make sure a field is enclosed inside headers_start/headers_end section */ 827/* Make sure a field is enclosed inside headers_start/headers_end section */
751#define CHECK_SKB_FIELD(field) \ 828#define CHECK_SKB_FIELD(field) \
752 BUILD_BUG_ON(offsetof(struct sk_buff, field) < \ 829 BUILD_BUG_ON(offsetof(struct sk_buff, field) < \
@@ -1841,6 +1918,7 @@ static bool __skb_splice_bits(struct sk_buff *skb, struct pipe_inode_info *pipe,
1841 struct splice_pipe_desc *spd, struct sock *sk) 1918 struct splice_pipe_desc *spd, struct sock *sk)
1842{ 1919{
1843 int seg; 1920 int seg;
1921 struct sk_buff *iter;
1844 1922
1845 /* map the linear part : 1923 /* map the linear part :
1846 * If skb->head_frag is set, this 'linear' part is backed by a 1924 * If skb->head_frag is set, this 'linear' part is backed by a
@@ -1867,6 +1945,19 @@ static bool __skb_splice_bits(struct sk_buff *skb, struct pipe_inode_info *pipe,
1867 return true; 1945 return true;
1868 } 1946 }
1869 1947
1948 skb_walk_frags(skb, iter) {
1949 if (*offset >= iter->len) {
1950 *offset -= iter->len;
1951 continue;
1952 }
1953 /* __skb_splice_bits() only fails if the output has no room
1954 * left, so no point in going over the frag_list for the error
1955 * case.
1956 */
1957 if (__skb_splice_bits(iter, pipe, offset, len, spd, sk))
1958 return true;
1959 }
1960
1870 return false; 1961 return false;
1871} 1962}
1872 1963
@@ -1893,9 +1984,7 @@ ssize_t skb_socket_splice(struct sock *sk,
1893 1984
1894/* 1985/*
1895 * Map data from the skb to a pipe. Should handle both the linear part, 1986 * Map data from the skb to a pipe. Should handle both the linear part,
1896 * the fragments, and the frag list. It does NOT handle frag lists within 1987 * the fragments, and the frag list.
1897 * the frag list, if such a thing exists. We'd probably need to recurse to
1898 * handle that cleanly.
1899 */ 1988 */
1900int skb_splice_bits(struct sk_buff *skb, struct sock *sk, unsigned int offset, 1989int skb_splice_bits(struct sk_buff *skb, struct sock *sk, unsigned int offset,
1901 struct pipe_inode_info *pipe, unsigned int tlen, 1990 struct pipe_inode_info *pipe, unsigned int tlen,
@@ -1914,29 +2003,10 @@ int skb_splice_bits(struct sk_buff *skb, struct sock *sk, unsigned int offset,
1914 .ops = &nosteal_pipe_buf_ops, 2003 .ops = &nosteal_pipe_buf_ops,
1915 .spd_release = sock_spd_release, 2004 .spd_release = sock_spd_release,
1916 }; 2005 };
1917 struct sk_buff *frag_iter;
1918 int ret = 0; 2006 int ret = 0;
1919 2007
1920 /* 2008 __skb_splice_bits(skb, pipe, &offset, &tlen, &spd, sk);
1921 * __skb_splice_bits() only fails if the output has no room left,
1922 * so no point in going over the frag_list for the error case.
1923 */
1924 if (__skb_splice_bits(skb, pipe, &offset, &tlen, &spd, sk))
1925 goto done;
1926 else if (!tlen)
1927 goto done;
1928
1929 /*
1930 * now see if we have a frag_list to map
1931 */
1932 skb_walk_frags(skb, frag_iter) {
1933 if (!tlen)
1934 break;
1935 if (__skb_splice_bits(frag_iter, pipe, &offset, &tlen, &spd, sk))
1936 break;
1937 }
1938 2009
1939done:
1940 if (spd.nr_pages) 2010 if (spd.nr_pages)
1941 ret = splice_cb(sk, pipe, &spd); 2011 ret = splice_cb(sk, pipe, &spd);
1942 2012
@@ -2946,6 +3016,24 @@ int skb_append_pagefrags(struct sk_buff *skb, struct page *page,
2946EXPORT_SYMBOL_GPL(skb_append_pagefrags); 3016EXPORT_SYMBOL_GPL(skb_append_pagefrags);
2947 3017
2948/** 3018/**
3019 * skb_push_rcsum - push skb and update receive checksum
3020 * @skb: buffer to update
3021 * @len: length of data pulled
3022 *
3023 * This function performs an skb_push on the packet and updates
3024 * the CHECKSUM_COMPLETE checksum. It should be used on
3025 * receive path processing instead of skb_push unless you know
3026 * that the checksum difference is zero (e.g., a valid IP header)
3027 * or you are setting ip_summed to CHECKSUM_NONE.
3028 */
3029static unsigned char *skb_push_rcsum(struct sk_buff *skb, unsigned len)
3030{
3031 skb_push(skb, len);
3032 skb_postpush_rcsum(skb, skb->data, len);
3033 return skb->data;
3034}
3035
3036/**
2949 * skb_pull_rcsum - pull skb and update receive checksum 3037 * skb_pull_rcsum - pull skb and update receive checksum
2950 * @skb: buffer to update 3038 * @skb: buffer to update
2951 * @len: length of data pulled 3039 * @len: length of data pulled
@@ -3004,8 +3092,7 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
3004 if (unlikely(!proto)) 3092 if (unlikely(!proto))
3005 return ERR_PTR(-EINVAL); 3093 return ERR_PTR(-EINVAL);
3006 3094
3007 csum = !head_skb->encap_hdr_csum && 3095 csum = !!can_checksum_protocol(features, proto);
3008 !!can_checksum_protocol(features, proto);
3009 3096
3010 headroom = skb_headroom(head_skb); 3097 headroom = skb_headroom(head_skb);
3011 pos = skb_headlen(head_skb); 3098 pos = skb_headlen(head_skb);
@@ -3098,13 +3185,15 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
3098 if (nskb->len == len + doffset) 3185 if (nskb->len == len + doffset)
3099 goto perform_csum_check; 3186 goto perform_csum_check;
3100 3187
3101 if (!sg && !nskb->remcsum_offload) { 3188 if (!sg) {
3102 nskb->ip_summed = CHECKSUM_NONE; 3189 if (!nskb->remcsum_offload)
3103 nskb->csum = skb_copy_and_csum_bits(head_skb, offset, 3190 nskb->ip_summed = CHECKSUM_NONE;
3104 skb_put(nskb, len), 3191 SKB_GSO_CB(nskb)->csum =
3105 len, 0); 3192 skb_copy_and_csum_bits(head_skb, offset,
3193 skb_put(nskb, len),
3194 len, 0);
3106 SKB_GSO_CB(nskb)->csum_start = 3195 SKB_GSO_CB(nskb)->csum_start =
3107 skb_headroom(nskb) + doffset; 3196 skb_headroom(nskb) + doffset;
3108 continue; 3197 continue;
3109 } 3198 }
3110 3199
@@ -3170,12 +3259,19 @@ skip_fraglist:
3170 nskb->truesize += nskb->data_len; 3259 nskb->truesize += nskb->data_len;
3171 3260
3172perform_csum_check: 3261perform_csum_check:
3173 if (!csum && !nskb->remcsum_offload) { 3262 if (!csum) {
3174 nskb->csum = skb_checksum(nskb, doffset, 3263 if (skb_has_shared_frag(nskb)) {
3175 nskb->len - doffset, 0); 3264 err = __skb_linearize(nskb);
3176 nskb->ip_summed = CHECKSUM_NONE; 3265 if (err)
3266 goto err;
3267 }
3268 if (!nskb->remcsum_offload)
3269 nskb->ip_summed = CHECKSUM_NONE;
3270 SKB_GSO_CB(nskb)->csum =
3271 skb_checksum(nskb, doffset,
3272 nskb->len - doffset, 0);
3177 SKB_GSO_CB(nskb)->csum_start = 3273 SKB_GSO_CB(nskb)->csum_start =
3178 skb_headroom(nskb) + doffset; 3274 skb_headroom(nskb) + doffset;
3179 } 3275 }
3180 } while ((offset += len) < head_skb->len); 3276 } while ((offset += len) < head_skb->len);
3181 3277
@@ -4082,9 +4178,9 @@ struct sk_buff *skb_checksum_trimmed(struct sk_buff *skb,
4082 if (!pskb_may_pull(skb_chk, offset)) 4178 if (!pskb_may_pull(skb_chk, offset))
4083 goto err; 4179 goto err;
4084 4180
4085 __skb_pull(skb_chk, offset); 4181 skb_pull_rcsum(skb_chk, offset);
4086 ret = skb_chkf(skb_chk); 4182 ret = skb_chkf(skb_chk);
4087 __skb_push(skb_chk, offset); 4183 skb_push_rcsum(skb_chk, offset);
4088 4184
4089 if (ret) 4185 if (ret)
4090 goto err; 4186 goto err;
@@ -4217,7 +4313,6 @@ void skb_scrub_packet(struct sk_buff *skb, bool xnet)
4217 skb->skb_iif = 0; 4313 skb->skb_iif = 0;
4218 skb->ignore_df = 0; 4314 skb->ignore_df = 0;
4219 skb_dst_drop(skb); 4315 skb_dst_drop(skb);
4220 skb_sender_cpu_clear(skb);
4221 secpath_reset(skb); 4316 secpath_reset(skb);
4222 nf_reset(skb); 4317 nf_reset(skb);
4223 nf_reset_trace(skb); 4318 nf_reset_trace(skb);
@@ -4413,9 +4508,7 @@ int skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci)
4413 skb->mac_len += VLAN_HLEN; 4508 skb->mac_len += VLAN_HLEN;
4414 __skb_pull(skb, offset); 4509 __skb_pull(skb, offset);
4415 4510
4416 if (skb->ip_summed == CHECKSUM_COMPLETE) 4511 skb_postpush_rcsum(skb, skb->data + (2 * ETH_ALEN), VLAN_HLEN);
4417 skb->csum = csum_add(skb->csum, csum_partial(skb->data
4418 + (2 * ETH_ALEN), VLAN_HLEN, 0));
4419 } 4512 }
4420 __vlan_hwaccel_put_tag(skb, vlan_proto, vlan_tci); 4513 __vlan_hwaccel_put_tag(skb, vlan_proto, vlan_tci);
4421 return 0; 4514 return 0;
diff --git a/net/core/sock.c b/net/core/sock.c
index 6c1c8bc93412..b67b9aedb230 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -987,6 +987,10 @@ set_rcvbuf:
987 sk->sk_incoming_cpu = val; 987 sk->sk_incoming_cpu = val;
988 break; 988 break;
989 989
990 case SO_CNX_ADVICE:
991 if (val == 1)
992 dst_negative_advice(sk);
993 break;
990 default: 994 default:
991 ret = -ENOPROTOOPT; 995 ret = -ENOPROTOOPT;
992 break; 996 break;
@@ -1531,6 +1535,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
1531 newsk = NULL; 1535 newsk = NULL;
1532 goto out; 1536 goto out;
1533 } 1537 }
1538 RCU_INIT_POINTER(newsk->sk_reuseport_cb, NULL);
1534 1539
1535 newsk->sk_err = 0; 1540 newsk->sk_err = 0;
1536 newsk->sk_priority = 0; 1541 newsk->sk_priority = 0;
@@ -1903,7 +1908,7 @@ EXPORT_SYMBOL(sock_cmsg_send);
1903bool skb_page_frag_refill(unsigned int sz, struct page_frag *pfrag, gfp_t gfp) 1908bool skb_page_frag_refill(unsigned int sz, struct page_frag *pfrag, gfp_t gfp)
1904{ 1909{
1905 if (pfrag->page) { 1910 if (pfrag->page) {
1906 if (atomic_read(&pfrag->page->_count) == 1) { 1911 if (page_ref_count(pfrag->page) == 1) {
1907 pfrag->offset = 0; 1912 pfrag->offset = 0;
1908 return true; 1913 return true;
1909 } 1914 }
diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c
index 1df98c557440..e92b759d906c 100644
--- a/net/core/sock_reuseport.c
+++ b/net/core/sock_reuseport.c
@@ -93,10 +93,17 @@ static struct sock_reuseport *reuseport_grow(struct sock_reuseport *reuse)
93 * @sk2: Socket belonging to the existing reuseport group. 93 * @sk2: Socket belonging to the existing reuseport group.
94 * May return ENOMEM and not add socket to group under memory pressure. 94 * May return ENOMEM and not add socket to group under memory pressure.
95 */ 95 */
96int reuseport_add_sock(struct sock *sk, const struct sock *sk2) 96int reuseport_add_sock(struct sock *sk, struct sock *sk2)
97{ 97{
98 struct sock_reuseport *reuse; 98 struct sock_reuseport *reuse;
99 99
100 if (!rcu_access_pointer(sk2->sk_reuseport_cb)) {
101 int err = reuseport_alloc(sk2);
102
103 if (err)
104 return err;
105 }
106
100 spin_lock_bh(&reuseport_lock); 107 spin_lock_bh(&reuseport_lock);
101 reuse = rcu_dereference_protected(sk2->sk_reuseport_cb, 108 reuse = rcu_dereference_protected(sk2->sk_reuseport_cb,
102 lockdep_is_held(&reuseport_lock)), 109 lockdep_is_held(&reuseport_lock)),
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 95b6139d710c..a6beb7b6ae55 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -26,6 +26,7 @@ static int zero = 0;
26static int one = 1; 26static int one = 1;
27static int min_sndbuf = SOCK_MIN_SNDBUF; 27static int min_sndbuf = SOCK_MIN_SNDBUF;
28static int min_rcvbuf = SOCK_MIN_RCVBUF; 28static int min_rcvbuf = SOCK_MIN_RCVBUF;
29static int max_skb_frags = MAX_SKB_FRAGS;
29 30
30static int net_msg_warn; /* Unused, but still a sysctl */ 31static int net_msg_warn; /* Unused, but still a sysctl */
31 32
@@ -392,6 +393,15 @@ static struct ctl_table net_core_table[] = {
392 .mode = 0644, 393 .mode = 0644,
393 .proc_handler = proc_dointvec 394 .proc_handler = proc_dointvec
394 }, 395 },
396 {
397 .procname = "max_skb_frags",
398 .data = &sysctl_max_skb_frags,
399 .maxlen = sizeof(int),
400 .mode = 0644,
401 .proc_handler = proc_dointvec_minmax,
402 .extra1 = &one,
403 .extra2 = &max_skb_frags,
404 },
395 { } 405 { }
396}; 406};
397 407
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 5684e14932bd..9c67a961ba53 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -204,8 +204,6 @@ void dccp_req_err(struct sock *sk, u64 seq)
204 * ICMPs are not backlogged, hence we cannot get an established 204 * ICMPs are not backlogged, hence we cannot get an established
205 * socket here. 205 * socket here.
206 */ 206 */
207 WARN_ON(req->sk);
208
209 if (!between48(seq, dccp_rsk(req)->dreq_iss, dccp_rsk(req)->dreq_gss)) { 207 if (!between48(seq, dccp_rsk(req)->dreq_iss, dccp_rsk(req)->dreq_gss)) {
210 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); 208 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
211 } else { 209 } else {
@@ -802,7 +800,7 @@ static int dccp_v4_rcv(struct sk_buff *skb)
802 } 800 }
803 801
804lookup: 802lookup:
805 sk = __inet_lookup_skb(&dccp_hashinfo, skb, 803 sk = __inet_lookup_skb(&dccp_hashinfo, skb, __dccp_hdr_len(dh),
806 dh->dccph_sport, dh->dccph_dport); 804 dh->dccph_sport, dh->dccph_dport);
807 if (!sk) { 805 if (!sk) {
808 dccp_pr_debug("failed to look up flow ID in table and " 806 dccp_pr_debug("failed to look up flow ID in table and "
@@ -824,26 +822,26 @@ lookup:
824 822
825 if (sk->sk_state == DCCP_NEW_SYN_RECV) { 823 if (sk->sk_state == DCCP_NEW_SYN_RECV) {
826 struct request_sock *req = inet_reqsk(sk); 824 struct request_sock *req = inet_reqsk(sk);
827 struct sock *nsk = NULL; 825 struct sock *nsk;
828 826
829 sk = req->rsk_listener; 827 sk = req->rsk_listener;
830 if (likely(sk->sk_state == DCCP_LISTEN)) { 828 if (unlikely(sk->sk_state != DCCP_LISTEN)) {
831 nsk = dccp_check_req(sk, skb, req);
832 } else {
833 inet_csk_reqsk_queue_drop_and_put(sk, req); 829 inet_csk_reqsk_queue_drop_and_put(sk, req);
834 goto lookup; 830 goto lookup;
835 } 831 }
832 sock_hold(sk);
833 nsk = dccp_check_req(sk, skb, req);
836 if (!nsk) { 834 if (!nsk) {
837 reqsk_put(req); 835 reqsk_put(req);
838 goto discard_it; 836 goto discard_and_relse;
839 } 837 }
840 if (nsk == sk) { 838 if (nsk == sk) {
841 sock_hold(sk);
842 reqsk_put(req); 839 reqsk_put(req);
843 } else if (dccp_child_process(sk, nsk, skb)) { 840 } else if (dccp_child_process(sk, nsk, skb)) {
844 dccp_v4_ctl_send_reset(sk, skb); 841 dccp_v4_ctl_send_reset(sk, skb);
845 goto discard_it; 842 goto discard_and_relse;
846 } else { 843 } else {
844 sock_put(sk);
847 return 0; 845 return 0;
848 } 846 }
849 } 847 }
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 9c6d0508e63a..4663a01d5039 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -668,7 +668,7 @@ static int dccp_v6_rcv(struct sk_buff *skb)
668 DCCP_SKB_CB(skb)->dccpd_ack_seq = dccp_hdr_ack_seq(skb); 668 DCCP_SKB_CB(skb)->dccpd_ack_seq = dccp_hdr_ack_seq(skb);
669 669
670lookup: 670lookup:
671 sk = __inet6_lookup_skb(&dccp_hashinfo, skb, 671 sk = __inet6_lookup_skb(&dccp_hashinfo, skb, __dccp_hdr_len(dh),
672 dh->dccph_sport, dh->dccph_dport, 672 dh->dccph_sport, dh->dccph_dport,
673 inet6_iif(skb)); 673 inet6_iif(skb));
674 if (!sk) { 674 if (!sk) {
@@ -691,26 +691,26 @@ lookup:
691 691
692 if (sk->sk_state == DCCP_NEW_SYN_RECV) { 692 if (sk->sk_state == DCCP_NEW_SYN_RECV) {
693 struct request_sock *req = inet_reqsk(sk); 693 struct request_sock *req = inet_reqsk(sk);
694 struct sock *nsk = NULL; 694 struct sock *nsk;
695 695
696 sk = req->rsk_listener; 696 sk = req->rsk_listener;
697 if (likely(sk->sk_state == DCCP_LISTEN)) { 697 if (unlikely(sk->sk_state != DCCP_LISTEN)) {
698 nsk = dccp_check_req(sk, skb, req);
699 } else {
700 inet_csk_reqsk_queue_drop_and_put(sk, req); 698 inet_csk_reqsk_queue_drop_and_put(sk, req);
701 goto lookup; 699 goto lookup;
702 } 700 }
701 sock_hold(sk);
702 nsk = dccp_check_req(sk, skb, req);
703 if (!nsk) { 703 if (!nsk) {
704 reqsk_put(req); 704 reqsk_put(req);
705 goto discard_it; 705 goto discard_and_relse;
706 } 706 }
707 if (nsk == sk) { 707 if (nsk == sk) {
708 sock_hold(sk);
709 reqsk_put(req); 708 reqsk_put(req);
710 } else if (dccp_child_process(sk, nsk, skb)) { 709 } else if (dccp_child_process(sk, nsk, skb)) {
711 dccp_v6_ctl_send_reset(sk, skb); 710 dccp_v6_ctl_send_reset(sk, skb);
712 goto discard_it; 711 goto discard_and_relse;
713 } else { 712 } else {
713 sock_put(sk);
714 return 0; 714 return 0;
715 } 715 }
716 } 716 }
@@ -993,7 +993,7 @@ static struct proto dccp_v6_prot = {
993 .sendmsg = dccp_sendmsg, 993 .sendmsg = dccp_sendmsg,
994 .recvmsg = dccp_recvmsg, 994 .recvmsg = dccp_recvmsg,
995 .backlog_rcv = dccp_v6_do_rcv, 995 .backlog_rcv = dccp_v6_do_rcv,
996 .hash = inet_hash, 996 .hash = inet6_hash,
997 .unhash = inet_unhash, 997 .unhash = inet_unhash,
998 .accept = inet_csk_accept, 998 .accept = inet_csk_accept,
999 .get_port = inet_csk_get_port, 999 .get_port = inet_csk_get_port,
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index fa4daba8db55..c28c47463b7e 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -430,35 +430,30 @@ static void dsa_switch_destroy(struct dsa_switch *ds)
430 hwmon_device_unregister(ds->hwmon_dev); 430 hwmon_device_unregister(ds->hwmon_dev);
431#endif 431#endif
432 432
433 /* Disable configuration of the CPU and DSA ports */ 433 /* Destroy network devices for physical switch ports. */
434 for (port = 0; port < DSA_MAX_PORTS; port++) { 434 for (port = 0; port < DSA_MAX_PORTS; port++) {
435 if (!(dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port))) 435 if (!(ds->phys_port_mask & (1 << port)))
436 continue;
437
438 if (!ds->ports[port])
436 continue; 439 continue;
437 440
441 dsa_slave_destroy(ds->ports[port]);
442 }
443
444 /* Remove any fixed link PHYs */
445 for (port = 0; port < DSA_MAX_PORTS; port++) {
438 port_dn = cd->port_dn[port]; 446 port_dn = cd->port_dn[port];
439 if (of_phy_is_fixed_link(port_dn)) { 447 if (of_phy_is_fixed_link(port_dn)) {
440 phydev = of_phy_find_device(port_dn); 448 phydev = of_phy_find_device(port_dn);
441 if (phydev) { 449 if (phydev) {
442 int addr = phydev->mdio.addr;
443
444 phy_device_free(phydev); 450 phy_device_free(phydev);
445 of_node_put(port_dn); 451 of_node_put(port_dn);
446 fixed_phy_del(addr); 452 fixed_phy_unregister(phydev);
447 } 453 }
448 } 454 }
449 } 455 }
450 456
451 /* Destroy network devices for physical switch ports. */
452 for (port = 0; port < DSA_MAX_PORTS; port++) {
453 if (!(ds->phys_port_mask & (1 << port)))
454 continue;
455
456 if (!ds->ports[port])
457 continue;
458
459 dsa_slave_destroy(ds->ports[port]);
460 }
461
462 mdiobus_unregister(ds->slave_mii_bus); 457 mdiobus_unregister(ds->slave_mii_bus);
463} 458}
464 459
@@ -935,6 +930,14 @@ static void dsa_remove_dst(struct dsa_switch_tree *dst)
935{ 930{
936 int i; 931 int i;
937 932
933 dst->master_netdev->dsa_ptr = NULL;
934
935 /* If we used a tagging format that doesn't have an ethertype
936 * field, make sure that all packets from this point get sent
937 * without the tag and go through the regular receive path.
938 */
939 wmb();
940
938 for (i = 0; i < dst->pd->nr_chips; i++) { 941 for (i = 0; i < dst->pd->nr_chips; i++) {
939 struct dsa_switch *ds = dst->ds[i]; 942 struct dsa_switch *ds = dst->ds[i];
940 943
@@ -988,14 +991,6 @@ static int dsa_suspend(struct device *d)
988 struct dsa_switch_tree *dst = platform_get_drvdata(pdev); 991 struct dsa_switch_tree *dst = platform_get_drvdata(pdev);
989 int i, ret = 0; 992 int i, ret = 0;
990 993
991 dst->master_netdev->dsa_ptr = NULL;
992
993 /* If we used a tagging format that doesn't have an ethertype
994 * field, make sure that all packets from this point get sent
995 * without the tag and go through the regular receive path.
996 */
997 wmb();
998
999 for (i = 0; i < dst->pd->nr_chips; i++) { 994 for (i = 0; i < dst->pd->nr_chips; i++) {
1000 struct dsa_switch *ds = dst->ds[i]; 995 struct dsa_switch *ds = dst->ds[i];
1001 996
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 40b9ca72aae3..a575f0350d5a 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -201,47 +201,6 @@ out:
201 return 0; 201 return 0;
202} 202}
203 203
204static int dsa_bridge_check_vlan_range(struct dsa_switch *ds,
205 const struct net_device *bridge,
206 u16 vid_begin, u16 vid_end)
207{
208 struct dsa_slave_priv *p;
209 struct net_device *dev, *vlan_br;
210 DECLARE_BITMAP(members, DSA_MAX_PORTS);
211 DECLARE_BITMAP(untagged, DSA_MAX_PORTS);
212 u16 vid;
213 int member, err;
214
215 if (!ds->drv->vlan_getnext || !vid_begin)
216 return -EOPNOTSUPP;
217
218 vid = vid_begin - 1;
219
220 do {
221 err = ds->drv->vlan_getnext(ds, &vid, members, untagged);
222 if (err)
223 break;
224
225 if (vid > vid_end)
226 break;
227
228 member = find_first_bit(members, DSA_MAX_PORTS);
229 if (member == DSA_MAX_PORTS)
230 continue;
231
232 dev = ds->ports[member];
233 p = netdev_priv(dev);
234 vlan_br = p->bridge_dev;
235 if (vlan_br == bridge)
236 continue;
237
238 netdev_dbg(vlan_br, "hardware VLAN %d already in use\n", vid);
239 return -EOPNOTSUPP;
240 } while (vid < vid_end);
241
242 return err == -ENOENT ? 0 : err;
243}
244
245static int dsa_slave_port_vlan_add(struct net_device *dev, 204static int dsa_slave_port_vlan_add(struct net_device *dev,
246 const struct switchdev_obj_port_vlan *vlan, 205 const struct switchdev_obj_port_vlan *vlan,
247 struct switchdev_trans *trans) 206 struct switchdev_trans *trans)
@@ -254,15 +213,6 @@ static int dsa_slave_port_vlan_add(struct net_device *dev,
254 if (!ds->drv->port_vlan_prepare || !ds->drv->port_vlan_add) 213 if (!ds->drv->port_vlan_prepare || !ds->drv->port_vlan_add)
255 return -EOPNOTSUPP; 214 return -EOPNOTSUPP;
256 215
257 /* If the requested port doesn't belong to the same bridge as
258 * the VLAN members, fallback to software VLAN (hopefully).
259 */
260 err = dsa_bridge_check_vlan_range(ds, p->bridge_dev,
261 vlan->vid_begin,
262 vlan->vid_end);
263 if (err)
264 return err;
265
266 err = ds->drv->port_vlan_prepare(ds, p->port, vlan, trans); 216 err = ds->drv->port_vlan_prepare(ds, p->port, vlan, trans);
267 if (err) 217 if (err)
268 return err; 218 return err;
@@ -293,41 +243,11 @@ static int dsa_slave_port_vlan_dump(struct net_device *dev,
293{ 243{
294 struct dsa_slave_priv *p = netdev_priv(dev); 244 struct dsa_slave_priv *p = netdev_priv(dev);
295 struct dsa_switch *ds = p->parent; 245 struct dsa_switch *ds = p->parent;
296 DECLARE_BITMAP(members, DSA_MAX_PORTS);
297 DECLARE_BITMAP(untagged, DSA_MAX_PORTS);
298 u16 pvid, vid = 0;
299 int err;
300
301 if (!ds->drv->vlan_getnext || !ds->drv->port_pvid_get)
302 return -EOPNOTSUPP;
303
304 err = ds->drv->port_pvid_get(ds, p->port, &pvid);
305 if (err)
306 return err;
307
308 for (;;) {
309 err = ds->drv->vlan_getnext(ds, &vid, members, untagged);
310 if (err)
311 break;
312
313 if (!test_bit(p->port, members))
314 continue;
315
316 memset(vlan, 0, sizeof(*vlan));
317 vlan->vid_begin = vlan->vid_end = vid;
318 246
319 if (vid == pvid) 247 if (ds->drv->port_vlan_dump)
320 vlan->flags |= BRIDGE_VLAN_INFO_PVID; 248 return ds->drv->port_vlan_dump(ds, p->port, vlan, cb);
321 249
322 if (test_bit(p->port, untagged)) 250 return -EOPNOTSUPP;
323 vlan->flags |= BRIDGE_VLAN_INFO_UNTAGGED;
324
325 err = cb(&vlan->obj);
326 if (err)
327 break;
328 }
329
330 return err == -ENOENT ? 0 : err;
331} 251}
332 252
333static int dsa_slave_port_fdb_add(struct net_device *dev, 253static int dsa_slave_port_fdb_add(struct net_device *dev,
@@ -385,31 +305,6 @@ static int dsa_slave_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
385 return -EOPNOTSUPP; 305 return -EOPNOTSUPP;
386} 306}
387 307
388/* Return a bitmask of all ports being currently bridged within a given bridge
389 * device. Note that on leave, the mask will still return the bitmask of ports
390 * currently bridged, prior to port removal, and this is exactly what we want.
391 */
392static u32 dsa_slave_br_port_mask(struct dsa_switch *ds,
393 struct net_device *bridge)
394{
395 struct dsa_slave_priv *p;
396 unsigned int port;
397 u32 mask = 0;
398
399 for (port = 0; port < DSA_MAX_PORTS; port++) {
400 if (!dsa_is_port_initialized(ds, port))
401 continue;
402
403 p = netdev_priv(ds->ports[port]);
404
405 if (ds->ports[port]->priv_flags & IFF_BRIDGE_PORT &&
406 p->bridge_dev == bridge)
407 mask |= 1 << port;
408 }
409
410 return mask;
411}
412
413static int dsa_slave_stp_update(struct net_device *dev, u8 state) 308static int dsa_slave_stp_update(struct net_device *dev, u8 state)
414{ 309{
415 struct dsa_slave_priv *p = netdev_priv(dev); 310 struct dsa_slave_priv *p = netdev_priv(dev);
@@ -422,6 +317,24 @@ static int dsa_slave_stp_update(struct net_device *dev, u8 state)
422 return ret; 317 return ret;
423} 318}
424 319
320static int dsa_slave_vlan_filtering(struct net_device *dev,
321 const struct switchdev_attr *attr,
322 struct switchdev_trans *trans)
323{
324 struct dsa_slave_priv *p = netdev_priv(dev);
325 struct dsa_switch *ds = p->parent;
326
327 /* bridge skips -EOPNOTSUPP, so skip the prepare phase */
328 if (switchdev_trans_ph_prepare(trans))
329 return 0;
330
331 if (ds->drv->port_vlan_filtering)
332 return ds->drv->port_vlan_filtering(ds, p->port,
333 attr->u.vlan_filtering);
334
335 return 0;
336}
337
425static int dsa_slave_port_attr_set(struct net_device *dev, 338static int dsa_slave_port_attr_set(struct net_device *dev,
426 const struct switchdev_attr *attr, 339 const struct switchdev_attr *attr,
427 struct switchdev_trans *trans) 340 struct switchdev_trans *trans)
@@ -438,6 +351,9 @@ static int dsa_slave_port_attr_set(struct net_device *dev,
438 ret = ds->drv->port_stp_update(ds, p->port, 351 ret = ds->drv->port_stp_update(ds, p->port,
439 attr->u.stp_state); 352 attr->u.stp_state);
440 break; 353 break;
354 case SWITCHDEV_ATTR_ID_BRIDGE_VLAN_FILTERING:
355 ret = dsa_slave_vlan_filtering(dev, attr, trans);
356 break;
441 default: 357 default:
442 ret = -EOPNOTSUPP; 358 ret = -EOPNOTSUPP;
443 break; 359 break;
@@ -532,23 +448,20 @@ static int dsa_slave_bridge_port_join(struct net_device *dev,
532 448
533 p->bridge_dev = br; 449 p->bridge_dev = br;
534 450
535 if (ds->drv->port_join_bridge) 451 if (ds->drv->port_bridge_join)
536 ret = ds->drv->port_join_bridge(ds, p->port, 452 ret = ds->drv->port_bridge_join(ds, p->port, br);
537 dsa_slave_br_port_mask(ds, br));
538 453
539 return ret; 454 return ret == -EOPNOTSUPP ? 0 : ret;
540} 455}
541 456
542static int dsa_slave_bridge_port_leave(struct net_device *dev) 457static void dsa_slave_bridge_port_leave(struct net_device *dev)
543{ 458{
544 struct dsa_slave_priv *p = netdev_priv(dev); 459 struct dsa_slave_priv *p = netdev_priv(dev);
545 struct dsa_switch *ds = p->parent; 460 struct dsa_switch *ds = p->parent;
546 int ret = -EOPNOTSUPP;
547 461
548 462
549 if (ds->drv->port_leave_bridge) 463 if (ds->drv->port_bridge_leave)
550 ret = ds->drv->port_leave_bridge(ds, p->port, 464 ds->drv->port_bridge_leave(ds, p->port);
551 dsa_slave_br_port_mask(ds, p->bridge_dev));
552 465
553 p->bridge_dev = NULL; 466 p->bridge_dev = NULL;
554 467
@@ -556,8 +469,6 @@ static int dsa_slave_bridge_port_leave(struct net_device *dev)
556 * so allow it to be in BR_STATE_FORWARDING to be kept functional 469 * so allow it to be in BR_STATE_FORWARDING to be kept functional
557 */ 470 */
558 dsa_slave_stp_update(dev, BR_STATE_FORWARDING); 471 dsa_slave_stp_update(dev, BR_STATE_FORWARDING);
559
560 return ret;
561} 472}
562 473
563static int dsa_slave_port_attr_get(struct net_device *dev, 474static int dsa_slave_port_attr_get(struct net_device *dev,
@@ -982,11 +893,15 @@ static void dsa_slave_adjust_link(struct net_device *dev)
982static int dsa_slave_fixed_link_update(struct net_device *dev, 893static int dsa_slave_fixed_link_update(struct net_device *dev,
983 struct fixed_phy_status *status) 894 struct fixed_phy_status *status)
984{ 895{
985 struct dsa_slave_priv *p = netdev_priv(dev); 896 struct dsa_slave_priv *p;
986 struct dsa_switch *ds = p->parent; 897 struct dsa_switch *ds;
987 898
988 if (ds->drv->fixed_link_update) 899 if (dev) {
989 ds->drv->fixed_link_update(ds, p->port, status); 900 p = netdev_priv(dev);
901 ds = p->parent;
902 if (ds->drv->fixed_link_update)
903 ds->drv->fixed_link_update(ds, p->port, status);
904 }
990 905
991 return 0; 906 return 0;
992} 907}
@@ -1194,7 +1109,6 @@ int dsa_slave_create(struct dsa_switch *ds, struct device *parent,
1194 if (ret) { 1109 if (ret) {
1195 netdev_err(master, "error %d registering interface %s\n", 1110 netdev_err(master, "error %d registering interface %s\n",
1196 ret, slave_dev->name); 1111 ret, slave_dev->name);
1197 phy_disconnect(p->phy);
1198 ds->ports[port] = NULL; 1112 ds->ports[port] = NULL;
1199 free_netdev(slave_dev); 1113 free_netdev(slave_dev);
1200 return ret; 1114 return ret;
@@ -1205,6 +1119,7 @@ int dsa_slave_create(struct dsa_switch *ds, struct device *parent,
1205 ret = dsa_slave_phy_setup(p, slave_dev); 1119 ret = dsa_slave_phy_setup(p, slave_dev);
1206 if (ret) { 1120 if (ret) {
1207 netdev_err(master, "error %d setting up slave phy\n", ret); 1121 netdev_err(master, "error %d setting up slave phy\n", ret);
1122 unregister_netdev(slave_dev);
1208 free_netdev(slave_dev); 1123 free_netdev(slave_dev);
1209 return ret; 1124 return ret;
1210 } 1125 }
@@ -1228,40 +1143,46 @@ static bool dsa_slave_dev_check(struct net_device *dev)
1228 return dev->netdev_ops == &dsa_slave_netdev_ops; 1143 return dev->netdev_ops == &dsa_slave_netdev_ops;
1229} 1144}
1230 1145
1231static int dsa_slave_master_changed(struct net_device *dev) 1146static int dsa_slave_port_upper_event(struct net_device *dev,
1147 unsigned long event, void *ptr)
1232{ 1148{
1233 struct net_device *master = netdev_master_upper_dev_get(dev); 1149 struct netdev_notifier_changeupper_info *info = ptr;
1234 struct dsa_slave_priv *p = netdev_priv(dev); 1150 struct net_device *upper = info->upper_dev;
1235 int err = 0; 1151 int err = 0;
1236 1152
1237 if (master && master->rtnl_link_ops && 1153 switch (event) {
1238 !strcmp(master->rtnl_link_ops->kind, "bridge")) 1154 case NETDEV_CHANGEUPPER:
1239 err = dsa_slave_bridge_port_join(dev, master); 1155 if (netif_is_bridge_master(upper)) {
1240 else if (dsa_port_is_bridged(p)) 1156 if (info->linking)
1241 err = dsa_slave_bridge_port_leave(dev); 1157 err = dsa_slave_bridge_port_join(dev, upper);
1158 else
1159 dsa_slave_bridge_port_leave(dev);
1160 }
1242 1161
1243 return err; 1162 break;
1163 }
1164
1165 return notifier_from_errno(err);
1244} 1166}
1245 1167
1246int dsa_slave_netdevice_event(struct notifier_block *unused, 1168static int dsa_slave_port_event(struct net_device *dev, unsigned long event,
1247 unsigned long event, void *ptr) 1169 void *ptr)
1248{ 1170{
1249 struct net_device *dev;
1250 int err = 0;
1251
1252 switch (event) { 1171 switch (event) {
1253 case NETDEV_CHANGEUPPER: 1172 case NETDEV_CHANGEUPPER:
1254 dev = netdev_notifier_info_to_dev(ptr); 1173 return dsa_slave_port_upper_event(dev, event, ptr);
1255 if (!dsa_slave_dev_check(dev)) 1174 }
1256 goto out;
1257 1175
1258 err = dsa_slave_master_changed(dev); 1176 return NOTIFY_DONE;
1259 if (err && err != -EOPNOTSUPP) 1177}
1260 netdev_warn(dev, "failed to reflect master change\n");
1261 1178
1262 break; 1179int dsa_slave_netdevice_event(struct notifier_block *unused,
1263 } 1180 unsigned long event, void *ptr)
1181{
1182 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1183
1184 if (dsa_slave_dev_check(dev))
1185 return dsa_slave_port_event(dev, event, ptr);
1264 1186
1265out:
1266 return NOTIFY_DONE; 1187 return NOTIFY_DONE;
1267} 1188}
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index 103871784e50..66dff5e3d772 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -125,6 +125,7 @@ EXPORT_SYMBOL(eth_header);
125 */ 125 */
126u32 eth_get_headlen(void *data, unsigned int len) 126u32 eth_get_headlen(void *data, unsigned int len)
127{ 127{
128 const unsigned int flags = FLOW_DISSECTOR_F_PARSE_1ST_FRAG;
128 const struct ethhdr *eth = (const struct ethhdr *)data; 129 const struct ethhdr *eth = (const struct ethhdr *)data;
129 struct flow_keys keys; 130 struct flow_keys keys;
130 131
@@ -134,7 +135,7 @@ u32 eth_get_headlen(void *data, unsigned int len)
134 135
135 /* parse any remaining L2/L3 headers, check for L4 */ 136 /* parse any remaining L2/L3 headers, check for L4 */
136 if (!skb_flow_dissect_flow_keys_buf(&keys, data, eth->h_proto, 137 if (!skb_flow_dissect_flow_keys_buf(&keys, data, eth->h_proto,
137 sizeof(*eth), len, 0)) 138 sizeof(*eth), len, flags))
138 return max_t(u32, keys.control.thoff, sizeof(*eth)); 139 return max_t(u32, keys.control.thoff, sizeof(*eth));
139 140
140 /* parse for any L4 headers */ 141 /* parse for any L4 headers */
diff --git a/net/ieee802154/6lowpan/core.c b/net/ieee802154/6lowpan/core.c
index 737c87a2a41e..0023c9048812 100644
--- a/net/ieee802154/6lowpan/core.c
+++ b/net/ieee802154/6lowpan/core.c
@@ -207,7 +207,7 @@ static int lowpan_device_event(struct notifier_block *unused,
207 struct net_device *wdev = netdev_notifier_info_to_dev(ptr); 207 struct net_device *wdev = netdev_notifier_info_to_dev(ptr);
208 208
209 if (wdev->type != ARPHRD_IEEE802154) 209 if (wdev->type != ARPHRD_IEEE802154)
210 goto out; 210 return NOTIFY_DONE;
211 211
212 switch (event) { 212 switch (event) {
213 case NETDEV_UNREGISTER: 213 case NETDEV_UNREGISTER:
@@ -219,11 +219,10 @@ static int lowpan_device_event(struct notifier_block *unused,
219 lowpan_dellink(wdev->ieee802154_ptr->lowpan_dev, NULL); 219 lowpan_dellink(wdev->ieee802154_ptr->lowpan_dev, NULL);
220 break; 220 break;
221 default: 221 default:
222 break; 222 return NOTIFY_DONE;
223 } 223 }
224 224
225out: 225 return NOTIFY_OK;
226 return NOTIFY_DONE;
227} 226}
228 227
229static struct notifier_block lowpan_dev_notifier = { 228static struct notifier_block lowpan_dev_notifier = {
diff --git a/net/ieee802154/socket.c b/net/ieee802154/socket.c
index a548be247e15..e0bd013a1e5e 100644
--- a/net/ieee802154/socket.c
+++ b/net/ieee802154/socket.c
@@ -182,12 +182,14 @@ static int ieee802154_sock_ioctl(struct socket *sock, unsigned int cmd,
182static HLIST_HEAD(raw_head); 182static HLIST_HEAD(raw_head);
183static DEFINE_RWLOCK(raw_lock); 183static DEFINE_RWLOCK(raw_lock);
184 184
185static void raw_hash(struct sock *sk) 185static int raw_hash(struct sock *sk)
186{ 186{
187 write_lock_bh(&raw_lock); 187 write_lock_bh(&raw_lock);
188 sk_add_node(sk, &raw_head); 188 sk_add_node(sk, &raw_head);
189 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); 189 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
190 write_unlock_bh(&raw_lock); 190 write_unlock_bh(&raw_lock);
191
192 return 0;
191} 193}
192 194
193static void raw_unhash(struct sock *sk) 195static void raw_unhash(struct sock *sk)
@@ -462,12 +464,14 @@ static inline struct dgram_sock *dgram_sk(const struct sock *sk)
462 return container_of(sk, struct dgram_sock, sk); 464 return container_of(sk, struct dgram_sock, sk);
463} 465}
464 466
465static void dgram_hash(struct sock *sk) 467static int dgram_hash(struct sock *sk)
466{ 468{
467 write_lock_bh(&dgram_lock); 469 write_lock_bh(&dgram_lock);
468 sk_add_node(sk, &dgram_head); 470 sk_add_node(sk, &dgram_head);
469 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); 471 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
470 write_unlock_bh(&dgram_lock); 472 write_unlock_bh(&dgram_lock);
473
474 return 0;
471} 475}
472 476
473static void dgram_unhash(struct sock *sk) 477static void dgram_unhash(struct sock *sk)
@@ -1026,8 +1030,13 @@ static int ieee802154_create(struct net *net, struct socket *sock,
1026 /* Checksums on by default */ 1030 /* Checksums on by default */
1027 sock_set_flag(sk, SOCK_ZAPPED); 1031 sock_set_flag(sk, SOCK_ZAPPED);
1028 1032
1029 if (sk->sk_prot->hash) 1033 if (sk->sk_prot->hash) {
1030 sk->sk_prot->hash(sk); 1034 rc = sk->sk_prot->hash(sk);
1035 if (rc) {
1036 sk_common_release(sk);
1037 goto out;
1038 }
1039 }
1031 1040
1032 if (sk->sk_prot->init) { 1041 if (sk->sk_prot->init) {
1033 rc = sk->sk_prot->init(sk); 1042 rc = sk->sk_prot->init(sk);
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index c22920525e5d..238225b0c970 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -186,6 +186,7 @@ config NET_IPGRE_DEMUX
186 186
187config NET_IP_TUNNEL 187config NET_IP_TUNNEL
188 tristate 188 tristate
189 select DST_CACHE
189 default n 190 default n
190 191
191config NET_IPGRE 192config NET_IPGRE
@@ -353,6 +354,7 @@ config INET_ESP
353 select CRYPTO_CBC 354 select CRYPTO_CBC
354 select CRYPTO_SHA1 355 select CRYPTO_SHA1
355 select CRYPTO_DES 356 select CRYPTO_DES
357 select CRYPTO_ECHAINIV
356 ---help--- 358 ---help---
357 Support for IPsec ESP. 359 Support for IPsec ESP.
358 360
@@ -404,14 +406,6 @@ config INET_XFRM_MODE_BEET
404 406
405 If unsure, say Y. 407 If unsure, say Y.
406 408
407config INET_LRO
408 tristate "Large Receive Offload (ipv4/tcp)"
409 default y
410 ---help---
411 Support for Large Receive Offload (ipv4/tcp).
412
413 If unsure, say Y.
414
415config INET_DIAG 409config INET_DIAG
416 tristate "INET: socket monitoring interface" 410 tristate "INET: socket monitoring interface"
417 default y 411 default y
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index 62c049b647e9..bfa133691cde 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -32,7 +32,6 @@ obj-$(CONFIG_INET_ESP) += esp4.o
32obj-$(CONFIG_INET_IPCOMP) += ipcomp.o 32obj-$(CONFIG_INET_IPCOMP) += ipcomp.o
33obj-$(CONFIG_INET_XFRM_TUNNEL) += xfrm4_tunnel.o 33obj-$(CONFIG_INET_XFRM_TUNNEL) += xfrm4_tunnel.o
34obj-$(CONFIG_INET_XFRM_MODE_BEET) += xfrm4_mode_beet.o 34obj-$(CONFIG_INET_XFRM_MODE_BEET) += xfrm4_mode_beet.o
35obj-$(CONFIG_INET_LRO) += inet_lro.o
36obj-$(CONFIG_INET_TUNNEL) += tunnel4.o 35obj-$(CONFIG_INET_TUNNEL) += tunnel4.o
37obj-$(CONFIG_INET_XFRM_MODE_TRANSPORT) += xfrm4_mode_transport.o 36obj-$(CONFIG_INET_XFRM_MODE_TRANSPORT) += xfrm4_mode_transport.o
38obj-$(CONFIG_INET_XFRM_MODE_TUNNEL) += xfrm4_mode_tunnel.o 37obj-$(CONFIG_INET_XFRM_MODE_TUNNEL) += xfrm4_mode_tunnel.o
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 5c5db6636704..9e481992dbae 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -370,7 +370,11 @@ lookup_protocol:
370 */ 370 */
371 inet->inet_sport = htons(inet->inet_num); 371 inet->inet_sport = htons(inet->inet_num);
372 /* Add to protocol hash chains. */ 372 /* Add to protocol hash chains. */
373 sk->sk_prot->hash(sk); 373 err = sk->sk_prot->hash(sk);
374 if (err) {
375 sk_common_release(sk);
376 goto out;
377 }
374 } 378 }
375 379
376 if (sk->sk_prot->init) { 380 if (sk->sk_prot->init) {
@@ -1091,12 +1095,6 @@ void inet_unregister_protosw(struct inet_protosw *p)
1091} 1095}
1092EXPORT_SYMBOL(inet_unregister_protosw); 1096EXPORT_SYMBOL(inet_unregister_protosw);
1093 1097
1094/*
1095 * Shall we try to damage output packets if routing dev changes?
1096 */
1097
1098int sysctl_ip_dynaddr __read_mostly;
1099
1100static int inet_sk_reselect_saddr(struct sock *sk) 1098static int inet_sk_reselect_saddr(struct sock *sk)
1101{ 1099{
1102 struct inet_sock *inet = inet_sk(sk); 1100 struct inet_sock *inet = inet_sk(sk);
@@ -1127,7 +1125,7 @@ static int inet_sk_reselect_saddr(struct sock *sk)
1127 if (new_saddr == old_saddr) 1125 if (new_saddr == old_saddr)
1128 return 0; 1126 return 0;
1129 1127
1130 if (sysctl_ip_dynaddr > 1) { 1128 if (sock_net(sk)->ipv4.sysctl_ip_dynaddr > 1) {
1131 pr_info("%s(): shifting inet->saddr from %pI4 to %pI4\n", 1129 pr_info("%s(): shifting inet->saddr from %pI4 to %pI4\n",
1132 __func__, &old_saddr, &new_saddr); 1130 __func__, &old_saddr, &new_saddr);
1133 } 1131 }
@@ -1142,8 +1140,7 @@ static int inet_sk_reselect_saddr(struct sock *sk)
1142 * Besides that, it does not check for connection 1140 * Besides that, it does not check for connection
1143 * uniqueness. Wait for troubles. 1141 * uniqueness. Wait for troubles.
1144 */ 1142 */
1145 __sk_prot_rehash(sk); 1143 return __sk_prot_rehash(sk);
1146 return 0;
1147} 1144}
1148 1145
1149int inet_sk_rebuild_header(struct sock *sk) 1146int inet_sk_rebuild_header(struct sock *sk)
@@ -1183,7 +1180,7 @@ int inet_sk_rebuild_header(struct sock *sk)
1183 * Other protocols have to map its equivalent state to TCP_SYN_SENT. 1180 * Other protocols have to map its equivalent state to TCP_SYN_SENT.
1184 * DCCP maps its DCCP_REQUESTING state to TCP_SYN_SENT. -acme 1181 * DCCP maps its DCCP_REQUESTING state to TCP_SYN_SENT. -acme
1185 */ 1182 */
1186 if (!sysctl_ip_dynaddr || 1183 if (!sock_net(sk)->ipv4.sysctl_ip_dynaddr ||
1187 sk->sk_state != TCP_SYN_SENT || 1184 sk->sk_state != TCP_SYN_SENT ||
1188 (sk->sk_userlocks & SOCK_BINDADDR_LOCK) || 1185 (sk->sk_userlocks & SOCK_BINDADDR_LOCK) ||
1189 (err = inet_sk_reselect_saddr(sk)) != 0) 1186 (err = inet_sk_reselect_saddr(sk)) != 0)
@@ -1383,6 +1380,45 @@ out:
1383 return pp; 1380 return pp;
1384} 1381}
1385 1382
1383static struct sk_buff **ipip_gro_receive(struct sk_buff **head,
1384 struct sk_buff *skb)
1385{
1386 if (NAPI_GRO_CB(skb)->encap_mark) {
1387 NAPI_GRO_CB(skb)->flush = 1;
1388 return NULL;
1389 }
1390
1391 NAPI_GRO_CB(skb)->encap_mark = 1;
1392
1393 return inet_gro_receive(head, skb);
1394}
1395
1396#define SECONDS_PER_DAY 86400
1397
1398/* inet_current_timestamp - Return IP network timestamp
1399 *
1400 * Return milliseconds since midnight in network byte order.
1401 */
1402__be32 inet_current_timestamp(void)
1403{
1404 u32 secs;
1405 u32 msecs;
1406 struct timespec64 ts;
1407
1408 ktime_get_real_ts64(&ts);
1409
1410 /* Get secs since midnight. */
1411 (void)div_u64_rem(ts.tv_sec, SECONDS_PER_DAY, &secs);
1412 /* Convert to msecs. */
1413 msecs = secs * MSEC_PER_SEC;
1414 /* Convert nsec to msec. */
1415 msecs += (u32)ts.tv_nsec / NSEC_PER_MSEC;
1416
1417 /* Convert to network byte order. */
1418 return htonl(msecs);
1419}
1420EXPORT_SYMBOL(inet_current_timestamp);
1421
1386int inet_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) 1422int inet_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
1387{ 1423{
1388 if (sk->sk_family == AF_INET) 1424 if (sk->sk_family == AF_INET)
@@ -1425,6 +1461,13 @@ out_unlock:
1425 return err; 1461 return err;
1426} 1462}
1427 1463
1464static int ipip_gro_complete(struct sk_buff *skb, int nhoff)
1465{
1466 skb->encapsulation = 1;
1467 skb_shinfo(skb)->gso_type |= SKB_GSO_IPIP;
1468 return inet_gro_complete(skb, nhoff);
1469}
1470
1428int inet_ctl_sock_create(struct sock **sk, unsigned short family, 1471int inet_ctl_sock_create(struct sock **sk, unsigned short family,
1429 unsigned short type, unsigned char protocol, 1472 unsigned short type, unsigned char protocol,
1430 struct net *net) 1473 struct net *net)
@@ -1652,8 +1695,8 @@ static struct packet_offload ip_packet_offload __read_mostly = {
1652static const struct net_offload ipip_offload = { 1695static const struct net_offload ipip_offload = {
1653 .callbacks = { 1696 .callbacks = {
1654 .gso_segment = inet_gso_segment, 1697 .gso_segment = inet_gso_segment,
1655 .gro_receive = inet_gro_receive, 1698 .gro_receive = ipip_gro_receive,
1656 .gro_complete = inet_gro_complete, 1699 .gro_complete = ipip_gro_complete,
1657 }, 1700 },
1658}; 1701};
1659 1702
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 59b3e0e8fd51..c34c7544d1db 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -665,7 +665,7 @@ static int arp_process(struct net *net, struct sock *sk, struct sk_buff *skb)
665 */ 665 */
666 666
667 if (!in_dev) 667 if (!in_dev)
668 goto out; 668 goto out_free_skb;
669 669
670 arp = arp_hdr(skb); 670 arp = arp_hdr(skb);
671 671
@@ -673,7 +673,7 @@ static int arp_process(struct net *net, struct sock *sk, struct sk_buff *skb)
673 default: 673 default:
674 if (arp->ar_pro != htons(ETH_P_IP) || 674 if (arp->ar_pro != htons(ETH_P_IP) ||
675 htons(dev_type) != arp->ar_hrd) 675 htons(dev_type) != arp->ar_hrd)
676 goto out; 676 goto out_free_skb;
677 break; 677 break;
678 case ARPHRD_ETHER: 678 case ARPHRD_ETHER:
679 case ARPHRD_FDDI: 679 case ARPHRD_FDDI:
@@ -690,17 +690,17 @@ static int arp_process(struct net *net, struct sock *sk, struct sk_buff *skb)
690 if ((arp->ar_hrd != htons(ARPHRD_ETHER) && 690 if ((arp->ar_hrd != htons(ARPHRD_ETHER) &&
691 arp->ar_hrd != htons(ARPHRD_IEEE802)) || 691 arp->ar_hrd != htons(ARPHRD_IEEE802)) ||
692 arp->ar_pro != htons(ETH_P_IP)) 692 arp->ar_pro != htons(ETH_P_IP))
693 goto out; 693 goto out_free_skb;
694 break; 694 break;
695 case ARPHRD_AX25: 695 case ARPHRD_AX25:
696 if (arp->ar_pro != htons(AX25_P_IP) || 696 if (arp->ar_pro != htons(AX25_P_IP) ||
697 arp->ar_hrd != htons(ARPHRD_AX25)) 697 arp->ar_hrd != htons(ARPHRD_AX25))
698 goto out; 698 goto out_free_skb;
699 break; 699 break;
700 case ARPHRD_NETROM: 700 case ARPHRD_NETROM:
701 if (arp->ar_pro != htons(AX25_P_IP) || 701 if (arp->ar_pro != htons(AX25_P_IP) ||
702 arp->ar_hrd != htons(ARPHRD_NETROM)) 702 arp->ar_hrd != htons(ARPHRD_NETROM))
703 goto out; 703 goto out_free_skb;
704 break; 704 break;
705 } 705 }
706 706
@@ -708,7 +708,7 @@ static int arp_process(struct net *net, struct sock *sk, struct sk_buff *skb)
708 708
709 if (arp->ar_op != htons(ARPOP_REPLY) && 709 if (arp->ar_op != htons(ARPOP_REPLY) &&
710 arp->ar_op != htons(ARPOP_REQUEST)) 710 arp->ar_op != htons(ARPOP_REQUEST))
711 goto out; 711 goto out_free_skb;
712 712
713/* 713/*
714 * Extract fields 714 * Extract fields
@@ -733,7 +733,15 @@ static int arp_process(struct net *net, struct sock *sk, struct sk_buff *skb)
733 */ 733 */
734 if (ipv4_is_multicast(tip) || 734 if (ipv4_is_multicast(tip) ||
735 (!IN_DEV_ROUTE_LOCALNET(in_dev) && ipv4_is_loopback(tip))) 735 (!IN_DEV_ROUTE_LOCALNET(in_dev) && ipv4_is_loopback(tip)))
736 goto out; 736 goto out_free_skb;
737
738 /*
739 * For some 802.11 wireless deployments (and possibly other networks),
740 * there will be an ARP proxy and gratuitous ARP frames are attacks
741 * and thus should not be accepted.
742 */
743 if (sip == tip && IN_DEV_ORCONF(in_dev, DROP_GRATUITOUS_ARP))
744 goto out_free_skb;
737 745
738/* 746/*
739 * Special case: We must set Frame Relay source Q.922 address 747 * Special case: We must set Frame Relay source Q.922 address
@@ -770,7 +778,7 @@ static int arp_process(struct net *net, struct sock *sk, struct sk_buff *skb)
770 !arp_ignore(in_dev, sip, tip)) 778 !arp_ignore(in_dev, sip, tip))
771 arp_send_dst(ARPOP_REPLY, ETH_P_ARP, sip, dev, tip, 779 arp_send_dst(ARPOP_REPLY, ETH_P_ARP, sip, dev, tip,
772 sha, dev->dev_addr, sha, reply_dst); 780 sha, dev->dev_addr, sha, reply_dst);
773 goto out; 781 goto out_consume_skb;
774 } 782 }
775 783
776 if (arp->ar_op == htons(ARPOP_REQUEST) && 784 if (arp->ar_op == htons(ARPOP_REQUEST) &&
@@ -795,7 +803,7 @@ static int arp_process(struct net *net, struct sock *sk, struct sk_buff *skb)
795 neigh_release(n); 803 neigh_release(n);
796 } 804 }
797 } 805 }
798 goto out; 806 goto out_consume_skb;
799 } else if (IN_DEV_FORWARD(in_dev)) { 807 } else if (IN_DEV_FORWARD(in_dev)) {
800 if (addr_type == RTN_UNICAST && 808 if (addr_type == RTN_UNICAST &&
801 (arp_fwd_proxy(in_dev, dev, rt) || 809 (arp_fwd_proxy(in_dev, dev, rt) ||
@@ -818,7 +826,7 @@ static int arp_process(struct net *net, struct sock *sk, struct sk_buff *skb)
818 in_dev->arp_parms, skb); 826 in_dev->arp_parms, skb);
819 goto out_free_dst; 827 goto out_free_dst;
820 } 828 }
821 goto out; 829 goto out_consume_skb;
822 } 830 }
823 } 831 }
824 } 832 }
@@ -868,11 +876,16 @@ static int arp_process(struct net *net, struct sock *sk, struct sk_buff *skb)
868 neigh_release(n); 876 neigh_release(n);
869 } 877 }
870 878
871out: 879out_consume_skb:
872 consume_skb(skb); 880 consume_skb(skb);
881
873out_free_dst: 882out_free_dst:
874 dst_release(reply_dst); 883 dst_release(reply_dst);
875 return 0; 884 return NET_RX_SUCCESS;
885
886out_free_skb:
887 kfree_skb(skb);
888 return NET_RX_DROP;
876} 889}
877 890
878static void parp_redo(struct sk_buff *skb) 891static void parp_redo(struct sk_buff *skb)
@@ -916,11 +929,11 @@ static int arp_rcv(struct sk_buff *skb, struct net_device *dev,
916 929
917consumeskb: 930consumeskb:
918 consume_skb(skb); 931 consume_skb(skb);
919 return 0; 932 return NET_RX_SUCCESS;
920freeskb: 933freeskb:
921 kfree_skb(skb); 934 kfree_skb(skb);
922out_of_mem: 935out_of_mem:
923 return 0; 936 return NET_RX_DROP;
924} 937}
925 938
926/* 939/*
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index cebd9d31e65a..e333bc86bd39 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -334,6 +334,9 @@ static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
334 334
335 ASSERT_RTNL(); 335 ASSERT_RTNL();
336 336
337 if (in_dev->dead)
338 goto no_promotions;
339
337 /* 1. Deleting primary ifaddr forces deletion all secondaries 340 /* 1. Deleting primary ifaddr forces deletion all secondaries
338 * unless alias promotion is set 341 * unless alias promotion is set
339 **/ 342 **/
@@ -380,6 +383,7 @@ static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
380 fib_del_ifaddr(ifa, ifa1); 383 fib_del_ifaddr(ifa, ifa1);
381 } 384 }
382 385
386no_promotions:
383 /* 2. Unlink it */ 387 /* 2. Unlink it */
384 388
385 *ifap = ifa1->ifa_next; 389 *ifap = ifa1->ifa_next;
@@ -1194,6 +1198,7 @@ __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
1194 __be32 addr = 0; 1198 __be32 addr = 0;
1195 struct in_device *in_dev; 1199 struct in_device *in_dev;
1196 struct net *net = dev_net(dev); 1200 struct net *net = dev_net(dev);
1201 int master_idx;
1197 1202
1198 rcu_read_lock(); 1203 rcu_read_lock();
1199 in_dev = __in_dev_get_rcu(dev); 1204 in_dev = __in_dev_get_rcu(dev);
@@ -1214,12 +1219,33 @@ __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
1214 if (addr) 1219 if (addr)
1215 goto out_unlock; 1220 goto out_unlock;
1216no_in_dev: 1221no_in_dev:
1222 master_idx = l3mdev_master_ifindex_rcu(dev);
1223
1224 /* For VRFs, the VRF device takes the place of the loopback device,
1225 * with addresses on it being preferred. Note in such cases the
1226 * loopback device will be among the devices that fail the master_idx
1227 * equality check in the loop below.
1228 */
1229 if (master_idx &&
1230 (dev = dev_get_by_index_rcu(net, master_idx)) &&
1231 (in_dev = __in_dev_get_rcu(dev))) {
1232 for_primary_ifa(in_dev) {
1233 if (ifa->ifa_scope != RT_SCOPE_LINK &&
1234 ifa->ifa_scope <= scope) {
1235 addr = ifa->ifa_local;
1236 goto out_unlock;
1237 }
1238 } endfor_ifa(in_dev);
1239 }
1217 1240
1218 /* Not loopback addresses on loopback should be preferred 1241 /* Not loopback addresses on loopback should be preferred
1219 in this case. It is important that lo is the first interface 1242 in this case. It is important that lo is the first interface
1220 in dev_base list. 1243 in dev_base list.
1221 */ 1244 */
1222 for_each_netdev_rcu(net, dev) { 1245 for_each_netdev_rcu(net, dev) {
1246 if (l3mdev_master_ifindex_rcu(dev) != master_idx)
1247 continue;
1248
1223 in_dev = __in_dev_get_rcu(dev); 1249 in_dev = __in_dev_get_rcu(dev);
1224 if (!in_dev) 1250 if (!in_dev)
1225 continue; 1251 continue;
@@ -1731,17 +1757,20 @@ static int inet_netconf_msgsize_devconf(int type)
1731{ 1757{
1732 int size = NLMSG_ALIGN(sizeof(struct netconfmsg)) 1758 int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
1733 + nla_total_size(4); /* NETCONFA_IFINDEX */ 1759 + nla_total_size(4); /* NETCONFA_IFINDEX */
1760 bool all = false;
1761
1762 if (type == NETCONFA_ALL)
1763 all = true;
1734 1764
1735 /* type -1 is used for ALL */ 1765 if (all || type == NETCONFA_FORWARDING)
1736 if (type == -1 || type == NETCONFA_FORWARDING)
1737 size += nla_total_size(4); 1766 size += nla_total_size(4);
1738 if (type == -1 || type == NETCONFA_RP_FILTER) 1767 if (all || type == NETCONFA_RP_FILTER)
1739 size += nla_total_size(4); 1768 size += nla_total_size(4);
1740 if (type == -1 || type == NETCONFA_MC_FORWARDING) 1769 if (all || type == NETCONFA_MC_FORWARDING)
1741 size += nla_total_size(4); 1770 size += nla_total_size(4);
1742 if (type == -1 || type == NETCONFA_PROXY_NEIGH) 1771 if (all || type == NETCONFA_PROXY_NEIGH)
1743 size += nla_total_size(4); 1772 size += nla_total_size(4);
1744 if (type == -1 || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) 1773 if (all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN)
1745 size += nla_total_size(4); 1774 size += nla_total_size(4);
1746 1775
1747 return size; 1776 return size;
@@ -1754,36 +1783,39 @@ static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
1754{ 1783{
1755 struct nlmsghdr *nlh; 1784 struct nlmsghdr *nlh;
1756 struct netconfmsg *ncm; 1785 struct netconfmsg *ncm;
1786 bool all = false;
1757 1787
1758 nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg), 1788 nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
1759 flags); 1789 flags);
1760 if (!nlh) 1790 if (!nlh)
1761 return -EMSGSIZE; 1791 return -EMSGSIZE;
1762 1792
1793 if (type == NETCONFA_ALL)
1794 all = true;
1795
1763 ncm = nlmsg_data(nlh); 1796 ncm = nlmsg_data(nlh);
1764 ncm->ncm_family = AF_INET; 1797 ncm->ncm_family = AF_INET;
1765 1798
1766 if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0) 1799 if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
1767 goto nla_put_failure; 1800 goto nla_put_failure;
1768 1801
1769 /* type -1 is used for ALL */ 1802 if ((all || type == NETCONFA_FORWARDING) &&
1770 if ((type == -1 || type == NETCONFA_FORWARDING) &&
1771 nla_put_s32(skb, NETCONFA_FORWARDING, 1803 nla_put_s32(skb, NETCONFA_FORWARDING,
1772 IPV4_DEVCONF(*devconf, FORWARDING)) < 0) 1804 IPV4_DEVCONF(*devconf, FORWARDING)) < 0)
1773 goto nla_put_failure; 1805 goto nla_put_failure;
1774 if ((type == -1 || type == NETCONFA_RP_FILTER) && 1806 if ((all || type == NETCONFA_RP_FILTER) &&
1775 nla_put_s32(skb, NETCONFA_RP_FILTER, 1807 nla_put_s32(skb, NETCONFA_RP_FILTER,
1776 IPV4_DEVCONF(*devconf, RP_FILTER)) < 0) 1808 IPV4_DEVCONF(*devconf, RP_FILTER)) < 0)
1777 goto nla_put_failure; 1809 goto nla_put_failure;
1778 if ((type == -1 || type == NETCONFA_MC_FORWARDING) && 1810 if ((all || type == NETCONFA_MC_FORWARDING) &&
1779 nla_put_s32(skb, NETCONFA_MC_FORWARDING, 1811 nla_put_s32(skb, NETCONFA_MC_FORWARDING,
1780 IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0) 1812 IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0)
1781 goto nla_put_failure; 1813 goto nla_put_failure;
1782 if ((type == -1 || type == NETCONFA_PROXY_NEIGH) && 1814 if ((all || type == NETCONFA_PROXY_NEIGH) &&
1783 nla_put_s32(skb, NETCONFA_PROXY_NEIGH, 1815 nla_put_s32(skb, NETCONFA_PROXY_NEIGH,
1784 IPV4_DEVCONF(*devconf, PROXY_ARP)) < 0) 1816 IPV4_DEVCONF(*devconf, PROXY_ARP)) < 0)
1785 goto nla_put_failure; 1817 goto nla_put_failure;
1786 if ((type == -1 || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) && 1818 if ((all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) &&
1787 nla_put_s32(skb, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN, 1819 nla_put_s32(skb, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
1788 IPV4_DEVCONF(*devconf, IGNORE_ROUTES_WITH_LINKDOWN)) < 0) 1820 IPV4_DEVCONF(*devconf, IGNORE_ROUTES_WITH_LINKDOWN)) < 0)
1789 goto nla_put_failure; 1821 goto nla_put_failure;
@@ -1847,7 +1879,7 @@ static int inet_netconf_get_devconf(struct sk_buff *in_skb,
1847 if (err < 0) 1879 if (err < 0)
1848 goto errout; 1880 goto errout;
1849 1881
1850 err = EINVAL; 1882 err = -EINVAL;
1851 if (!tb[NETCONFA_IFINDEX]) 1883 if (!tb[NETCONFA_IFINDEX])
1852 goto errout; 1884 goto errout;
1853 1885
@@ -1871,14 +1903,14 @@ static int inet_netconf_get_devconf(struct sk_buff *in_skb,
1871 } 1903 }
1872 1904
1873 err = -ENOBUFS; 1905 err = -ENOBUFS;
1874 skb = nlmsg_new(inet_netconf_msgsize_devconf(-1), GFP_ATOMIC); 1906 skb = nlmsg_new(inet_netconf_msgsize_devconf(NETCONFA_ALL), GFP_ATOMIC);
1875 if (!skb) 1907 if (!skb)
1876 goto errout; 1908 goto errout;
1877 1909
1878 err = inet_netconf_fill_devconf(skb, ifindex, devconf, 1910 err = inet_netconf_fill_devconf(skb, ifindex, devconf,
1879 NETLINK_CB(in_skb).portid, 1911 NETLINK_CB(in_skb).portid,
1880 nlh->nlmsg_seq, RTM_NEWNETCONF, 0, 1912 nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
1881 -1); 1913 NETCONFA_ALL);
1882 if (err < 0) { 1914 if (err < 0) {
1883 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */ 1915 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1884 WARN_ON(err == -EMSGSIZE); 1916 WARN_ON(err == -EMSGSIZE);
@@ -1922,7 +1954,7 @@ static int inet_netconf_dump_devconf(struct sk_buff *skb,
1922 cb->nlh->nlmsg_seq, 1954 cb->nlh->nlmsg_seq,
1923 RTM_NEWNETCONF, 1955 RTM_NEWNETCONF,
1924 NLM_F_MULTI, 1956 NLM_F_MULTI,
1925 -1) < 0) { 1957 NETCONFA_ALL) < 0) {
1926 rcu_read_unlock(); 1958 rcu_read_unlock();
1927 goto done; 1959 goto done;
1928 } 1960 }
@@ -1938,7 +1970,7 @@ cont:
1938 NETLINK_CB(cb->skb).portid, 1970 NETLINK_CB(cb->skb).portid,
1939 cb->nlh->nlmsg_seq, 1971 cb->nlh->nlmsg_seq,
1940 RTM_NEWNETCONF, NLM_F_MULTI, 1972 RTM_NEWNETCONF, NLM_F_MULTI,
1941 -1) < 0) 1973 NETCONFA_ALL) < 0)
1942 goto done; 1974 goto done;
1943 else 1975 else
1944 h++; 1976 h++;
@@ -1949,7 +1981,7 @@ cont:
1949 NETLINK_CB(cb->skb).portid, 1981 NETLINK_CB(cb->skb).portid,
1950 cb->nlh->nlmsg_seq, 1982 cb->nlh->nlmsg_seq,
1951 RTM_NEWNETCONF, NLM_F_MULTI, 1983 RTM_NEWNETCONF, NLM_F_MULTI,
1952 -1) < 0) 1984 NETCONFA_ALL) < 0)
1953 goto done; 1985 goto done;
1954 else 1986 else
1955 h++; 1987 h++;
@@ -2185,6 +2217,8 @@ static struct devinet_sysctl_table {
2185 "igmpv3_unsolicited_report_interval"), 2217 "igmpv3_unsolicited_report_interval"),
2186 DEVINET_SYSCTL_RW_ENTRY(IGNORE_ROUTES_WITH_LINKDOWN, 2218 DEVINET_SYSCTL_RW_ENTRY(IGNORE_ROUTES_WITH_LINKDOWN,
2187 "ignore_routes_with_linkdown"), 2219 "ignore_routes_with_linkdown"),
2220 DEVINET_SYSCTL_RW_ENTRY(DROP_GRATUITOUS_ARP,
2221 "drop_gratuitous_arp"),
2188 2222
2189 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"), 2223 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
2190 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"), 2224 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
@@ -2192,6 +2226,8 @@ static struct devinet_sysctl_table {
2192 "promote_secondaries"), 2226 "promote_secondaries"),
2193 DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET, 2227 DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
2194 "route_localnet"), 2228 "route_localnet"),
2229 DEVINET_SYSCTL_FLUSHING_ENTRY(DROP_UNICAST_IN_L2_MULTICAST,
2230 "drop_unicast_in_l2_multicast"),
2195 }, 2231 },
2196}; 2232};
2197 2233
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 473447593060..8a9246deccfe 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -280,7 +280,6 @@ __be32 fib_compute_spec_dst(struct sk_buff *skb)
280 struct in_device *in_dev; 280 struct in_device *in_dev;
281 struct fib_result res; 281 struct fib_result res;
282 struct rtable *rt; 282 struct rtable *rt;
283 struct flowi4 fl4;
284 struct net *net; 283 struct net *net;
285 int scope; 284 int scope;
286 285
@@ -296,14 +295,13 @@ __be32 fib_compute_spec_dst(struct sk_buff *skb)
296 295
297 scope = RT_SCOPE_UNIVERSE; 296 scope = RT_SCOPE_UNIVERSE;
298 if (!ipv4_is_zeronet(ip_hdr(skb)->saddr)) { 297 if (!ipv4_is_zeronet(ip_hdr(skb)->saddr)) {
299 fl4.flowi4_oif = 0; 298 struct flowi4 fl4 = {
300 fl4.flowi4_iif = LOOPBACK_IFINDEX; 299 .flowi4_iif = LOOPBACK_IFINDEX,
301 fl4.daddr = ip_hdr(skb)->saddr; 300 .daddr = ip_hdr(skb)->saddr,
302 fl4.saddr = 0; 301 .flowi4_tos = RT_TOS(ip_hdr(skb)->tos),
303 fl4.flowi4_tos = RT_TOS(ip_hdr(skb)->tos); 302 .flowi4_scope = scope,
304 fl4.flowi4_scope = scope; 303 .flowi4_mark = IN_DEV_SRC_VMARK(in_dev) ? skb->mark : 0,
305 fl4.flowi4_mark = IN_DEV_SRC_VMARK(in_dev) ? skb->mark : 0; 304 };
306 fl4.flowi4_tun_key.tun_id = 0;
307 if (!fib_lookup(net, &fl4, &res, 0)) 305 if (!fib_lookup(net, &fl4, &res, 0))
308 return FIB_RES_PREFSRC(net, res); 306 return FIB_RES_PREFSRC(net, res);
309 } else { 307 } else {
@@ -922,6 +920,9 @@ void fib_del_ifaddr(struct in_ifaddr *ifa, struct in_ifaddr *iprim)
922 subnet = 1; 920 subnet = 1;
923 } 921 }
924 922
923 if (in_dev->dead)
924 goto no_promotions;
925
925 /* Deletion is more complicated than add. 926 /* Deletion is more complicated than add.
926 * We should take care of not to delete too much :-) 927 * We should take care of not to delete too much :-)
927 * 928 *
@@ -997,6 +998,7 @@ void fib_del_ifaddr(struct in_ifaddr *ifa, struct in_ifaddr *iprim)
997 } 998 }
998 } 999 }
999 1000
1001no_promotions:
1000 if (!(ok & BRD_OK)) 1002 if (!(ok & BRD_OK))
1001 fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim); 1003 fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
1002 if (subnet && ifa->ifa_prefixlen < 31) { 1004 if (subnet && ifa->ifa_prefixlen < 31) {
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 7aea0ccb6be6..d07fc076bea0 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1394,9 +1394,10 @@ found:
1394 struct fib_info *fi = fa->fa_info; 1394 struct fib_info *fi = fa->fa_info;
1395 int nhsel, err; 1395 int nhsel, err;
1396 1396
1397 if ((index >= (1ul << fa->fa_slen)) && 1397 if ((BITS_PER_LONG > KEYLENGTH) || (fa->fa_slen < KEYLENGTH)) {
1398 ((BITS_PER_LONG > KEYLENGTH) || (fa->fa_slen != KEYLENGTH))) 1398 if (index >= (1ul << fa->fa_slen))
1399 continue; 1399 continue;
1400 }
1400 if (fa->fa_tos && fa->fa_tos != flp->flowi4_tos) 1401 if (fa->fa_tos && fa->fa_tos != flp->flowi4_tos)
1401 continue; 1402 continue;
1402 if (fi->fib_dead) 1403 if (fi->fib_dead)
diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c
index 976f0dcf6991..a0586b4a197d 100644
--- a/net/ipv4/fou.c
+++ b/net/ipv4/fou.c
@@ -48,7 +48,7 @@ static inline struct fou *fou_from_sock(struct sock *sk)
48 return sk->sk_user_data; 48 return sk->sk_user_data;
49} 49}
50 50
51static void fou_recv_pull(struct sk_buff *skb, size_t len) 51static int fou_recv_pull(struct sk_buff *skb, size_t len)
52{ 52{
53 struct iphdr *iph = ip_hdr(skb); 53 struct iphdr *iph = ip_hdr(skb);
54 54
@@ -59,6 +59,7 @@ static void fou_recv_pull(struct sk_buff *skb, size_t len)
59 __skb_pull(skb, len); 59 __skb_pull(skb, len);
60 skb_postpull_rcsum(skb, udp_hdr(skb), len); 60 skb_postpull_rcsum(skb, udp_hdr(skb), len);
61 skb_reset_transport_header(skb); 61 skb_reset_transport_header(skb);
62 return iptunnel_pull_offloads(skb);
62} 63}
63 64
64static int fou_udp_recv(struct sock *sk, struct sk_buff *skb) 65static int fou_udp_recv(struct sock *sk, struct sk_buff *skb)
@@ -68,9 +69,14 @@ static int fou_udp_recv(struct sock *sk, struct sk_buff *skb)
68 if (!fou) 69 if (!fou)
69 return 1; 70 return 1;
70 71
71 fou_recv_pull(skb, sizeof(struct udphdr)); 72 if (fou_recv_pull(skb, sizeof(struct udphdr)))
73 goto drop;
72 74
73 return -fou->protocol; 75 return -fou->protocol;
76
77drop:
78 kfree_skb(skb);
79 return 0;
74} 80}
75 81
76static struct guehdr *gue_remcsum(struct sk_buff *skb, struct guehdr *guehdr, 82static struct guehdr *gue_remcsum(struct sk_buff *skb, struct guehdr *guehdr,
@@ -170,6 +176,9 @@ static int gue_udp_recv(struct sock *sk, struct sk_buff *skb)
170 __skb_pull(skb, sizeof(struct udphdr) + hdrlen); 176 __skb_pull(skb, sizeof(struct udphdr) + hdrlen);
171 skb_reset_transport_header(skb); 177 skb_reset_transport_header(skb);
172 178
179 if (iptunnel_pull_offloads(skb))
180 goto drop;
181
173 return -guehdr->proto_ctype; 182 return -guehdr->proto_ctype;
174 183
175drop: 184drop:
@@ -319,8 +328,6 @@ static struct sk_buff **gue_gro_receive(struct sk_buff **head,
319 328
320 skb_gro_pull(skb, hdrlen); 329 skb_gro_pull(skb, hdrlen);
321 330
322 flush = 0;
323
324 for (p = *head; p; p = p->next) { 331 for (p = *head; p; p = p->next) {
325 const struct guehdr *guehdr2; 332 const struct guehdr *guehdr2;
326 333
@@ -352,6 +359,7 @@ static struct sk_buff **gue_gro_receive(struct sk_buff **head,
352 goto out_unlock; 359 goto out_unlock;
353 360
354 pp = ops->callbacks.gro_receive(head, skb); 361 pp = ops->callbacks.gro_receive(head, skb);
362 flush = 0;
355 363
356out_unlock: 364out_unlock:
357 rcu_read_unlock(); 365 rcu_read_unlock();
@@ -774,7 +782,6 @@ static void fou_build_udp(struct sk_buff *skb, struct ip_tunnel_encap *e,
774 uh->dest = e->dport; 782 uh->dest = e->dport;
775 uh->source = sport; 783 uh->source = sport;
776 uh->len = htons(skb->len); 784 uh->len = htons(skb->len);
777 uh->check = 0;
778 udp_set_csum(!(e->flags & TUNNEL_ENCAP_FLAG_CSUM), skb, 785 udp_set_csum(!(e->flags & TUNNEL_ENCAP_FLAG_CSUM), skb,
779 fl4->saddr, fl4->daddr, skb->len); 786 fl4->saddr, fl4->daddr, skb->len);
780 787
@@ -784,11 +791,11 @@ static void fou_build_udp(struct sk_buff *skb, struct ip_tunnel_encap *e,
784int fou_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e, 791int fou_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
785 u8 *protocol, struct flowi4 *fl4) 792 u8 *protocol, struct flowi4 *fl4)
786{ 793{
787 bool csum = !!(e->flags & TUNNEL_ENCAP_FLAG_CSUM); 794 int type = e->flags & TUNNEL_ENCAP_FLAG_CSUM ? SKB_GSO_UDP_TUNNEL_CSUM :
788 int type = csum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL; 795 SKB_GSO_UDP_TUNNEL;
789 __be16 sport; 796 __be16 sport;
790 797
791 skb = iptunnel_handle_offloads(skb, csum, type); 798 skb = iptunnel_handle_offloads(skb, type);
792 799
793 if (IS_ERR(skb)) 800 if (IS_ERR(skb))
794 return PTR_ERR(skb); 801 return PTR_ERR(skb);
@@ -804,8 +811,8 @@ EXPORT_SYMBOL(fou_build_header);
804int gue_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e, 811int gue_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
805 u8 *protocol, struct flowi4 *fl4) 812 u8 *protocol, struct flowi4 *fl4)
806{ 813{
807 bool csum = !!(e->flags & TUNNEL_ENCAP_FLAG_CSUM); 814 int type = e->flags & TUNNEL_ENCAP_FLAG_CSUM ? SKB_GSO_UDP_TUNNEL_CSUM :
808 int type = csum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL; 815 SKB_GSO_UDP_TUNNEL;
809 struct guehdr *guehdr; 816 struct guehdr *guehdr;
810 size_t hdrlen, optlen = 0; 817 size_t hdrlen, optlen = 0;
811 __be16 sport; 818 __be16 sport;
@@ -814,7 +821,6 @@ int gue_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
814 821
815 if ((e->flags & TUNNEL_ENCAP_FLAG_REMCSUM) && 822 if ((e->flags & TUNNEL_ENCAP_FLAG_REMCSUM) &&
816 skb->ip_summed == CHECKSUM_PARTIAL) { 823 skb->ip_summed == CHECKSUM_PARTIAL) {
817 csum = false;
818 optlen += GUE_PLEN_REMCSUM; 824 optlen += GUE_PLEN_REMCSUM;
819 type |= SKB_GSO_TUNNEL_REMCSUM; 825 type |= SKB_GSO_TUNNEL_REMCSUM;
820 need_priv = true; 826 need_priv = true;
@@ -822,7 +828,7 @@ int gue_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
822 828
823 optlen += need_priv ? GUE_LEN_PRIV : 0; 829 optlen += need_priv ? GUE_LEN_PRIV : 0;
824 830
825 skb = iptunnel_handle_offloads(skb, csum, type); 831 skb = iptunnel_handle_offloads(skb, type);
826 832
827 if (IS_ERR(skb)) 833 if (IS_ERR(skb))
828 return PTR_ERR(skb); 834 return PTR_ERR(skb);
diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c
index 5a8ee3282550..c47539d04b88 100644
--- a/net/ipv4/gre_offload.c
+++ b/net/ipv4/gre_offload.c
@@ -18,15 +18,13 @@
18static struct sk_buff *gre_gso_segment(struct sk_buff *skb, 18static struct sk_buff *gre_gso_segment(struct sk_buff *skb,
19 netdev_features_t features) 19 netdev_features_t features)
20{ 20{
21 int tnl_hlen = skb_inner_mac_header(skb) - skb_transport_header(skb);
21 struct sk_buff *segs = ERR_PTR(-EINVAL); 22 struct sk_buff *segs = ERR_PTR(-EINVAL);
22 netdev_features_t enc_features;
23 int ghl;
24 struct gre_base_hdr *greh;
25 u16 mac_offset = skb->mac_header; 23 u16 mac_offset = skb->mac_header;
26 int mac_len = skb->mac_len;
27 __be16 protocol = skb->protocol; 24 __be16 protocol = skb->protocol;
28 int tnl_hlen; 25 u16 mac_len = skb->mac_len;
29 bool csum; 26 int gre_offset, outer_hlen;
27 bool need_csum, ufo;
30 28
31 if (unlikely(skb_shinfo(skb)->gso_type & 29 if (unlikely(skb_shinfo(skb)->gso_type &
32 ~(SKB_GSO_TCPV4 | 30 ~(SKB_GSO_TCPV4 |
@@ -43,74 +41,75 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb,
43 if (!skb->encapsulation) 41 if (!skb->encapsulation)
44 goto out; 42 goto out;
45 43
46 if (unlikely(!pskb_may_pull(skb, sizeof(*greh)))) 44 if (unlikely(tnl_hlen < sizeof(struct gre_base_hdr)))
47 goto out; 45 goto out;
48 46
49 greh = (struct gre_base_hdr *)skb_transport_header(skb); 47 if (unlikely(!pskb_may_pull(skb, tnl_hlen)))
50
51 ghl = skb_inner_mac_header(skb) - skb_transport_header(skb);
52 if (unlikely(ghl < sizeof(*greh)))
53 goto out; 48 goto out;
54 49
55 csum = !!(greh->flags & GRE_CSUM);
56 if (csum)
57 skb->encap_hdr_csum = 1;
58
59 /* setup inner skb. */ 50 /* setup inner skb. */
60 skb->protocol = greh->protocol;
61 skb->encapsulation = 0; 51 skb->encapsulation = 0;
62 52 SKB_GSO_CB(skb)->encap_level = 0;
63 if (unlikely(!pskb_may_pull(skb, ghl))) 53 __skb_pull(skb, tnl_hlen);
64 goto out;
65
66 __skb_pull(skb, ghl);
67 skb_reset_mac_header(skb); 54 skb_reset_mac_header(skb);
68 skb_set_network_header(skb, skb_inner_network_offset(skb)); 55 skb_set_network_header(skb, skb_inner_network_offset(skb));
69 skb->mac_len = skb_inner_network_offset(skb); 56 skb->mac_len = skb_inner_network_offset(skb);
57 skb->protocol = skb->inner_protocol;
58
59 need_csum = !!(skb_shinfo(skb)->gso_type & SKB_GSO_GRE_CSUM);
60 skb->encap_hdr_csum = need_csum;
61
62 ufo = !!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP);
63
64 features &= skb->dev->hw_enc_features;
65
66 /* The only checksum offload we care about from here on out is the
67 * outer one so strip the existing checksum feature flags based
68 * on the fact that we will be computing our checksum in software.
69 */
70 if (ufo) {
71 features &= ~NETIF_F_CSUM_MASK;
72 if (!need_csum)
73 features |= NETIF_F_HW_CSUM;
74 }
70 75
71 /* segment inner packet. */ 76 /* segment inner packet. */
72 enc_features = skb->dev->hw_enc_features & features; 77 segs = skb_mac_gso_segment(skb, features);
73 segs = skb_mac_gso_segment(skb, enc_features);
74 if (IS_ERR_OR_NULL(segs)) { 78 if (IS_ERR_OR_NULL(segs)) {
75 skb_gso_error_unwind(skb, protocol, ghl, mac_offset, mac_len); 79 skb_gso_error_unwind(skb, protocol, tnl_hlen, mac_offset,
80 mac_len);
76 goto out; 81 goto out;
77 } 82 }
78 83
84 outer_hlen = skb_tnl_header_len(skb);
85 gre_offset = outer_hlen - tnl_hlen;
79 skb = segs; 86 skb = segs;
80 tnl_hlen = skb_tnl_header_len(skb);
81 do { 87 do {
82 __skb_push(skb, ghl); 88 struct gre_base_hdr *greh;
83 if (csum) { 89 __be32 *pcsum;
84 __be32 *pcsum;
85
86 if (skb_has_shared_frag(skb)) {
87 int err;
88
89 err = __skb_linearize(skb);
90 if (err) {
91 kfree_skb_list(segs);
92 segs = ERR_PTR(err);
93 goto out;
94 }
95 }
96
97 skb_reset_transport_header(skb);
98 90
99 greh = (struct gre_base_hdr *) 91 /* Set up inner headers if we are offloading inner checksum */
100 skb_transport_header(skb); 92 if (skb->ip_summed == CHECKSUM_PARTIAL) {
101 pcsum = (__be32 *)(greh + 1); 93 skb_reset_inner_headers(skb);
102 *pcsum = 0; 94 skb->encapsulation = 1;
103 *(__sum16 *)pcsum = gso_make_checksum(skb, 0);
104 } 95 }
105 __skb_push(skb, tnl_hlen - ghl);
106 96
107 skb_reset_inner_headers(skb); 97 skb->mac_len = mac_len;
108 skb->encapsulation = 1; 98 skb->protocol = protocol;
109 99
100 __skb_push(skb, outer_hlen);
110 skb_reset_mac_header(skb); 101 skb_reset_mac_header(skb);
111 skb_set_network_header(skb, mac_len); 102 skb_set_network_header(skb, mac_len);
112 skb->mac_len = mac_len; 103 skb_set_transport_header(skb, gre_offset);
113 skb->protocol = protocol; 104
105 if (!need_csum)
106 continue;
107
108 greh = (struct gre_base_hdr *)skb_transport_header(skb);
109 pcsum = (__be32 *)(greh + 1);
110
111 *pcsum = 0;
112 *(__sum16 *)pcsum = gso_make_checksum(skb, 0);
114 } while ((skb = skb->next)); 113 } while ((skb = skb->next));
115out: 114out:
116 return segs; 115 return segs;
@@ -128,6 +127,11 @@ static struct sk_buff **gre_gro_receive(struct sk_buff **head,
128 struct packet_offload *ptype; 127 struct packet_offload *ptype;
129 __be16 type; 128 __be16 type;
130 129
130 if (NAPI_GRO_CB(skb)->encap_mark)
131 goto out;
132
133 NAPI_GRO_CB(skb)->encap_mark = 1;
134
131 off = skb_gro_offset(skb); 135 off = skb_gro_offset(skb);
132 hlen = off + sizeof(*greh); 136 hlen = off + sizeof(*greh);
133 greh = skb_gro_header_fast(skb, off); 137 greh = skb_gro_header_fast(skb, off);
@@ -177,8 +181,6 @@ static struct sk_buff **gre_gro_receive(struct sk_buff **head,
177 null_compute_pseudo); 181 null_compute_pseudo);
178 } 182 }
179 183
180 flush = 0;
181
182 for (p = *head; p; p = p->next) { 184 for (p = *head; p; p = p->next) {
183 const struct gre_base_hdr *greh2; 185 const struct gre_base_hdr *greh2;
184 186
@@ -215,6 +217,7 @@ static struct sk_buff **gre_gro_receive(struct sk_buff **head,
215 skb_gro_postpull_rcsum(skb, greh, grehlen); 217 skb_gro_postpull_rcsum(skb, greh, grehlen);
216 218
217 pp = ptype->callbacks.gro_receive(head, skb); 219 pp = ptype->callbacks.gro_receive(head, skb);
220 flush = 0;
218 221
219out_unlock: 222out_unlock:
220 rcu_read_unlock(); 223 rcu_read_unlock();
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 36e26977c908..6333489771ed 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -931,7 +931,6 @@ static bool icmp_echo(struct sk_buff *skb)
931 */ 931 */
932static bool icmp_timestamp(struct sk_buff *skb) 932static bool icmp_timestamp(struct sk_buff *skb)
933{ 933{
934 struct timespec tv;
935 struct icmp_bxm icmp_param; 934 struct icmp_bxm icmp_param;
936 /* 935 /*
937 * Too short. 936 * Too short.
@@ -942,9 +941,7 @@ static bool icmp_timestamp(struct sk_buff *skb)
942 /* 941 /*
943 * Fill in the current time as ms since midnight UT: 942 * Fill in the current time as ms since midnight UT:
944 */ 943 */
945 getnstimeofday(&tv); 944 icmp_param.data.times[1] = inet_current_timestamp();
946 icmp_param.data.times[1] = htonl((tv.tv_sec % 86400) * MSEC_PER_SEC +
947 tv.tv_nsec / NSEC_PER_MSEC);
948 icmp_param.data.times[2] = icmp_param.data.times[1]; 945 icmp_param.data.times[2] = icmp_param.data.times[1];
949 if (skb_copy_bits(skb, 0, &icmp_param.data.times[0], 4)) 946 if (skb_copy_bits(skb, 0, &icmp_param.data.times[0], 4))
950 BUG(); 947 BUG();
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 05e4cba14162..9b4ca87f70ba 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -107,12 +107,6 @@
107#include <linux/seq_file.h> 107#include <linux/seq_file.h>
108#endif 108#endif
109 109
110#define IP_MAX_MEMBERSHIPS 20
111#define IP_MAX_MSF 10
112
113/* IGMP reports for link-local multicast groups are enabled by default */
114int sysctl_igmp_llm_reports __read_mostly = 1;
115
116#ifdef CONFIG_IP_MULTICAST 110#ifdef CONFIG_IP_MULTICAST
117/* Parameter names and values are taken from igmp-v2-06 draft */ 111/* Parameter names and values are taken from igmp-v2-06 draft */
118 112
@@ -356,9 +350,8 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, unsigned int mtu)
356 skb_dst_set(skb, &rt->dst); 350 skb_dst_set(skb, &rt->dst);
357 skb->dev = dev; 351 skb->dev = dev;
358 352
359 skb->reserved_tailroom = skb_end_offset(skb) -
360 min(mtu, skb_end_offset(skb));
361 skb_reserve(skb, hlen); 353 skb_reserve(skb, hlen);
354 skb_tailroom_reserve(skb, mtu, tlen);
362 355
363 skb_reset_network_header(skb); 356 skb_reset_network_header(skb);
364 pip = ip_hdr(skb); 357 pip = ip_hdr(skb);
@@ -433,6 +426,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc,
433 int type, int gdeleted, int sdeleted) 426 int type, int gdeleted, int sdeleted)
434{ 427{
435 struct net_device *dev = pmc->interface->dev; 428 struct net_device *dev = pmc->interface->dev;
429 struct net *net = dev_net(dev);
436 struct igmpv3_report *pih; 430 struct igmpv3_report *pih;
437 struct igmpv3_grec *pgr = NULL; 431 struct igmpv3_grec *pgr = NULL;
438 struct ip_sf_list *psf, *psf_next, *psf_prev, **psf_list; 432 struct ip_sf_list *psf, *psf_next, *psf_prev, **psf_list;
@@ -440,7 +434,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc,
440 434
441 if (pmc->multiaddr == IGMP_ALL_HOSTS) 435 if (pmc->multiaddr == IGMP_ALL_HOSTS)
442 return skb; 436 return skb;
443 if (ipv4_is_local_multicast(pmc->multiaddr) && !sysctl_igmp_llm_reports) 437 if (ipv4_is_local_multicast(pmc->multiaddr) && !net->ipv4.sysctl_igmp_llm_reports)
444 return skb; 438 return skb;
445 439
446 isquery = type == IGMPV3_MODE_IS_INCLUDE || 440 isquery = type == IGMPV3_MODE_IS_INCLUDE ||
@@ -543,6 +537,7 @@ empty_source:
543static int igmpv3_send_report(struct in_device *in_dev, struct ip_mc_list *pmc) 537static int igmpv3_send_report(struct in_device *in_dev, struct ip_mc_list *pmc)
544{ 538{
545 struct sk_buff *skb = NULL; 539 struct sk_buff *skb = NULL;
540 struct net *net = dev_net(in_dev->dev);
546 int type; 541 int type;
547 542
548 if (!pmc) { 543 if (!pmc) {
@@ -551,7 +546,7 @@ static int igmpv3_send_report(struct in_device *in_dev, struct ip_mc_list *pmc)
551 if (pmc->multiaddr == IGMP_ALL_HOSTS) 546 if (pmc->multiaddr == IGMP_ALL_HOSTS)
552 continue; 547 continue;
553 if (ipv4_is_local_multicast(pmc->multiaddr) && 548 if (ipv4_is_local_multicast(pmc->multiaddr) &&
554 !sysctl_igmp_llm_reports) 549 !net->ipv4.sysctl_igmp_llm_reports)
555 continue; 550 continue;
556 spin_lock_bh(&pmc->lock); 551 spin_lock_bh(&pmc->lock);
557 if (pmc->sfcount[MCAST_EXCLUDE]) 552 if (pmc->sfcount[MCAST_EXCLUDE])
@@ -687,7 +682,7 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc,
687 if (type == IGMPV3_HOST_MEMBERSHIP_REPORT) 682 if (type == IGMPV3_HOST_MEMBERSHIP_REPORT)
688 return igmpv3_send_report(in_dev, pmc); 683 return igmpv3_send_report(in_dev, pmc);
689 684
690 if (ipv4_is_local_multicast(group) && !sysctl_igmp_llm_reports) 685 if (ipv4_is_local_multicast(group) && !net->ipv4.sysctl_igmp_llm_reports)
691 return 0; 686 return 0;
692 687
693 if (type == IGMP_HOST_LEAVE_MESSAGE) 688 if (type == IGMP_HOST_LEAVE_MESSAGE)
@@ -766,9 +761,10 @@ static void igmp_ifc_timer_expire(unsigned long data)
766 761
767static void igmp_ifc_event(struct in_device *in_dev) 762static void igmp_ifc_event(struct in_device *in_dev)
768{ 763{
764 struct net *net = dev_net(in_dev->dev);
769 if (IGMP_V1_SEEN(in_dev) || IGMP_V2_SEEN(in_dev)) 765 if (IGMP_V1_SEEN(in_dev) || IGMP_V2_SEEN(in_dev))
770 return; 766 return;
771 in_dev->mr_ifc_count = in_dev->mr_qrv ?: sysctl_igmp_qrv; 767 in_dev->mr_ifc_count = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
772 igmp_ifc_start_timer(in_dev, 1); 768 igmp_ifc_start_timer(in_dev, 1);
773} 769}
774 770
@@ -858,12 +854,13 @@ static int igmp_marksources(struct ip_mc_list *pmc, int nsrcs, __be32 *srcs)
858static bool igmp_heard_report(struct in_device *in_dev, __be32 group) 854static bool igmp_heard_report(struct in_device *in_dev, __be32 group)
859{ 855{
860 struct ip_mc_list *im; 856 struct ip_mc_list *im;
857 struct net *net = dev_net(in_dev->dev);
861 858
862 /* Timers are only set for non-local groups */ 859 /* Timers are only set for non-local groups */
863 860
864 if (group == IGMP_ALL_HOSTS) 861 if (group == IGMP_ALL_HOSTS)
865 return false; 862 return false;
866 if (ipv4_is_local_multicast(group) && !sysctl_igmp_llm_reports) 863 if (ipv4_is_local_multicast(group) && !net->ipv4.sysctl_igmp_llm_reports)
867 return false; 864 return false;
868 865
869 rcu_read_lock(); 866 rcu_read_lock();
@@ -887,6 +884,7 @@ static bool igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb,
887 __be32 group = ih->group; 884 __be32 group = ih->group;
888 int max_delay; 885 int max_delay;
889 int mark = 0; 886 int mark = 0;
887 struct net *net = dev_net(in_dev->dev);
890 888
891 889
892 if (len == 8) { 890 if (len == 8) {
@@ -972,7 +970,7 @@ static bool igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb,
972 if (im->multiaddr == IGMP_ALL_HOSTS) 970 if (im->multiaddr == IGMP_ALL_HOSTS)
973 continue; 971 continue;
974 if (ipv4_is_local_multicast(im->multiaddr) && 972 if (ipv4_is_local_multicast(im->multiaddr) &&
975 !sysctl_igmp_llm_reports) 973 !net->ipv4.sysctl_igmp_llm_reports)
976 continue; 974 continue;
977 spin_lock_bh(&im->lock); 975 spin_lock_bh(&im->lock);
978 if (im->tm_running) 976 if (im->tm_running)
@@ -1088,6 +1086,7 @@ static void ip_mc_filter_del(struct in_device *in_dev, __be32 addr)
1088static void igmpv3_add_delrec(struct in_device *in_dev, struct ip_mc_list *im) 1086static void igmpv3_add_delrec(struct in_device *in_dev, struct ip_mc_list *im)
1089{ 1087{
1090 struct ip_mc_list *pmc; 1088 struct ip_mc_list *pmc;
1089 struct net *net = dev_net(in_dev->dev);
1091 1090
1092 /* this is an "ip_mc_list" for convenience; only the fields below 1091 /* this is an "ip_mc_list" for convenience; only the fields below
1093 * are actually used. In particular, the refcnt and users are not 1092 * are actually used. In particular, the refcnt and users are not
@@ -1102,7 +1101,7 @@ static void igmpv3_add_delrec(struct in_device *in_dev, struct ip_mc_list *im)
1102 pmc->interface = im->interface; 1101 pmc->interface = im->interface;
1103 in_dev_hold(in_dev); 1102 in_dev_hold(in_dev);
1104 pmc->multiaddr = im->multiaddr; 1103 pmc->multiaddr = im->multiaddr;
1105 pmc->crcount = in_dev->mr_qrv ?: sysctl_igmp_qrv; 1104 pmc->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
1106 pmc->sfmode = im->sfmode; 1105 pmc->sfmode = im->sfmode;
1107 if (pmc->sfmode == MCAST_INCLUDE) { 1106 if (pmc->sfmode == MCAST_INCLUDE) {
1108 struct ip_sf_list *psf; 1107 struct ip_sf_list *psf;
@@ -1187,6 +1186,7 @@ static void igmp_group_dropped(struct ip_mc_list *im)
1187{ 1186{
1188 struct in_device *in_dev = im->interface; 1187 struct in_device *in_dev = im->interface;
1189#ifdef CONFIG_IP_MULTICAST 1188#ifdef CONFIG_IP_MULTICAST
1189 struct net *net = dev_net(in_dev->dev);
1190 int reporter; 1190 int reporter;
1191#endif 1191#endif
1192 1192
@@ -1198,7 +1198,7 @@ static void igmp_group_dropped(struct ip_mc_list *im)
1198#ifdef CONFIG_IP_MULTICAST 1198#ifdef CONFIG_IP_MULTICAST
1199 if (im->multiaddr == IGMP_ALL_HOSTS) 1199 if (im->multiaddr == IGMP_ALL_HOSTS)
1200 return; 1200 return;
1201 if (ipv4_is_local_multicast(im->multiaddr) && !sysctl_igmp_llm_reports) 1201 if (ipv4_is_local_multicast(im->multiaddr) && !net->ipv4.sysctl_igmp_llm_reports)
1202 return; 1202 return;
1203 1203
1204 reporter = im->reporter; 1204 reporter = im->reporter;
@@ -1223,6 +1223,9 @@ static void igmp_group_dropped(struct ip_mc_list *im)
1223static void igmp_group_added(struct ip_mc_list *im) 1223static void igmp_group_added(struct ip_mc_list *im)
1224{ 1224{
1225 struct in_device *in_dev = im->interface; 1225 struct in_device *in_dev = im->interface;
1226#ifdef CONFIG_IP_MULTICAST
1227 struct net *net = dev_net(in_dev->dev);
1228#endif
1226 1229
1227 if (im->loaded == 0) { 1230 if (im->loaded == 0) {
1228 im->loaded = 1; 1231 im->loaded = 1;
@@ -1232,7 +1235,7 @@ static void igmp_group_added(struct ip_mc_list *im)
1232#ifdef CONFIG_IP_MULTICAST 1235#ifdef CONFIG_IP_MULTICAST
1233 if (im->multiaddr == IGMP_ALL_HOSTS) 1236 if (im->multiaddr == IGMP_ALL_HOSTS)
1234 return; 1237 return;
1235 if (ipv4_is_local_multicast(im->multiaddr) && !sysctl_igmp_llm_reports) 1238 if (ipv4_is_local_multicast(im->multiaddr) && !net->ipv4.sysctl_igmp_llm_reports)
1236 return; 1239 return;
1237 1240
1238 if (in_dev->dead) 1241 if (in_dev->dead)
@@ -1245,7 +1248,7 @@ static void igmp_group_added(struct ip_mc_list *im)
1245 } 1248 }
1246 /* else, v3 */ 1249 /* else, v3 */
1247 1250
1248 im->crcount = in_dev->mr_qrv ?: sysctl_igmp_qrv; 1251 im->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
1249 igmp_ifc_event(in_dev); 1252 igmp_ifc_event(in_dev);
1250#endif 1253#endif
1251} 1254}
@@ -1314,6 +1317,9 @@ static void ip_mc_hash_remove(struct in_device *in_dev,
1314void ip_mc_inc_group(struct in_device *in_dev, __be32 addr) 1317void ip_mc_inc_group(struct in_device *in_dev, __be32 addr)
1315{ 1318{
1316 struct ip_mc_list *im; 1319 struct ip_mc_list *im;
1320#ifdef CONFIG_IP_MULTICAST
1321 struct net *net = dev_net(in_dev->dev);
1322#endif
1317 1323
1318 ASSERT_RTNL(); 1324 ASSERT_RTNL();
1319 1325
@@ -1340,7 +1346,7 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 addr)
1340 spin_lock_init(&im->lock); 1346 spin_lock_init(&im->lock);
1341#ifdef CONFIG_IP_MULTICAST 1347#ifdef CONFIG_IP_MULTICAST
1342 setup_timer(&im->timer, igmp_timer_expire, (unsigned long)im); 1348 setup_timer(&im->timer, igmp_timer_expire, (unsigned long)im);
1343 im->unsolicit_count = sysctl_igmp_qrv; 1349 im->unsolicit_count = net->ipv4.sysctl_igmp_qrv;
1344#endif 1350#endif
1345 1351
1346 im->next_rcu = in_dev->mc_list; 1352 im->next_rcu = in_dev->mc_list;
@@ -1533,6 +1539,7 @@ static void ip_mc_rejoin_groups(struct in_device *in_dev)
1533#ifdef CONFIG_IP_MULTICAST 1539#ifdef CONFIG_IP_MULTICAST
1534 struct ip_mc_list *im; 1540 struct ip_mc_list *im;
1535 int type; 1541 int type;
1542 struct net *net = dev_net(in_dev->dev);
1536 1543
1537 ASSERT_RTNL(); 1544 ASSERT_RTNL();
1538 1545
@@ -1540,7 +1547,7 @@ static void ip_mc_rejoin_groups(struct in_device *in_dev)
1540 if (im->multiaddr == IGMP_ALL_HOSTS) 1547 if (im->multiaddr == IGMP_ALL_HOSTS)
1541 continue; 1548 continue;
1542 if (ipv4_is_local_multicast(im->multiaddr) && 1549 if (ipv4_is_local_multicast(im->multiaddr) &&
1543 !sysctl_igmp_llm_reports) 1550 !net->ipv4.sysctl_igmp_llm_reports)
1544 continue; 1551 continue;
1545 1552
1546 /* a failover is happening and switches 1553 /* a failover is happening and switches
@@ -1639,6 +1646,9 @@ void ip_mc_down(struct in_device *in_dev)
1639 1646
1640void ip_mc_init_dev(struct in_device *in_dev) 1647void ip_mc_init_dev(struct in_device *in_dev)
1641{ 1648{
1649#ifdef CONFIG_IP_MULTICAST
1650 struct net *net = dev_net(in_dev->dev);
1651#endif
1642 ASSERT_RTNL(); 1652 ASSERT_RTNL();
1643 1653
1644#ifdef CONFIG_IP_MULTICAST 1654#ifdef CONFIG_IP_MULTICAST
@@ -1646,7 +1656,7 @@ void ip_mc_init_dev(struct in_device *in_dev)
1646 (unsigned long)in_dev); 1656 (unsigned long)in_dev);
1647 setup_timer(&in_dev->mr_ifc_timer, igmp_ifc_timer_expire, 1657 setup_timer(&in_dev->mr_ifc_timer, igmp_ifc_timer_expire,
1648 (unsigned long)in_dev); 1658 (unsigned long)in_dev);
1649 in_dev->mr_qrv = sysctl_igmp_qrv; 1659 in_dev->mr_qrv = net->ipv4.sysctl_igmp_qrv;
1650#endif 1660#endif
1651 1661
1652 spin_lock_init(&in_dev->mc_tomb_lock); 1662 spin_lock_init(&in_dev->mc_tomb_lock);
@@ -1657,11 +1667,14 @@ void ip_mc_init_dev(struct in_device *in_dev)
1657void ip_mc_up(struct in_device *in_dev) 1667void ip_mc_up(struct in_device *in_dev)
1658{ 1668{
1659 struct ip_mc_list *pmc; 1669 struct ip_mc_list *pmc;
1670#ifdef CONFIG_IP_MULTICAST
1671 struct net *net = dev_net(in_dev->dev);
1672#endif
1660 1673
1661 ASSERT_RTNL(); 1674 ASSERT_RTNL();
1662 1675
1663#ifdef CONFIG_IP_MULTICAST 1676#ifdef CONFIG_IP_MULTICAST
1664 in_dev->mr_qrv = sysctl_igmp_qrv; 1677 in_dev->mr_qrv = net->ipv4.sysctl_igmp_qrv;
1665#endif 1678#endif
1666 ip_mc_inc_group(in_dev, IGMP_ALL_HOSTS); 1679 ip_mc_inc_group(in_dev, IGMP_ALL_HOSTS);
1667 1680
@@ -1727,11 +1740,6 @@ static struct in_device *ip_mc_find_dev(struct net *net, struct ip_mreqn *imr)
1727/* 1740/*
1728 * Join a socket to a group 1741 * Join a socket to a group
1729 */ 1742 */
1730int sysctl_igmp_max_memberships __read_mostly = IP_MAX_MEMBERSHIPS;
1731int sysctl_igmp_max_msf __read_mostly = IP_MAX_MSF;
1732#ifdef CONFIG_IP_MULTICAST
1733int sysctl_igmp_qrv __read_mostly = IGMP_QUERY_ROBUSTNESS_VARIABLE;
1734#endif
1735 1743
1736static int ip_mc_del1_src(struct ip_mc_list *pmc, int sfmode, 1744static int ip_mc_del1_src(struct ip_mc_list *pmc, int sfmode,
1737 __be32 *psfsrc) 1745 __be32 *psfsrc)
@@ -1756,6 +1764,7 @@ static int ip_mc_del1_src(struct ip_mc_list *pmc, int sfmode,
1756 if (!psf->sf_count[MCAST_INCLUDE] && !psf->sf_count[MCAST_EXCLUDE]) { 1764 if (!psf->sf_count[MCAST_INCLUDE] && !psf->sf_count[MCAST_EXCLUDE]) {
1757#ifdef CONFIG_IP_MULTICAST 1765#ifdef CONFIG_IP_MULTICAST
1758 struct in_device *in_dev = pmc->interface; 1766 struct in_device *in_dev = pmc->interface;
1767 struct net *net = dev_net(in_dev->dev);
1759#endif 1768#endif
1760 1769
1761 /* no more filters for this source */ 1770 /* no more filters for this source */
@@ -1766,7 +1775,7 @@ static int ip_mc_del1_src(struct ip_mc_list *pmc, int sfmode,
1766#ifdef CONFIG_IP_MULTICAST 1775#ifdef CONFIG_IP_MULTICAST
1767 if (psf->sf_oldin && 1776 if (psf->sf_oldin &&
1768 !IGMP_V1_SEEN(in_dev) && !IGMP_V2_SEEN(in_dev)) { 1777 !IGMP_V1_SEEN(in_dev) && !IGMP_V2_SEEN(in_dev)) {
1769 psf->sf_crcount = in_dev->mr_qrv ?: sysctl_igmp_qrv; 1778 psf->sf_crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
1770 psf->sf_next = pmc->tomb; 1779 psf->sf_next = pmc->tomb;
1771 pmc->tomb = psf; 1780 pmc->tomb = psf;
1772 rv = 1; 1781 rv = 1;
@@ -1824,12 +1833,13 @@ static int ip_mc_del_src(struct in_device *in_dev, __be32 *pmca, int sfmode,
1824 pmc->sfcount[MCAST_INCLUDE]) { 1833 pmc->sfcount[MCAST_INCLUDE]) {
1825#ifdef CONFIG_IP_MULTICAST 1834#ifdef CONFIG_IP_MULTICAST
1826 struct ip_sf_list *psf; 1835 struct ip_sf_list *psf;
1836 struct net *net = dev_net(in_dev->dev);
1827#endif 1837#endif
1828 1838
1829 /* filter mode change */ 1839 /* filter mode change */
1830 pmc->sfmode = MCAST_INCLUDE; 1840 pmc->sfmode = MCAST_INCLUDE;
1831#ifdef CONFIG_IP_MULTICAST 1841#ifdef CONFIG_IP_MULTICAST
1832 pmc->crcount = in_dev->mr_qrv ?: sysctl_igmp_qrv; 1842 pmc->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
1833 in_dev->mr_ifc_count = pmc->crcount; 1843 in_dev->mr_ifc_count = pmc->crcount;
1834 for (psf = pmc->sources; psf; psf = psf->sf_next) 1844 for (psf = pmc->sources; psf; psf = psf->sf_next)
1835 psf->sf_crcount = 0; 1845 psf->sf_crcount = 0;
@@ -1996,6 +2006,7 @@ static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode,
1996 } else if (isexclude != (pmc->sfcount[MCAST_EXCLUDE] != 0)) { 2006 } else if (isexclude != (pmc->sfcount[MCAST_EXCLUDE] != 0)) {
1997#ifdef CONFIG_IP_MULTICAST 2007#ifdef CONFIG_IP_MULTICAST
1998 struct ip_sf_list *psf; 2008 struct ip_sf_list *psf;
2009 struct net *net = dev_net(pmc->interface->dev);
1999 in_dev = pmc->interface; 2010 in_dev = pmc->interface;
2000#endif 2011#endif
2001 2012
@@ -2007,7 +2018,7 @@ static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode,
2007#ifdef CONFIG_IP_MULTICAST 2018#ifdef CONFIG_IP_MULTICAST
2008 /* else no filters; keep old mode for reports */ 2019 /* else no filters; keep old mode for reports */
2009 2020
2010 pmc->crcount = in_dev->mr_qrv ?: sysctl_igmp_qrv; 2021 pmc->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
2011 in_dev->mr_ifc_count = pmc->crcount; 2022 in_dev->mr_ifc_count = pmc->crcount;
2012 for (psf = pmc->sources; psf; psf = psf->sf_next) 2023 for (psf = pmc->sources; psf; psf = psf->sf_next)
2013 psf->sf_crcount = 0; 2024 psf->sf_crcount = 0;
@@ -2074,7 +2085,7 @@ int ip_mc_join_group(struct sock *sk, struct ip_mreqn *imr)
2074 count++; 2085 count++;
2075 } 2086 }
2076 err = -ENOBUFS; 2087 err = -ENOBUFS;
2077 if (count >= sysctl_igmp_max_memberships) 2088 if (count >= net->ipv4.sysctl_igmp_max_memberships)
2078 goto done; 2089 goto done;
2079 iml = sock_kmalloc(sk, sizeof(*iml), GFP_KERNEL); 2090 iml = sock_kmalloc(sk, sizeof(*iml), GFP_KERNEL);
2080 if (!iml) 2091 if (!iml)
@@ -2246,7 +2257,7 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct
2246 } 2257 }
2247 /* else, add a new source to the filter */ 2258 /* else, add a new source to the filter */
2248 2259
2249 if (psl && psl->sl_count >= sysctl_igmp_max_msf) { 2260 if (psl && psl->sl_count >= net->ipv4.sysctl_igmp_max_msf) {
2250 err = -ENOBUFS; 2261 err = -ENOBUFS;
2251 goto done; 2262 goto done;
2252 } 2263 }
@@ -2919,6 +2930,12 @@ static int __net_init igmp_net_init(struct net *net)
2919 goto out_sock; 2930 goto out_sock;
2920 } 2931 }
2921 2932
2933 /* Sysctl initialization */
2934 net->ipv4.sysctl_igmp_max_memberships = 20;
2935 net->ipv4.sysctl_igmp_max_msf = 10;
2936 /* IGMP reports for link-local multicast groups are enabled by default */
2937 net->ipv4.sysctl_igmp_llm_reports = 1;
2938 net->ipv4.sysctl_igmp_qrv = 2;
2922 return 0; 2939 return 0;
2923 2940
2924out_sock: 2941out_sock:
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 46b9c887bede..bc5196ea1bdf 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -24,6 +24,7 @@
24#include <net/tcp_states.h> 24#include <net/tcp_states.h>
25#include <net/xfrm.h> 25#include <net/xfrm.h>
26#include <net/tcp.h> 26#include <net/tcp.h>
27#include <net/sock_reuseport.h>
27 28
28#ifdef INET_CSK_DEBUG 29#ifdef INET_CSK_DEBUG
29const char inet_csk_timer_bug_msg[] = "inet_csk BUG: unknown timer value\n"; 30const char inet_csk_timer_bug_msg[] = "inet_csk BUG: unknown timer value\n";
@@ -67,7 +68,8 @@ int inet_csk_bind_conflict(const struct sock *sk,
67 if ((!reuse || !sk2->sk_reuse || 68 if ((!reuse || !sk2->sk_reuse ||
68 sk2->sk_state == TCP_LISTEN) && 69 sk2->sk_state == TCP_LISTEN) &&
69 (!reuseport || !sk2->sk_reuseport || 70 (!reuseport || !sk2->sk_reuseport ||
70 (sk2->sk_state != TCP_TIME_WAIT && 71 rcu_access_pointer(sk->sk_reuseport_cb) ||
72 (sk2->sk_state != TCP_TIME_WAIT &&
71 !uid_eq(uid, sock_i_uid(sk2))))) { 73 !uid_eq(uid, sock_i_uid(sk2))))) {
72 74
73 if (!sk2->sk_rcv_saddr || !sk->sk_rcv_saddr || 75 if (!sk2->sk_rcv_saddr || !sk->sk_rcv_saddr ||
@@ -89,161 +91,154 @@ EXPORT_SYMBOL_GPL(inet_csk_bind_conflict);
89 91
90/* Obtain a reference to a local port for the given sock, 92/* Obtain a reference to a local port for the given sock,
91 * if snum is zero it means select any available local port. 93 * if snum is zero it means select any available local port.
94 * We try to allocate an odd port (and leave even ports for connect())
92 */ 95 */
93int inet_csk_get_port(struct sock *sk, unsigned short snum) 96int inet_csk_get_port(struct sock *sk, unsigned short snum)
94{ 97{
95 struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; 98 bool reuse = sk->sk_reuse && sk->sk_state != TCP_LISTEN;
99 struct inet_hashinfo *hinfo = sk->sk_prot->h.hashinfo;
100 int ret = 1, attempts = 5, port = snum;
101 int smallest_size = -1, smallest_port;
96 struct inet_bind_hashbucket *head; 102 struct inet_bind_hashbucket *head;
97 struct inet_bind_bucket *tb;
98 int ret, attempts = 5;
99 struct net *net = sock_net(sk); 103 struct net *net = sock_net(sk);
100 int smallest_size = -1, smallest_rover; 104 int i, low, high, attempt_half;
105 struct inet_bind_bucket *tb;
101 kuid_t uid = sock_i_uid(sk); 106 kuid_t uid = sock_i_uid(sk);
102 int attempt_half = (sk->sk_reuse == SK_CAN_REUSE) ? 1 : 0; 107 u32 remaining, offset;
103 108
104 local_bh_disable(); 109 if (port) {
105 if (!snum) { 110have_port:
106 int remaining, rover, low, high; 111 head = &hinfo->bhash[inet_bhashfn(net, port,
112 hinfo->bhash_size)];
113 spin_lock_bh(&head->lock);
114 inet_bind_bucket_for_each(tb, &head->chain)
115 if (net_eq(ib_net(tb), net) && tb->port == port)
116 goto tb_found;
107 117
118 goto tb_not_found;
119 }
108again: 120again:
109 inet_get_local_port_range(net, &low, &high); 121 attempt_half = (sk->sk_reuse == SK_CAN_REUSE) ? 1 : 0;
110 if (attempt_half) { 122other_half_scan:
111 int half = low + ((high - low) >> 1); 123 inet_get_local_port_range(net, &low, &high);
112 124 high++; /* [32768, 60999] -> [32768, 61000[ */
113 if (attempt_half == 1) 125 if (high - low < 4)
114 high = half; 126 attempt_half = 0;
115 else 127 if (attempt_half) {
116 low = half; 128 int half = low + (((high - low) >> 2) << 1);
117 } 129
118 remaining = (high - low) + 1; 130 if (attempt_half == 1)
119 smallest_rover = rover = prandom_u32() % remaining + low; 131 high = half;
120 132 else
121 smallest_size = -1; 133 low = half;
122 do { 134 }
123 if (inet_is_local_reserved_port(net, rover)) 135 remaining = high - low;
124 goto next_nolock; 136 if (likely(remaining > 1))
125 head = &hashinfo->bhash[inet_bhashfn(net, rover, 137 remaining &= ~1U;
126 hashinfo->bhash_size)]; 138
127 spin_lock(&head->lock); 139 offset = prandom_u32() % remaining;
128 inet_bind_bucket_for_each(tb, &head->chain) 140 /* __inet_hash_connect() favors ports having @low parity
129 if (net_eq(ib_net(tb), net) && tb->port == rover) { 141 * We do the opposite to not pollute connect() users.
130 if (((tb->fastreuse > 0 && 142 */
131 sk->sk_reuse && 143 offset |= 1U;
132 sk->sk_state != TCP_LISTEN) || 144 smallest_size = -1;
133 (tb->fastreuseport > 0 && 145 smallest_port = low; /* avoid compiler warning */
134 sk->sk_reuseport && 146
135 uid_eq(tb->fastuid, uid))) && 147other_parity_scan:
136 (tb->num_owners < smallest_size || smallest_size == -1)) { 148 port = low + offset;
137 smallest_size = tb->num_owners; 149 for (i = 0; i < remaining; i += 2, port += 2) {
138 smallest_rover = rover; 150 if (unlikely(port >= high))
139 } 151 port -= remaining;
140 if (!inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb, false)) { 152 if (inet_is_local_reserved_port(net, port))
141 snum = rover; 153 continue;
142 goto tb_found; 154 head = &hinfo->bhash[inet_bhashfn(net, port,
143 } 155 hinfo->bhash_size)];
144 goto next; 156 spin_lock_bh(&head->lock);
157 inet_bind_bucket_for_each(tb, &head->chain)
158 if (net_eq(ib_net(tb), net) && tb->port == port) {
159 if (((tb->fastreuse > 0 && reuse) ||
160 (tb->fastreuseport > 0 &&
161 sk->sk_reuseport &&
162 !rcu_access_pointer(sk->sk_reuseport_cb) &&
163 uid_eq(tb->fastuid, uid))) &&
164 (tb->num_owners < smallest_size || smallest_size == -1)) {
165 smallest_size = tb->num_owners;
166 smallest_port = port;
145 } 167 }
146 break; 168 if (!inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb, false))
147 next: 169 goto tb_found;
148 spin_unlock(&head->lock); 170 goto next_port;
149 next_nolock:
150 if (++rover > high)
151 rover = low;
152 } while (--remaining > 0);
153
154 /* Exhausted local port range during search? It is not
155 * possible for us to be holding one of the bind hash
156 * locks if this test triggers, because if 'remaining'
157 * drops to zero, we broke out of the do/while loop at
158 * the top level, not from the 'break;' statement.
159 */
160 ret = 1;
161 if (remaining <= 0) {
162 if (smallest_size != -1) {
163 snum = smallest_rover;
164 goto have_snum;
165 }
166 if (attempt_half == 1) {
167 /* OK we now try the upper half of the range */
168 attempt_half = 2;
169 goto again;
170 } 171 }
171 goto fail; 172 goto tb_not_found;
172 } 173next_port:
173 /* OK, here is the one we will use. HEAD is 174 spin_unlock_bh(&head->lock);
174 * non-NULL and we hold it's mutex. 175 cond_resched();
175 */ 176 }
176 snum = rover; 177
177 } else { 178 if (smallest_size != -1) {
178have_snum: 179 port = smallest_port;
179 head = &hashinfo->bhash[inet_bhashfn(net, snum, 180 goto have_port;
180 hashinfo->bhash_size)]; 181 }
181 spin_lock(&head->lock); 182 offset--;
182 inet_bind_bucket_for_each(tb, &head->chain) 183 if (!(offset & 1))
183 if (net_eq(ib_net(tb), net) && tb->port == snum) 184 goto other_parity_scan;
184 goto tb_found; 185
186 if (attempt_half == 1) {
187 /* OK we now try the upper half of the range */
188 attempt_half = 2;
189 goto other_half_scan;
185 } 190 }
186 tb = NULL; 191 return ret;
187 goto tb_not_found; 192
193tb_not_found:
194 tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep,
195 net, head, port);
196 if (!tb)
197 goto fail_unlock;
188tb_found: 198tb_found:
189 if (!hlist_empty(&tb->owners)) { 199 if (!hlist_empty(&tb->owners)) {
190 if (sk->sk_reuse == SK_FORCE_REUSE) 200 if (sk->sk_reuse == SK_FORCE_REUSE)
191 goto success; 201 goto success;
192 202
193 if (((tb->fastreuse > 0 && 203 if (((tb->fastreuse > 0 && reuse) ||
194 sk->sk_reuse && sk->sk_state != TCP_LISTEN) ||
195 (tb->fastreuseport > 0 && 204 (tb->fastreuseport > 0 &&
205 !rcu_access_pointer(sk->sk_reuseport_cb) &&
196 sk->sk_reuseport && uid_eq(tb->fastuid, uid))) && 206 sk->sk_reuseport && uid_eq(tb->fastuid, uid))) &&
197 smallest_size == -1) { 207 smallest_size == -1)
198 goto success; 208 goto success;
199 } else { 209 if (inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb, true)) {
200 ret = 1; 210 if ((reuse ||
201 if (inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb, true)) { 211 (tb->fastreuseport > 0 &&
202 if (((sk->sk_reuse && sk->sk_state != TCP_LISTEN) || 212 sk->sk_reuseport &&
203 (tb->fastreuseport > 0 && 213 !rcu_access_pointer(sk->sk_reuseport_cb) &&
204 sk->sk_reuseport && uid_eq(tb->fastuid, uid))) && 214 uid_eq(tb->fastuid, uid))) &&
205 smallest_size != -1 && --attempts >= 0) { 215 smallest_size != -1 && --attempts >= 0) {
206 spin_unlock(&head->lock); 216 spin_unlock_bh(&head->lock);
207 goto again; 217 goto again;
208 }
209
210 goto fail_unlock;
211 } 218 }
219 goto fail_unlock;
212 } 220 }
213 } 221 if (!reuse)
214tb_not_found:
215 ret = 1;
216 if (!tb && (tb = inet_bind_bucket_create(hashinfo->bind_bucket_cachep,
217 net, head, snum)) == NULL)
218 goto fail_unlock;
219 if (hlist_empty(&tb->owners)) {
220 if (sk->sk_reuse && sk->sk_state != TCP_LISTEN)
221 tb->fastreuse = 1;
222 else
223 tb->fastreuse = 0; 222 tb->fastreuse = 0;
223 if (!sk->sk_reuseport || !uid_eq(tb->fastuid, uid))
224 tb->fastreuseport = 0;
225 } else {
226 tb->fastreuse = reuse;
224 if (sk->sk_reuseport) { 227 if (sk->sk_reuseport) {
225 tb->fastreuseport = 1; 228 tb->fastreuseport = 1;
226 tb->fastuid = uid; 229 tb->fastuid = uid;
227 } else 230 } else {
228 tb->fastreuseport = 0;
229 } else {
230 if (tb->fastreuse &&
231 (!sk->sk_reuse || sk->sk_state == TCP_LISTEN))
232 tb->fastreuse = 0;
233 if (tb->fastreuseport &&
234 (!sk->sk_reuseport || !uid_eq(tb->fastuid, uid)))
235 tb->fastreuseport = 0; 231 tb->fastreuseport = 0;
232 }
236 } 233 }
237success: 234success:
238 if (!inet_csk(sk)->icsk_bind_hash) 235 if (!inet_csk(sk)->icsk_bind_hash)
239 inet_bind_hash(sk, tb, snum); 236 inet_bind_hash(sk, tb, port);
240 WARN_ON(inet_csk(sk)->icsk_bind_hash != tb); 237 WARN_ON(inet_csk(sk)->icsk_bind_hash != tb);
241 ret = 0; 238 ret = 0;
242 239
243fail_unlock: 240fail_unlock:
244 spin_unlock(&head->lock); 241 spin_unlock_bh(&head->lock);
245fail:
246 local_bh_enable();
247 return ret; 242 return ret;
248} 243}
249EXPORT_SYMBOL_GPL(inet_csk_get_port); 244EXPORT_SYMBOL_GPL(inet_csk_get_port);
@@ -482,10 +477,6 @@ EXPORT_SYMBOL_GPL(inet_csk_route_child_sock);
482#define AF_INET_FAMILY(fam) true 477#define AF_INET_FAMILY(fam) true
483#endif 478#endif
484 479
485/* Only thing we need from tcp.h */
486extern int sysctl_tcp_synack_retries;
487
488
489/* Decide when to expire the request and when to resend SYN-ACK */ 480/* Decide when to expire the request and when to resend SYN-ACK */
490static inline void syn_ack_recalc(struct request_sock *req, const int thresh, 481static inline void syn_ack_recalc(struct request_sock *req, const int thresh,
491 const int max_retries, 482 const int max_retries,
@@ -557,6 +548,7 @@ static void reqsk_timer_handler(unsigned long data)
557{ 548{
558 struct request_sock *req = (struct request_sock *)data; 549 struct request_sock *req = (struct request_sock *)data;
559 struct sock *sk_listener = req->rsk_listener; 550 struct sock *sk_listener = req->rsk_listener;
551 struct net *net = sock_net(sk_listener);
560 struct inet_connection_sock *icsk = inet_csk(sk_listener); 552 struct inet_connection_sock *icsk = inet_csk(sk_listener);
561 struct request_sock_queue *queue = &icsk->icsk_accept_queue; 553 struct request_sock_queue *queue = &icsk->icsk_accept_queue;
562 int qlen, expire = 0, resend = 0; 554 int qlen, expire = 0, resend = 0;
@@ -566,7 +558,7 @@ static void reqsk_timer_handler(unsigned long data)
566 if (sk_state_load(sk_listener) != TCP_LISTEN) 558 if (sk_state_load(sk_listener) != TCP_LISTEN)
567 goto drop; 559 goto drop;
568 560
569 max_retries = icsk->icsk_syn_retries ? : sysctl_tcp_synack_retries; 561 max_retries = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_synack_retries;
570 thresh = max_retries; 562 thresh = max_retries;
571 /* Normally all the openreqs are young and become mature 563 /* Normally all the openreqs are young and become mature
572 * (i.e. converted to established socket) for first timeout. 564 * (i.e. converted to established socket) for first timeout.
@@ -737,6 +729,7 @@ int inet_csk_listen_start(struct sock *sk, int backlog)
737{ 729{
738 struct inet_connection_sock *icsk = inet_csk(sk); 730 struct inet_connection_sock *icsk = inet_csk(sk);
739 struct inet_sock *inet = inet_sk(sk); 731 struct inet_sock *inet = inet_sk(sk);
732 int err = -EADDRINUSE;
740 733
741 reqsk_queue_alloc(&icsk->icsk_accept_queue); 734 reqsk_queue_alloc(&icsk->icsk_accept_queue);
742 735
@@ -754,13 +747,14 @@ int inet_csk_listen_start(struct sock *sk, int backlog)
754 inet->inet_sport = htons(inet->inet_num); 747 inet->inet_sport = htons(inet->inet_num);
755 748
756 sk_dst_reset(sk); 749 sk_dst_reset(sk);
757 sk->sk_prot->hash(sk); 750 err = sk->sk_prot->hash(sk);
758 751
759 return 0; 752 if (likely(!err))
753 return 0;
760 } 754 }
761 755
762 sk->sk_state = TCP_CLOSE; 756 sk->sk_state = TCP_CLOSE;
763 return -EADDRINUSE; 757 return err;
764} 758}
765EXPORT_SYMBOL_GPL(inet_csk_listen_start); 759EXPORT_SYMBOL_GPL(inet_csk_listen_start);
766 760
@@ -789,14 +783,16 @@ static void inet_child_forget(struct sock *sk, struct request_sock *req,
789 reqsk_put(req); 783 reqsk_put(req);
790} 784}
791 785
792void inet_csk_reqsk_queue_add(struct sock *sk, struct request_sock *req, 786struct sock *inet_csk_reqsk_queue_add(struct sock *sk,
793 struct sock *child) 787 struct request_sock *req,
788 struct sock *child)
794{ 789{
795 struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue; 790 struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue;
796 791
797 spin_lock(&queue->rskq_lock); 792 spin_lock(&queue->rskq_lock);
798 if (unlikely(sk->sk_state != TCP_LISTEN)) { 793 if (unlikely(sk->sk_state != TCP_LISTEN)) {
799 inet_child_forget(sk, req, child); 794 inet_child_forget(sk, req, child);
795 child = NULL;
800 } else { 796 } else {
801 req->sk = child; 797 req->sk = child;
802 req->dl_next = NULL; 798 req->dl_next = NULL;
@@ -808,6 +804,7 @@ void inet_csk_reqsk_queue_add(struct sock *sk, struct request_sock *req,
808 sk_acceptq_added(sk); 804 sk_acceptq_added(sk);
809 } 805 }
810 spin_unlock(&queue->rskq_lock); 806 spin_unlock(&queue->rskq_lock);
807 return child;
811} 808}
812EXPORT_SYMBOL(inet_csk_reqsk_queue_add); 809EXPORT_SYMBOL(inet_csk_reqsk_queue_add);
813 810
@@ -817,11 +814,8 @@ struct sock *inet_csk_complete_hashdance(struct sock *sk, struct sock *child,
817 if (own_req) { 814 if (own_req) {
818 inet_csk_reqsk_queue_drop(sk, req); 815 inet_csk_reqsk_queue_drop(sk, req);
819 reqsk_queue_removed(&inet_csk(sk)->icsk_accept_queue, req); 816 reqsk_queue_removed(&inet_csk(sk)->icsk_accept_queue, req);
820 inet_csk_reqsk_queue_add(sk, req, child); 817 if (inet_csk_reqsk_queue_add(sk, req, child))
821 /* Warning: caller must not call reqsk_put(req); 818 return child;
822 * child stole last reference on it.
823 */
824 return child;
825 } 819 }
826 /* Too bad, another child took ownership of the request, undo. */ 820 /* Too bad, another child took ownership of the request, undo. */
827 bh_unlock_sock(child); 821 bh_unlock_sock(child);
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 8bb8e7ad8548..5fdb02f5598e 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -357,17 +357,24 @@ struct sock *inet_diag_find_one_icsk(struct net *net,
357 struct sock *sk; 357 struct sock *sk;
358 358
359 if (req->sdiag_family == AF_INET) 359 if (req->sdiag_family == AF_INET)
360 sk = inet_lookup(net, hashinfo, req->id.idiag_dst[0], 360 sk = inet_lookup(net, hashinfo, NULL, 0, req->id.idiag_dst[0],
361 req->id.idiag_dport, req->id.idiag_src[0], 361 req->id.idiag_dport, req->id.idiag_src[0],
362 req->id.idiag_sport, req->id.idiag_if); 362 req->id.idiag_sport, req->id.idiag_if);
363#if IS_ENABLED(CONFIG_IPV6) 363#if IS_ENABLED(CONFIG_IPV6)
364 else if (req->sdiag_family == AF_INET6) 364 else if (req->sdiag_family == AF_INET6) {
365 sk = inet6_lookup(net, hashinfo, 365 if (ipv6_addr_v4mapped((struct in6_addr *)req->id.idiag_dst) &&
366 (struct in6_addr *)req->id.idiag_dst, 366 ipv6_addr_v4mapped((struct in6_addr *)req->id.idiag_src))
367 req->id.idiag_dport, 367 sk = inet_lookup(net, hashinfo, NULL, 0, req->id.idiag_dst[3],
368 (struct in6_addr *)req->id.idiag_src, 368 req->id.idiag_dport, req->id.idiag_src[3],
369 req->id.idiag_sport, 369 req->id.idiag_sport, req->id.idiag_if);
370 req->id.idiag_if); 370 else
371 sk = inet6_lookup(net, hashinfo, NULL, 0,
372 (struct in6_addr *)req->id.idiag_dst,
373 req->id.idiag_dport,
374 (struct in6_addr *)req->id.idiag_src,
375 req->id.idiag_sport,
376 req->id.idiag_if);
377 }
371#endif 378#endif
372 else 379 else
373 return ERR_PTR(-EINVAL); 380 return ERR_PTR(-EINVAL);
@@ -872,6 +879,7 @@ next_normal:
872 } 879 }
873 880
874 spin_unlock_bh(lock); 881 spin_unlock_bh(lock);
882 cond_resched();
875 } 883 }
876 884
877done: 885done:
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index ccc5980797fc..bc68eced0105 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -20,10 +20,12 @@
20#include <linux/wait.h> 20#include <linux/wait.h>
21#include <linux/vmalloc.h> 21#include <linux/vmalloc.h>
22 22
23#include <net/addrconf.h>
23#include <net/inet_connection_sock.h> 24#include <net/inet_connection_sock.h>
24#include <net/inet_hashtables.h> 25#include <net/inet_hashtables.h>
25#include <net/secure_seq.h> 26#include <net/secure_seq.h>
26#include <net/ip.h> 27#include <net/ip.h>
28#include <net/sock_reuseport.h>
27 29
28static u32 inet_ehashfn(const struct net *net, const __be32 laddr, 30static u32 inet_ehashfn(const struct net *net, const __be32 laddr,
29 const __u16 lport, const __be32 faddr, 31 const __u16 lport, const __be32 faddr,
@@ -205,6 +207,7 @@ static inline int compute_score(struct sock *sk, struct net *net,
205 207
206struct sock *__inet_lookup_listener(struct net *net, 208struct sock *__inet_lookup_listener(struct net *net,
207 struct inet_hashinfo *hashinfo, 209 struct inet_hashinfo *hashinfo,
210 struct sk_buff *skb, int doff,
208 const __be32 saddr, __be16 sport, 211 const __be32 saddr, __be16 sport,
209 const __be32 daddr, const unsigned short hnum, 212 const __be32 daddr, const unsigned short hnum,
210 const int dif) 213 const int dif)
@@ -214,6 +217,7 @@ struct sock *__inet_lookup_listener(struct net *net,
214 unsigned int hash = inet_lhashfn(net, hnum); 217 unsigned int hash = inet_lhashfn(net, hnum);
215 struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash]; 218 struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash];
216 int score, hiscore, matches = 0, reuseport = 0; 219 int score, hiscore, matches = 0, reuseport = 0;
220 bool select_ok = true;
217 u32 phash = 0; 221 u32 phash = 0;
218 222
219 rcu_read_lock(); 223 rcu_read_lock();
@@ -229,6 +233,15 @@ begin:
229 if (reuseport) { 233 if (reuseport) {
230 phash = inet_ehashfn(net, daddr, hnum, 234 phash = inet_ehashfn(net, daddr, hnum,
231 saddr, sport); 235 saddr, sport);
236 if (select_ok) {
237 struct sock *sk2;
238 sk2 = reuseport_select_sock(sk, phash,
239 skb, doff);
240 if (sk2) {
241 result = sk2;
242 goto found;
243 }
244 }
232 matches = 1; 245 matches = 1;
233 } 246 }
234 } else if (score == hiscore && reuseport) { 247 } else if (score == hiscore && reuseport) {
@@ -246,11 +259,13 @@ begin:
246 if (get_nulls_value(node) != hash + LISTENING_NULLS_BASE) 259 if (get_nulls_value(node) != hash + LISTENING_NULLS_BASE)
247 goto begin; 260 goto begin;
248 if (result) { 261 if (result) {
262found:
249 if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt))) 263 if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt)))
250 result = NULL; 264 result = NULL;
251 else if (unlikely(compute_score(result, net, hnum, daddr, 265 else if (unlikely(compute_score(result, net, hnum, daddr,
252 dif) < hiscore)) { 266 dif) < hiscore)) {
253 sock_put(result); 267 sock_put(result);
268 select_ok = false;
254 goto begin; 269 goto begin;
255 } 270 }
256 } 271 }
@@ -449,32 +464,74 @@ bool inet_ehash_nolisten(struct sock *sk, struct sock *osk)
449} 464}
450EXPORT_SYMBOL_GPL(inet_ehash_nolisten); 465EXPORT_SYMBOL_GPL(inet_ehash_nolisten);
451 466
452void __inet_hash(struct sock *sk, struct sock *osk) 467static int inet_reuseport_add_sock(struct sock *sk,
468 struct inet_listen_hashbucket *ilb,
469 int (*saddr_same)(const struct sock *sk1,
470 const struct sock *sk2,
471 bool match_wildcard))
472{
473 struct sock *sk2;
474 struct hlist_nulls_node *node;
475 kuid_t uid = sock_i_uid(sk);
476
477 sk_nulls_for_each_rcu(sk2, node, &ilb->head) {
478 if (sk2 != sk &&
479 sk2->sk_family == sk->sk_family &&
480 ipv6_only_sock(sk2) == ipv6_only_sock(sk) &&
481 sk2->sk_bound_dev_if == sk->sk_bound_dev_if &&
482 sk2->sk_reuseport && uid_eq(uid, sock_i_uid(sk2)) &&
483 saddr_same(sk, sk2, false))
484 return reuseport_add_sock(sk, sk2);
485 }
486
487 /* Initial allocation may have already happened via setsockopt */
488 if (!rcu_access_pointer(sk->sk_reuseport_cb))
489 return reuseport_alloc(sk);
490 return 0;
491}
492
493int __inet_hash(struct sock *sk, struct sock *osk,
494 int (*saddr_same)(const struct sock *sk1,
495 const struct sock *sk2,
496 bool match_wildcard))
453{ 497{
454 struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; 498 struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
455 struct inet_listen_hashbucket *ilb; 499 struct inet_listen_hashbucket *ilb;
500 int err = 0;
456 501
457 if (sk->sk_state != TCP_LISTEN) { 502 if (sk->sk_state != TCP_LISTEN) {
458 inet_ehash_nolisten(sk, osk); 503 inet_ehash_nolisten(sk, osk);
459 return; 504 return 0;
460 } 505 }
461 WARN_ON(!sk_unhashed(sk)); 506 WARN_ON(!sk_unhashed(sk));
462 ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)]; 507 ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)];
463 508
464 spin_lock(&ilb->lock); 509 spin_lock(&ilb->lock);
510 if (sk->sk_reuseport) {
511 err = inet_reuseport_add_sock(sk, ilb, saddr_same);
512 if (err)
513 goto unlock;
514 }
465 __sk_nulls_add_node_rcu(sk, &ilb->head); 515 __sk_nulls_add_node_rcu(sk, &ilb->head);
466 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); 516 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
517unlock:
467 spin_unlock(&ilb->lock); 518 spin_unlock(&ilb->lock);
519
520 return err;
468} 521}
469EXPORT_SYMBOL(__inet_hash); 522EXPORT_SYMBOL(__inet_hash);
470 523
471void inet_hash(struct sock *sk) 524int inet_hash(struct sock *sk)
472{ 525{
526 int err = 0;
527
473 if (sk->sk_state != TCP_CLOSE) { 528 if (sk->sk_state != TCP_CLOSE) {
474 local_bh_disable(); 529 local_bh_disable();
475 __inet_hash(sk, NULL); 530 err = __inet_hash(sk, NULL, ipv4_rcv_saddr_equal);
476 local_bh_enable(); 531 local_bh_enable();
477 } 532 }
533
534 return err;
478} 535}
479EXPORT_SYMBOL_GPL(inet_hash); 536EXPORT_SYMBOL_GPL(inet_hash);
480 537
@@ -493,6 +550,8 @@ void inet_unhash(struct sock *sk)
493 lock = inet_ehash_lockp(hashinfo, sk->sk_hash); 550 lock = inet_ehash_lockp(hashinfo, sk->sk_hash);
494 551
495 spin_lock_bh(lock); 552 spin_lock_bh(lock);
553 if (rcu_access_pointer(sk->sk_reuseport_cb))
554 reuseport_detach_sock(sk);
496 done = __sk_nulls_del_node_init_rcu(sk); 555 done = __sk_nulls_del_node_init_rcu(sk);
497 if (done) 556 if (done)
498 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); 557 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
@@ -506,106 +565,106 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
506 struct sock *, __u16, struct inet_timewait_sock **)) 565 struct sock *, __u16, struct inet_timewait_sock **))
507{ 566{
508 struct inet_hashinfo *hinfo = death_row->hashinfo; 567 struct inet_hashinfo *hinfo = death_row->hashinfo;
509 const unsigned short snum = inet_sk(sk)->inet_num; 568 struct inet_timewait_sock *tw = NULL;
510 struct inet_bind_hashbucket *head; 569 struct inet_bind_hashbucket *head;
511 struct inet_bind_bucket *tb; 570 int port = inet_sk(sk)->inet_num;
512 int ret;
513 struct net *net = sock_net(sk); 571 struct net *net = sock_net(sk);
572 struct inet_bind_bucket *tb;
573 u32 remaining, offset;
574 int ret, i, low, high;
575 static u32 hint;
576
577 if (port) {
578 head = &hinfo->bhash[inet_bhashfn(net, port,
579 hinfo->bhash_size)];
580 tb = inet_csk(sk)->icsk_bind_hash;
581 spin_lock_bh(&head->lock);
582 if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
583 inet_ehash_nolisten(sk, NULL);
584 spin_unlock_bh(&head->lock);
585 return 0;
586 }
587 spin_unlock(&head->lock);
588 /* No definite answer... Walk to established hash table */
589 ret = check_established(death_row, sk, port, NULL);
590 local_bh_enable();
591 return ret;
592 }
514 593
515 if (!snum) { 594 inet_get_local_port_range(net, &low, &high);
516 int i, remaining, low, high, port; 595 high++; /* [32768, 60999] -> [32768, 61000[ */
517 static u32 hint; 596 remaining = high - low;
518 u32 offset = hint + port_offset; 597 if (likely(remaining > 1))
519 struct inet_timewait_sock *tw = NULL; 598 remaining &= ~1U;
520 599
521 inet_get_local_port_range(net, &low, &high); 600 offset = (hint + port_offset) % remaining;
522 remaining = (high - low) + 1; 601 /* In first pass we try ports of @low parity.
602 * inet_csk_get_port() does the opposite choice.
603 */
604 offset &= ~1U;
605other_parity_scan:
606 port = low + offset;
607 for (i = 0; i < remaining; i += 2, port += 2) {
608 if (unlikely(port >= high))
609 port -= remaining;
610 if (inet_is_local_reserved_port(net, port))
611 continue;
612 head = &hinfo->bhash[inet_bhashfn(net, port,
613 hinfo->bhash_size)];
614 spin_lock_bh(&head->lock);
523 615
524 /* By starting with offset being an even number, 616 /* Does not bother with rcv_saddr checks, because
525 * we tend to leave about 50% of ports for other uses, 617 * the established check is already unique enough.
526 * like bind(0).
527 */ 618 */
528 offset &= ~1; 619 inet_bind_bucket_for_each(tb, &head->chain) {
529 620 if (net_eq(ib_net(tb), net) && tb->port == port) {
530 local_bh_disable(); 621 if (tb->fastreuse >= 0 ||
531 for (i = 0; i < remaining; i++) { 622 tb->fastreuseport >= 0)
532 port = low + (i + offset) % remaining;
533 if (inet_is_local_reserved_port(net, port))
534 continue;
535 head = &hinfo->bhash[inet_bhashfn(net, port,
536 hinfo->bhash_size)];
537 spin_lock(&head->lock);
538
539 /* Does not bother with rcv_saddr checks,
540 * because the established check is already
541 * unique enough.
542 */
543 inet_bind_bucket_for_each(tb, &head->chain) {
544 if (net_eq(ib_net(tb), net) &&
545 tb->port == port) {
546 if (tb->fastreuse >= 0 ||
547 tb->fastreuseport >= 0)
548 goto next_port;
549 WARN_ON(hlist_empty(&tb->owners));
550 if (!check_established(death_row, sk,
551 port, &tw))
552 goto ok;
553 goto next_port; 623 goto next_port;
554 } 624 WARN_ON(hlist_empty(&tb->owners));
625 if (!check_established(death_row, sk,
626 port, &tw))
627 goto ok;
628 goto next_port;
555 } 629 }
556
557 tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep,
558 net, head, port);
559 if (!tb) {
560 spin_unlock(&head->lock);
561 break;
562 }
563 tb->fastreuse = -1;
564 tb->fastreuseport = -1;
565 goto ok;
566
567 next_port:
568 spin_unlock(&head->lock);
569 } 630 }
570 local_bh_enable();
571
572 return -EADDRNOTAVAIL;
573 631
574ok: 632 tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep,
575 hint += (i + 2) & ~1; 633 net, head, port);
576 634 if (!tb) {
577 /* Head lock still held and bh's disabled */ 635 spin_unlock_bh(&head->lock);
578 inet_bind_hash(sk, tb, port); 636 return -ENOMEM;
579 if (sk_unhashed(sk)) {
580 inet_sk(sk)->inet_sport = htons(port);
581 inet_ehash_nolisten(sk, (struct sock *)tw);
582 } 637 }
583 if (tw) 638 tb->fastreuse = -1;
584 inet_twsk_bind_unhash(tw, hinfo); 639 tb->fastreuseport = -1;
585 spin_unlock(&head->lock); 640 goto ok;
641next_port:
642 spin_unlock_bh(&head->lock);
643 cond_resched();
644 }
586 645
587 if (tw) 646 offset++;
588 inet_twsk_deschedule_put(tw); 647 if ((offset & 1) && remaining > 1)
648 goto other_parity_scan;
589 649
590 ret = 0; 650 return -EADDRNOTAVAIL;
591 goto out;
592 }
593 651
594 head = &hinfo->bhash[inet_bhashfn(net, snum, hinfo->bhash_size)]; 652ok:
595 tb = inet_csk(sk)->icsk_bind_hash; 653 hint += i + 2;
596 spin_lock_bh(&head->lock); 654
597 if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) { 655 /* Head lock still held and bh's disabled */
598 inet_ehash_nolisten(sk, NULL); 656 inet_bind_hash(sk, tb, port);
599 spin_unlock_bh(&head->lock); 657 if (sk_unhashed(sk)) {
600 return 0; 658 inet_sk(sk)->inet_sport = htons(port);
601 } else { 659 inet_ehash_nolisten(sk, (struct sock *)tw);
602 spin_unlock(&head->lock);
603 /* No definite answer... Walk to established hash table */
604 ret = check_established(death_row, sk, snum, NULL);
605out:
606 local_bh_enable();
607 return ret;
608 } 660 }
661 if (tw)
662 inet_twsk_bind_unhash(tw, hinfo);
663 spin_unlock(&head->lock);
664 if (tw)
665 inet_twsk_deschedule_put(tw);
666 local_bh_enable();
667 return 0;
609} 668}
610 669
611/* 670/*
diff --git a/net/ipv4/inet_lro.c b/net/ipv4/inet_lro.c
deleted file mode 100644
index f17ea49b28fb..000000000000
--- a/net/ipv4/inet_lro.c
+++ /dev/null
@@ -1,374 +0,0 @@
1/*
2 * linux/net/ipv4/inet_lro.c
3 *
4 * Large Receive Offload (ipv4 / tcp)
5 *
6 * (C) Copyright IBM Corp. 2007
7 *
8 * Authors:
9 * Jan-Bernd Themann <themann@de.ibm.com>
10 * Christoph Raisch <raisch@de.ibm.com>
11 *
12 *
13 * This program is free software; you can redistribute it and/or modify
14 * it under the terms of the GNU General Public License as published by
15 * the Free Software Foundation; either version 2, or (at your option)
16 * any later version.
17 *
18 * This program is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU General Public License for more details.
22 *
23 * You should have received a copy of the GNU General Public License
24 * along with this program; if not, write to the Free Software
25 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
26 */
27
28
29#include <linux/module.h>
30#include <linux/if_vlan.h>
31#include <linux/inet_lro.h>
32#include <net/checksum.h>
33
34MODULE_LICENSE("GPL");
35MODULE_AUTHOR("Jan-Bernd Themann <themann@de.ibm.com>");
36MODULE_DESCRIPTION("Large Receive Offload (ipv4 / tcp)");
37
38#define TCP_HDR_LEN(tcph) (tcph->doff << 2)
39#define IP_HDR_LEN(iph) (iph->ihl << 2)
40#define TCP_PAYLOAD_LENGTH(iph, tcph) \
41 (ntohs(iph->tot_len) - IP_HDR_LEN(iph) - TCP_HDR_LEN(tcph))
42
43#define IPH_LEN_WO_OPTIONS 5
44#define TCPH_LEN_WO_OPTIONS 5
45#define TCPH_LEN_W_TIMESTAMP 8
46
47#define LRO_MAX_PG_HLEN 64
48
49#define LRO_INC_STATS(lro_mgr, attr) { lro_mgr->stats.attr++; }
50
51/*
52 * Basic tcp checks whether packet is suitable for LRO
53 */
54
55static int lro_tcp_ip_check(const struct iphdr *iph, const struct tcphdr *tcph,
56 int len, const struct net_lro_desc *lro_desc)
57{
58 /* check ip header: don't aggregate padded frames */
59 if (ntohs(iph->tot_len) != len)
60 return -1;
61
62 if (TCP_PAYLOAD_LENGTH(iph, tcph) == 0)
63 return -1;
64
65 if (iph->ihl != IPH_LEN_WO_OPTIONS)
66 return -1;
67
68 if (tcph->cwr || tcph->ece || tcph->urg || !tcph->ack ||
69 tcph->rst || tcph->syn || tcph->fin)
70 return -1;
71
72 if (INET_ECN_is_ce(ipv4_get_dsfield(iph)))
73 return -1;
74
75 if (tcph->doff != TCPH_LEN_WO_OPTIONS &&
76 tcph->doff != TCPH_LEN_W_TIMESTAMP)
77 return -1;
78
79 /* check tcp options (only timestamp allowed) */
80 if (tcph->doff == TCPH_LEN_W_TIMESTAMP) {
81 __be32 *topt = (__be32 *)(tcph + 1);
82
83 if (*topt != htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16)
84 | (TCPOPT_TIMESTAMP << 8)
85 | TCPOLEN_TIMESTAMP))
86 return -1;
87
88 /* timestamp should be in right order */
89 topt++;
90 if (lro_desc && after(ntohl(lro_desc->tcp_rcv_tsval),
91 ntohl(*topt)))
92 return -1;
93
94 /* timestamp reply should not be zero */
95 topt++;
96 if (*topt == 0)
97 return -1;
98 }
99
100 return 0;
101}
102
103static void lro_update_tcp_ip_header(struct net_lro_desc *lro_desc)
104{
105 struct iphdr *iph = lro_desc->iph;
106 struct tcphdr *tcph = lro_desc->tcph;
107 __be32 *p;
108 __wsum tcp_hdr_csum;
109
110 tcph->ack_seq = lro_desc->tcp_ack;
111 tcph->window = lro_desc->tcp_window;
112
113 if (lro_desc->tcp_saw_tstamp) {
114 p = (__be32 *)(tcph + 1);
115 *(p+2) = lro_desc->tcp_rcv_tsecr;
116 }
117
118 csum_replace2(&iph->check, iph->tot_len, htons(lro_desc->ip_tot_len));
119 iph->tot_len = htons(lro_desc->ip_tot_len);
120
121 tcph->check = 0;
122 tcp_hdr_csum = csum_partial(tcph, TCP_HDR_LEN(tcph), 0);
123 lro_desc->data_csum = csum_add(lro_desc->data_csum, tcp_hdr_csum);
124 tcph->check = csum_tcpudp_magic(iph->saddr, iph->daddr,
125 lro_desc->ip_tot_len -
126 IP_HDR_LEN(iph), IPPROTO_TCP,
127 lro_desc->data_csum);
128}
129
130static __wsum lro_tcp_data_csum(struct iphdr *iph, struct tcphdr *tcph, int len)
131{
132 __wsum tcp_csum;
133 __wsum tcp_hdr_csum;
134 __wsum tcp_ps_hdr_csum;
135
136 tcp_csum = ~csum_unfold(tcph->check);
137 tcp_hdr_csum = csum_partial(tcph, TCP_HDR_LEN(tcph), tcp_csum);
138
139 tcp_ps_hdr_csum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
140 len + TCP_HDR_LEN(tcph),
141 IPPROTO_TCP, 0);
142
143 return csum_sub(csum_sub(tcp_csum, tcp_hdr_csum),
144 tcp_ps_hdr_csum);
145}
146
147static void lro_init_desc(struct net_lro_desc *lro_desc, struct sk_buff *skb,
148 struct iphdr *iph, struct tcphdr *tcph)
149{
150 int nr_frags;
151 __be32 *ptr;
152 u32 tcp_data_len = TCP_PAYLOAD_LENGTH(iph, tcph);
153
154 nr_frags = skb_shinfo(skb)->nr_frags;
155 lro_desc->parent = skb;
156 lro_desc->next_frag = &(skb_shinfo(skb)->frags[nr_frags]);
157 lro_desc->iph = iph;
158 lro_desc->tcph = tcph;
159 lro_desc->tcp_next_seq = ntohl(tcph->seq) + tcp_data_len;
160 lro_desc->tcp_ack = tcph->ack_seq;
161 lro_desc->tcp_window = tcph->window;
162
163 lro_desc->pkt_aggr_cnt = 1;
164 lro_desc->ip_tot_len = ntohs(iph->tot_len);
165
166 if (tcph->doff == 8) {
167 ptr = (__be32 *)(tcph+1);
168 lro_desc->tcp_saw_tstamp = 1;
169 lro_desc->tcp_rcv_tsval = *(ptr+1);
170 lro_desc->tcp_rcv_tsecr = *(ptr+2);
171 }
172
173 lro_desc->mss = tcp_data_len;
174 lro_desc->active = 1;
175
176 lro_desc->data_csum = lro_tcp_data_csum(iph, tcph,
177 tcp_data_len);
178}
179
180static inline void lro_clear_desc(struct net_lro_desc *lro_desc)
181{
182 memset(lro_desc, 0, sizeof(struct net_lro_desc));
183}
184
185static void lro_add_common(struct net_lro_desc *lro_desc, struct iphdr *iph,
186 struct tcphdr *tcph, int tcp_data_len)
187{
188 struct sk_buff *parent = lro_desc->parent;
189 __be32 *topt;
190
191 lro_desc->pkt_aggr_cnt++;
192 lro_desc->ip_tot_len += tcp_data_len;
193 lro_desc->tcp_next_seq += tcp_data_len;
194 lro_desc->tcp_window = tcph->window;
195 lro_desc->tcp_ack = tcph->ack_seq;
196
197 /* don't update tcp_rcv_tsval, would not work with PAWS */
198 if (lro_desc->tcp_saw_tstamp) {
199 topt = (__be32 *) (tcph + 1);
200 lro_desc->tcp_rcv_tsecr = *(topt + 2);
201 }
202
203 lro_desc->data_csum = csum_block_add(lro_desc->data_csum,
204 lro_tcp_data_csum(iph, tcph,
205 tcp_data_len),
206 parent->len);
207
208 parent->len += tcp_data_len;
209 parent->data_len += tcp_data_len;
210 if (tcp_data_len > lro_desc->mss)
211 lro_desc->mss = tcp_data_len;
212}
213
214static void lro_add_packet(struct net_lro_desc *lro_desc, struct sk_buff *skb,
215 struct iphdr *iph, struct tcphdr *tcph)
216{
217 struct sk_buff *parent = lro_desc->parent;
218 int tcp_data_len = TCP_PAYLOAD_LENGTH(iph, tcph);
219
220 lro_add_common(lro_desc, iph, tcph, tcp_data_len);
221
222 skb_pull(skb, (skb->len - tcp_data_len));
223 parent->truesize += skb->truesize;
224
225 if (lro_desc->last_skb)
226 lro_desc->last_skb->next = skb;
227 else
228 skb_shinfo(parent)->frag_list = skb;
229
230 lro_desc->last_skb = skb;
231}
232
233
234static int lro_check_tcp_conn(struct net_lro_desc *lro_desc,
235 struct iphdr *iph,
236 struct tcphdr *tcph)
237{
238 if ((lro_desc->iph->saddr != iph->saddr) ||
239 (lro_desc->iph->daddr != iph->daddr) ||
240 (lro_desc->tcph->source != tcph->source) ||
241 (lro_desc->tcph->dest != tcph->dest))
242 return -1;
243 return 0;
244}
245
246static struct net_lro_desc *lro_get_desc(struct net_lro_mgr *lro_mgr,
247 struct net_lro_desc *lro_arr,
248 struct iphdr *iph,
249 struct tcphdr *tcph)
250{
251 struct net_lro_desc *lro_desc = NULL;
252 struct net_lro_desc *tmp;
253 int max_desc = lro_mgr->max_desc;
254 int i;
255
256 for (i = 0; i < max_desc; i++) {
257 tmp = &lro_arr[i];
258 if (tmp->active)
259 if (!lro_check_tcp_conn(tmp, iph, tcph)) {
260 lro_desc = tmp;
261 goto out;
262 }
263 }
264
265 for (i = 0; i < max_desc; i++) {
266 if (!lro_arr[i].active) {
267 lro_desc = &lro_arr[i];
268 goto out;
269 }
270 }
271
272 LRO_INC_STATS(lro_mgr, no_desc);
273out:
274 return lro_desc;
275}
276
277static void lro_flush(struct net_lro_mgr *lro_mgr,
278 struct net_lro_desc *lro_desc)
279{
280 if (lro_desc->pkt_aggr_cnt > 1)
281 lro_update_tcp_ip_header(lro_desc);
282
283 skb_shinfo(lro_desc->parent)->gso_size = lro_desc->mss;
284
285 if (lro_mgr->features & LRO_F_NAPI)
286 netif_receive_skb(lro_desc->parent);
287 else
288 netif_rx(lro_desc->parent);
289
290 LRO_INC_STATS(lro_mgr, flushed);
291 lro_clear_desc(lro_desc);
292}
293
294static int __lro_proc_skb(struct net_lro_mgr *lro_mgr, struct sk_buff *skb,
295 void *priv)
296{
297 struct net_lro_desc *lro_desc;
298 struct iphdr *iph;
299 struct tcphdr *tcph;
300 u64 flags;
301 int vlan_hdr_len = 0;
302
303 if (!lro_mgr->get_skb_header ||
304 lro_mgr->get_skb_header(skb, (void *)&iph, (void *)&tcph,
305 &flags, priv))
306 goto out;
307
308 if (!(flags & LRO_IPV4) || !(flags & LRO_TCP))
309 goto out;
310
311 lro_desc = lro_get_desc(lro_mgr, lro_mgr->lro_arr, iph, tcph);
312 if (!lro_desc)
313 goto out;
314
315 if ((skb->protocol == htons(ETH_P_8021Q)) &&
316 !(lro_mgr->features & LRO_F_EXTRACT_VLAN_ID))
317 vlan_hdr_len = VLAN_HLEN;
318
319 if (!lro_desc->active) { /* start new lro session */
320 if (lro_tcp_ip_check(iph, tcph, skb->len - vlan_hdr_len, NULL))
321 goto out;
322
323 skb->ip_summed = lro_mgr->ip_summed_aggr;
324 lro_init_desc(lro_desc, skb, iph, tcph);
325 LRO_INC_STATS(lro_mgr, aggregated);
326 return 0;
327 }
328
329 if (lro_desc->tcp_next_seq != ntohl(tcph->seq))
330 goto out2;
331
332 if (lro_tcp_ip_check(iph, tcph, skb->len, lro_desc))
333 goto out2;
334
335 lro_add_packet(lro_desc, skb, iph, tcph);
336 LRO_INC_STATS(lro_mgr, aggregated);
337
338 if ((lro_desc->pkt_aggr_cnt >= lro_mgr->max_aggr) ||
339 lro_desc->parent->len > (0xFFFF - lro_mgr->dev->mtu))
340 lro_flush(lro_mgr, lro_desc);
341
342 return 0;
343
344out2: /* send aggregated SKBs to stack */
345 lro_flush(lro_mgr, lro_desc);
346
347out:
348 return 1;
349}
350
351void lro_receive_skb(struct net_lro_mgr *lro_mgr,
352 struct sk_buff *skb,
353 void *priv)
354{
355 if (__lro_proc_skb(lro_mgr, skb, priv)) {
356 if (lro_mgr->features & LRO_F_NAPI)
357 netif_receive_skb(skb);
358 else
359 netif_rx(skb);
360 }
361}
362EXPORT_SYMBOL(lro_receive_skb);
363
364void lro_flush_all(struct net_lro_mgr *lro_mgr)
365{
366 int i;
367 struct net_lro_desc *lro_desc = lro_mgr->lro_arr;
368
369 for (i = 0; i < lro_mgr->max_desc; i++) {
370 if (lro_desc[i].active)
371 lro_flush(lro_mgr, &lro_desc[i]);
372 }
373}
374EXPORT_SYMBOL(lro_flush_all);
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index da0d7ce85844..af18f1e4889e 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -71,7 +71,6 @@ static int ip_forward_finish(struct net *net, struct sock *sk, struct sk_buff *s
71 if (unlikely(opt->optlen)) 71 if (unlikely(opt->optlen))
72 ip_forward_options(skb); 72 ip_forward_options(skb);
73 73
74 skb_sender_cpu_clear(skb);
75 return dst_output(net, sk, skb); 74 return dst_output(net, sk, skb);
76} 75}
77 76
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 3f00810b7288..efbd47d1a531 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -54,8 +54,6 @@
54 * code now. If you change something here, _PLEASE_ update ipv6/reassembly.c 54 * code now. If you change something here, _PLEASE_ update ipv6/reassembly.c
55 * as well. Or notify me, at least. --ANK 55 * as well. Or notify me, at least. --ANK
56 */ 56 */
57
58static int sysctl_ipfrag_max_dist __read_mostly = 64;
59static const char ip_frag_cache_name[] = "ip4-frags"; 57static const char ip_frag_cache_name[] = "ip4-frags";
60 58
61struct ipfrag_skb_cb 59struct ipfrag_skb_cb
@@ -150,7 +148,7 @@ static void ip4_frag_init(struct inet_frag_queue *q, const void *a)
150 qp->daddr = arg->iph->daddr; 148 qp->daddr = arg->iph->daddr;
151 qp->vif = arg->vif; 149 qp->vif = arg->vif;
152 qp->user = arg->user; 150 qp->user = arg->user;
153 qp->peer = sysctl_ipfrag_max_dist ? 151 qp->peer = q->net->max_dist ?
154 inet_getpeer_v4(net->ipv4.peers, arg->iph->saddr, arg->vif, 1) : 152 inet_getpeer_v4(net->ipv4.peers, arg->iph->saddr, arg->vif, 1) :
155 NULL; 153 NULL;
156} 154}
@@ -275,7 +273,7 @@ static struct ipq *ip_find(struct net *net, struct iphdr *iph,
275static int ip_frag_too_far(struct ipq *qp) 273static int ip_frag_too_far(struct ipq *qp)
276{ 274{
277 struct inet_peer *peer = qp->peer; 275 struct inet_peer *peer = qp->peer;
278 unsigned int max = sysctl_ipfrag_max_dist; 276 unsigned int max = qp->q.net->max_dist;
279 unsigned int start, end; 277 unsigned int start, end;
280 278
281 int rc; 279 int rc;
@@ -661,6 +659,7 @@ int ip_defrag(struct net *net, struct sk_buff *skb, u32 user)
661 struct ipq *qp; 659 struct ipq *qp;
662 660
663 IP_INC_STATS_BH(net, IPSTATS_MIB_REASMREQDS); 661 IP_INC_STATS_BH(net, IPSTATS_MIB_REASMREQDS);
662 skb_orphan(skb);
664 663
665 /* Lookup (or create) queue header */ 664 /* Lookup (or create) queue header */
666 qp = ip_find(net, ip_hdr(skb), user, vif); 665 qp = ip_find(net, ip_hdr(skb), user, vif);
@@ -748,6 +747,14 @@ static struct ctl_table ip4_frags_ns_ctl_table[] = {
748 .mode = 0644, 747 .mode = 0644,
749 .proc_handler = proc_dointvec_jiffies, 748 .proc_handler = proc_dointvec_jiffies,
750 }, 749 },
750 {
751 .procname = "ipfrag_max_dist",
752 .data = &init_net.ipv4.frags.max_dist,
753 .maxlen = sizeof(int),
754 .mode = 0644,
755 .proc_handler = proc_dointvec_minmax,
756 .extra1 = &zero
757 },
751 { } 758 { }
752}; 759};
753 760
@@ -761,14 +768,6 @@ static struct ctl_table ip4_frags_ctl_table[] = {
761 .mode = 0644, 768 .mode = 0644,
762 .proc_handler = proc_dointvec_jiffies, 769 .proc_handler = proc_dointvec_jiffies,
763 }, 770 },
764 {
765 .procname = "ipfrag_max_dist",
766 .data = &sysctl_ipfrag_max_dist,
767 .maxlen = sizeof(int),
768 .mode = 0644,
769 .proc_handler = proc_dointvec_minmax,
770 .extra1 = &zero
771 },
772 { } 771 { }
773}; 772};
774 773
@@ -789,10 +788,7 @@ static int __net_init ip4_frags_ns_ctl_register(struct net *net)
789 table[1].data = &net->ipv4.frags.low_thresh; 788 table[1].data = &net->ipv4.frags.low_thresh;
790 table[1].extra2 = &net->ipv4.frags.high_thresh; 789 table[1].extra2 = &net->ipv4.frags.high_thresh;
791 table[2].data = &net->ipv4.frags.timeout; 790 table[2].data = &net->ipv4.frags.timeout;
792 791 table[3].data = &net->ipv4.frags.max_dist;
793 /* Don't export sysctls to unprivileged users */
794 if (net->user_ns != &init_user_ns)
795 table[0].procname = NULL;
796 } 792 }
797 793
798 hdr = register_net_sysctl(net, "net/ipv4", table); 794 hdr = register_net_sysctl(net, "net/ipv4", table);
@@ -864,6 +860,8 @@ static int __net_init ipv4_frags_init_net(struct net *net)
864 */ 860 */
865 net->ipv4.frags.timeout = IP_FRAG_TIME; 861 net->ipv4.frags.timeout = IP_FRAG_TIME;
866 862
863 net->ipv4.frags.max_dist = 64;
864
867 res = inet_frags_init_net(&net->ipv4.frags); 865 res = inet_frags_init_net(&net->ipv4.frags);
868 if (res) 866 if (res)
869 return res; 867 return res;
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 7c51c4e1661f..31936d387cfd 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -238,7 +238,7 @@ static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi,
238 return -EINVAL; 238 return -EINVAL;
239 } 239 }
240 } 240 }
241 return iptunnel_pull_header(skb, hdr_len, tpi->proto); 241 return iptunnel_pull_header(skb, hdr_len, tpi->proto, false);
242} 242}
243 243
244static void ipgre_err(struct sk_buff *skb, u32 info, 244static void ipgre_err(struct sk_buff *skb, u32 info,
@@ -440,6 +440,17 @@ drop:
440 return 0; 440 return 0;
441} 441}
442 442
443static __sum16 gre_checksum(struct sk_buff *skb)
444{
445 __wsum csum;
446
447 if (skb->ip_summed == CHECKSUM_PARTIAL)
448 csum = lco_csum(skb);
449 else
450 csum = skb_checksum(skb, 0, skb->len, 0);
451 return csum_fold(csum);
452}
453
443static void build_header(struct sk_buff *skb, int hdr_len, __be16 flags, 454static void build_header(struct sk_buff *skb, int hdr_len, __be16 flags,
444 __be16 proto, __be32 key, __be32 seq) 455 __be16 proto, __be32 key, __be32 seq)
445{ 456{
@@ -467,8 +478,7 @@ static void build_header(struct sk_buff *skb, int hdr_len, __be16 flags,
467 !(skb_shinfo(skb)->gso_type & 478 !(skb_shinfo(skb)->gso_type &
468 (SKB_GSO_GRE | SKB_GSO_GRE_CSUM))) { 479 (SKB_GSO_GRE | SKB_GSO_GRE_CSUM))) {
469 *ptr = 0; 480 *ptr = 0;
470 *(__sum16 *)ptr = csum_fold(skb_checksum(skb, 0, 481 *(__sum16 *)ptr = gre_checksum(skb);
471 skb->len, 0));
472 } 482 }
473 } 483 }
474} 484}
@@ -493,8 +503,7 @@ static void __gre_xmit(struct sk_buff *skb, struct net_device *dev,
493static struct sk_buff *gre_handle_offloads(struct sk_buff *skb, 503static struct sk_buff *gre_handle_offloads(struct sk_buff *skb,
494 bool csum) 504 bool csum)
495{ 505{
496 return iptunnel_handle_offloads(skb, csum, 506 return iptunnel_handle_offloads(skb, csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE);
497 csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE);
498} 507}
499 508
500static struct rtable *gre_get_rt(struct sk_buff *skb, 509static struct rtable *gre_get_rt(struct sk_buff *skb,
@@ -518,11 +527,12 @@ static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev)
518{ 527{
519 struct ip_tunnel_info *tun_info; 528 struct ip_tunnel_info *tun_info;
520 const struct ip_tunnel_key *key; 529 const struct ip_tunnel_key *key;
530 struct rtable *rt = NULL;
521 struct flowi4 fl; 531 struct flowi4 fl;
522 struct rtable *rt;
523 int min_headroom; 532 int min_headroom;
524 int tunnel_hlen; 533 int tunnel_hlen;
525 __be16 df, flags; 534 __be16 df, flags;
535 bool use_cache;
526 int err; 536 int err;
527 537
528 tun_info = skb_tunnel_info(skb); 538 tun_info = skb_tunnel_info(skb);
@@ -531,9 +541,17 @@ static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev)
531 goto err_free_skb; 541 goto err_free_skb;
532 542
533 key = &tun_info->key; 543 key = &tun_info->key;
534 rt = gre_get_rt(skb, dev, &fl, key); 544 use_cache = ip_tunnel_dst_cache_usable(skb, tun_info);
535 if (IS_ERR(rt)) 545 if (use_cache)
536 goto err_free_skb; 546 rt = dst_cache_get_ip4(&tun_info->dst_cache, &fl.saddr);
547 if (!rt) {
548 rt = gre_get_rt(skb, dev, &fl, key);
549 if (IS_ERR(rt))
550 goto err_free_skb;
551 if (use_cache)
552 dst_cache_set_ip4(&tun_info->dst_cache, &rt->dst,
553 fl.saddr);
554 }
537 555
538 tunnel_hlen = ip_gre_calc_hlen(key->tun_flags); 556 tunnel_hlen = ip_gre_calc_hlen(key->tun_flags);
539 557
@@ -1054,8 +1072,9 @@ static const struct net_device_ops gre_tap_netdev_ops = {
1054static void ipgre_tap_setup(struct net_device *dev) 1072static void ipgre_tap_setup(struct net_device *dev)
1055{ 1073{
1056 ether_setup(dev); 1074 ether_setup(dev);
1057 dev->netdev_ops = &gre_tap_netdev_ops; 1075 dev->netdev_ops = &gre_tap_netdev_ops;
1058 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; 1076 dev->priv_flags &= ~IFF_TX_SKB_SHARING;
1077 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
1059 ip_tunnel_setup(dev, gre_tap_net_id); 1078 ip_tunnel_setup(dev, gre_tap_net_id);
1060} 1079}
1061 1080
@@ -1240,6 +1259,14 @@ struct net_device *gretap_fb_dev_create(struct net *net, const char *name,
1240 err = ipgre_newlink(net, dev, tb, NULL); 1259 err = ipgre_newlink(net, dev, tb, NULL);
1241 if (err < 0) 1260 if (err < 0)
1242 goto out; 1261 goto out;
1262
1263 /* openvswitch users expect packet sizes to be unrestricted,
1264 * so set the largest MTU we can.
1265 */
1266 err = __ip_tunnel_change_mtu(dev, IP_MAX_MTU, false);
1267 if (err)
1268 goto out;
1269
1243 return dev; 1270 return dev;
1244out: 1271out:
1245 free_netdev(dev); 1272 free_netdev(dev);
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index b1209b63381f..e3d782746d9d 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -308,15 +308,15 @@ drop:
308 return true; 308 return true;
309} 309}
310 310
311int sysctl_ip_early_demux __read_mostly = 1;
312EXPORT_SYMBOL(sysctl_ip_early_demux);
313
314static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb) 311static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
315{ 312{
316 const struct iphdr *iph = ip_hdr(skb); 313 const struct iphdr *iph = ip_hdr(skb);
317 struct rtable *rt; 314 struct rtable *rt;
318 315
319 if (sysctl_ip_early_demux && !skb_dst(skb) && !skb->sk) { 316 if (net->ipv4.sysctl_ip_early_demux &&
317 !skb_dst(skb) &&
318 !skb->sk &&
319 !ip_is_fragment(iph)) {
320 const struct net_protocol *ipprot; 320 const struct net_protocol *ipprot;
321 int protocol = iph->protocol; 321 int protocol = iph->protocol;
322 322
@@ -359,8 +359,31 @@ static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
359 rt = skb_rtable(skb); 359 rt = skb_rtable(skb);
360 if (rt->rt_type == RTN_MULTICAST) { 360 if (rt->rt_type == RTN_MULTICAST) {
361 IP_UPD_PO_STATS_BH(net, IPSTATS_MIB_INMCAST, skb->len); 361 IP_UPD_PO_STATS_BH(net, IPSTATS_MIB_INMCAST, skb->len);
362 } else if (rt->rt_type == RTN_BROADCAST) 362 } else if (rt->rt_type == RTN_BROADCAST) {
363 IP_UPD_PO_STATS_BH(net, IPSTATS_MIB_INBCAST, skb->len); 363 IP_UPD_PO_STATS_BH(net, IPSTATS_MIB_INBCAST, skb->len);
364 } else if (skb->pkt_type == PACKET_BROADCAST ||
365 skb->pkt_type == PACKET_MULTICAST) {
366 struct in_device *in_dev = __in_dev_get_rcu(skb->dev);
367
368 /* RFC 1122 3.3.6:
369 *
370 * When a host sends a datagram to a link-layer broadcast
371 * address, the IP destination address MUST be a legal IP
372 * broadcast or IP multicast address.
373 *
374 * A host SHOULD silently discard a datagram that is received
375 * via a link-layer broadcast (see Section 2.4) but does not
376 * specify an IP multicast or broadcast destination address.
377 *
378 * This doesn't explicitly say L2 *broadcast*, but broadcast is
379 * in a way a form of multicast and the most common use case for
380 * this is 802.11 protecting against cross-station spoofing (the
381 * so-called "hole-196" attack) so do it for both.
382 */
383 if (in_dev &&
384 IN_DEV_ORCONF(in_dev, DROP_UNICAST_IN_L2_MULTICAST))
385 goto drop;
386 }
364 387
365 return dst_input(skb); 388 return dst_input(skb);
366 389
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index bd246792360b..4d158ff1def1 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -58,10 +58,9 @@ void ip_options_build(struct sk_buff *skb, struct ip_options *opt,
58 if (opt->ts_needaddr) 58 if (opt->ts_needaddr)
59 ip_rt_get_source(iph+opt->ts+iph[opt->ts+2]-9, skb, rt); 59 ip_rt_get_source(iph+opt->ts+iph[opt->ts+2]-9, skb, rt);
60 if (opt->ts_needtime) { 60 if (opt->ts_needtime) {
61 struct timespec tv;
62 __be32 midtime; 61 __be32 midtime;
63 getnstimeofday(&tv); 62
64 midtime = htonl((tv.tv_sec % 86400) * MSEC_PER_SEC + tv.tv_nsec / NSEC_PER_MSEC); 63 midtime = inet_current_timestamp();
65 memcpy(iph+opt->ts+iph[opt->ts+2]-5, &midtime, 4); 64 memcpy(iph+opt->ts+iph[opt->ts+2]-5, &midtime, 4);
66 } 65 }
67 return; 66 return;
@@ -415,11 +414,10 @@ int ip_options_compile(struct net *net,
415 break; 414 break;
416 } 415 }
417 if (timeptr) { 416 if (timeptr) {
418 struct timespec tv; 417 __be32 midtime;
419 u32 midtime; 418
420 getnstimeofday(&tv); 419 midtime = inet_current_timestamp();
421 midtime = (tv.tv_sec % 86400) * MSEC_PER_SEC + tv.tv_nsec / NSEC_PER_MSEC; 420 memcpy(timeptr, &midtime, 4);
422 put_unaligned_be32(midtime, timeptr);
423 opt->is_changed = 1; 421 opt->is_changed = 1;
424 } 422 }
425 } else if ((optptr[3]&0xF) != IPOPT_TS_PRESPEC) { 423 } else if ((optptr[3]&0xF) != IPOPT_TS_PRESPEC) {
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 64878efa045c..124bf0a66328 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -79,9 +79,6 @@
79#include <linux/netlink.h> 79#include <linux/netlink.h>
80#include <linux/tcp.h> 80#include <linux/tcp.h>
81 81
82int sysctl_ip_default_ttl __read_mostly = IPDEFTTL;
83EXPORT_SYMBOL(sysctl_ip_default_ttl);
84
85static int 82static int
86ip_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, 83ip_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
87 unsigned int mtu, 84 unsigned int mtu,
@@ -1236,13 +1233,16 @@ ssize_t ip_append_page(struct sock *sk, struct flowi4 *fl4, struct page *page,
1236 if (!skb) 1233 if (!skb)
1237 return -EINVAL; 1234 return -EINVAL;
1238 1235
1239 cork->length += size;
1240 if ((size + skb->len > mtu) && 1236 if ((size + skb->len > mtu) &&
1241 (sk->sk_protocol == IPPROTO_UDP) && 1237 (sk->sk_protocol == IPPROTO_UDP) &&
1242 (rt->dst.dev->features & NETIF_F_UFO)) { 1238 (rt->dst.dev->features & NETIF_F_UFO)) {
1239 if (skb->ip_summed != CHECKSUM_PARTIAL)
1240 return -EOPNOTSUPP;
1241
1243 skb_shinfo(skb)->gso_size = mtu - fragheaderlen; 1242 skb_shinfo(skb)->gso_size = mtu - fragheaderlen;
1244 skb_shinfo(skb)->gso_type = SKB_GSO_UDP; 1243 skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
1245 } 1244 }
1245 cork->length += size;
1246 1246
1247 while (size > 0) { 1247 while (size > 0) {
1248 if (skb_is_gso(skb)) { 1248 if (skb_is_gso(skb)) {
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 5f73a7c03e27..035ad645a8d9 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -249,6 +249,8 @@ int ip_cmsg_send(struct net *net, struct msghdr *msg, struct ipcm_cookie *ipc,
249 switch (cmsg->cmsg_type) { 249 switch (cmsg->cmsg_type) {
250 case IP_RETOPTS: 250 case IP_RETOPTS:
251 err = cmsg->cmsg_len - CMSG_ALIGN(sizeof(struct cmsghdr)); 251 err = cmsg->cmsg_len - CMSG_ALIGN(sizeof(struct cmsghdr));
252
253 /* Our caller is responsible for freeing ipc->opt */
252 err = ip_options_get(net, &ipc->opt, CMSG_DATA(cmsg), 254 err = ip_options_get(net, &ipc->opt, CMSG_DATA(cmsg),
253 err < 40 ? err : 40); 255 err < 40 ? err : 40);
254 if (err) 256 if (err)
@@ -571,6 +573,7 @@ static int do_ip_setsockopt(struct sock *sk, int level,
571 int optname, char __user *optval, unsigned int optlen) 573 int optname, char __user *optval, unsigned int optlen)
572{ 574{
573 struct inet_sock *inet = inet_sk(sk); 575 struct inet_sock *inet = inet_sk(sk);
576 struct net *net = sock_net(sk);
574 int val = 0, err; 577 int val = 0, err;
575 bool needs_rtnl = setsockopt_needs_rtnl(optname); 578 bool needs_rtnl = setsockopt_needs_rtnl(optname);
576 579
@@ -910,7 +913,7 @@ static int do_ip_setsockopt(struct sock *sk, int level,
910 } 913 }
911 /* numsrc >= (1G-4) overflow in 32 bits */ 914 /* numsrc >= (1G-4) overflow in 32 bits */
912 if (msf->imsf_numsrc >= 0x3ffffffcU || 915 if (msf->imsf_numsrc >= 0x3ffffffcU ||
913 msf->imsf_numsrc > sysctl_igmp_max_msf) { 916 msf->imsf_numsrc > net->ipv4.sysctl_igmp_max_msf) {
914 kfree(msf); 917 kfree(msf);
915 err = -ENOBUFS; 918 err = -ENOBUFS;
916 break; 919 break;
@@ -1065,7 +1068,7 @@ static int do_ip_setsockopt(struct sock *sk, int level,
1065 1068
1066 /* numsrc >= (4G-140)/128 overflow in 32 bits */ 1069 /* numsrc >= (4G-140)/128 overflow in 32 bits */
1067 if (gsf->gf_numsrc >= 0x1ffffff || 1070 if (gsf->gf_numsrc >= 0x1ffffff ||
1068 gsf->gf_numsrc > sysctl_igmp_max_msf) { 1071 gsf->gf_numsrc > net->ipv4.sysctl_igmp_max_msf) {
1069 err = -ENOBUFS; 1072 err = -ENOBUFS;
1070 goto mc_msf_out; 1073 goto mc_msf_out;
1071 } 1074 }
@@ -1340,10 +1343,13 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname,
1340 val = inet->tos; 1343 val = inet->tos;
1341 break; 1344 break;
1342 case IP_TTL: 1345 case IP_TTL:
1346 {
1347 struct net *net = sock_net(sk);
1343 val = (inet->uc_ttl == -1 ? 1348 val = (inet->uc_ttl == -1 ?
1344 sysctl_ip_default_ttl : 1349 net->ipv4.sysctl_ip_default_ttl :
1345 inet->uc_ttl); 1350 inet->uc_ttl);
1346 break; 1351 break;
1352 }
1347 case IP_HDRINCL: 1353 case IP_HDRINCL:
1348 val = inet->hdrincl; 1354 val = inet->hdrincl;
1349 break; 1355 break;
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index c7bd72e9b544..6aad0192443d 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -68,61 +68,6 @@ static unsigned int ip_tunnel_hash(__be32 key, __be32 remote)
68 IP_TNL_HASH_BITS); 68 IP_TNL_HASH_BITS);
69} 69}
70 70
71static void __tunnel_dst_set(struct ip_tunnel_dst *idst,
72 struct dst_entry *dst, __be32 saddr)
73{
74 struct dst_entry *old_dst;
75
76 dst_clone(dst);
77 old_dst = xchg((__force struct dst_entry **)&idst->dst, dst);
78 dst_release(old_dst);
79 idst->saddr = saddr;
80}
81
82static noinline void tunnel_dst_set(struct ip_tunnel *t,
83 struct dst_entry *dst, __be32 saddr)
84{
85 __tunnel_dst_set(raw_cpu_ptr(t->dst_cache), dst, saddr);
86}
87
88static void tunnel_dst_reset(struct ip_tunnel *t)
89{
90 tunnel_dst_set(t, NULL, 0);
91}
92
93void ip_tunnel_dst_reset_all(struct ip_tunnel *t)
94{
95 int i;
96
97 for_each_possible_cpu(i)
98 __tunnel_dst_set(per_cpu_ptr(t->dst_cache, i), NULL, 0);
99}
100EXPORT_SYMBOL(ip_tunnel_dst_reset_all);
101
102static struct rtable *tunnel_rtable_get(struct ip_tunnel *t,
103 u32 cookie, __be32 *saddr)
104{
105 struct ip_tunnel_dst *idst;
106 struct dst_entry *dst;
107
108 rcu_read_lock();
109 idst = raw_cpu_ptr(t->dst_cache);
110 dst = rcu_dereference(idst->dst);
111 if (dst && !atomic_inc_not_zero(&dst->__refcnt))
112 dst = NULL;
113 if (dst) {
114 if (!dst->obsolete || dst->ops->check(dst, cookie)) {
115 *saddr = idst->saddr;
116 } else {
117 tunnel_dst_reset(t);
118 dst_release(dst);
119 dst = NULL;
120 }
121 }
122 rcu_read_unlock();
123 return (struct rtable *)dst;
124}
125
126static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p, 71static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p,
127 __be16 flags, __be32 key) 72 __be16 flags, __be32 key)
128{ 73{
@@ -381,7 +326,8 @@ static int ip_tunnel_bind_dev(struct net_device *dev)
381 326
382 if (!IS_ERR(rt)) { 327 if (!IS_ERR(rt)) {
383 tdev = rt->dst.dev; 328 tdev = rt->dst.dev;
384 tunnel_dst_set(tunnel, &rt->dst, fl4.saddr); 329 dst_cache_set_ip4(&tunnel->dst_cache, &rt->dst,
330 fl4.saddr);
385 ip_rt_put(rt); 331 ip_rt_put(rt);
386 } 332 }
387 if (dev->type != ARPHRD_ETHER) 333 if (dev->type != ARPHRD_ETHER)
@@ -661,6 +607,8 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
661 inner_iph = (const struct iphdr *)skb_inner_network_header(skb); 607 inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
662 connected = (tunnel->parms.iph.daddr != 0); 608 connected = (tunnel->parms.iph.daddr != 0);
663 609
610 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
611
664 dst = tnl_params->daddr; 612 dst = tnl_params->daddr;
665 if (dst == 0) { 613 if (dst == 0) {
666 /* NBMA tunnel */ 614 /* NBMA tunnel */
@@ -729,7 +677,8 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
729 if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0) 677 if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0)
730 goto tx_error; 678 goto tx_error;
731 679
732 rt = connected ? tunnel_rtable_get(tunnel, 0, &fl4.saddr) : NULL; 680 rt = connected ? dst_cache_get_ip4(&tunnel->dst_cache, &fl4.saddr) :
681 NULL;
733 682
734 if (!rt) { 683 if (!rt) {
735 rt = ip_route_output_key(tunnel->net, &fl4); 684 rt = ip_route_output_key(tunnel->net, &fl4);
@@ -739,7 +688,8 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
739 goto tx_error; 688 goto tx_error;
740 } 689 }
741 if (connected) 690 if (connected)
742 tunnel_dst_set(tunnel, &rt->dst, fl4.saddr); 691 dst_cache_set_ip4(&tunnel->dst_cache, &rt->dst,
692 fl4.saddr);
743 } 693 }
744 694
745 if (rt->dst.dev == dev) { 695 if (rt->dst.dev == dev) {
@@ -758,7 +708,6 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
758 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) { 708 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
759 tunnel->err_count--; 709 tunnel->err_count--;
760 710
761 memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
762 dst_link_failure(skb); 711 dst_link_failure(skb);
763 } else 712 } else
764 tunnel->err_count = 0; 713 tunnel->err_count = 0;
@@ -836,7 +785,7 @@ static void ip_tunnel_update(struct ip_tunnel_net *itn,
836 if (set_mtu) 785 if (set_mtu)
837 dev->mtu = mtu; 786 dev->mtu = mtu;
838 } 787 }
839 ip_tunnel_dst_reset_all(t); 788 dst_cache_reset(&t->dst_cache);
840 netdev_state_change(dev); 789 netdev_state_change(dev);
841} 790}
842 791
@@ -943,17 +892,31 @@ done:
943} 892}
944EXPORT_SYMBOL_GPL(ip_tunnel_ioctl); 893EXPORT_SYMBOL_GPL(ip_tunnel_ioctl);
945 894
946int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu) 895int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict)
947{ 896{
948 struct ip_tunnel *tunnel = netdev_priv(dev); 897 struct ip_tunnel *tunnel = netdev_priv(dev);
949 int t_hlen = tunnel->hlen + sizeof(struct iphdr); 898 int t_hlen = tunnel->hlen + sizeof(struct iphdr);
899 int max_mtu = 0xFFF8 - dev->hard_header_len - t_hlen;
950 900
951 if (new_mtu < 68 || 901 if (new_mtu < 68)
952 new_mtu > 0xFFF8 - dev->hard_header_len - t_hlen)
953 return -EINVAL; 902 return -EINVAL;
903
904 if (new_mtu > max_mtu) {
905 if (strict)
906 return -EINVAL;
907
908 new_mtu = max_mtu;
909 }
910
954 dev->mtu = new_mtu; 911 dev->mtu = new_mtu;
955 return 0; 912 return 0;
956} 913}
914EXPORT_SYMBOL_GPL(__ip_tunnel_change_mtu);
915
916int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
917{
918 return __ip_tunnel_change_mtu(dev, new_mtu, true);
919}
957EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu); 920EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu);
958 921
959static void ip_tunnel_dev_free(struct net_device *dev) 922static void ip_tunnel_dev_free(struct net_device *dev)
@@ -961,7 +924,7 @@ static void ip_tunnel_dev_free(struct net_device *dev)
961 struct ip_tunnel *tunnel = netdev_priv(dev); 924 struct ip_tunnel *tunnel = netdev_priv(dev);
962 925
963 gro_cells_destroy(&tunnel->gro_cells); 926 gro_cells_destroy(&tunnel->gro_cells);
964 free_percpu(tunnel->dst_cache); 927 dst_cache_destroy(&tunnel->dst_cache);
965 free_percpu(dev->tstats); 928 free_percpu(dev->tstats);
966 free_netdev(dev); 929 free_netdev(dev);
967} 930}
@@ -1155,15 +1118,15 @@ int ip_tunnel_init(struct net_device *dev)
1155 if (!dev->tstats) 1118 if (!dev->tstats)
1156 return -ENOMEM; 1119 return -ENOMEM;
1157 1120
1158 tunnel->dst_cache = alloc_percpu(struct ip_tunnel_dst); 1121 err = dst_cache_init(&tunnel->dst_cache, GFP_KERNEL);
1159 if (!tunnel->dst_cache) { 1122 if (err) {
1160 free_percpu(dev->tstats); 1123 free_percpu(dev->tstats);
1161 return -ENOMEM; 1124 return err;
1162 } 1125 }
1163 1126
1164 err = gro_cells_init(&tunnel->gro_cells, dev); 1127 err = gro_cells_init(&tunnel->gro_cells, dev);
1165 if (err) { 1128 if (err) {
1166 free_percpu(tunnel->dst_cache); 1129 dst_cache_destroy(&tunnel->dst_cache);
1167 free_percpu(dev->tstats); 1130 free_percpu(dev->tstats);
1168 return err; 1131 return err;
1169 } 1132 }
@@ -1193,7 +1156,7 @@ void ip_tunnel_uninit(struct net_device *dev)
1193 if (itn->fb_tunnel_dev != dev) 1156 if (itn->fb_tunnel_dev != dev)
1194 ip_tunnel_del(itn, netdev_priv(dev)); 1157 ip_tunnel_del(itn, netdev_priv(dev));
1195 1158
1196 ip_tunnel_dst_reset_all(tunnel); 1159 dst_cache_reset(&tunnel->dst_cache);
1197} 1160}
1198EXPORT_SYMBOL_GPL(ip_tunnel_uninit); 1161EXPORT_SYMBOL_GPL(ip_tunnel_uninit);
1199 1162
diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c
index 859d415c0b2d..02dd990af542 100644
--- a/net/ipv4/ip_tunnel_core.c
+++ b/net/ipv4/ip_tunnel_core.c
@@ -86,7 +86,8 @@ void iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb,
86} 86}
87EXPORT_SYMBOL_GPL(iptunnel_xmit); 87EXPORT_SYMBOL_GPL(iptunnel_xmit);
88 88
89int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 inner_proto) 89int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 inner_proto,
90 bool xnet)
90{ 91{
91 if (unlikely(!pskb_may_pull(skb, hdr_len))) 92 if (unlikely(!pskb_may_pull(skb, hdr_len)))
92 return -ENOMEM; 93 return -ENOMEM;
@@ -109,14 +110,12 @@ int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 inner_proto)
109 skb->protocol = inner_proto; 110 skb->protocol = inner_proto;
110 } 111 }
111 112
112 nf_reset(skb);
113 secpath_reset(skb);
114 skb_clear_hash_if_not_l4(skb); 113 skb_clear_hash_if_not_l4(skb);
115 skb_dst_drop(skb);
116 skb->vlan_tci = 0; 114 skb->vlan_tci = 0;
117 skb_set_queue_mapping(skb, 0); 115 skb_set_queue_mapping(skb, 0);
118 skb->pkt_type = PACKET_HOST; 116 skb_scrub_packet(skb, xnet);
119 return 0; 117
118 return iptunnel_pull_offloads(skb);
120} 119}
121EXPORT_SYMBOL_GPL(iptunnel_pull_header); 120EXPORT_SYMBOL_GPL(iptunnel_pull_header);
122 121
@@ -148,7 +147,6 @@ struct metadata_dst *iptunnel_metadata_reply(struct metadata_dst *md,
148EXPORT_SYMBOL_GPL(iptunnel_metadata_reply); 147EXPORT_SYMBOL_GPL(iptunnel_metadata_reply);
149 148
150struct sk_buff *iptunnel_handle_offloads(struct sk_buff *skb, 149struct sk_buff *iptunnel_handle_offloads(struct sk_buff *skb,
151 bool csum_help,
152 int gso_type_mask) 150 int gso_type_mask)
153{ 151{
154 int err; 152 int err;
@@ -166,20 +164,15 @@ struct sk_buff *iptunnel_handle_offloads(struct sk_buff *skb,
166 return skb; 164 return skb;
167 } 165 }
168 166
169 /* If packet is not gso and we are resolving any partial checksum, 167 if (skb->ip_summed != CHECKSUM_PARTIAL) {
170 * clear encapsulation flag. This allows setting CHECKSUM_PARTIAL
171 * on the outer header without confusing devices that implement
172 * NETIF_F_IP_CSUM with encapsulation.
173 */
174 if (csum_help)
175 skb->encapsulation = 0;
176
177 if (skb->ip_summed == CHECKSUM_PARTIAL && csum_help) {
178 err = skb_checksum_help(skb);
179 if (unlikely(err))
180 goto error;
181 } else if (skb->ip_summed != CHECKSUM_PARTIAL)
182 skb->ip_summed = CHECKSUM_NONE; 168 skb->ip_summed = CHECKSUM_NONE;
169 /* We clear encapsulation here to prevent badly-written
170 * drivers potentially deciding to offload an inner checksum
171 * if we set CHECKSUM_PARTIAL on the outer header.
172 * This should go away when the drivers are all fixed.
173 */
174 skb->encapsulation = 0;
175 }
183 176
184 return skb; 177 return skb;
185error: 178error:
@@ -406,6 +399,12 @@ static const struct lwtunnel_encap_ops ip6_tun_lwt_ops = {
406 399
407void __init ip_tunnel_core_init(void) 400void __init ip_tunnel_core_init(void)
408{ 401{
402 /* If you land here, make sure whether increasing ip_tunnel_info's
403 * options_len is a reasonable choice with its usage in front ends
404 * (f.e., it's part of flow keys, etc).
405 */
406 BUILD_BUG_ON(IP_TUNNEL_OPTS_MAX != 255);
407
409 lwtunnel_encap_add_ops(&ip_tun_lwt_ops, LWTUNNEL_ENCAP_IP); 408 lwtunnel_encap_add_ops(&ip_tun_lwt_ops, LWTUNNEL_ENCAP_IP);
410 lwtunnel_encap_add_ops(&ip6_tun_lwt_ops, LWTUNNEL_ENCAP_IP6); 409 lwtunnel_encap_add_ops(&ip6_tun_lwt_ops, LWTUNNEL_ENCAP_IP6);
411} 410}
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index 67f7c9de0b16..2ed9dd2b5f2f 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -143,7 +143,11 @@ static char dhcp_client_identifier[253] __initdata;
143 143
144/* Persistent data: */ 144/* Persistent data: */
145 145
146#ifdef IPCONFIG_DYNAMIC
146static int ic_proto_used; /* Protocol used, if any */ 147static int ic_proto_used; /* Protocol used, if any */
148#else
149#define ic_proto_used 0
150#endif
147static __be32 ic_nameservers[CONF_NAMESERVERS_MAX]; /* DNS Server IP addresses */ 151static __be32 ic_nameservers[CONF_NAMESERVERS_MAX]; /* DNS Server IP addresses */
148static u8 ic_domain[64]; /* DNS (not NIS) domain name */ 152static u8 ic_domain[64]; /* DNS (not NIS) domain name */
149 153
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 4044da61e747..ec51d02166de 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -195,7 +195,7 @@ static int ipip_rcv(struct sk_buff *skb)
195 if (tunnel) { 195 if (tunnel) {
196 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) 196 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
197 goto drop; 197 goto drop;
198 if (iptunnel_pull_header(skb, 0, tpi.proto)) 198 if (iptunnel_pull_header(skb, 0, tpi.proto, false))
199 goto drop; 199 goto drop;
200 return ip_tunnel_rcv(tunnel, skb, &tpi, NULL, log_ecn_error); 200 return ip_tunnel_rcv(tunnel, skb, &tpi, NULL, log_ecn_error);
201 } 201 }
@@ -219,7 +219,7 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
219 if (unlikely(skb->protocol != htons(ETH_P_IP))) 219 if (unlikely(skb->protocol != htons(ETH_P_IP)))
220 goto tx_error; 220 goto tx_error;
221 221
222 skb = iptunnel_handle_offloads(skb, false, SKB_GSO_IPIP); 222 skb = iptunnel_handle_offloads(skb, SKB_GSO_IPIP);
223 if (IS_ERR(skb)) 223 if (IS_ERR(skb))
224 goto out; 224 goto out;
225 225
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index b488cac9c5ca..bf081927e06b 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -1780,9 +1780,29 @@ static int do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len
1780 return ret; 1780 return ret;
1781} 1781}
1782 1782
1783struct xt_table *arpt_register_table(struct net *net, 1783static void __arpt_unregister_table(struct xt_table *table)
1784 const struct xt_table *table, 1784{
1785 const struct arpt_replace *repl) 1785 struct xt_table_info *private;
1786 void *loc_cpu_entry;
1787 struct module *table_owner = table->me;
1788 struct arpt_entry *iter;
1789
1790 private = xt_unregister_table(table);
1791
1792 /* Decrease module usage counts and free resources */
1793 loc_cpu_entry = private->entries;
1794 xt_entry_foreach(iter, loc_cpu_entry, private->size)
1795 cleanup_entry(iter);
1796 if (private->number > private->initial_entries)
1797 module_put(table_owner);
1798 xt_free_table_info(private);
1799}
1800
1801int arpt_register_table(struct net *net,
1802 const struct xt_table *table,
1803 const struct arpt_replace *repl,
1804 const struct nf_hook_ops *ops,
1805 struct xt_table **res)
1786{ 1806{
1787 int ret; 1807 int ret;
1788 struct xt_table_info *newinfo; 1808 struct xt_table_info *newinfo;
@@ -1791,10 +1811,8 @@ struct xt_table *arpt_register_table(struct net *net,
1791 struct xt_table *new_table; 1811 struct xt_table *new_table;
1792 1812
1793 newinfo = xt_alloc_table_info(repl->size); 1813 newinfo = xt_alloc_table_info(repl->size);
1794 if (!newinfo) { 1814 if (!newinfo)
1795 ret = -ENOMEM; 1815 return -ENOMEM;
1796 goto out;
1797 }
1798 1816
1799 loc_cpu_entry = newinfo->entries; 1817 loc_cpu_entry = newinfo->entries;
1800 memcpy(loc_cpu_entry, repl->entries, repl->size); 1818 memcpy(loc_cpu_entry, repl->entries, repl->size);
@@ -1809,30 +1827,28 @@ struct xt_table *arpt_register_table(struct net *net,
1809 ret = PTR_ERR(new_table); 1827 ret = PTR_ERR(new_table);
1810 goto out_free; 1828 goto out_free;
1811 } 1829 }
1812 return new_table; 1830
1831 /* set res now, will see skbs right after nf_register_net_hooks */
1832 WRITE_ONCE(*res, new_table);
1833
1834 ret = nf_register_net_hooks(net, ops, hweight32(table->valid_hooks));
1835 if (ret != 0) {
1836 __arpt_unregister_table(new_table);
1837 *res = NULL;
1838 }
1839
1840 return ret;
1813 1841
1814out_free: 1842out_free:
1815 xt_free_table_info(newinfo); 1843 xt_free_table_info(newinfo);
1816out: 1844 return ret;
1817 return ERR_PTR(ret);
1818} 1845}
1819 1846
1820void arpt_unregister_table(struct xt_table *table) 1847void arpt_unregister_table(struct net *net, struct xt_table *table,
1848 const struct nf_hook_ops *ops)
1821{ 1849{
1822 struct xt_table_info *private; 1850 nf_unregister_net_hooks(net, ops, hweight32(table->valid_hooks));
1823 void *loc_cpu_entry; 1851 __arpt_unregister_table(table);
1824 struct module *table_owner = table->me;
1825 struct arpt_entry *iter;
1826
1827 private = xt_unregister_table(table);
1828
1829 /* Decrease module usage counts and free resources */
1830 loc_cpu_entry = private->entries;
1831 xt_entry_foreach(iter, loc_cpu_entry, private->size)
1832 cleanup_entry(iter);
1833 if (private->number > private->initial_entries)
1834 module_put(table_owner);
1835 xt_free_table_info(private);
1836} 1852}
1837 1853
1838/* The built-in targets: standard (NULL) and error. */ 1854/* The built-in targets: standard (NULL) and error. */
diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c
index 1897ee160920..dd8c80dc32a2 100644
--- a/net/ipv4/netfilter/arptable_filter.c
+++ b/net/ipv4/netfilter/arptable_filter.c
@@ -17,12 +17,15 @@ MODULE_DESCRIPTION("arptables filter table");
17#define FILTER_VALID_HOOKS ((1 << NF_ARP_IN) | (1 << NF_ARP_OUT) | \ 17#define FILTER_VALID_HOOKS ((1 << NF_ARP_IN) | (1 << NF_ARP_OUT) | \
18 (1 << NF_ARP_FORWARD)) 18 (1 << NF_ARP_FORWARD))
19 19
20static int __net_init arptable_filter_table_init(struct net *net);
21
20static const struct xt_table packet_filter = { 22static const struct xt_table packet_filter = {
21 .name = "filter", 23 .name = "filter",
22 .valid_hooks = FILTER_VALID_HOOKS, 24 .valid_hooks = FILTER_VALID_HOOKS,
23 .me = THIS_MODULE, 25 .me = THIS_MODULE,
24 .af = NFPROTO_ARP, 26 .af = NFPROTO_ARP,
25 .priority = NF_IP_PRI_FILTER, 27 .priority = NF_IP_PRI_FILTER,
28 .table_init = arptable_filter_table_init,
26}; 29};
27 30
28/* The work comes in here from netfilter.c */ 31/* The work comes in here from netfilter.c */
@@ -35,26 +38,32 @@ arptable_filter_hook(void *priv, struct sk_buff *skb,
35 38
36static struct nf_hook_ops *arpfilter_ops __read_mostly; 39static struct nf_hook_ops *arpfilter_ops __read_mostly;
37 40
38static int __net_init arptable_filter_net_init(struct net *net) 41static int __net_init arptable_filter_table_init(struct net *net)
39{ 42{
40 struct arpt_replace *repl; 43 struct arpt_replace *repl;
41 44 int err;
45
46 if (net->ipv4.arptable_filter)
47 return 0;
48
42 repl = arpt_alloc_initial_table(&packet_filter); 49 repl = arpt_alloc_initial_table(&packet_filter);
43 if (repl == NULL) 50 if (repl == NULL)
44 return -ENOMEM; 51 return -ENOMEM;
45 net->ipv4.arptable_filter = 52 err = arpt_register_table(net, &packet_filter, repl, arpfilter_ops,
46 arpt_register_table(net, &packet_filter, repl); 53 &net->ipv4.arptable_filter);
47 kfree(repl); 54 kfree(repl);
48 return PTR_ERR_OR_ZERO(net->ipv4.arptable_filter); 55 return err;
49} 56}
50 57
51static void __net_exit arptable_filter_net_exit(struct net *net) 58static void __net_exit arptable_filter_net_exit(struct net *net)
52{ 59{
53 arpt_unregister_table(net->ipv4.arptable_filter); 60 if (!net->ipv4.arptable_filter)
61 return;
62 arpt_unregister_table(net, net->ipv4.arptable_filter, arpfilter_ops);
63 net->ipv4.arptable_filter = NULL;
54} 64}
55 65
56static struct pernet_operations arptable_filter_net_ops = { 66static struct pernet_operations arptable_filter_net_ops = {
57 .init = arptable_filter_net_init,
58 .exit = arptable_filter_net_exit, 67 .exit = arptable_filter_net_exit,
59}; 68};
60 69
@@ -62,26 +71,23 @@ static int __init arptable_filter_init(void)
62{ 71{
63 int ret; 72 int ret;
64 73
74 arpfilter_ops = xt_hook_ops_alloc(&packet_filter, arptable_filter_hook);
75 if (IS_ERR(arpfilter_ops))
76 return PTR_ERR(arpfilter_ops);
77
65 ret = register_pernet_subsys(&arptable_filter_net_ops); 78 ret = register_pernet_subsys(&arptable_filter_net_ops);
66 if (ret < 0) 79 if (ret < 0) {
80 kfree(arpfilter_ops);
67 return ret; 81 return ret;
68
69 arpfilter_ops = xt_hook_link(&packet_filter, arptable_filter_hook);
70 if (IS_ERR(arpfilter_ops)) {
71 ret = PTR_ERR(arpfilter_ops);
72 goto cleanup_table;
73 } 82 }
74 return ret;
75 83
76cleanup_table:
77 unregister_pernet_subsys(&arptable_filter_net_ops);
78 return ret; 84 return ret;
79} 85}
80 86
81static void __exit arptable_filter_fini(void) 87static void __exit arptable_filter_fini(void)
82{ 88{
83 xt_hook_unlink(&packet_filter, arpfilter_ops);
84 unregister_pernet_subsys(&arptable_filter_net_ops); 89 unregister_pernet_subsys(&arptable_filter_net_ops);
90 kfree(arpfilter_ops);
85} 91}
86 92
87module_init(arptable_filter_init); 93module_init(arptable_filter_init);
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index b99affad6ba1..e53f8d6f326d 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -2062,9 +2062,27 @@ do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2062 return ret; 2062 return ret;
2063} 2063}
2064 2064
2065struct xt_table *ipt_register_table(struct net *net, 2065static void __ipt_unregister_table(struct net *net, struct xt_table *table)
2066 const struct xt_table *table, 2066{
2067 const struct ipt_replace *repl) 2067 struct xt_table_info *private;
2068 void *loc_cpu_entry;
2069 struct module *table_owner = table->me;
2070 struct ipt_entry *iter;
2071
2072 private = xt_unregister_table(table);
2073
2074 /* Decrease module usage counts and free resources */
2075 loc_cpu_entry = private->entries;
2076 xt_entry_foreach(iter, loc_cpu_entry, private->size)
2077 cleanup_entry(iter, net);
2078 if (private->number > private->initial_entries)
2079 module_put(table_owner);
2080 xt_free_table_info(private);
2081}
2082
2083int ipt_register_table(struct net *net, const struct xt_table *table,
2084 const struct ipt_replace *repl,
2085 const struct nf_hook_ops *ops, struct xt_table **res)
2068{ 2086{
2069 int ret; 2087 int ret;
2070 struct xt_table_info *newinfo; 2088 struct xt_table_info *newinfo;
@@ -2073,10 +2091,8 @@ struct xt_table *ipt_register_table(struct net *net,
2073 struct xt_table *new_table; 2091 struct xt_table *new_table;
2074 2092
2075 newinfo = xt_alloc_table_info(repl->size); 2093 newinfo = xt_alloc_table_info(repl->size);
2076 if (!newinfo) { 2094 if (!newinfo)
2077 ret = -ENOMEM; 2095 return -ENOMEM;
2078 goto out;
2079 }
2080 2096
2081 loc_cpu_entry = newinfo->entries; 2097 loc_cpu_entry = newinfo->entries;
2082 memcpy(loc_cpu_entry, repl->entries, repl->size); 2098 memcpy(loc_cpu_entry, repl->entries, repl->size);
@@ -2091,30 +2107,27 @@ struct xt_table *ipt_register_table(struct net *net,
2091 goto out_free; 2107 goto out_free;
2092 } 2108 }
2093 2109
2094 return new_table; 2110 /* set res now, will see skbs right after nf_register_net_hooks */
2111 WRITE_ONCE(*res, new_table);
2112
2113 ret = nf_register_net_hooks(net, ops, hweight32(table->valid_hooks));
2114 if (ret != 0) {
2115 __ipt_unregister_table(net, new_table);
2116 *res = NULL;
2117 }
2118
2119 return ret;
2095 2120
2096out_free: 2121out_free:
2097 xt_free_table_info(newinfo); 2122 xt_free_table_info(newinfo);
2098out: 2123 return ret;
2099 return ERR_PTR(ret);
2100} 2124}
2101 2125
2102void ipt_unregister_table(struct net *net, struct xt_table *table) 2126void ipt_unregister_table(struct net *net, struct xt_table *table,
2127 const struct nf_hook_ops *ops)
2103{ 2128{
2104 struct xt_table_info *private; 2129 nf_unregister_net_hooks(net, ops, hweight32(table->valid_hooks));
2105 void *loc_cpu_entry; 2130 __ipt_unregister_table(net, table);
2106 struct module *table_owner = table->me;
2107 struct ipt_entry *iter;
2108
2109 private = xt_unregister_table(table);
2110
2111 /* Decrease module usage counts and free resources */
2112 loc_cpu_entry = private->entries;
2113 xt_entry_foreach(iter, loc_cpu_entry, private->size)
2114 cleanup_entry(iter, net);
2115 if (private->number > private->initial_entries)
2116 module_put(table_owner);
2117 xt_free_table_info(private);
2118} 2131}
2119 2132
2120/* Returns 1 if the type and code is matched by the range, 0 otherwise */ 2133/* Returns 1 if the type and code is matched by the range, 0 otherwise */
diff --git a/net/ipv4/netfilter/ipt_SYNPROXY.c b/net/ipv4/netfilter/ipt_SYNPROXY.c
index 5fdc556514ba..7b8fbb352877 100644
--- a/net/ipv4/netfilter/ipt_SYNPROXY.c
+++ b/net/ipv4/netfilter/ipt_SYNPROXY.c
@@ -21,6 +21,7 @@ static struct iphdr *
21synproxy_build_ip(struct sk_buff *skb, __be32 saddr, __be32 daddr) 21synproxy_build_ip(struct sk_buff *skb, __be32 saddr, __be32 daddr)
22{ 22{
23 struct iphdr *iph; 23 struct iphdr *iph;
24 struct net *net = sock_net(skb->sk);
24 25
25 skb_reset_network_header(skb); 26 skb_reset_network_header(skb);
26 iph = (struct iphdr *)skb_put(skb, sizeof(*iph)); 27 iph = (struct iphdr *)skb_put(skb, sizeof(*iph));
@@ -29,7 +30,7 @@ synproxy_build_ip(struct sk_buff *skb, __be32 saddr, __be32 daddr)
29 iph->tos = 0; 30 iph->tos = 0;
30 iph->id = 0; 31 iph->id = 0;
31 iph->frag_off = htons(IP_DF); 32 iph->frag_off = htons(IP_DF);
32 iph->ttl = sysctl_ip_default_ttl; 33 iph->ttl = net->ipv4.sysctl_ip_default_ttl;
33 iph->protocol = IPPROTO_TCP; 34 iph->protocol = IPPROTO_TCP;
34 iph->check = 0; 35 iph->check = 0;
35 iph->saddr = saddr; 36 iph->saddr = saddr;
diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c
index 397ef2dd133e..7667f223d7f8 100644
--- a/net/ipv4/netfilter/iptable_filter.c
+++ b/net/ipv4/netfilter/iptable_filter.c
@@ -23,6 +23,7 @@ MODULE_DESCRIPTION("iptables filter table");
23#define FILTER_VALID_HOOKS ((1 << NF_INET_LOCAL_IN) | \ 23#define FILTER_VALID_HOOKS ((1 << NF_INET_LOCAL_IN) | \
24 (1 << NF_INET_FORWARD) | \ 24 (1 << NF_INET_FORWARD) | \
25 (1 << NF_INET_LOCAL_OUT)) 25 (1 << NF_INET_LOCAL_OUT))
26static int __net_init iptable_filter_table_init(struct net *net);
26 27
27static const struct xt_table packet_filter = { 28static const struct xt_table packet_filter = {
28 .name = "filter", 29 .name = "filter",
@@ -30,6 +31,7 @@ static const struct xt_table packet_filter = {
30 .me = THIS_MODULE, 31 .me = THIS_MODULE,
31 .af = NFPROTO_IPV4, 32 .af = NFPROTO_IPV4,
32 .priority = NF_IP_PRI_FILTER, 33 .priority = NF_IP_PRI_FILTER,
34 .table_init = iptable_filter_table_init,
33}; 35};
34 36
35static unsigned int 37static unsigned int
@@ -48,12 +50,16 @@ iptable_filter_hook(void *priv, struct sk_buff *skb,
48static struct nf_hook_ops *filter_ops __read_mostly; 50static struct nf_hook_ops *filter_ops __read_mostly;
49 51
50/* Default to forward because I got too much mail already. */ 52/* Default to forward because I got too much mail already. */
51static bool forward = true; 53static bool forward __read_mostly = true;
52module_param(forward, bool, 0000); 54module_param(forward, bool, 0000);
53 55
54static int __net_init iptable_filter_net_init(struct net *net) 56static int __net_init iptable_filter_table_init(struct net *net)
55{ 57{
56 struct ipt_replace *repl; 58 struct ipt_replace *repl;
59 int err;
60
61 if (net->ipv4.iptable_filter)
62 return 0;
57 63
58 repl = ipt_alloc_initial_table(&packet_filter); 64 repl = ipt_alloc_initial_table(&packet_filter);
59 if (repl == NULL) 65 if (repl == NULL)
@@ -62,15 +68,26 @@ static int __net_init iptable_filter_net_init(struct net *net)
62 ((struct ipt_standard *)repl->entries)[1].target.verdict = 68 ((struct ipt_standard *)repl->entries)[1].target.verdict =
63 forward ? -NF_ACCEPT - 1 : -NF_DROP - 1; 69 forward ? -NF_ACCEPT - 1 : -NF_DROP - 1;
64 70
65 net->ipv4.iptable_filter = 71 err = ipt_register_table(net, &packet_filter, repl, filter_ops,
66 ipt_register_table(net, &packet_filter, repl); 72 &net->ipv4.iptable_filter);
67 kfree(repl); 73 kfree(repl);
68 return PTR_ERR_OR_ZERO(net->ipv4.iptable_filter); 74 return err;
75}
76
77static int __net_init iptable_filter_net_init(struct net *net)
78{
79 if (net == &init_net || !forward)
80 return iptable_filter_table_init(net);
81
82 return 0;
69} 83}
70 84
71static void __net_exit iptable_filter_net_exit(struct net *net) 85static void __net_exit iptable_filter_net_exit(struct net *net)
72{ 86{
73 ipt_unregister_table(net, net->ipv4.iptable_filter); 87 if (!net->ipv4.iptable_filter)
88 return;
89 ipt_unregister_table(net, net->ipv4.iptable_filter, filter_ops);
90 net->ipv4.iptable_filter = NULL;
74} 91}
75 92
76static struct pernet_operations iptable_filter_net_ops = { 93static struct pernet_operations iptable_filter_net_ops = {
@@ -82,24 +99,21 @@ static int __init iptable_filter_init(void)
82{ 99{
83 int ret; 100 int ret;
84 101
102 filter_ops = xt_hook_ops_alloc(&packet_filter, iptable_filter_hook);
103 if (IS_ERR(filter_ops))
104 return PTR_ERR(filter_ops);
105
85 ret = register_pernet_subsys(&iptable_filter_net_ops); 106 ret = register_pernet_subsys(&iptable_filter_net_ops);
86 if (ret < 0) 107 if (ret < 0)
87 return ret; 108 kfree(filter_ops);
88
89 /* Register hooks */
90 filter_ops = xt_hook_link(&packet_filter, iptable_filter_hook);
91 if (IS_ERR(filter_ops)) {
92 ret = PTR_ERR(filter_ops);
93 unregister_pernet_subsys(&iptable_filter_net_ops);
94 }
95 109
96 return ret; 110 return ret;
97} 111}
98 112
99static void __exit iptable_filter_fini(void) 113static void __exit iptable_filter_fini(void)
100{ 114{
101 xt_hook_unlink(&packet_filter, filter_ops);
102 unregister_pernet_subsys(&iptable_filter_net_ops); 115 unregister_pernet_subsys(&iptable_filter_net_ops);
116 kfree(filter_ops);
103} 117}
104 118
105module_init(iptable_filter_init); 119module_init(iptable_filter_init);
diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c
index ba5d392a13c4..57fc97cdac70 100644
--- a/net/ipv4/netfilter/iptable_mangle.c
+++ b/net/ipv4/netfilter/iptable_mangle.c
@@ -28,12 +28,15 @@ MODULE_DESCRIPTION("iptables mangle table");
28 (1 << NF_INET_LOCAL_OUT) | \ 28 (1 << NF_INET_LOCAL_OUT) | \
29 (1 << NF_INET_POST_ROUTING)) 29 (1 << NF_INET_POST_ROUTING))
30 30
31static int __net_init iptable_mangle_table_init(struct net *net);
32
31static const struct xt_table packet_mangler = { 33static const struct xt_table packet_mangler = {
32 .name = "mangle", 34 .name = "mangle",
33 .valid_hooks = MANGLE_VALID_HOOKS, 35 .valid_hooks = MANGLE_VALID_HOOKS,
34 .me = THIS_MODULE, 36 .me = THIS_MODULE,
35 .af = NFPROTO_IPV4, 37 .af = NFPROTO_IPV4,
36 .priority = NF_IP_PRI_MANGLE, 38 .priority = NF_IP_PRI_MANGLE,
39 .table_init = iptable_mangle_table_init,
37}; 40};
38 41
39static unsigned int 42static unsigned int
@@ -92,27 +95,32 @@ iptable_mangle_hook(void *priv,
92} 95}
93 96
94static struct nf_hook_ops *mangle_ops __read_mostly; 97static struct nf_hook_ops *mangle_ops __read_mostly;
95 98static int __net_init iptable_mangle_table_init(struct net *net)
96static int __net_init iptable_mangle_net_init(struct net *net)
97{ 99{
98 struct ipt_replace *repl; 100 struct ipt_replace *repl;
101 int ret;
102
103 if (net->ipv4.iptable_mangle)
104 return 0;
99 105
100 repl = ipt_alloc_initial_table(&packet_mangler); 106 repl = ipt_alloc_initial_table(&packet_mangler);
101 if (repl == NULL) 107 if (repl == NULL)
102 return -ENOMEM; 108 return -ENOMEM;
103 net->ipv4.iptable_mangle = 109 ret = ipt_register_table(net, &packet_mangler, repl, mangle_ops,
104 ipt_register_table(net, &packet_mangler, repl); 110 &net->ipv4.iptable_mangle);
105 kfree(repl); 111 kfree(repl);
106 return PTR_ERR_OR_ZERO(net->ipv4.iptable_mangle); 112 return ret;
107} 113}
108 114
109static void __net_exit iptable_mangle_net_exit(struct net *net) 115static void __net_exit iptable_mangle_net_exit(struct net *net)
110{ 116{
111 ipt_unregister_table(net, net->ipv4.iptable_mangle); 117 if (!net->ipv4.iptable_mangle)
118 return;
119 ipt_unregister_table(net, net->ipv4.iptable_mangle, mangle_ops);
120 net->ipv4.iptable_mangle = NULL;
112} 121}
113 122
114static struct pernet_operations iptable_mangle_net_ops = { 123static struct pernet_operations iptable_mangle_net_ops = {
115 .init = iptable_mangle_net_init,
116 .exit = iptable_mangle_net_exit, 124 .exit = iptable_mangle_net_exit,
117}; 125};
118 126
@@ -120,15 +128,22 @@ static int __init iptable_mangle_init(void)
120{ 128{
121 int ret; 129 int ret;
122 130
131 mangle_ops = xt_hook_ops_alloc(&packet_mangler, iptable_mangle_hook);
132 if (IS_ERR(mangle_ops)) {
133 ret = PTR_ERR(mangle_ops);
134 return ret;
135 }
136
123 ret = register_pernet_subsys(&iptable_mangle_net_ops); 137 ret = register_pernet_subsys(&iptable_mangle_net_ops);
124 if (ret < 0) 138 if (ret < 0) {
139 kfree(mangle_ops);
125 return ret; 140 return ret;
141 }
126 142
127 /* Register hooks */ 143 ret = iptable_mangle_table_init(&init_net);
128 mangle_ops = xt_hook_link(&packet_mangler, iptable_mangle_hook); 144 if (ret) {
129 if (IS_ERR(mangle_ops)) {
130 ret = PTR_ERR(mangle_ops);
131 unregister_pernet_subsys(&iptable_mangle_net_ops); 145 unregister_pernet_subsys(&iptable_mangle_net_ops);
146 kfree(mangle_ops);
132 } 147 }
133 148
134 return ret; 149 return ret;
@@ -136,8 +151,8 @@ static int __init iptable_mangle_init(void)
136 151
137static void __exit iptable_mangle_fini(void) 152static void __exit iptable_mangle_fini(void)
138{ 153{
139 xt_hook_unlink(&packet_mangler, mangle_ops);
140 unregister_pernet_subsys(&iptable_mangle_net_ops); 154 unregister_pernet_subsys(&iptable_mangle_net_ops);
155 kfree(mangle_ops);
141} 156}
142 157
143module_init(iptable_mangle_init); 158module_init(iptable_mangle_init);
diff --git a/net/ipv4/netfilter/iptable_nat.c b/net/ipv4/netfilter/iptable_nat.c
index ae2cd2752046..138a24bc76ad 100644
--- a/net/ipv4/netfilter/iptable_nat.c
+++ b/net/ipv4/netfilter/iptable_nat.c
@@ -18,6 +18,8 @@
18#include <net/netfilter/nf_nat_core.h> 18#include <net/netfilter/nf_nat_core.h>
19#include <net/netfilter/nf_nat_l3proto.h> 19#include <net/netfilter/nf_nat_l3proto.h>
20 20
21static int __net_init iptable_nat_table_init(struct net *net);
22
21static const struct xt_table nf_nat_ipv4_table = { 23static const struct xt_table nf_nat_ipv4_table = {
22 .name = "nat", 24 .name = "nat",
23 .valid_hooks = (1 << NF_INET_PRE_ROUTING) | 25 .valid_hooks = (1 << NF_INET_PRE_ROUTING) |
@@ -26,6 +28,7 @@ static const struct xt_table nf_nat_ipv4_table = {
26 (1 << NF_INET_LOCAL_IN), 28 (1 << NF_INET_LOCAL_IN),
27 .me = THIS_MODULE, 29 .me = THIS_MODULE,
28 .af = NFPROTO_IPV4, 30 .af = NFPROTO_IPV4,
31 .table_init = iptable_nat_table_init,
29}; 32};
30 33
31static unsigned int iptable_nat_do_chain(void *priv, 34static unsigned int iptable_nat_do_chain(void *priv,
@@ -95,50 +98,50 @@ static struct nf_hook_ops nf_nat_ipv4_ops[] __read_mostly = {
95 }, 98 },
96}; 99};
97 100
98static int __net_init iptable_nat_net_init(struct net *net) 101static int __net_init iptable_nat_table_init(struct net *net)
99{ 102{
100 struct ipt_replace *repl; 103 struct ipt_replace *repl;
104 int ret;
105
106 if (net->ipv4.nat_table)
107 return 0;
101 108
102 repl = ipt_alloc_initial_table(&nf_nat_ipv4_table); 109 repl = ipt_alloc_initial_table(&nf_nat_ipv4_table);
103 if (repl == NULL) 110 if (repl == NULL)
104 return -ENOMEM; 111 return -ENOMEM;
105 net->ipv4.nat_table = ipt_register_table(net, &nf_nat_ipv4_table, repl); 112 ret = ipt_register_table(net, &nf_nat_ipv4_table, repl,
113 nf_nat_ipv4_ops, &net->ipv4.nat_table);
106 kfree(repl); 114 kfree(repl);
107 return PTR_ERR_OR_ZERO(net->ipv4.nat_table); 115 return ret;
108} 116}
109 117
110static void __net_exit iptable_nat_net_exit(struct net *net) 118static void __net_exit iptable_nat_net_exit(struct net *net)
111{ 119{
112 ipt_unregister_table(net, net->ipv4.nat_table); 120 if (!net->ipv4.nat_table)
121 return;
122 ipt_unregister_table(net, net->ipv4.nat_table, nf_nat_ipv4_ops);
123 net->ipv4.nat_table = NULL;
113} 124}
114 125
115static struct pernet_operations iptable_nat_net_ops = { 126static struct pernet_operations iptable_nat_net_ops = {
116 .init = iptable_nat_net_init,
117 .exit = iptable_nat_net_exit, 127 .exit = iptable_nat_net_exit,
118}; 128};
119 129
120static int __init iptable_nat_init(void) 130static int __init iptable_nat_init(void)
121{ 131{
122 int err; 132 int ret = register_pernet_subsys(&iptable_nat_net_ops);
123 133
124 err = register_pernet_subsys(&iptable_nat_net_ops); 134 if (ret)
125 if (err < 0) 135 return ret;
126 goto err1;
127 136
128 err = nf_register_hooks(nf_nat_ipv4_ops, ARRAY_SIZE(nf_nat_ipv4_ops)); 137 ret = iptable_nat_table_init(&init_net);
129 if (err < 0) 138 if (ret)
130 goto err2; 139 unregister_pernet_subsys(&iptable_nat_net_ops);
131 return 0; 140 return ret;
132
133err2:
134 unregister_pernet_subsys(&iptable_nat_net_ops);
135err1:
136 return err;
137} 141}
138 142
139static void __exit iptable_nat_exit(void) 143static void __exit iptable_nat_exit(void)
140{ 144{
141 nf_unregister_hooks(nf_nat_ipv4_ops, ARRAY_SIZE(nf_nat_ipv4_ops));
142 unregister_pernet_subsys(&iptable_nat_net_ops); 145 unregister_pernet_subsys(&iptable_nat_net_ops);
143} 146}
144 147
diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c
index 1ba02811acb0..2642ecd2645c 100644
--- a/net/ipv4/netfilter/iptable_raw.c
+++ b/net/ipv4/netfilter/iptable_raw.c
@@ -10,12 +10,15 @@
10 10
11#define RAW_VALID_HOOKS ((1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_OUT)) 11#define RAW_VALID_HOOKS ((1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_OUT))
12 12
13static int __net_init iptable_raw_table_init(struct net *net);
14
13static const struct xt_table packet_raw = { 15static const struct xt_table packet_raw = {
14 .name = "raw", 16 .name = "raw",
15 .valid_hooks = RAW_VALID_HOOKS, 17 .valid_hooks = RAW_VALID_HOOKS,
16 .me = THIS_MODULE, 18 .me = THIS_MODULE,
17 .af = NFPROTO_IPV4, 19 .af = NFPROTO_IPV4,
18 .priority = NF_IP_PRI_RAW, 20 .priority = NF_IP_PRI_RAW,
21 .table_init = iptable_raw_table_init,
19}; 22};
20 23
21/* The work comes in here from netfilter.c. */ 24/* The work comes in here from netfilter.c. */
@@ -34,26 +37,32 @@ iptable_raw_hook(void *priv, struct sk_buff *skb,
34 37
35static struct nf_hook_ops *rawtable_ops __read_mostly; 38static struct nf_hook_ops *rawtable_ops __read_mostly;
36 39
37static int __net_init iptable_raw_net_init(struct net *net) 40static int __net_init iptable_raw_table_init(struct net *net)
38{ 41{
39 struct ipt_replace *repl; 42 struct ipt_replace *repl;
43 int ret;
44
45 if (net->ipv4.iptable_raw)
46 return 0;
40 47
41 repl = ipt_alloc_initial_table(&packet_raw); 48 repl = ipt_alloc_initial_table(&packet_raw);
42 if (repl == NULL) 49 if (repl == NULL)
43 return -ENOMEM; 50 return -ENOMEM;
44 net->ipv4.iptable_raw = 51 ret = ipt_register_table(net, &packet_raw, repl, rawtable_ops,
45 ipt_register_table(net, &packet_raw, repl); 52 &net->ipv4.iptable_raw);
46 kfree(repl); 53 kfree(repl);
47 return PTR_ERR_OR_ZERO(net->ipv4.iptable_raw); 54 return ret;
48} 55}
49 56
50static void __net_exit iptable_raw_net_exit(struct net *net) 57static void __net_exit iptable_raw_net_exit(struct net *net)
51{ 58{
52 ipt_unregister_table(net, net->ipv4.iptable_raw); 59 if (!net->ipv4.iptable_raw)
60 return;
61 ipt_unregister_table(net, net->ipv4.iptable_raw, rawtable_ops);
62 net->ipv4.iptable_raw = NULL;
53} 63}
54 64
55static struct pernet_operations iptable_raw_net_ops = { 65static struct pernet_operations iptable_raw_net_ops = {
56 .init = iptable_raw_net_init,
57 .exit = iptable_raw_net_exit, 66 .exit = iptable_raw_net_exit,
58}; 67};
59 68
@@ -61,15 +70,20 @@ static int __init iptable_raw_init(void)
61{ 70{
62 int ret; 71 int ret;
63 72
73 rawtable_ops = xt_hook_ops_alloc(&packet_raw, iptable_raw_hook);
74 if (IS_ERR(rawtable_ops))
75 return PTR_ERR(rawtable_ops);
76
64 ret = register_pernet_subsys(&iptable_raw_net_ops); 77 ret = register_pernet_subsys(&iptable_raw_net_ops);
65 if (ret < 0) 78 if (ret < 0) {
79 kfree(rawtable_ops);
66 return ret; 80 return ret;
81 }
67 82
68 /* Register hooks */ 83 ret = iptable_raw_table_init(&init_net);
69 rawtable_ops = xt_hook_link(&packet_raw, iptable_raw_hook); 84 if (ret) {
70 if (IS_ERR(rawtable_ops)) {
71 ret = PTR_ERR(rawtable_ops);
72 unregister_pernet_subsys(&iptable_raw_net_ops); 85 unregister_pernet_subsys(&iptable_raw_net_ops);
86 kfree(rawtable_ops);
73 } 87 }
74 88
75 return ret; 89 return ret;
@@ -77,8 +91,8 @@ static int __init iptable_raw_init(void)
77 91
78static void __exit iptable_raw_fini(void) 92static void __exit iptable_raw_fini(void)
79{ 93{
80 xt_hook_unlink(&packet_raw, rawtable_ops);
81 unregister_pernet_subsys(&iptable_raw_net_ops); 94 unregister_pernet_subsys(&iptable_raw_net_ops);
95 kfree(rawtable_ops);
82} 96}
83 97
84module_init(iptable_raw_init); 98module_init(iptable_raw_init);
diff --git a/net/ipv4/netfilter/iptable_security.c b/net/ipv4/netfilter/iptable_security.c
index c2e23d5e9cd4..ff226596e4b5 100644
--- a/net/ipv4/netfilter/iptable_security.c
+++ b/net/ipv4/netfilter/iptable_security.c
@@ -28,12 +28,15 @@ MODULE_DESCRIPTION("iptables security table, for MAC rules");
28 (1 << NF_INET_FORWARD) | \ 28 (1 << NF_INET_FORWARD) | \
29 (1 << NF_INET_LOCAL_OUT) 29 (1 << NF_INET_LOCAL_OUT)
30 30
31static int __net_init iptable_security_table_init(struct net *net);
32
31static const struct xt_table security_table = { 33static const struct xt_table security_table = {
32 .name = "security", 34 .name = "security",
33 .valid_hooks = SECURITY_VALID_HOOKS, 35 .valid_hooks = SECURITY_VALID_HOOKS,
34 .me = THIS_MODULE, 36 .me = THIS_MODULE,
35 .af = NFPROTO_IPV4, 37 .af = NFPROTO_IPV4,
36 .priority = NF_IP_PRI_SECURITY, 38 .priority = NF_IP_PRI_SECURITY,
39 .table_init = iptable_security_table_init,
37}; 40};
38 41
39static unsigned int 42static unsigned int
@@ -51,26 +54,33 @@ iptable_security_hook(void *priv, struct sk_buff *skb,
51 54
52static struct nf_hook_ops *sectbl_ops __read_mostly; 55static struct nf_hook_ops *sectbl_ops __read_mostly;
53 56
54static int __net_init iptable_security_net_init(struct net *net) 57static int __net_init iptable_security_table_init(struct net *net)
55{ 58{
56 struct ipt_replace *repl; 59 struct ipt_replace *repl;
60 int ret;
61
62 if (net->ipv4.iptable_security)
63 return 0;
57 64
58 repl = ipt_alloc_initial_table(&security_table); 65 repl = ipt_alloc_initial_table(&security_table);
59 if (repl == NULL) 66 if (repl == NULL)
60 return -ENOMEM; 67 return -ENOMEM;
61 net->ipv4.iptable_security = 68 ret = ipt_register_table(net, &security_table, repl, sectbl_ops,
62 ipt_register_table(net, &security_table, repl); 69 &net->ipv4.iptable_security);
63 kfree(repl); 70 kfree(repl);
64 return PTR_ERR_OR_ZERO(net->ipv4.iptable_security); 71 return ret;
65} 72}
66 73
67static void __net_exit iptable_security_net_exit(struct net *net) 74static void __net_exit iptable_security_net_exit(struct net *net)
68{ 75{
69 ipt_unregister_table(net, net->ipv4.iptable_security); 76 if (!net->ipv4.iptable_security)
77 return;
78
79 ipt_unregister_table(net, net->ipv4.iptable_security, sectbl_ops);
80 net->ipv4.iptable_security = NULL;
70} 81}
71 82
72static struct pernet_operations iptable_security_net_ops = { 83static struct pernet_operations iptable_security_net_ops = {
73 .init = iptable_security_net_init,
74 .exit = iptable_security_net_exit, 84 .exit = iptable_security_net_exit,
75}; 85};
76 86
@@ -78,27 +88,29 @@ static int __init iptable_security_init(void)
78{ 88{
79 int ret; 89 int ret;
80 90
91 sectbl_ops = xt_hook_ops_alloc(&security_table, iptable_security_hook);
92 if (IS_ERR(sectbl_ops))
93 return PTR_ERR(sectbl_ops);
94
81 ret = register_pernet_subsys(&iptable_security_net_ops); 95 ret = register_pernet_subsys(&iptable_security_net_ops);
82 if (ret < 0) 96 if (ret < 0) {
97 kfree(sectbl_ops);
83 return ret; 98 return ret;
84
85 sectbl_ops = xt_hook_link(&security_table, iptable_security_hook);
86 if (IS_ERR(sectbl_ops)) {
87 ret = PTR_ERR(sectbl_ops);
88 goto cleanup_table;
89 } 99 }
90 100
91 return ret; 101 ret = iptable_security_table_init(&init_net);
102 if (ret) {
103 unregister_pernet_subsys(&iptable_security_net_ops);
104 kfree(sectbl_ops);
105 }
92 106
93cleanup_table:
94 unregister_pernet_subsys(&iptable_security_net_ops);
95 return ret; 107 return ret;
96} 108}
97 109
98static void __exit iptable_security_fini(void) 110static void __exit iptable_security_fini(void)
99{ 111{
100 xt_hook_unlink(&security_table, sectbl_ops);
101 unregister_pernet_subsys(&iptable_security_net_ops); 112 unregister_pernet_subsys(&iptable_security_net_ops);
113 kfree(sectbl_ops);
102} 114}
103 115
104module_init(iptable_security_init); 116module_init(iptable_security_init);
diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c
index 6fb869f646bf..d88da36b383c 100644
--- a/net/ipv4/netfilter/nf_defrag_ipv4.c
+++ b/net/ipv4/netfilter/nf_defrag_ipv4.c
@@ -27,16 +27,12 @@ static int nf_ct_ipv4_gather_frags(struct net *net, struct sk_buff *skb,
27{ 27{
28 int err; 28 int err;
29 29
30 skb_orphan(skb);
31
32 local_bh_disable(); 30 local_bh_disable();
33 err = ip_defrag(net, skb, user); 31 err = ip_defrag(net, skb, user);
34 local_bh_enable(); 32 local_bh_enable();
35 33
36 if (!err) { 34 if (!err)
37 ip_send_check(ip_hdr(skb));
38 skb->ignore_df = 1; 35 skb->ignore_df = 1;
39 }
40 36
41 return err; 37 return err;
42} 38}
diff --git a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
index 61c7cc22ea68..f8aad03d674b 100644
--- a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
@@ -127,29 +127,15 @@ static void nf_nat_ipv4_csum_recalc(struct sk_buff *skb,
127 u8 proto, void *data, __sum16 *check, 127 u8 proto, void *data, __sum16 *check,
128 int datalen, int oldlen) 128 int datalen, int oldlen)
129{ 129{
130 const struct iphdr *iph = ip_hdr(skb);
131 struct rtable *rt = skb_rtable(skb);
132
133 if (skb->ip_summed != CHECKSUM_PARTIAL) { 130 if (skb->ip_summed != CHECKSUM_PARTIAL) {
134 if (!(rt->rt_flags & RTCF_LOCAL) && 131 const struct iphdr *iph = ip_hdr(skb);
135 (!skb->dev || skb->dev->features & 132
136 (NETIF_F_IP_CSUM | NETIF_F_HW_CSUM))) { 133 skb->ip_summed = CHECKSUM_PARTIAL;
137 skb->ip_summed = CHECKSUM_PARTIAL; 134 skb->csum_start = skb_headroom(skb) + skb_network_offset(skb) +
138 skb->csum_start = skb_headroom(skb) + 135 ip_hdrlen(skb);
139 skb_network_offset(skb) + 136 skb->csum_offset = (void *)check - data;
140 ip_hdrlen(skb); 137 *check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, datalen,
141 skb->csum_offset = (void *)check - data; 138 proto, 0);
142 *check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
143 datalen, proto, 0);
144 } else {
145 *check = 0;
146 *check = csum_tcpudp_magic(iph->saddr, iph->daddr,
147 datalen, proto,
148 csum_partial(data, datalen,
149 0));
150 if (proto == IPPROTO_UDP && !*check)
151 *check = CSUM_MANGLED_0;
152 }
153 } else 139 } else
154 inet_proto_csum_replace2(check, skb, 140 inet_proto_csum_replace2(check, skb,
155 htons(oldlen), htons(datalen), true); 141 htons(oldlen), htons(datalen), true);
diff --git a/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c b/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c
index c6eb42100e9a..ea91058b5f6f 100644
--- a/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c
+++ b/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c
@@ -108,10 +108,18 @@ static int masq_inet_event(struct notifier_block *this,
108 unsigned long event, 108 unsigned long event,
109 void *ptr) 109 void *ptr)
110{ 110{
111 struct net_device *dev = ((struct in_ifaddr *)ptr)->ifa_dev->dev; 111 struct in_device *idev = ((struct in_ifaddr *)ptr)->ifa_dev;
112 struct netdev_notifier_info info; 112 struct netdev_notifier_info info;
113 113
114 netdev_notifier_info_init(&info, dev); 114 /* The masq_dev_notifier will catch the case of the device going
115 * down. So if the inetdev is dead and being destroyed we have
116 * no work to do. Otherwise this is an individual address removal
117 * and we have to perform the flush.
118 */
119 if (idev->dead)
120 return NOTIFY_DONE;
121
122 netdev_notifier_info_init(&info, idev->dev);
115 return masq_device_event(this, event, &info); 123 return masq_device_event(this, event, &info);
116} 124}
117 125
diff --git a/net/ipv4/netfilter/nft_masq_ipv4.c b/net/ipv4/netfilter/nft_masq_ipv4.c
index b72ffc58e255..51ced81b616c 100644
--- a/net/ipv4/netfilter/nft_masq_ipv4.c
+++ b/net/ipv4/netfilter/nft_masq_ipv4.c
@@ -25,7 +25,12 @@ static void nft_masq_ipv4_eval(const struct nft_expr *expr,
25 25
26 memset(&range, 0, sizeof(range)); 26 memset(&range, 0, sizeof(range));
27 range.flags = priv->flags; 27 range.flags = priv->flags;
28 28 if (priv->sreg_proto_min) {
29 range.min_proto.all =
30 *(__be16 *)&regs->data[priv->sreg_proto_min];
31 range.max_proto.all =
32 *(__be16 *)&regs->data[priv->sreg_proto_max];
33 }
29 regs->verdict.code = nf_nat_masquerade_ipv4(pkt->skb, pkt->hook, 34 regs->verdict.code = nf_nat_masquerade_ipv4(pkt->skb, pkt->hook,
30 &range, pkt->out); 35 &range, pkt->out);
31} 36}
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index c117b21b937d..cf9700b1a106 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -145,10 +145,12 @@ fail:
145} 145}
146EXPORT_SYMBOL_GPL(ping_get_port); 146EXPORT_SYMBOL_GPL(ping_get_port);
147 147
148void ping_hash(struct sock *sk) 148int ping_hash(struct sock *sk)
149{ 149{
150 pr_debug("ping_hash(sk->port=%u)\n", inet_sk(sk)->inet_num); 150 pr_debug("ping_hash(sk->port=%u)\n", inet_sk(sk)->inet_num);
151 BUG(); /* "Please do not press this button again." */ 151 BUG(); /* "Please do not press this button again." */
152
153 return 0;
152} 154}
153 155
154void ping_unhash(struct sock *sk) 156void ping_unhash(struct sock *sk)
@@ -746,8 +748,10 @@ static int ping_v4_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
746 748
747 if (msg->msg_controllen) { 749 if (msg->msg_controllen) {
748 err = ip_cmsg_send(sock_net(sk), msg, &ipc, false); 750 err = ip_cmsg_send(sock_net(sk), msg, &ipc, false);
749 if (err) 751 if (unlikely(err)) {
752 kfree(ipc.opt);
750 return err; 753 return err;
754 }
751 if (ipc.opt) 755 if (ipc.opt)
752 free = 1; 756 free = 1;
753 } 757 }
@@ -1138,13 +1142,6 @@ static int ping_v4_seq_show(struct seq_file *seq, void *v)
1138 return 0; 1142 return 0;
1139} 1143}
1140 1144
1141static const struct seq_operations ping_v4_seq_ops = {
1142 .show = ping_v4_seq_show,
1143 .start = ping_v4_seq_start,
1144 .next = ping_seq_next,
1145 .stop = ping_seq_stop,
1146};
1147
1148static int ping_seq_open(struct inode *inode, struct file *file) 1145static int ping_seq_open(struct inode *inode, struct file *file)
1149{ 1146{
1150 struct ping_seq_afinfo *afinfo = PDE_DATA(inode); 1147 struct ping_seq_afinfo *afinfo = PDE_DATA(inode);
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 3abd9d7a3adf..9f665b63a927 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -390,7 +390,7 @@ static int snmp_seq_show(struct seq_file *seq, void *v)
390 390
391 seq_printf(seq, "\nIp: %d %d", 391 seq_printf(seq, "\nIp: %d %d",
392 IPV4_DEVCONF_ALL(net, FORWARDING) ? 1 : 2, 392 IPV4_DEVCONF_ALL(net, FORWARDING) ? 1 : 2,
393 sysctl_ip_default_ttl); 393 net->ipv4.sysctl_ip_default_ttl);
394 394
395 BUILD_BUG_ON(offsetof(struct ipstats_mib, mibs) != 0); 395 BUILD_BUG_ON(offsetof(struct ipstats_mib, mibs) != 0);
396 for (i = 0; snmp4_ipstats_list[i].name != NULL; i++) 396 for (i = 0; snmp4_ipstats_list[i].name != NULL; i++)
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index bc35f1842512..8d22de74080c 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -93,7 +93,7 @@ static struct raw_hashinfo raw_v4_hashinfo = {
93 .lock = __RW_LOCK_UNLOCKED(raw_v4_hashinfo.lock), 93 .lock = __RW_LOCK_UNLOCKED(raw_v4_hashinfo.lock),
94}; 94};
95 95
96void raw_hash_sk(struct sock *sk) 96int raw_hash_sk(struct sock *sk)
97{ 97{
98 struct raw_hashinfo *h = sk->sk_prot->h.raw_hash; 98 struct raw_hashinfo *h = sk->sk_prot->h.raw_hash;
99 struct hlist_head *head; 99 struct hlist_head *head;
@@ -104,6 +104,8 @@ void raw_hash_sk(struct sock *sk)
104 sk_add_node(sk, head); 104 sk_add_node(sk, head);
105 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); 105 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
106 write_unlock_bh(&h->lock); 106 write_unlock_bh(&h->lock);
107
108 return 0;
107} 109}
108EXPORT_SYMBOL_GPL(raw_hash_sk); 110EXPORT_SYMBOL_GPL(raw_hash_sk);
109 111
@@ -547,8 +549,10 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
547 549
548 if (msg->msg_controllen) { 550 if (msg->msg_controllen) {
549 err = ip_cmsg_send(net, msg, &ipc, false); 551 err = ip_cmsg_send(net, msg, &ipc, false);
550 if (err) 552 if (unlikely(err)) {
553 kfree(ipc.opt);
551 goto out; 554 goto out;
555 }
552 if (ipc.opt) 556 if (ipc.opt)
553 free = 1; 557 free = 1;
554 } 558 }
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 85f184e429c6..02c62299d717 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -129,6 +129,7 @@ static int ip_rt_mtu_expires __read_mostly = 10 * 60 * HZ;
129static int ip_rt_min_pmtu __read_mostly = 512 + 20 + 20; 129static int ip_rt_min_pmtu __read_mostly = 512 + 20 + 20;
130static int ip_rt_min_advmss __read_mostly = 256; 130static int ip_rt_min_advmss __read_mostly = 256;
131 131
132static int ip_rt_gc_timeout __read_mostly = RT_GC_TIMEOUT;
132/* 133/*
133 * Interface to generic destination cache. 134 * Interface to generic destination cache.
134 */ 135 */
@@ -755,7 +756,7 @@ static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flow
755 struct fib_nh *nh = &FIB_RES_NH(res); 756 struct fib_nh *nh = &FIB_RES_NH(res);
756 757
757 update_or_create_fnhe(nh, fl4->daddr, new_gw, 758 update_or_create_fnhe(nh, fl4->daddr, new_gw,
758 0, 0); 759 0, jiffies + ip_rt_gc_timeout);
759 } 760 }
760 if (kill_route) 761 if (kill_route)
761 rt->dst.obsolete = DST_OBSOLETE_KILL; 762 rt->dst.obsolete = DST_OBSOLETE_KILL;
@@ -1556,6 +1557,36 @@ static void ip_handle_martian_source(struct net_device *dev,
1556#endif 1557#endif
1557} 1558}
1558 1559
1560static void ip_del_fnhe(struct fib_nh *nh, __be32 daddr)
1561{
1562 struct fnhe_hash_bucket *hash;
1563 struct fib_nh_exception *fnhe, __rcu **fnhe_p;
1564 u32 hval = fnhe_hashfun(daddr);
1565
1566 spin_lock_bh(&fnhe_lock);
1567
1568 hash = rcu_dereference_protected(nh->nh_exceptions,
1569 lockdep_is_held(&fnhe_lock));
1570 hash += hval;
1571
1572 fnhe_p = &hash->chain;
1573 fnhe = rcu_dereference_protected(*fnhe_p, lockdep_is_held(&fnhe_lock));
1574 while (fnhe) {
1575 if (fnhe->fnhe_daddr == daddr) {
1576 rcu_assign_pointer(*fnhe_p, rcu_dereference_protected(
1577 fnhe->fnhe_next, lockdep_is_held(&fnhe_lock)));
1578 fnhe_flush_routes(fnhe);
1579 kfree_rcu(fnhe, rcu);
1580 break;
1581 }
1582 fnhe_p = &fnhe->fnhe_next;
1583 fnhe = rcu_dereference_protected(fnhe->fnhe_next,
1584 lockdep_is_held(&fnhe_lock));
1585 }
1586
1587 spin_unlock_bh(&fnhe_lock);
1588}
1589
1559/* called in rcu_read_lock() section */ 1590/* called in rcu_read_lock() section */
1560static int __mkroute_input(struct sk_buff *skb, 1591static int __mkroute_input(struct sk_buff *skb,
1561 const struct fib_result *res, 1592 const struct fib_result *res,
@@ -1609,11 +1640,20 @@ static int __mkroute_input(struct sk_buff *skb,
1609 1640
1610 fnhe = find_exception(&FIB_RES_NH(*res), daddr); 1641 fnhe = find_exception(&FIB_RES_NH(*res), daddr);
1611 if (do_cache) { 1642 if (do_cache) {
1612 if (fnhe) 1643 if (fnhe) {
1613 rth = rcu_dereference(fnhe->fnhe_rth_input); 1644 rth = rcu_dereference(fnhe->fnhe_rth_input);
1614 else 1645 if (rth && rth->dst.expires &&
1615 rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input); 1646 time_after(jiffies, rth->dst.expires)) {
1647 ip_del_fnhe(&FIB_RES_NH(*res), daddr);
1648 fnhe = NULL;
1649 } else {
1650 goto rt_cache;
1651 }
1652 }
1653
1654 rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input);
1616 1655
1656rt_cache:
1617 if (rt_cache_valid(rth)) { 1657 if (rt_cache_valid(rth)) {
1618 skb_dst_set_noref(skb, &rth->dst); 1658 skb_dst_set_noref(skb, &rth->dst);
1619 goto out; 1659 goto out;
@@ -2014,19 +2054,29 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
2014 struct fib_nh *nh = &FIB_RES_NH(*res); 2054 struct fib_nh *nh = &FIB_RES_NH(*res);
2015 2055
2016 fnhe = find_exception(nh, fl4->daddr); 2056 fnhe = find_exception(nh, fl4->daddr);
2017 if (fnhe) 2057 if (fnhe) {
2018 prth = &fnhe->fnhe_rth_output; 2058 prth = &fnhe->fnhe_rth_output;
2019 else { 2059 rth = rcu_dereference(*prth);
2020 if (unlikely(fl4->flowi4_flags & 2060 if (rth && rth->dst.expires &&
2021 FLOWI_FLAG_KNOWN_NH && 2061 time_after(jiffies, rth->dst.expires)) {
2022 !(nh->nh_gw && 2062 ip_del_fnhe(nh, fl4->daddr);
2023 nh->nh_scope == RT_SCOPE_LINK))) { 2063 fnhe = NULL;
2024 do_cache = false; 2064 } else {
2025 goto add; 2065 goto rt_cache;
2026 } 2066 }
2027 prth = raw_cpu_ptr(nh->nh_pcpu_rth_output);
2028 } 2067 }
2068
2069 if (unlikely(fl4->flowi4_flags &
2070 FLOWI_FLAG_KNOWN_NH &&
2071 !(nh->nh_gw &&
2072 nh->nh_scope == RT_SCOPE_LINK))) {
2073 do_cache = false;
2074 goto add;
2075 }
2076 prth = raw_cpu_ptr(nh->nh_pcpu_rth_output);
2029 rth = rcu_dereference(*prth); 2077 rth = rcu_dereference(*prth);
2078
2079rt_cache:
2030 if (rt_cache_valid(rth)) { 2080 if (rt_cache_valid(rth)) {
2031 dst_hold(&rth->dst); 2081 dst_hold(&rth->dst);
2032 return rth; 2082 return rth;
@@ -2569,7 +2619,6 @@ void ip_rt_multicast_event(struct in_device *in_dev)
2569} 2619}
2570 2620
2571#ifdef CONFIG_SYSCTL 2621#ifdef CONFIG_SYSCTL
2572static int ip_rt_gc_timeout __read_mostly = RT_GC_TIMEOUT;
2573static int ip_rt_gc_interval __read_mostly = 60 * HZ; 2622static int ip_rt_gc_interval __read_mostly = 60 * HZ;
2574static int ip_rt_gc_min_interval __read_mostly = HZ / 2; 2623static int ip_rt_gc_min_interval __read_mostly = HZ / 2;
2575static int ip_rt_gc_elasticity __read_mostly = 8; 2624static int ip_rt_gc_elasticity __read_mostly = 8;
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 643a86c49020..4c04f09338e3 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -19,8 +19,6 @@
19#include <net/tcp.h> 19#include <net/tcp.h>
20#include <net/route.h> 20#include <net/route.h>
21 21
22extern int sysctl_tcp_syncookies;
23
24static u32 syncookie_secret[2][16-4+SHA_DIGEST_WORDS] __read_mostly; 22static u32 syncookie_secret[2][16-4+SHA_DIGEST_WORDS] __read_mostly;
25 23
26#define COOKIEBITS 24 /* Upper bits store count */ 24#define COOKIEBITS 24 /* Upper bits store count */
@@ -50,8 +48,7 @@ static u32 syncookie_secret[2][16-4+SHA_DIGEST_WORDS] __read_mostly;
50#define TSBITS 6 48#define TSBITS 6
51#define TSMASK (((__u32)1 << TSBITS) - 1) 49#define TSMASK (((__u32)1 << TSBITS) - 1)
52 50
53static DEFINE_PER_CPU(__u32 [16 + 5 + SHA_WORKSPACE_WORDS], 51static DEFINE_PER_CPU(__u32 [16 + 5 + SHA_WORKSPACE_WORDS], ipv4_cookie_scratch);
54 ipv4_cookie_scratch);
55 52
56static u32 cookie_hash(__be32 saddr, __be32 daddr, __be16 sport, __be16 dport, 53static u32 cookie_hash(__be32 saddr, __be32 daddr, __be16 sport, __be16 dport,
57 u32 count, int c) 54 u32 count, int c)
@@ -307,7 +304,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
307 __u8 rcv_wscale; 304 __u8 rcv_wscale;
308 struct flowi4 fl4; 305 struct flowi4 fl4;
309 306
310 if (!sysctl_tcp_syncookies || !th->ack || th->rst) 307 if (!sock_net(sk)->ipv4.sysctl_tcp_syncookies || !th->ack || th->rst)
311 goto out; 308 goto out;
312 309
313 if (tcp_synq_no_recent_overflow(sk)) 310 if (tcp_synq_no_recent_overflow(sk))
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 4d367b4139a3..1e1fe6086dd9 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -283,31 +283,6 @@ static struct ctl_table ipv4_table[] = {
283 .proc_handler = proc_dointvec 283 .proc_handler = proc_dointvec
284 }, 284 },
285 { 285 {
286 .procname = "ip_default_ttl",
287 .data = &sysctl_ip_default_ttl,
288 .maxlen = sizeof(int),
289 .mode = 0644,
290 .proc_handler = proc_dointvec_minmax,
291 .extra1 = &ip_ttl_min,
292 .extra2 = &ip_ttl_max,
293 },
294 {
295 .procname = "tcp_syn_retries",
296 .data = &sysctl_tcp_syn_retries,
297 .maxlen = sizeof(int),
298 .mode = 0644,
299 .proc_handler = proc_dointvec_minmax,
300 .extra1 = &tcp_syn_retries_min,
301 .extra2 = &tcp_syn_retries_max
302 },
303 {
304 .procname = "tcp_synack_retries",
305 .data = &sysctl_tcp_synack_retries,
306 .maxlen = sizeof(int),
307 .mode = 0644,
308 .proc_handler = proc_dointvec
309 },
310 {
311 .procname = "tcp_max_orphans", 286 .procname = "tcp_max_orphans",
312 .data = &sysctl_tcp_max_orphans, 287 .data = &sysctl_tcp_max_orphans,
313 .maxlen = sizeof(int), 288 .maxlen = sizeof(int),
@@ -322,51 +297,6 @@ static struct ctl_table ipv4_table[] = {
322 .proc_handler = proc_dointvec 297 .proc_handler = proc_dointvec
323 }, 298 },
324 { 299 {
325 .procname = "ip_early_demux",
326 .data = &sysctl_ip_early_demux,
327 .maxlen = sizeof(int),
328 .mode = 0644,
329 .proc_handler = proc_dointvec
330 },
331 {
332 .procname = "ip_dynaddr",
333 .data = &sysctl_ip_dynaddr,
334 .maxlen = sizeof(int),
335 .mode = 0644,
336 .proc_handler = proc_dointvec
337 },
338 {
339 .procname = "tcp_retries1",
340 .data = &sysctl_tcp_retries1,
341 .maxlen = sizeof(int),
342 .mode = 0644,
343 .proc_handler = proc_dointvec_minmax,
344 .extra2 = &tcp_retr1_max
345 },
346 {
347 .procname = "tcp_retries2",
348 .data = &sysctl_tcp_retries2,
349 .maxlen = sizeof(int),
350 .mode = 0644,
351 .proc_handler = proc_dointvec
352 },
353 {
354 .procname = "tcp_fin_timeout",
355 .data = &sysctl_tcp_fin_timeout,
356 .maxlen = sizeof(int),
357 .mode = 0644,
358 .proc_handler = proc_dointvec_jiffies,
359 },
360#ifdef CONFIG_SYN_COOKIES
361 {
362 .procname = "tcp_syncookies",
363 .data = &sysctl_tcp_syncookies,
364 .maxlen = sizeof(int),
365 .mode = 0644,
366 .proc_handler = proc_dointvec
367 },
368#endif
369 {
370 .procname = "tcp_fastopen", 300 .procname = "tcp_fastopen",
371 .data = &sysctl_tcp_fastopen, 301 .data = &sysctl_tcp_fastopen,
372 .maxlen = sizeof(int), 302 .maxlen = sizeof(int),
@@ -415,30 +345,6 @@ static struct ctl_table ipv4_table[] = {
415 .proc_handler = proc_dointvec 345 .proc_handler = proc_dointvec
416 }, 346 },
417 { 347 {
418 .procname = "igmp_max_memberships",
419 .data = &sysctl_igmp_max_memberships,
420 .maxlen = sizeof(int),
421 .mode = 0644,
422 .proc_handler = proc_dointvec
423 },
424 {
425 .procname = "igmp_max_msf",
426 .data = &sysctl_igmp_max_msf,
427 .maxlen = sizeof(int),
428 .mode = 0644,
429 .proc_handler = proc_dointvec
430 },
431#ifdef CONFIG_IP_MULTICAST
432 {
433 .procname = "igmp_qrv",
434 .data = &sysctl_igmp_qrv,
435 .maxlen = sizeof(int),
436 .mode = 0644,
437 .proc_handler = proc_dointvec_minmax,
438 .extra1 = &one
439 },
440#endif
441 {
442 .procname = "inet_peer_threshold", 348 .procname = "inet_peer_threshold",
443 .data = &inet_peer_threshold, 349 .data = &inet_peer_threshold,
444 .maxlen = sizeof(int), 350 .maxlen = sizeof(int),
@@ -460,13 +366,6 @@ static struct ctl_table ipv4_table[] = {
460 .proc_handler = proc_dointvec_jiffies, 366 .proc_handler = proc_dointvec_jiffies,
461 }, 367 },
462 { 368 {
463 .procname = "tcp_orphan_retries",
464 .data = &sysctl_tcp_orphan_retries,
465 .maxlen = sizeof(int),
466 .mode = 0644,
467 .proc_handler = proc_dointvec
468 },
469 {
470 .procname = "tcp_fack", 369 .procname = "tcp_fack",
471 .data = &sysctl_tcp_fack, 370 .data = &sysctl_tcp_fack,
472 .maxlen = sizeof(int), 371 .maxlen = sizeof(int),
@@ -481,13 +380,6 @@ static struct ctl_table ipv4_table[] = {
481 .proc_handler = proc_dointvec, 380 .proc_handler = proc_dointvec,
482 }, 381 },
483 { 382 {
484 .procname = "tcp_reordering",
485 .data = &sysctl_tcp_reordering,
486 .maxlen = sizeof(int),
487 .mode = 0644,
488 .proc_handler = proc_dointvec
489 },
490 {
491 .procname = "tcp_max_reordering", 383 .procname = "tcp_max_reordering",
492 .data = &sysctl_tcp_max_reordering, 384 .data = &sysctl_tcp_max_reordering,
493 .maxlen = sizeof(int), 385 .maxlen = sizeof(int),
@@ -517,13 +409,6 @@ static struct ctl_table ipv4_table[] = {
517 .extra1 = &one, 409 .extra1 = &one,
518 }, 410 },
519 { 411 {
520 .procname = "tcp_notsent_lowat",
521 .data = &sysctl_tcp_notsent_lowat,
522 .maxlen = sizeof(sysctl_tcp_notsent_lowat),
523 .mode = 0644,
524 .proc_handler = proc_dointvec,
525 },
526 {
527 .procname = "tcp_rmem", 412 .procname = "tcp_rmem",
528 .data = &sysctl_tcp_rmem, 413 .data = &sysctl_tcp_rmem,
529 .maxlen = sizeof(sysctl_tcp_rmem), 414 .maxlen = sizeof(sysctl_tcp_rmem),
@@ -845,6 +730,29 @@ static struct ctl_table ipv4_net_table[] = {
845 .proc_handler = proc_dointvec 730 .proc_handler = proc_dointvec
846 }, 731 },
847 { 732 {
733 .procname = "ip_dynaddr",
734 .data = &init_net.ipv4.sysctl_ip_dynaddr,
735 .maxlen = sizeof(int),
736 .mode = 0644,
737 .proc_handler = proc_dointvec
738 },
739 {
740 .procname = "ip_early_demux",
741 .data = &init_net.ipv4.sysctl_ip_early_demux,
742 .maxlen = sizeof(int),
743 .mode = 0644,
744 .proc_handler = proc_dointvec
745 },
746 {
747 .procname = "ip_default_ttl",
748 .data = &init_net.ipv4.sysctl_ip_default_ttl,
749 .maxlen = sizeof(int),
750 .mode = 0644,
751 .proc_handler = proc_dointvec_minmax,
752 .extra1 = &ip_ttl_min,
753 .extra2 = &ip_ttl_max,
754 },
755 {
848 .procname = "ip_local_port_range", 756 .procname = "ip_local_port_range",
849 .maxlen = sizeof(init_net.ipv4.ip_local_ports.range), 757 .maxlen = sizeof(init_net.ipv4.ip_local_ports.range),
850 .data = &init_net.ipv4.ip_local_ports.range, 758 .data = &init_net.ipv4.ip_local_ports.range,
@@ -934,12 +842,36 @@ static struct ctl_table ipv4_net_table[] = {
934 }, 842 },
935 { 843 {
936 .procname = "igmp_link_local_mcast_reports", 844 .procname = "igmp_link_local_mcast_reports",
937 .data = &sysctl_igmp_llm_reports, 845 .data = &init_net.ipv4.sysctl_igmp_llm_reports,
846 .maxlen = sizeof(int),
847 .mode = 0644,
848 .proc_handler = proc_dointvec
849 },
850 {
851 .procname = "igmp_max_memberships",
852 .data = &init_net.ipv4.sysctl_igmp_max_memberships,
938 .maxlen = sizeof(int), 853 .maxlen = sizeof(int),
939 .mode = 0644, 854 .mode = 0644,
940 .proc_handler = proc_dointvec 855 .proc_handler = proc_dointvec
941 }, 856 },
942 { 857 {
858 .procname = "igmp_max_msf",
859 .data = &init_net.ipv4.sysctl_igmp_max_msf,
860 .maxlen = sizeof(int),
861 .mode = 0644,
862 .proc_handler = proc_dointvec
863 },
864#ifdef CONFIG_IP_MULTICAST
865 {
866 .procname = "igmp_qrv",
867 .data = &init_net.ipv4.sysctl_igmp_qrv,
868 .maxlen = sizeof(int),
869 .mode = 0644,
870 .proc_handler = proc_dointvec_minmax,
871 .extra1 = &one
872 },
873#endif
874 {
943 .procname = "tcp_keepalive_time", 875 .procname = "tcp_keepalive_time",
944 .data = &init_net.ipv4.sysctl_tcp_keepalive_time, 876 .data = &init_net.ipv4.sysctl_tcp_keepalive_time,
945 .maxlen = sizeof(int), 877 .maxlen = sizeof(int),
@@ -960,6 +892,74 @@ static struct ctl_table ipv4_net_table[] = {
960 .mode = 0644, 892 .mode = 0644,
961 .proc_handler = proc_dointvec_jiffies, 893 .proc_handler = proc_dointvec_jiffies,
962 }, 894 },
895 {
896 .procname = "tcp_syn_retries",
897 .data = &init_net.ipv4.sysctl_tcp_syn_retries,
898 .maxlen = sizeof(int),
899 .mode = 0644,
900 .proc_handler = proc_dointvec_minmax,
901 .extra1 = &tcp_syn_retries_min,
902 .extra2 = &tcp_syn_retries_max
903 },
904 {
905 .procname = "tcp_synack_retries",
906 .data = &init_net.ipv4.sysctl_tcp_synack_retries,
907 .maxlen = sizeof(int),
908 .mode = 0644,
909 .proc_handler = proc_dointvec
910 },
911#ifdef CONFIG_SYN_COOKIES
912 {
913 .procname = "tcp_syncookies",
914 .data = &init_net.ipv4.sysctl_tcp_syncookies,
915 .maxlen = sizeof(int),
916 .mode = 0644,
917 .proc_handler = proc_dointvec
918 },
919#endif
920 {
921 .procname = "tcp_reordering",
922 .data = &init_net.ipv4.sysctl_tcp_reordering,
923 .maxlen = sizeof(int),
924 .mode = 0644,
925 .proc_handler = proc_dointvec
926 },
927 {
928 .procname = "tcp_retries1",
929 .data = &init_net.ipv4.sysctl_tcp_retries1,
930 .maxlen = sizeof(int),
931 .mode = 0644,
932 .proc_handler = proc_dointvec_minmax,
933 .extra2 = &tcp_retr1_max
934 },
935 {
936 .procname = "tcp_retries2",
937 .data = &init_net.ipv4.sysctl_tcp_retries2,
938 .maxlen = sizeof(int),
939 .mode = 0644,
940 .proc_handler = proc_dointvec
941 },
942 {
943 .procname = "tcp_orphan_retries",
944 .data = &init_net.ipv4.sysctl_tcp_orphan_retries,
945 .maxlen = sizeof(int),
946 .mode = 0644,
947 .proc_handler = proc_dointvec
948 },
949 {
950 .procname = "tcp_fin_timeout",
951 .data = &init_net.ipv4.sysctl_tcp_fin_timeout,
952 .maxlen = sizeof(int),
953 .mode = 0644,
954 .proc_handler = proc_dointvec_jiffies,
955 },
956 {
957 .procname = "tcp_notsent_lowat",
958 .data = &init_net.ipv4.sysctl_tcp_notsent_lowat,
959 .maxlen = sizeof(unsigned int),
960 .mode = 0644,
961 .proc_handler = proc_dointvec,
962 },
963 { } 963 { }
964}; 964};
965 965
@@ -988,6 +988,10 @@ static __net_init int ipv4_sysctl_init_net(struct net *net)
988 if (!net->ipv4.sysctl_local_reserved_ports) 988 if (!net->ipv4.sysctl_local_reserved_ports)
989 goto err_ports; 989 goto err_ports;
990 990
991 net->ipv4.sysctl_ip_default_ttl = IPDEFTTL;
992 net->ipv4.sysctl_ip_dynaddr = 0;
993 net->ipv4.sysctl_ip_early_demux = 1;
994
991 return 0; 995 return 0;
992 996
993err_ports: 997err_ports:
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index fd17eec93525..08b8b960a8ed 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -247,6 +247,7 @@
247 247
248#define pr_fmt(fmt) "TCP: " fmt 248#define pr_fmt(fmt) "TCP: " fmt
249 249
250#include <crypto/hash.h>
250#include <linux/kernel.h> 251#include <linux/kernel.h>
251#include <linux/module.h> 252#include <linux/module.h>
252#include <linux/types.h> 253#include <linux/types.h>
@@ -266,7 +267,6 @@
266#include <linux/swap.h> 267#include <linux/swap.h>
267#include <linux/cache.h> 268#include <linux/cache.h>
268#include <linux/err.h> 269#include <linux/err.h>
269#include <linux/crypto.h>
270#include <linux/time.h> 270#include <linux/time.h>
271#include <linux/slab.h> 271#include <linux/slab.h>
272 272
@@ -279,10 +279,9 @@
279 279
280#include <asm/uaccess.h> 280#include <asm/uaccess.h>
281#include <asm/ioctls.h> 281#include <asm/ioctls.h>
282#include <asm/unaligned.h>
282#include <net/busy_poll.h> 283#include <net/busy_poll.h>
283 284
284int sysctl_tcp_fin_timeout __read_mostly = TCP_FIN_TIMEOUT;
285
286int sysctl_tcp_min_tso_segs __read_mostly = 2; 285int sysctl_tcp_min_tso_segs __read_mostly = 2;
287 286
288int sysctl_tcp_autocorking __read_mostly = 1; 287int sysctl_tcp_autocorking __read_mostly = 1;
@@ -405,7 +404,7 @@ void tcp_init_sock(struct sock *sk)
405 tp->mss_cache = TCP_MSS_DEFAULT; 404 tp->mss_cache = TCP_MSS_DEFAULT;
406 u64_stats_init(&tp->syncp); 405 u64_stats_init(&tp->syncp);
407 406
408 tp->reordering = sysctl_tcp_reordering; 407 tp->reordering = sock_net(sk)->ipv4.sysctl_tcp_reordering;
409 tcp_enable_early_retrans(tp); 408 tcp_enable_early_retrans(tp);
410 tcp_assign_congestion_control(sk); 409 tcp_assign_congestion_control(sk);
411 410
@@ -557,20 +556,7 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
557 return -EINVAL; 556 return -EINVAL;
558 557
559 slow = lock_sock_fast(sk); 558 slow = lock_sock_fast(sk);
560 if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) 559 answ = tcp_inq(sk);
561 answ = 0;
562 else if (sock_flag(sk, SOCK_URGINLINE) ||
563 !tp->urg_data ||
564 before(tp->urg_seq, tp->copied_seq) ||
565 !before(tp->urg_seq, tp->rcv_nxt)) {
566
567 answ = tp->rcv_nxt - tp->copied_seq;
568
569 /* Subtract 1, if FIN was received */
570 if (answ && sock_flag(sk, SOCK_DONE))
571 answ--;
572 } else
573 answ = tp->urg_seq - tp->copied_seq;
574 unlock_sock_fast(sk, slow); 560 unlock_sock_fast(sk, slow);
575 break; 561 break;
576 case SIOCATMARK: 562 case SIOCATMARK:
@@ -939,7 +925,7 @@ new_segment:
939 925
940 i = skb_shinfo(skb)->nr_frags; 926 i = skb_shinfo(skb)->nr_frags;
941 can_coalesce = skb_can_coalesce(skb, i, page, offset); 927 can_coalesce = skb_can_coalesce(skb, i, page, offset);
942 if (!can_coalesce && i >= MAX_SKB_FRAGS) { 928 if (!can_coalesce && i >= sysctl_max_skb_frags) {
943 tcp_mark_push(tp, skb); 929 tcp_mark_push(tp, skb);
944 goto new_segment; 930 goto new_segment;
945 } 931 }
@@ -1212,7 +1198,7 @@ new_segment:
1212 1198
1213 if (!skb_can_coalesce(skb, i, pfrag->page, 1199 if (!skb_can_coalesce(skb, i, pfrag->page,
1214 pfrag->offset)) { 1200 pfrag->offset)) {
1215 if (i == MAX_SKB_FRAGS || !sg) { 1201 if (i == sysctl_max_skb_frags || !sg) {
1216 tcp_mark_push(tp, skb); 1202 tcp_mark_push(tp, skb);
1217 goto new_segment; 1203 goto new_segment;
1218 } 1204 }
@@ -1465,8 +1451,10 @@ static struct sk_buff *tcp_recv_skb(struct sock *sk, u32 seq, u32 *off)
1465 1451
1466 while ((skb = skb_peek(&sk->sk_receive_queue)) != NULL) { 1452 while ((skb = skb_peek(&sk->sk_receive_queue)) != NULL) {
1467 offset = seq - TCP_SKB_CB(skb)->seq; 1453 offset = seq - TCP_SKB_CB(skb)->seq;
1468 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN) 1454 if (unlikely(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)) {
1455 pr_err_once("%s: found a SYN, please report !\n", __func__);
1469 offset--; 1456 offset--;
1457 }
1470 if (offset < skb->len || (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)) { 1458 if (offset < skb->len || (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)) {
1471 *off = offset; 1459 *off = offset;
1472 return skb; 1460 return skb;
@@ -1656,8 +1644,10 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
1656 break; 1644 break;
1657 1645
1658 offset = *seq - TCP_SKB_CB(skb)->seq; 1646 offset = *seq - TCP_SKB_CB(skb)->seq;
1659 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN) 1647 if (unlikely(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)) {
1648 pr_err_once("%s: found a SYN, please report !\n", __func__);
1660 offset--; 1649 offset--;
1650 }
1661 if (offset < skb->len) 1651 if (offset < skb->len)
1662 goto found_ok_skb; 1652 goto found_ok_skb;
1663 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) 1653 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
@@ -2325,6 +2315,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
2325{ 2315{
2326 struct tcp_sock *tp = tcp_sk(sk); 2316 struct tcp_sock *tp = tcp_sk(sk);
2327 struct inet_connection_sock *icsk = inet_csk(sk); 2317 struct inet_connection_sock *icsk = inet_csk(sk);
2318 struct net *net = sock_net(sk);
2328 int val; 2319 int val;
2329 int err = 0; 2320 int err = 0;
2330 2321
@@ -2521,7 +2512,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
2521 case TCP_LINGER2: 2512 case TCP_LINGER2:
2522 if (val < 0) 2513 if (val < 0)
2523 tp->linger2 = -1; 2514 tp->linger2 = -1;
2524 else if (val > sysctl_tcp_fin_timeout / HZ) 2515 else if (val > net->ipv4.sysctl_tcp_fin_timeout / HZ)
2525 tp->linger2 = 0; 2516 tp->linger2 = 0;
2526 else 2517 else
2527 tp->linger2 = val * HZ; 2518 tp->linger2 = val * HZ;
@@ -2638,6 +2629,8 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
2638 const struct inet_connection_sock *icsk = inet_csk(sk); 2629 const struct inet_connection_sock *icsk = inet_csk(sk);
2639 u32 now = tcp_time_stamp; 2630 u32 now = tcp_time_stamp;
2640 unsigned int start; 2631 unsigned int start;
2632 int notsent_bytes;
2633 u64 rate64;
2641 u32 rate; 2634 u32 rate;
2642 2635
2643 memset(info, 0, sizeof(*info)); 2636 memset(info, 0, sizeof(*info));
@@ -2703,18 +2696,27 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
2703 info->tcpi_total_retrans = tp->total_retrans; 2696 info->tcpi_total_retrans = tp->total_retrans;
2704 2697
2705 rate = READ_ONCE(sk->sk_pacing_rate); 2698 rate = READ_ONCE(sk->sk_pacing_rate);
2706 info->tcpi_pacing_rate = rate != ~0U ? rate : ~0ULL; 2699 rate64 = rate != ~0U ? rate : ~0ULL;
2700 put_unaligned(rate64, &info->tcpi_pacing_rate);
2707 2701
2708 rate = READ_ONCE(sk->sk_max_pacing_rate); 2702 rate = READ_ONCE(sk->sk_max_pacing_rate);
2709 info->tcpi_max_pacing_rate = rate != ~0U ? rate : ~0ULL; 2703 rate64 = rate != ~0U ? rate : ~0ULL;
2704 put_unaligned(rate64, &info->tcpi_max_pacing_rate);
2710 2705
2711 do { 2706 do {
2712 start = u64_stats_fetch_begin_irq(&tp->syncp); 2707 start = u64_stats_fetch_begin_irq(&tp->syncp);
2713 info->tcpi_bytes_acked = tp->bytes_acked; 2708 put_unaligned(tp->bytes_acked, &info->tcpi_bytes_acked);
2714 info->tcpi_bytes_received = tp->bytes_received; 2709 put_unaligned(tp->bytes_received, &info->tcpi_bytes_received);
2715 } while (u64_stats_fetch_retry_irq(&tp->syncp, start)); 2710 } while (u64_stats_fetch_retry_irq(&tp->syncp, start));
2716 info->tcpi_segs_out = tp->segs_out; 2711 info->tcpi_segs_out = tp->segs_out;
2717 info->tcpi_segs_in = tp->segs_in; 2712 info->tcpi_segs_in = tp->segs_in;
2713
2714 notsent_bytes = READ_ONCE(tp->write_seq) - READ_ONCE(tp->snd_nxt);
2715 info->tcpi_notsent_bytes = max(0, notsent_bytes);
2716
2717 info->tcpi_min_rtt = tcp_min_rtt(tp);
2718 info->tcpi_data_segs_in = tp->data_segs_in;
2719 info->tcpi_data_segs_out = tp->data_segs_out;
2718} 2720}
2719EXPORT_SYMBOL_GPL(tcp_get_info); 2721EXPORT_SYMBOL_GPL(tcp_get_info);
2720 2722
@@ -2723,6 +2725,7 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
2723{ 2725{
2724 struct inet_connection_sock *icsk = inet_csk(sk); 2726 struct inet_connection_sock *icsk = inet_csk(sk);
2725 struct tcp_sock *tp = tcp_sk(sk); 2727 struct tcp_sock *tp = tcp_sk(sk);
2728 struct net *net = sock_net(sk);
2726 int val, len; 2729 int val, len;
2727 2730
2728 if (get_user(len, optlen)) 2731 if (get_user(len, optlen))
@@ -2757,12 +2760,12 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
2757 val = keepalive_probes(tp); 2760 val = keepalive_probes(tp);
2758 break; 2761 break;
2759 case TCP_SYNCNT: 2762 case TCP_SYNCNT:
2760 val = icsk->icsk_syn_retries ? : sysctl_tcp_syn_retries; 2763 val = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_syn_retries;
2761 break; 2764 break;
2762 case TCP_LINGER2: 2765 case TCP_LINGER2:
2763 val = tp->linger2; 2766 val = tp->linger2;
2764 if (val >= 0) 2767 if (val >= 0)
2765 val = (val ? : sysctl_tcp_fin_timeout) / HZ; 2768 val = (val ? : net->ipv4.sysctl_tcp_fin_timeout) / HZ;
2766 break; 2769 break;
2767 case TCP_DEFER_ACCEPT: 2770 case TCP_DEFER_ACCEPT:
2768 val = retrans_to_secs(icsk->icsk_accept_queue.rskq_defer_accept, 2771 val = retrans_to_secs(icsk->icsk_accept_queue.rskq_defer_accept,
@@ -2939,17 +2942,26 @@ static bool tcp_md5sig_pool_populated = false;
2939 2942
2940static void __tcp_alloc_md5sig_pool(void) 2943static void __tcp_alloc_md5sig_pool(void)
2941{ 2944{
2945 struct crypto_ahash *hash;
2942 int cpu; 2946 int cpu;
2943 2947
2948 hash = crypto_alloc_ahash("md5", 0, CRYPTO_ALG_ASYNC);
2949 if (IS_ERR(hash))
2950 return;
2951
2944 for_each_possible_cpu(cpu) { 2952 for_each_possible_cpu(cpu) {
2945 if (!per_cpu(tcp_md5sig_pool, cpu).md5_desc.tfm) { 2953 struct ahash_request *req;
2946 struct crypto_hash *hash;
2947 2954
2948 hash = crypto_alloc_hash("md5", 0, CRYPTO_ALG_ASYNC); 2955 if (per_cpu(tcp_md5sig_pool, cpu).md5_req)
2949 if (IS_ERR_OR_NULL(hash)) 2956 continue;
2950 return; 2957
2951 per_cpu(tcp_md5sig_pool, cpu).md5_desc.tfm = hash; 2958 req = ahash_request_alloc(hash, GFP_KERNEL);
2952 } 2959 if (!req)
2960 return;
2961
2962 ahash_request_set_callback(req, 0, NULL, NULL);
2963
2964 per_cpu(tcp_md5sig_pool, cpu).md5_req = req;
2953 } 2965 }
2954 /* before setting tcp_md5sig_pool_populated, we must commit all writes 2966 /* before setting tcp_md5sig_pool_populated, we must commit all writes
2955 * to memory. See smp_rmb() in tcp_get_md5sig_pool() 2967 * to memory. See smp_rmb() in tcp_get_md5sig_pool()
@@ -2999,7 +3011,6 @@ int tcp_md5_hash_header(struct tcp_md5sig_pool *hp,
2999{ 3011{
3000 struct scatterlist sg; 3012 struct scatterlist sg;
3001 struct tcphdr hdr; 3013 struct tcphdr hdr;
3002 int err;
3003 3014
3004 /* We are not allowed to change tcphdr, make a local copy */ 3015 /* We are not allowed to change tcphdr, make a local copy */
3005 memcpy(&hdr, th, sizeof(hdr)); 3016 memcpy(&hdr, th, sizeof(hdr));
@@ -3007,8 +3018,8 @@ int tcp_md5_hash_header(struct tcp_md5sig_pool *hp,
3007 3018
3008 /* options aren't included in the hash */ 3019 /* options aren't included in the hash */
3009 sg_init_one(&sg, &hdr, sizeof(hdr)); 3020 sg_init_one(&sg, &hdr, sizeof(hdr));
3010 err = crypto_hash_update(&hp->md5_desc, &sg, sizeof(hdr)); 3021 ahash_request_set_crypt(hp->md5_req, &sg, NULL, sizeof(hdr));
3011 return err; 3022 return crypto_ahash_update(hp->md5_req);
3012} 3023}
3013EXPORT_SYMBOL(tcp_md5_hash_header); 3024EXPORT_SYMBOL(tcp_md5_hash_header);
3014 3025
@@ -3017,7 +3028,7 @@ int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp,
3017{ 3028{
3018 struct scatterlist sg; 3029 struct scatterlist sg;
3019 const struct tcphdr *tp = tcp_hdr(skb); 3030 const struct tcphdr *tp = tcp_hdr(skb);
3020 struct hash_desc *desc = &hp->md5_desc; 3031 struct ahash_request *req = hp->md5_req;
3021 unsigned int i; 3032 unsigned int i;
3022 const unsigned int head_data_len = skb_headlen(skb) > header_len ? 3033 const unsigned int head_data_len = skb_headlen(skb) > header_len ?
3023 skb_headlen(skb) - header_len : 0; 3034 skb_headlen(skb) - header_len : 0;
@@ -3027,7 +3038,8 @@ int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp,
3027 sg_init_table(&sg, 1); 3038 sg_init_table(&sg, 1);
3028 3039
3029 sg_set_buf(&sg, ((u8 *) tp) + header_len, head_data_len); 3040 sg_set_buf(&sg, ((u8 *) tp) + header_len, head_data_len);
3030 if (crypto_hash_update(desc, &sg, head_data_len)) 3041 ahash_request_set_crypt(req, &sg, NULL, head_data_len);
3042 if (crypto_ahash_update(req))
3031 return 1; 3043 return 1;
3032 3044
3033 for (i = 0; i < shi->nr_frags; ++i) { 3045 for (i = 0; i < shi->nr_frags; ++i) {
@@ -3037,7 +3049,8 @@ int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp,
3037 3049
3038 sg_set_page(&sg, page, skb_frag_size(f), 3050 sg_set_page(&sg, page, skb_frag_size(f),
3039 offset_in_page(offset)); 3051 offset_in_page(offset));
3040 if (crypto_hash_update(desc, &sg, skb_frag_size(f))) 3052 ahash_request_set_crypt(req, &sg, NULL, skb_frag_size(f));
3053 if (crypto_ahash_update(req))
3041 return 1; 3054 return 1;
3042 } 3055 }
3043 3056
@@ -3054,7 +3067,8 @@ int tcp_md5_hash_key(struct tcp_md5sig_pool *hp, const struct tcp_md5sig_key *ke
3054 struct scatterlist sg; 3067 struct scatterlist sg;
3055 3068
3056 sg_init_one(&sg, key->key, key->keylen); 3069 sg_init_one(&sg, key->key, key->keylen);
3057 return crypto_hash_update(&hp->md5_desc, &sg, key->keylen); 3070 ahash_request_set_crypt(hp->md5_req, &sg, NULL, key->keylen);
3071 return crypto_ahash_update(hp->md5_req);
3058} 3072}
3059EXPORT_SYMBOL(tcp_md5_hash_key); 3073EXPORT_SYMBOL(tcp_md5_hash_key);
3060 3074
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
index 55be6ac70cff..cffd8f9ed1a9 100644
--- a/net/ipv4/tcp_fastopen.c
+++ b/net/ipv4/tcp_fastopen.c
@@ -1,3 +1,4 @@
1#include <linux/crypto.h>
1#include <linux/err.h> 2#include <linux/err.h>
2#include <linux/init.h> 3#include <linux/init.h>
3#include <linux/kernel.h> 4#include <linux/kernel.h>
@@ -124,6 +125,49 @@ static bool tcp_fastopen_cookie_gen(struct request_sock *req,
124 return false; 125 return false;
125} 126}
126 127
128
129/* If an incoming SYN or SYNACK frame contains a payload and/or FIN,
130 * queue this additional data / FIN.
131 */
132void tcp_fastopen_add_skb(struct sock *sk, struct sk_buff *skb)
133{
134 struct tcp_sock *tp = tcp_sk(sk);
135
136 if (TCP_SKB_CB(skb)->end_seq == tp->rcv_nxt)
137 return;
138
139 skb = skb_clone(skb, GFP_ATOMIC);
140 if (!skb)
141 return;
142
143 skb_dst_drop(skb);
144 /* segs_in has been initialized to 1 in tcp_create_openreq_child().
145 * Hence, reset segs_in to 0 before calling tcp_segs_in()
146 * to avoid double counting. Also, tcp_segs_in() expects
147 * skb->len to include the tcp_hdrlen. Hence, it should
148 * be called before __skb_pull().
149 */
150 tp->segs_in = 0;
151 tcp_segs_in(tp, skb);
152 __skb_pull(skb, tcp_hdrlen(skb));
153 skb_set_owner_r(skb, sk);
154
155 TCP_SKB_CB(skb)->seq++;
156 TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_SYN;
157
158 tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
159 __skb_queue_tail(&sk->sk_receive_queue, skb);
160 tp->syn_data_acked = 1;
161
162 /* u64_stats_update_begin(&tp->syncp) not needed here,
163 * as we certainly are not changing upper 32bit value (0)
164 */
165 tp->bytes_received = skb->len;
166
167 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
168 tcp_fin(sk);
169}
170
127static struct sock *tcp_fastopen_create_child(struct sock *sk, 171static struct sock *tcp_fastopen_create_child(struct sock *sk,
128 struct sk_buff *skb, 172 struct sk_buff *skb,
129 struct dst_entry *dst, 173 struct dst_entry *dst,
@@ -132,7 +176,6 @@ static struct sock *tcp_fastopen_create_child(struct sock *sk,
132 struct tcp_sock *tp; 176 struct tcp_sock *tp;
133 struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue; 177 struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue;
134 struct sock *child; 178 struct sock *child;
135 u32 end_seq;
136 bool own_req; 179 bool own_req;
137 180
138 req->num_retrans = 0; 181 req->num_retrans = 0;
@@ -178,35 +221,11 @@ static struct sock *tcp_fastopen_create_child(struct sock *sk,
178 tcp_init_metrics(child); 221 tcp_init_metrics(child);
179 tcp_init_buffer_space(child); 222 tcp_init_buffer_space(child);
180 223
181 /* Queue the data carried in the SYN packet. 224 tp->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
182 * We used to play tricky games with skb_get(). 225
183 * With lockless listener, it is a dead end. 226 tcp_fastopen_add_skb(child, skb);
184 * Do not think about it. 227
185 * 228 tcp_rsk(req)->rcv_nxt = tp->rcv_nxt;
186 * XXX (TFO) - we honor a zero-payload TFO request for now,
187 * (any reason not to?) but no need to queue the skb since
188 * there is no data. How about SYN+FIN?
189 */
190 end_seq = TCP_SKB_CB(skb)->end_seq;
191 if (end_seq != TCP_SKB_CB(skb)->seq + 1) {
192 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
193
194 if (likely(skb2)) {
195 skb_dst_drop(skb2);
196 __skb_pull(skb2, tcp_hdrlen(skb));
197 skb_set_owner_r(skb2, child);
198 __skb_queue_tail(&child->sk_receive_queue, skb2);
199 tp->syn_data_acked = 1;
200
201 /* u64_stats_update_begin(&tp->syncp) not needed here,
202 * as we certainly are not changing upper 32bit value (0)
203 */
204 tp->bytes_received = end_seq - TCP_SKB_CB(skb)->seq - 1;
205 } else {
206 end_seq = TCP_SKB_CB(skb)->seq + 1;
207 }
208 }
209 tcp_rsk(req)->rcv_nxt = tp->rcv_nxt = end_seq;
210 /* tcp_conn_request() is sending the SYNACK, 229 /* tcp_conn_request() is sending the SYNACK,
211 * and queues the child into listener accept queue. 230 * and queues the child into listener accept queue.
212 */ 231 */
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 0003d409fec5..e6e65f79ade8 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -80,9 +80,7 @@ int sysctl_tcp_timestamps __read_mostly = 1;
80int sysctl_tcp_window_scaling __read_mostly = 1; 80int sysctl_tcp_window_scaling __read_mostly = 1;
81int sysctl_tcp_sack __read_mostly = 1; 81int sysctl_tcp_sack __read_mostly = 1;
82int sysctl_tcp_fack __read_mostly = 1; 82int sysctl_tcp_fack __read_mostly = 1;
83int sysctl_tcp_reordering __read_mostly = TCP_FASTRETRANS_THRESH;
84int sysctl_tcp_max_reordering __read_mostly = 300; 83int sysctl_tcp_max_reordering __read_mostly = 300;
85EXPORT_SYMBOL(sysctl_tcp_reordering);
86int sysctl_tcp_dsack __read_mostly = 1; 84int sysctl_tcp_dsack __read_mostly = 1;
87int sysctl_tcp_app_win __read_mostly = 31; 85int sysctl_tcp_app_win __read_mostly = 31;
88int sysctl_tcp_adv_win_scale __read_mostly = 1; 86int sysctl_tcp_adv_win_scale __read_mostly = 1;
@@ -126,6 +124,10 @@ int sysctl_tcp_invalid_ratelimit __read_mostly = HZ/2;
126#define TCP_REMNANT (TCP_FLAG_FIN|TCP_FLAG_URG|TCP_FLAG_SYN|TCP_FLAG_PSH) 124#define TCP_REMNANT (TCP_FLAG_FIN|TCP_FLAG_URG|TCP_FLAG_SYN|TCP_FLAG_PSH)
127#define TCP_HP_BITS (~(TCP_RESERVED_BITS|TCP_FLAG_PSH)) 125#define TCP_HP_BITS (~(TCP_RESERVED_BITS|TCP_FLAG_PSH))
128 126
127#define REXMIT_NONE 0 /* no loss recovery to do */
128#define REXMIT_LOST 1 /* retransmit packets marked lost */
129#define REXMIT_NEW 2 /* FRTO-style transmit of unsent/new packets */
130
129/* Adapt the MSS value used to make delayed ack decision to the 131/* Adapt the MSS value used to make delayed ack decision to the
130 * real world. 132 * real world.
131 */ 133 */
@@ -1210,6 +1212,7 @@ static u8 tcp_sacktag_one(struct sock *sk,
1210 sacked |= TCPCB_SACKED_ACKED; 1212 sacked |= TCPCB_SACKED_ACKED;
1211 state->flag |= FLAG_DATA_SACKED; 1213 state->flag |= FLAG_DATA_SACKED;
1212 tp->sacked_out += pcount; 1214 tp->sacked_out += pcount;
1215 tp->delivered += pcount; /* Out-of-order packets delivered */
1213 1216
1214 fack_count += pcount; 1217 fack_count += pcount;
1215 1218
@@ -1821,8 +1824,12 @@ static void tcp_check_reno_reordering(struct sock *sk, const int addend)
1821static void tcp_add_reno_sack(struct sock *sk) 1824static void tcp_add_reno_sack(struct sock *sk)
1822{ 1825{
1823 struct tcp_sock *tp = tcp_sk(sk); 1826 struct tcp_sock *tp = tcp_sk(sk);
1827 u32 prior_sacked = tp->sacked_out;
1828
1824 tp->sacked_out++; 1829 tp->sacked_out++;
1825 tcp_check_reno_reordering(sk, 0); 1830 tcp_check_reno_reordering(sk, 0);
1831 if (tp->sacked_out > prior_sacked)
1832 tp->delivered++; /* Some out-of-order packet is delivered */
1826 tcp_verify_left_out(tp); 1833 tcp_verify_left_out(tp);
1827} 1834}
1828 1835
@@ -1834,6 +1841,7 @@ static void tcp_remove_reno_sacks(struct sock *sk, int acked)
1834 1841
1835 if (acked > 0) { 1842 if (acked > 0) {
1836 /* One ACK acked hole. The rest eat duplicate ACKs. */ 1843 /* One ACK acked hole. The rest eat duplicate ACKs. */
1844 tp->delivered += max_t(int, acked - tp->sacked_out, 1);
1837 if (acked - 1 >= tp->sacked_out) 1845 if (acked - 1 >= tp->sacked_out)
1838 tp->sacked_out = 0; 1846 tp->sacked_out = 0;
1839 else 1847 else
@@ -1873,6 +1881,7 @@ void tcp_enter_loss(struct sock *sk)
1873{ 1881{
1874 const struct inet_connection_sock *icsk = inet_csk(sk); 1882 const struct inet_connection_sock *icsk = inet_csk(sk);
1875 struct tcp_sock *tp = tcp_sk(sk); 1883 struct tcp_sock *tp = tcp_sk(sk);
1884 struct net *net = sock_net(sk);
1876 struct sk_buff *skb; 1885 struct sk_buff *skb;
1877 bool new_recovery = icsk->icsk_ca_state < TCP_CA_Recovery; 1886 bool new_recovery = icsk->icsk_ca_state < TCP_CA_Recovery;
1878 bool is_reneg; /* is receiver reneging on SACKs? */ 1887 bool is_reneg; /* is receiver reneging on SACKs? */
@@ -1923,9 +1932,9 @@ void tcp_enter_loss(struct sock *sk)
1923 * suggests that the degree of reordering is over-estimated. 1932 * suggests that the degree of reordering is over-estimated.
1924 */ 1933 */
1925 if (icsk->icsk_ca_state <= TCP_CA_Disorder && 1934 if (icsk->icsk_ca_state <= TCP_CA_Disorder &&
1926 tp->sacked_out >= sysctl_tcp_reordering) 1935 tp->sacked_out >= net->ipv4.sysctl_tcp_reordering)
1927 tp->reordering = min_t(unsigned int, tp->reordering, 1936 tp->reordering = min_t(unsigned int, tp->reordering,
1928 sysctl_tcp_reordering); 1937 net->ipv4.sysctl_tcp_reordering);
1929 tcp_set_ca_state(sk, TCP_CA_Loss); 1938 tcp_set_ca_state(sk, TCP_CA_Loss);
1930 tp->high_seq = tp->snd_nxt; 1939 tp->high_seq = tp->snd_nxt;
1931 tcp_ecn_queue_cwr(tp); 1940 tcp_ecn_queue_cwr(tp);
@@ -2109,6 +2118,7 @@ static bool tcp_time_to_recover(struct sock *sk, int flag)
2109{ 2118{
2110 struct tcp_sock *tp = tcp_sk(sk); 2119 struct tcp_sock *tp = tcp_sk(sk);
2111 __u32 packets_out; 2120 __u32 packets_out;
2121 int tcp_reordering = sock_net(sk)->ipv4.sysctl_tcp_reordering;
2112 2122
2113 /* Trick#1: The loss is proven. */ 2123 /* Trick#1: The loss is proven. */
2114 if (tp->lost_out) 2124 if (tp->lost_out)
@@ -2123,7 +2133,7 @@ static bool tcp_time_to_recover(struct sock *sk, int flag)
2123 */ 2133 */
2124 packets_out = tp->packets_out; 2134 packets_out = tp->packets_out;
2125 if (packets_out <= tp->reordering && 2135 if (packets_out <= tp->reordering &&
2126 tp->sacked_out >= max_t(__u32, packets_out/2, sysctl_tcp_reordering) && 2136 tp->sacked_out >= max_t(__u32, packets_out/2, tcp_reordering) &&
2127 !tcp_may_send_now(sk)) { 2137 !tcp_may_send_now(sk)) {
2128 /* We have nothing to send. This connection is limited 2138 /* We have nothing to send. This connection is limited
2129 * either by receiver window or by application. 2139 * either by receiver window or by application.
@@ -2164,8 +2174,7 @@ static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head)
2164{ 2174{
2165 struct tcp_sock *tp = tcp_sk(sk); 2175 struct tcp_sock *tp = tcp_sk(sk);
2166 struct sk_buff *skb; 2176 struct sk_buff *skb;
2167 int cnt, oldcnt; 2177 int cnt, oldcnt, lost;
2168 int err;
2169 unsigned int mss; 2178 unsigned int mss;
2170 /* Use SACK to deduce losses of new sequences sent during recovery */ 2179 /* Use SACK to deduce losses of new sequences sent during recovery */
2171 const u32 loss_high = tcp_is_sack(tp) ? tp->snd_nxt : tp->high_seq; 2180 const u32 loss_high = tcp_is_sack(tp) ? tp->snd_nxt : tp->high_seq;
@@ -2205,9 +2214,10 @@ static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head)
2205 break; 2214 break;
2206 2215
2207 mss = tcp_skb_mss(skb); 2216 mss = tcp_skb_mss(skb);
2208 err = tcp_fragment(sk, skb, (packets - oldcnt) * mss, 2217 /* If needed, chop off the prefix to mark as lost. */
2209 mss, GFP_ATOMIC); 2218 lost = (packets - oldcnt) * mss;
2210 if (err < 0) 2219 if (lost < skb->len &&
2220 tcp_fragment(sk, skb, lost, mss, GFP_ATOMIC) < 0)
2211 break; 2221 break;
2212 cnt = packets; 2222 cnt = packets;
2213 } 2223 }
@@ -2366,8 +2376,6 @@ static void tcp_undo_cwnd_reduction(struct sock *sk, bool unmark_loss)
2366 tp->snd_ssthresh = tp->prior_ssthresh; 2376 tp->snd_ssthresh = tp->prior_ssthresh;
2367 tcp_ecn_withdraw_cwr(tp); 2377 tcp_ecn_withdraw_cwr(tp);
2368 } 2378 }
2369 } else {
2370 tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh);
2371 } 2379 }
2372 tp->snd_cwnd_stamp = tcp_time_stamp; 2380 tp->snd_cwnd_stamp = tcp_time_stamp;
2373 tp->undo_marker = 0; 2381 tp->undo_marker = 0;
@@ -2469,14 +2477,12 @@ static void tcp_init_cwnd_reduction(struct sock *sk)
2469 tcp_ecn_queue_cwr(tp); 2477 tcp_ecn_queue_cwr(tp);
2470} 2478}
2471 2479
2472static void tcp_cwnd_reduction(struct sock *sk, const int prior_unsacked, 2480static void tcp_cwnd_reduction(struct sock *sk, int newly_acked_sacked,
2473 int fast_rexmit, int flag) 2481 int flag)
2474{ 2482{
2475 struct tcp_sock *tp = tcp_sk(sk); 2483 struct tcp_sock *tp = tcp_sk(sk);
2476 int sndcnt = 0; 2484 int sndcnt = 0;
2477 int delta = tp->snd_ssthresh - tcp_packets_in_flight(tp); 2485 int delta = tp->snd_ssthresh - tcp_packets_in_flight(tp);
2478 int newly_acked_sacked = prior_unsacked -
2479 (tp->packets_out - tp->sacked_out);
2480 2486
2481 if (newly_acked_sacked <= 0 || WARN_ON_ONCE(!tp->prior_cwnd)) 2487 if (newly_acked_sacked <= 0 || WARN_ON_ONCE(!tp->prior_cwnd))
2482 return; 2488 return;
@@ -2494,7 +2500,8 @@ static void tcp_cwnd_reduction(struct sock *sk, const int prior_unsacked,
2494 } else { 2500 } else {
2495 sndcnt = min(delta, newly_acked_sacked); 2501 sndcnt = min(delta, newly_acked_sacked);
2496 } 2502 }
2497 sndcnt = max(sndcnt, (fast_rexmit ? 1 : 0)); 2503 /* Force a fast retransmit upon entering fast recovery */
2504 sndcnt = max(sndcnt, (tp->prr_out ? 0 : 1));
2498 tp->snd_cwnd = tcp_packets_in_flight(tp) + sndcnt; 2505 tp->snd_cwnd = tcp_packets_in_flight(tp) + sndcnt;
2499} 2506}
2500 2507
@@ -2539,7 +2546,7 @@ static void tcp_try_keep_open(struct sock *sk)
2539 } 2546 }
2540} 2547}
2541 2548
2542static void tcp_try_to_open(struct sock *sk, int flag, const int prior_unsacked) 2549static void tcp_try_to_open(struct sock *sk, int flag)
2543{ 2550{
2544 struct tcp_sock *tp = tcp_sk(sk); 2551 struct tcp_sock *tp = tcp_sk(sk);
2545 2552
@@ -2553,8 +2560,6 @@ static void tcp_try_to_open(struct sock *sk, int flag, const int prior_unsacked)
2553 2560
2554 if (inet_csk(sk)->icsk_ca_state != TCP_CA_CWR) { 2561 if (inet_csk(sk)->icsk_ca_state != TCP_CA_CWR) {
2555 tcp_try_keep_open(sk); 2562 tcp_try_keep_open(sk);
2556 } else {
2557 tcp_cwnd_reduction(sk, prior_unsacked, 0, flag);
2558 } 2563 }
2559} 2564}
2560 2565
@@ -2664,7 +2669,8 @@ static void tcp_enter_recovery(struct sock *sk, bool ece_ack)
2664/* Process an ACK in CA_Loss state. Move to CA_Open if lost data are 2669/* Process an ACK in CA_Loss state. Move to CA_Open if lost data are
2665 * recovered or spurious. Otherwise retransmits more on partial ACKs. 2670 * recovered or spurious. Otherwise retransmits more on partial ACKs.
2666 */ 2671 */
2667static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack) 2672static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack,
2673 int *rexmit)
2668{ 2674{
2669 struct tcp_sock *tp = tcp_sk(sk); 2675 struct tcp_sock *tp = tcp_sk(sk);
2670 bool recovered = !before(tp->snd_una, tp->high_seq); 2676 bool recovered = !before(tp->snd_una, tp->high_seq);
@@ -2686,10 +2692,15 @@ static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack)
2686 tp->frto = 0; /* Step 3.a. loss was real */ 2692 tp->frto = 0; /* Step 3.a. loss was real */
2687 } else if (flag & FLAG_SND_UNA_ADVANCED && !recovered) { 2693 } else if (flag & FLAG_SND_UNA_ADVANCED && !recovered) {
2688 tp->high_seq = tp->snd_nxt; 2694 tp->high_seq = tp->snd_nxt;
2689 __tcp_push_pending_frames(sk, tcp_current_mss(sk), 2695 /* Step 2.b. Try send new data (but deferred until cwnd
2690 TCP_NAGLE_OFF); 2696 * is updated in tcp_ack()). Otherwise fall back to
2691 if (after(tp->snd_nxt, tp->high_seq)) 2697 * the conventional recovery.
2692 return; /* Step 2.b */ 2698 */
2699 if (tcp_send_head(sk) &&
2700 after(tcp_wnd_end(tp), tp->snd_nxt)) {
2701 *rexmit = REXMIT_NEW;
2702 return;
2703 }
2693 tp->frto = 0; 2704 tp->frto = 0;
2694 } 2705 }
2695 } 2706 }
@@ -2708,12 +2719,11 @@ static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack)
2708 else if (flag & FLAG_SND_UNA_ADVANCED) 2719 else if (flag & FLAG_SND_UNA_ADVANCED)
2709 tcp_reset_reno_sack(tp); 2720 tcp_reset_reno_sack(tp);
2710 } 2721 }
2711 tcp_xmit_retransmit_queue(sk); 2722 *rexmit = REXMIT_LOST;
2712} 2723}
2713 2724
2714/* Undo during fast recovery after partial ACK. */ 2725/* Undo during fast recovery after partial ACK. */
2715static bool tcp_try_undo_partial(struct sock *sk, const int acked, 2726static bool tcp_try_undo_partial(struct sock *sk, const int acked)
2716 const int prior_unsacked, int flag)
2717{ 2727{
2718 struct tcp_sock *tp = tcp_sk(sk); 2728 struct tcp_sock *tp = tcp_sk(sk);
2719 2729
@@ -2728,10 +2738,8 @@ static bool tcp_try_undo_partial(struct sock *sk, const int acked,
2728 * can undo. Otherwise we clock out new packets but do not 2738 * can undo. Otherwise we clock out new packets but do not
2729 * mark more packets lost or retransmit more. 2739 * mark more packets lost or retransmit more.
2730 */ 2740 */
2731 if (tp->retrans_out) { 2741 if (tp->retrans_out)
2732 tcp_cwnd_reduction(sk, prior_unsacked, 0, flag);
2733 return true; 2742 return true;
2734 }
2735 2743
2736 if (!tcp_any_retrans_done(sk)) 2744 if (!tcp_any_retrans_done(sk))
2737 tp->retrans_stamp = 0; 2745 tp->retrans_stamp = 0;
@@ -2750,21 +2758,21 @@ static bool tcp_try_undo_partial(struct sock *sk, const int acked,
2750 * taking into account both packets sitting in receiver's buffer and 2758 * taking into account both packets sitting in receiver's buffer and
2751 * packets lost by network. 2759 * packets lost by network.
2752 * 2760 *
2753 * Besides that it does CWND reduction, when packet loss is detected 2761 * Besides that it updates the congestion state when packet loss or ECN
2754 * and changes state of machine. 2762 * is detected. But it does not reduce the cwnd, it is done by the
2763 * congestion control later.
2755 * 2764 *
2756 * It does _not_ decide what to send, it is made in function 2765 * It does _not_ decide what to send, it is made in function
2757 * tcp_xmit_retransmit_queue(). 2766 * tcp_xmit_retransmit_queue().
2758 */ 2767 */
2759static void tcp_fastretrans_alert(struct sock *sk, const int acked, 2768static void tcp_fastretrans_alert(struct sock *sk, const int acked,
2760 const int prior_unsacked, 2769 bool is_dupack, int *ack_flag, int *rexmit)
2761 bool is_dupack, int flag)
2762{ 2770{
2763 struct inet_connection_sock *icsk = inet_csk(sk); 2771 struct inet_connection_sock *icsk = inet_csk(sk);
2764 struct tcp_sock *tp = tcp_sk(sk); 2772 struct tcp_sock *tp = tcp_sk(sk);
2773 int fast_rexmit = 0, flag = *ack_flag;
2765 bool do_lost = is_dupack || ((flag & FLAG_DATA_SACKED) && 2774 bool do_lost = is_dupack || ((flag & FLAG_DATA_SACKED) &&
2766 (tcp_fackets_out(tp) > tp->reordering)); 2775 (tcp_fackets_out(tp) > tp->reordering));
2767 int fast_rexmit = 0;
2768 2776
2769 if (WARN_ON(!tp->packets_out && tp->sacked_out)) 2777 if (WARN_ON(!tp->packets_out && tp->sacked_out))
2770 tp->sacked_out = 0; 2778 tp->sacked_out = 0;
@@ -2811,8 +2819,10 @@ static void tcp_fastretrans_alert(struct sock *sk, const int acked,
2811 2819
2812 /* Use RACK to detect loss */ 2820 /* Use RACK to detect loss */
2813 if (sysctl_tcp_recovery & TCP_RACK_LOST_RETRANS && 2821 if (sysctl_tcp_recovery & TCP_RACK_LOST_RETRANS &&
2814 tcp_rack_mark_lost(sk)) 2822 tcp_rack_mark_lost(sk)) {
2815 flag |= FLAG_LOST_RETRANS; 2823 flag |= FLAG_LOST_RETRANS;
2824 *ack_flag |= FLAG_LOST_RETRANS;
2825 }
2816 2826
2817 /* E. Process state. */ 2827 /* E. Process state. */
2818 switch (icsk->icsk_ca_state) { 2828 switch (icsk->icsk_ca_state) {
@@ -2821,7 +2831,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const int acked,
2821 if (tcp_is_reno(tp) && is_dupack) 2831 if (tcp_is_reno(tp) && is_dupack)
2822 tcp_add_reno_sack(sk); 2832 tcp_add_reno_sack(sk);
2823 } else { 2833 } else {
2824 if (tcp_try_undo_partial(sk, acked, prior_unsacked, flag)) 2834 if (tcp_try_undo_partial(sk, acked))
2825 return; 2835 return;
2826 /* Partial ACK arrived. Force fast retransmit. */ 2836 /* Partial ACK arrived. Force fast retransmit. */
2827 do_lost = tcp_is_reno(tp) || 2837 do_lost = tcp_is_reno(tp) ||
@@ -2833,7 +2843,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const int acked,
2833 } 2843 }
2834 break; 2844 break;
2835 case TCP_CA_Loss: 2845 case TCP_CA_Loss:
2836 tcp_process_loss(sk, flag, is_dupack); 2846 tcp_process_loss(sk, flag, is_dupack, rexmit);
2837 if (icsk->icsk_ca_state != TCP_CA_Open && 2847 if (icsk->icsk_ca_state != TCP_CA_Open &&
2838 !(flag & FLAG_LOST_RETRANS)) 2848 !(flag & FLAG_LOST_RETRANS))
2839 return; 2849 return;
@@ -2850,7 +2860,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const int acked,
2850 tcp_try_undo_dsack(sk); 2860 tcp_try_undo_dsack(sk);
2851 2861
2852 if (!tcp_time_to_recover(sk, flag)) { 2862 if (!tcp_time_to_recover(sk, flag)) {
2853 tcp_try_to_open(sk, flag, prior_unsacked); 2863 tcp_try_to_open(sk, flag);
2854 return; 2864 return;
2855 } 2865 }
2856 2866
@@ -2872,8 +2882,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const int acked,
2872 2882
2873 if (do_lost) 2883 if (do_lost)
2874 tcp_update_scoreboard(sk, fast_rexmit); 2884 tcp_update_scoreboard(sk, fast_rexmit);
2875 tcp_cwnd_reduction(sk, prior_unsacked, fast_rexmit, flag); 2885 *rexmit = REXMIT_LOST;
2876 tcp_xmit_retransmit_queue(sk);
2877} 2886}
2878 2887
2879/* Kathleen Nichols' algorithm for tracking the minimum value of 2888/* Kathleen Nichols' algorithm for tracking the minimum value of
@@ -2898,7 +2907,10 @@ static void tcp_update_rtt_min(struct sock *sk, u32 rtt_us)
2898{ 2907{
2899 const u32 now = tcp_time_stamp, wlen = sysctl_tcp_min_rtt_wlen * HZ; 2908 const u32 now = tcp_time_stamp, wlen = sysctl_tcp_min_rtt_wlen * HZ;
2900 struct rtt_meas *m = tcp_sk(sk)->rtt_min; 2909 struct rtt_meas *m = tcp_sk(sk)->rtt_min;
2901 struct rtt_meas rttm = { .rtt = (rtt_us ? : 1), .ts = now }; 2910 struct rtt_meas rttm = {
2911 .rtt = likely(rtt_us) ? rtt_us : jiffies_to_usecs(1),
2912 .ts = now,
2913 };
2902 u32 elapsed; 2914 u32 elapsed;
2903 2915
2904 /* Check if the new measurement updates the 1st, 2nd, or 3rd choices */ 2916 /* Check if the new measurement updates the 1st, 2nd, or 3rd choices */
@@ -3095,7 +3107,7 @@ static void tcp_ack_tstamp(struct sock *sk, struct sk_buff *skb,
3095 * arrived at the other end. 3107 * arrived at the other end.
3096 */ 3108 */
3097static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, 3109static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
3098 u32 prior_snd_una, 3110 u32 prior_snd_una, int *acked,
3099 struct tcp_sacktag_state *sack) 3111 struct tcp_sacktag_state *sack)
3100{ 3112{
3101 const struct inet_connection_sock *icsk = inet_csk(sk); 3113 const struct inet_connection_sock *icsk = inet_csk(sk);
@@ -3153,10 +3165,13 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
3153 flag |= FLAG_ORIG_SACK_ACKED; 3165 flag |= FLAG_ORIG_SACK_ACKED;
3154 } 3166 }
3155 3167
3156 if (sacked & TCPCB_SACKED_ACKED) 3168 if (sacked & TCPCB_SACKED_ACKED) {
3157 tp->sacked_out -= acked_pcount; 3169 tp->sacked_out -= acked_pcount;
3158 else if (tcp_is_sack(tp) && !tcp_skb_spurious_retrans(tp, skb)) 3170 } else if (tcp_is_sack(tp)) {
3159 tcp_rack_advance(tp, &skb->skb_mstamp, sacked); 3171 tp->delivered += acked_pcount;
3172 if (!tcp_skb_spurious_retrans(tp, skb))
3173 tcp_rack_advance(tp, &skb->skb_mstamp, sacked);
3174 }
3160 if (sacked & TCPCB_LOST) 3175 if (sacked & TCPCB_LOST)
3161 tp->lost_out -= acked_pcount; 3176 tp->lost_out -= acked_pcount;
3162 3177
@@ -3265,6 +3280,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
3265 } 3280 }
3266 } 3281 }
3267#endif 3282#endif
3283 *acked = pkts_acked;
3268 return flag; 3284 return flag;
3269} 3285}
3270 3286
@@ -3298,21 +3314,36 @@ static inline bool tcp_ack_is_dubious(const struct sock *sk, const int flag)
3298/* Decide wheather to run the increase function of congestion control. */ 3314/* Decide wheather to run the increase function of congestion control. */
3299static inline bool tcp_may_raise_cwnd(const struct sock *sk, const int flag) 3315static inline bool tcp_may_raise_cwnd(const struct sock *sk, const int flag)
3300{ 3316{
3301 if (tcp_in_cwnd_reduction(sk))
3302 return false;
3303
3304 /* If reordering is high then always grow cwnd whenever data is 3317 /* If reordering is high then always grow cwnd whenever data is
3305 * delivered regardless of its ordering. Otherwise stay conservative 3318 * delivered regardless of its ordering. Otherwise stay conservative
3306 * and only grow cwnd on in-order delivery (RFC5681). A stretched ACK w/ 3319 * and only grow cwnd on in-order delivery (RFC5681). A stretched ACK w/
3307 * new SACK or ECE mark may first advance cwnd here and later reduce 3320 * new SACK or ECE mark may first advance cwnd here and later reduce
3308 * cwnd in tcp_fastretrans_alert() based on more states. 3321 * cwnd in tcp_fastretrans_alert() based on more states.
3309 */ 3322 */
3310 if (tcp_sk(sk)->reordering > sysctl_tcp_reordering) 3323 if (tcp_sk(sk)->reordering > sock_net(sk)->ipv4.sysctl_tcp_reordering)
3311 return flag & FLAG_FORWARD_PROGRESS; 3324 return flag & FLAG_FORWARD_PROGRESS;
3312 3325
3313 return flag & FLAG_DATA_ACKED; 3326 return flag & FLAG_DATA_ACKED;
3314} 3327}
3315 3328
3329/* The "ultimate" congestion control function that aims to replace the rigid
3330 * cwnd increase and decrease control (tcp_cong_avoid,tcp_*cwnd_reduction).
3331 * It's called toward the end of processing an ACK with precise rate
3332 * information. All transmission or retransmission are delayed afterwards.
3333 */
3334static void tcp_cong_control(struct sock *sk, u32 ack, u32 acked_sacked,
3335 int flag)
3336{
3337 if (tcp_in_cwnd_reduction(sk)) {
3338 /* Reduce cwnd if state mandates */
3339 tcp_cwnd_reduction(sk, acked_sacked, flag);
3340 } else if (tcp_may_raise_cwnd(sk, flag)) {
3341 /* Advance cwnd if state allows */
3342 tcp_cong_avoid(sk, ack, acked_sacked);
3343 }
3344 tcp_update_pacing_rate(sk);
3345}
3346
3316/* Check that window update is acceptable. 3347/* Check that window update is acceptable.
3317 * The function assumes that snd_una<=ack<=snd_next. 3348 * The function assumes that snd_una<=ack<=snd_next.
3318 */ 3349 */
@@ -3508,6 +3539,27 @@ static inline void tcp_in_ack_event(struct sock *sk, u32 flags)
3508 icsk->icsk_ca_ops->in_ack_event(sk, flags); 3539 icsk->icsk_ca_ops->in_ack_event(sk, flags);
3509} 3540}
3510 3541
3542/* Congestion control has updated the cwnd already. So if we're in
3543 * loss recovery then now we do any new sends (for FRTO) or
3544 * retransmits (for CA_Loss or CA_recovery) that make sense.
3545 */
3546static void tcp_xmit_recovery(struct sock *sk, int rexmit)
3547{
3548 struct tcp_sock *tp = tcp_sk(sk);
3549
3550 if (rexmit == REXMIT_NONE)
3551 return;
3552
3553 if (unlikely(rexmit == 2)) {
3554 __tcp_push_pending_frames(sk, tcp_current_mss(sk),
3555 TCP_NAGLE_OFF);
3556 if (after(tp->snd_nxt, tp->high_seq))
3557 return;
3558 tp->frto = 0;
3559 }
3560 tcp_xmit_retransmit_queue(sk);
3561}
3562
3511/* This routine deals with incoming acks, but not outgoing ones. */ 3563/* This routine deals with incoming acks, but not outgoing ones. */
3512static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) 3564static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
3513{ 3565{
@@ -3520,8 +3572,9 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
3520 bool is_dupack = false; 3572 bool is_dupack = false;
3521 u32 prior_fackets; 3573 u32 prior_fackets;
3522 int prior_packets = tp->packets_out; 3574 int prior_packets = tp->packets_out;
3523 const int prior_unsacked = tp->packets_out - tp->sacked_out; 3575 u32 prior_delivered = tp->delivered;
3524 int acked = 0; /* Number of packets newly acked */ 3576 int acked = 0; /* Number of packets newly acked */
3577 int rexmit = REXMIT_NONE; /* Flag to (re)transmit to recover losses */
3525 3578
3526 sack_state.first_sackt.v64 = 0; 3579 sack_state.first_sackt.v64 = 0;
3527 3580
@@ -3610,23 +3663,16 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
3610 goto no_queue; 3663 goto no_queue;
3611 3664
3612 /* See if we can take anything off of the retransmit queue. */ 3665 /* See if we can take anything off of the retransmit queue. */
3613 acked = tp->packets_out; 3666 flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una, &acked,
3614 flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una,
3615 &sack_state); 3667 &sack_state);
3616 acked -= tp->packets_out;
3617 3668
3618 if (tcp_ack_is_dubious(sk, flag)) { 3669 if (tcp_ack_is_dubious(sk, flag)) {
3619 is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP)); 3670 is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP));
3620 tcp_fastretrans_alert(sk, acked, prior_unsacked, 3671 tcp_fastretrans_alert(sk, acked, is_dupack, &flag, &rexmit);
3621 is_dupack, flag);
3622 } 3672 }
3623 if (tp->tlp_high_seq) 3673 if (tp->tlp_high_seq)
3624 tcp_process_tlp_ack(sk, ack, flag); 3674 tcp_process_tlp_ack(sk, ack, flag);
3625 3675
3626 /* Advance cwnd if state allows */
3627 if (tcp_may_raise_cwnd(sk, flag))
3628 tcp_cong_avoid(sk, ack, acked);
3629
3630 if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP)) { 3676 if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP)) {
3631 struct dst_entry *dst = __sk_dst_get(sk); 3677 struct dst_entry *dst = __sk_dst_get(sk);
3632 if (dst) 3678 if (dst)
@@ -3635,14 +3681,14 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
3635 3681
3636 if (icsk->icsk_pending == ICSK_TIME_RETRANS) 3682 if (icsk->icsk_pending == ICSK_TIME_RETRANS)
3637 tcp_schedule_loss_probe(sk); 3683 tcp_schedule_loss_probe(sk);
3638 tcp_update_pacing_rate(sk); 3684 tcp_cong_control(sk, ack, tp->delivered - prior_delivered, flag);
3685 tcp_xmit_recovery(sk, rexmit);
3639 return 1; 3686 return 1;
3640 3687
3641no_queue: 3688no_queue:
3642 /* If data was DSACKed, see if we can undo a cwnd reduction. */ 3689 /* If data was DSACKed, see if we can undo a cwnd reduction. */
3643 if (flag & FLAG_DSACKING_ACK) 3690 if (flag & FLAG_DSACKING_ACK)
3644 tcp_fastretrans_alert(sk, acked, prior_unsacked, 3691 tcp_fastretrans_alert(sk, acked, is_dupack, &flag, &rexmit);
3645 is_dupack, flag);
3646 /* If this ack opens up a zero window, clear backoff. It was 3692 /* If this ack opens up a zero window, clear backoff. It was
3647 * being used to time the probes, and is probably far higher than 3693 * being used to time the probes, and is probably far higher than
3648 * it needs to be for normal retransmission. 3694 * it needs to be for normal retransmission.
@@ -3665,8 +3711,8 @@ old_ack:
3665 if (TCP_SKB_CB(skb)->sacked) { 3711 if (TCP_SKB_CB(skb)->sacked) {
3666 flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una, 3712 flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una,
3667 &sack_state); 3713 &sack_state);
3668 tcp_fastretrans_alert(sk, acked, prior_unsacked, 3714 tcp_fastretrans_alert(sk, acked, is_dupack, &flag, &rexmit);
3669 is_dupack, flag); 3715 tcp_xmit_recovery(sk, rexmit);
3670 } 3716 }
3671 3717
3672 SOCK_DEBUG(sk, "Ack %u before %u:%u\n", ack, tp->snd_una, tp->snd_nxt); 3718 SOCK_DEBUG(sk, "Ack %u before %u:%u\n", ack, tp->snd_una, tp->snd_nxt);
@@ -3997,7 +4043,7 @@ void tcp_reset(struct sock *sk)
3997 * 4043 *
3998 * If we are in FINWAIT-2, a received FIN moves us to TIME-WAIT. 4044 * If we are in FINWAIT-2, a received FIN moves us to TIME-WAIT.
3999 */ 4045 */
4000static void tcp_fin(struct sock *sk) 4046void tcp_fin(struct sock *sk)
4001{ 4047{
4002 struct tcp_sock *tp = tcp_sk(sk); 4048 struct tcp_sock *tp = tcp_sk(sk);
4003 4049
@@ -5511,6 +5557,9 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack,
5511 tp->syn_data_acked = tp->syn_data; 5557 tp->syn_data_acked = tp->syn_data;
5512 if (tp->syn_data_acked) 5558 if (tp->syn_data_acked)
5513 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFASTOPENACTIVE); 5559 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFASTOPENACTIVE);
5560
5561 tcp_fastopen_add_skb(sk, synack);
5562
5514 return false; 5563 return false;
5515} 5564}
5516 5565
@@ -6117,9 +6166,10 @@ static bool tcp_syn_flood_action(const struct sock *sk,
6117 struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue; 6166 struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue;
6118 const char *msg = "Dropping request"; 6167 const char *msg = "Dropping request";
6119 bool want_cookie = false; 6168 bool want_cookie = false;
6169 struct net *net = sock_net(sk);
6120 6170
6121#ifdef CONFIG_SYN_COOKIES 6171#ifdef CONFIG_SYN_COOKIES
6122 if (sysctl_tcp_syncookies) { 6172 if (net->ipv4.sysctl_tcp_syncookies) {
6123 msg = "Sending cookies"; 6173 msg = "Sending cookies";
6124 want_cookie = true; 6174 want_cookie = true;
6125 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDOCOOKIES); 6175 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDOCOOKIES);
@@ -6128,7 +6178,7 @@ static bool tcp_syn_flood_action(const struct sock *sk,
6128 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP); 6178 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP);
6129 6179
6130 if (!queue->synflood_warned && 6180 if (!queue->synflood_warned &&
6131 sysctl_tcp_syncookies != 2 && 6181 net->ipv4.sysctl_tcp_syncookies != 2 &&
6132 xchg(&queue->synflood_warned, 1) == 0) 6182 xchg(&queue->synflood_warned, 1) == 0)
6133 pr_info("%s: Possible SYN flooding on port %d. %s. Check SNMP counters.\n", 6183 pr_info("%s: Possible SYN flooding on port %d. %s. Check SNMP counters.\n",
6134 proto, ntohs(tcp_hdr(skb)->dest), msg); 6184 proto, ntohs(tcp_hdr(skb)->dest), msg);
@@ -6161,6 +6211,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
6161 __u32 isn = TCP_SKB_CB(skb)->tcp_tw_isn; 6211 __u32 isn = TCP_SKB_CB(skb)->tcp_tw_isn;
6162 struct tcp_options_received tmp_opt; 6212 struct tcp_options_received tmp_opt;
6163 struct tcp_sock *tp = tcp_sk(sk); 6213 struct tcp_sock *tp = tcp_sk(sk);
6214 struct net *net = sock_net(sk);
6164 struct sock *fastopen_sk = NULL; 6215 struct sock *fastopen_sk = NULL;
6165 struct dst_entry *dst = NULL; 6216 struct dst_entry *dst = NULL;
6166 struct request_sock *req; 6217 struct request_sock *req;
@@ -6171,7 +6222,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
6171 * limitations, they conserve resources and peer is 6222 * limitations, they conserve resources and peer is
6172 * evidently real one. 6223 * evidently real one.
6173 */ 6224 */
6174 if ((sysctl_tcp_syncookies == 2 || 6225 if ((net->ipv4.sysctl_tcp_syncookies == 2 ||
6175 inet_csk_reqsk_queue_is_full(sk)) && !isn) { 6226 inet_csk_reqsk_queue_is_full(sk)) && !isn) {
6176 want_cookie = tcp_syn_flood_action(sk, skb, rsk_ops->slab_name); 6227 want_cookie = tcp_syn_flood_action(sk, skb, rsk_ops->slab_name);
6177 if (!want_cookie) 6228 if (!want_cookie)
@@ -6237,7 +6288,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
6237 } 6288 }
6238 } 6289 }
6239 /* Kill the following clause, if you dislike this way. */ 6290 /* Kill the following clause, if you dislike this way. */
6240 else if (!sysctl_tcp_syncookies && 6291 else if (!net->ipv4.sysctl_tcp_syncookies &&
6241 (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) < 6292 (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
6242 (sysctl_max_syn_backlog >> 2)) && 6293 (sysctl_max_syn_backlog >> 2)) &&
6243 !tcp_peer_is_proven(req, dst, false, 6294 !tcp_peer_is_proven(req, dst, false,
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 5ced3e4013e3..ad450509029b 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -81,7 +81,7 @@
81#include <linux/proc_fs.h> 81#include <linux/proc_fs.h>
82#include <linux/seq_file.h> 82#include <linux/seq_file.h>
83 83
84#include <linux/crypto.h> 84#include <crypto/hash.h>
85#include <linux/scatterlist.h> 85#include <linux/scatterlist.h>
86 86
87int sysctl_tcp_tw_reuse __read_mostly; 87int sysctl_tcp_tw_reuse __read_mostly;
@@ -311,7 +311,7 @@ static void do_redirect(struct sk_buff *skb, struct sock *sk)
311 311
312 312
313/* handle ICMP messages on TCP_NEW_SYN_RECV request sockets */ 313/* handle ICMP messages on TCP_NEW_SYN_RECV request sockets */
314void tcp_req_err(struct sock *sk, u32 seq) 314void tcp_req_err(struct sock *sk, u32 seq, bool abort)
315{ 315{
316 struct request_sock *req = inet_reqsk(sk); 316 struct request_sock *req = inet_reqsk(sk);
317 struct net *net = sock_net(sk); 317 struct net *net = sock_net(sk);
@@ -319,11 +319,9 @@ void tcp_req_err(struct sock *sk, u32 seq)
319 /* ICMPs are not backlogged, hence we cannot get 319 /* ICMPs are not backlogged, hence we cannot get
320 * an established socket here. 320 * an established socket here.
321 */ 321 */
322 WARN_ON(req->sk);
323
324 if (seq != tcp_rsk(req)->snt_isn) { 322 if (seq != tcp_rsk(req)->snt_isn) {
325 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); 323 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
326 } else { 324 } else if (abort) {
327 /* 325 /*
328 * Still in SYN_RECV, just remove it silently. 326 * Still in SYN_RECV, just remove it silently.
329 * There is no good way to pass the error to the newly 327 * There is no good way to pass the error to the newly
@@ -383,7 +381,12 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
383 } 381 }
384 seq = ntohl(th->seq); 382 seq = ntohl(th->seq);
385 if (sk->sk_state == TCP_NEW_SYN_RECV) 383 if (sk->sk_state == TCP_NEW_SYN_RECV)
386 return tcp_req_err(sk, seq); 384 return tcp_req_err(sk, seq,
385 type == ICMP_PARAMETERPROB ||
386 type == ICMP_TIME_EXCEEDED ||
387 (type == ICMP_DEST_UNREACH &&
388 (code == ICMP_NET_UNREACH ||
389 code == ICMP_HOST_UNREACH)));
387 390
388 bh_lock_sock(sk); 391 bh_lock_sock(sk);
389 /* If too many ICMPs get dropped on busy 392 /* If too many ICMPs get dropped on busy
@@ -637,8 +640,8 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
637 * Incoming packet is checked with md5 hash with finding key, 640 * Incoming packet is checked with md5 hash with finding key,
638 * no RST generated if md5 hash doesn't match. 641 * no RST generated if md5 hash doesn't match.
639 */ 642 */
640 sk1 = __inet_lookup_listener(net, 643 sk1 = __inet_lookup_listener(net, &tcp_hashinfo, NULL, 0,
641 &tcp_hashinfo, ip_hdr(skb)->saddr, 644 ip_hdr(skb)->saddr,
642 th->source, ip_hdr(skb)->daddr, 645 th->source, ip_hdr(skb)->daddr,
643 ntohs(th->source), inet_iif(skb)); 646 ntohs(th->source), inet_iif(skb));
644 /* don't send rst if it can't find key */ 647 /* don't send rst if it can't find key */
@@ -707,7 +710,8 @@ release_sk1:
707 outside socket context is ugly, certainly. What can I do? 710 outside socket context is ugly, certainly. What can I do?
708 */ 711 */
709 712
710static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack, 713static void tcp_v4_send_ack(struct net *net,
714 struct sk_buff *skb, u32 seq, u32 ack,
711 u32 win, u32 tsval, u32 tsecr, int oif, 715 u32 win, u32 tsval, u32 tsecr, int oif,
712 struct tcp_md5sig_key *key, 716 struct tcp_md5sig_key *key,
713 int reply_flags, u8 tos) 717 int reply_flags, u8 tos)
@@ -722,7 +726,6 @@ static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
722 ]; 726 ];
723 } rep; 727 } rep;
724 struct ip_reply_arg arg; 728 struct ip_reply_arg arg;
725 struct net *net = dev_net(skb_dst(skb)->dev);
726 729
727 memset(&rep.th, 0, sizeof(struct tcphdr)); 730 memset(&rep.th, 0, sizeof(struct tcphdr));
728 memset(&arg, 0, sizeof(arg)); 731 memset(&arg, 0, sizeof(arg));
@@ -784,7 +787,8 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
784 struct inet_timewait_sock *tw = inet_twsk(sk); 787 struct inet_timewait_sock *tw = inet_twsk(sk);
785 struct tcp_timewait_sock *tcptw = tcp_twsk(sk); 788 struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
786 789
787 tcp_v4_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt, 790 tcp_v4_send_ack(sock_net(sk), skb,
791 tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
788 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, 792 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
789 tcp_time_stamp + tcptw->tw_ts_offset, 793 tcp_time_stamp + tcptw->tw_ts_offset,
790 tcptw->tw_ts_recent, 794 tcptw->tw_ts_recent,
@@ -803,8 +807,10 @@ static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
803 /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV 807 /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
804 * sk->sk_state == TCP_SYN_RECV -> for Fast Open. 808 * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
805 */ 809 */
806 tcp_v4_send_ack(skb, (sk->sk_state == TCP_LISTEN) ? 810 u32 seq = (sk->sk_state == TCP_LISTEN) ? tcp_rsk(req)->snt_isn + 1 :
807 tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt, 811 tcp_sk(sk)->snd_nxt;
812
813 tcp_v4_send_ack(sock_net(sk), skb, seq,
808 tcp_rsk(req)->rcv_nxt, req->rsk_rcv_wnd, 814 tcp_rsk(req)->rcv_nxt, req->rsk_rcv_wnd,
809 tcp_time_stamp, 815 tcp_time_stamp,
810 req->ts_recent, 816 req->ts_recent,
@@ -857,7 +863,6 @@ static void tcp_v4_reqsk_destructor(struct request_sock *req)
857 kfree(inet_rsk(req)->opt); 863 kfree(inet_rsk(req)->opt);
858} 864}
859 865
860
861#ifdef CONFIG_TCP_MD5SIG 866#ifdef CONFIG_TCP_MD5SIG
862/* 867/*
863 * RFC2385 MD5 checksumming requires a mapping of 868 * RFC2385 MD5 checksumming requires a mapping of
@@ -1031,21 +1036,22 @@ static int tcp_v4_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp,
1031 bp->len = cpu_to_be16(nbytes); 1036 bp->len = cpu_to_be16(nbytes);
1032 1037
1033 sg_init_one(&sg, bp, sizeof(*bp)); 1038 sg_init_one(&sg, bp, sizeof(*bp));
1034 return crypto_hash_update(&hp->md5_desc, &sg, sizeof(*bp)); 1039 ahash_request_set_crypt(hp->md5_req, &sg, NULL, sizeof(*bp));
1040 return crypto_ahash_update(hp->md5_req);
1035} 1041}
1036 1042
1037static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key, 1043static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
1038 __be32 daddr, __be32 saddr, const struct tcphdr *th) 1044 __be32 daddr, __be32 saddr, const struct tcphdr *th)
1039{ 1045{
1040 struct tcp_md5sig_pool *hp; 1046 struct tcp_md5sig_pool *hp;
1041 struct hash_desc *desc; 1047 struct ahash_request *req;
1042 1048
1043 hp = tcp_get_md5sig_pool(); 1049 hp = tcp_get_md5sig_pool();
1044 if (!hp) 1050 if (!hp)
1045 goto clear_hash_noput; 1051 goto clear_hash_noput;
1046 desc = &hp->md5_desc; 1052 req = hp->md5_req;
1047 1053
1048 if (crypto_hash_init(desc)) 1054 if (crypto_ahash_init(req))
1049 goto clear_hash; 1055 goto clear_hash;
1050 if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, th->doff << 2)) 1056 if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, th->doff << 2))
1051 goto clear_hash; 1057 goto clear_hash;
@@ -1053,7 +1059,8 @@ static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
1053 goto clear_hash; 1059 goto clear_hash;
1054 if (tcp_md5_hash_key(hp, key)) 1060 if (tcp_md5_hash_key(hp, key))
1055 goto clear_hash; 1061 goto clear_hash;
1056 if (crypto_hash_final(desc, md5_hash)) 1062 ahash_request_set_crypt(req, NULL, md5_hash, 0);
1063 if (crypto_ahash_final(req))
1057 goto clear_hash; 1064 goto clear_hash;
1058 1065
1059 tcp_put_md5sig_pool(); 1066 tcp_put_md5sig_pool();
@@ -1071,7 +1078,7 @@ int tcp_v4_md5_hash_skb(char *md5_hash, const struct tcp_md5sig_key *key,
1071 const struct sk_buff *skb) 1078 const struct sk_buff *skb)
1072{ 1079{
1073 struct tcp_md5sig_pool *hp; 1080 struct tcp_md5sig_pool *hp;
1074 struct hash_desc *desc; 1081 struct ahash_request *req;
1075 const struct tcphdr *th = tcp_hdr(skb); 1082 const struct tcphdr *th = tcp_hdr(skb);
1076 __be32 saddr, daddr; 1083 __be32 saddr, daddr;
1077 1084
@@ -1087,9 +1094,9 @@ int tcp_v4_md5_hash_skb(char *md5_hash, const struct tcp_md5sig_key *key,
1087 hp = tcp_get_md5sig_pool(); 1094 hp = tcp_get_md5sig_pool();
1088 if (!hp) 1095 if (!hp)
1089 goto clear_hash_noput; 1096 goto clear_hash_noput;
1090 desc = &hp->md5_desc; 1097 req = hp->md5_req;
1091 1098
1092 if (crypto_hash_init(desc)) 1099 if (crypto_ahash_init(req))
1093 goto clear_hash; 1100 goto clear_hash;
1094 1101
1095 if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, skb->len)) 1102 if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, skb->len))
@@ -1100,7 +1107,8 @@ int tcp_v4_md5_hash_skb(char *md5_hash, const struct tcp_md5sig_key *key,
1100 goto clear_hash; 1107 goto clear_hash;
1101 if (tcp_md5_hash_key(hp, key)) 1108 if (tcp_md5_hash_key(hp, key))
1102 goto clear_hash; 1109 goto clear_hash;
1103 if (crypto_hash_final(desc, md5_hash)) 1110 ahash_request_set_crypt(req, NULL, md5_hash, 0);
1111 if (crypto_ahash_final(req))
1104 goto clear_hash; 1112 goto clear_hash;
1105 1113
1106 tcp_put_md5sig_pool(); 1114 tcp_put_md5sig_pool();
@@ -1579,7 +1587,8 @@ int tcp_v4_rcv(struct sk_buff *skb)
1579 TCP_SKB_CB(skb)->sacked = 0; 1587 TCP_SKB_CB(skb)->sacked = 0;
1580 1588
1581lookup: 1589lookup:
1582 sk = __inet_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest); 1590 sk = __inet_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th), th->source,
1591 th->dest);
1583 if (!sk) 1592 if (!sk)
1584 goto no_tcp_socket; 1593 goto no_tcp_socket;
1585 1594
@@ -1589,28 +1598,30 @@ process:
1589 1598
1590 if (sk->sk_state == TCP_NEW_SYN_RECV) { 1599 if (sk->sk_state == TCP_NEW_SYN_RECV) {
1591 struct request_sock *req = inet_reqsk(sk); 1600 struct request_sock *req = inet_reqsk(sk);
1592 struct sock *nsk = NULL; 1601 struct sock *nsk;
1593 1602
1594 sk = req->rsk_listener; 1603 sk = req->rsk_listener;
1595 if (tcp_v4_inbound_md5_hash(sk, skb)) 1604 if (unlikely(tcp_v4_inbound_md5_hash(sk, skb))) {
1596 goto discard_and_relse; 1605 reqsk_put(req);
1597 if (likely(sk->sk_state == TCP_LISTEN)) { 1606 goto discard_it;
1598 nsk = tcp_check_req(sk, skb, req, false); 1607 }
1599 } else { 1608 if (unlikely(sk->sk_state != TCP_LISTEN)) {
1600 inet_csk_reqsk_queue_drop_and_put(sk, req); 1609 inet_csk_reqsk_queue_drop_and_put(sk, req);
1601 goto lookup; 1610 goto lookup;
1602 } 1611 }
1612 sock_hold(sk);
1613 nsk = tcp_check_req(sk, skb, req, false);
1603 if (!nsk) { 1614 if (!nsk) {
1604 reqsk_put(req); 1615 reqsk_put(req);
1605 goto discard_it; 1616 goto discard_and_relse;
1606 } 1617 }
1607 if (nsk == sk) { 1618 if (nsk == sk) {
1608 sock_hold(sk);
1609 reqsk_put(req); 1619 reqsk_put(req);
1610 } else if (tcp_child_process(sk, nsk, skb)) { 1620 } else if (tcp_child_process(sk, nsk, skb)) {
1611 tcp_v4_send_reset(nsk, skb); 1621 tcp_v4_send_reset(nsk, skb);
1612 goto discard_it; 1622 goto discard_and_relse;
1613 } else { 1623 } else {
1624 sock_put(sk);
1614 return 0; 1625 return 0;
1615 } 1626 }
1616 } 1627 }
@@ -1640,7 +1651,7 @@ process:
1640 sk_incoming_cpu_update(sk); 1651 sk_incoming_cpu_update(sk);
1641 1652
1642 bh_lock_sock_nested(sk); 1653 bh_lock_sock_nested(sk);
1643 tcp_sk(sk)->segs_in += max_t(u16, 1, skb_shinfo(skb)->gso_segs); 1654 tcp_segs_in(tcp_sk(sk), skb);
1644 ret = 0; 1655 ret = 0;
1645 if (!sock_owned_by_user(sk)) { 1656 if (!sock_owned_by_user(sk)) {
1646 if (!tcp_prequeue(sk, skb)) 1657 if (!tcp_prequeue(sk, skb))
@@ -1693,7 +1704,8 @@ do_time_wait:
1693 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) { 1704 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1694 case TCP_TW_SYN: { 1705 case TCP_TW_SYN: {
1695 struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev), 1706 struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev),
1696 &tcp_hashinfo, 1707 &tcp_hashinfo, skb,
1708 __tcp_hdrlen(th),
1697 iph->saddr, th->source, 1709 iph->saddr, th->source,
1698 iph->daddr, th->dest, 1710 iph->daddr, th->dest,
1699 inet_iif(skb)); 1711 inet_iif(skb));
@@ -2385,6 +2397,16 @@ static int __net_init tcp_sk_init(struct net *net)
2385 net->ipv4.sysctl_tcp_keepalive_probes = TCP_KEEPALIVE_PROBES; 2397 net->ipv4.sysctl_tcp_keepalive_probes = TCP_KEEPALIVE_PROBES;
2386 net->ipv4.sysctl_tcp_keepalive_intvl = TCP_KEEPALIVE_INTVL; 2398 net->ipv4.sysctl_tcp_keepalive_intvl = TCP_KEEPALIVE_INTVL;
2387 2399
2400 net->ipv4.sysctl_tcp_syn_retries = TCP_SYN_RETRIES;
2401 net->ipv4.sysctl_tcp_synack_retries = TCP_SYNACK_RETRIES;
2402 net->ipv4.sysctl_tcp_syncookies = 1;
2403 net->ipv4.sysctl_tcp_reordering = TCP_FASTRETRANS_THRESH;
2404 net->ipv4.sysctl_tcp_retries1 = TCP_RETR1;
2405 net->ipv4.sysctl_tcp_retries2 = TCP_RETR2;
2406 net->ipv4.sysctl_tcp_orphan_retries = 0;
2407 net->ipv4.sysctl_tcp_fin_timeout = TCP_FIN_TIMEOUT;
2408 net->ipv4.sysctl_tcp_notsent_lowat = UINT_MAX;
2409
2388 return 0; 2410 return 0;
2389fail: 2411fail:
2390 tcp_sk_exit(net); 2412 tcp_sk_exit(net);
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index c8cbc2b4b792..7b7eec439906 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c
@@ -369,6 +369,7 @@ void tcp_update_metrics(struct sock *sk)
369 const struct inet_connection_sock *icsk = inet_csk(sk); 369 const struct inet_connection_sock *icsk = inet_csk(sk);
370 struct dst_entry *dst = __sk_dst_get(sk); 370 struct dst_entry *dst = __sk_dst_get(sk);
371 struct tcp_sock *tp = tcp_sk(sk); 371 struct tcp_sock *tp = tcp_sk(sk);
372 struct net *net = sock_net(sk);
372 struct tcp_metrics_block *tm; 373 struct tcp_metrics_block *tm;
373 unsigned long rtt; 374 unsigned long rtt;
374 u32 val; 375 u32 val;
@@ -473,7 +474,7 @@ void tcp_update_metrics(struct sock *sk)
473 if (!tcp_metric_locked(tm, TCP_METRIC_REORDERING)) { 474 if (!tcp_metric_locked(tm, TCP_METRIC_REORDERING)) {
474 val = tcp_metric_get(tm, TCP_METRIC_REORDERING); 475 val = tcp_metric_get(tm, TCP_METRIC_REORDERING);
475 if (val < tp->reordering && 476 if (val < tp->reordering &&
476 tp->reordering != sysctl_tcp_reordering) 477 tp->reordering != net->ipv4.sysctl_tcp_reordering)
477 tcp_metric_set(tm, TCP_METRIC_REORDERING, 478 tcp_metric_set(tm, TCP_METRIC_REORDERING,
478 tp->reordering); 479 tp->reordering);
479 } 480 }
@@ -550,7 +551,7 @@ reset:
550 */ 551 */
551 if (crtt > tp->srtt_us) { 552 if (crtt > tp->srtt_us) {
552 /* Set RTO like tcp_rtt_estimator(), but from cached RTT. */ 553 /* Set RTO like tcp_rtt_estimator(), but from cached RTT. */
553 crtt /= 8 * USEC_PER_MSEC; 554 crtt /= 8 * USEC_PER_SEC / HZ;
554 inet_csk(sk)->icsk_rto = crtt + max(2 * crtt, tcp_rto_min(sk)); 555 inet_csk(sk)->icsk_rto = crtt + max(2 * crtt, tcp_rto_min(sk));
555 } else if (tp->srtt_us == 0) { 556 } else if (tp->srtt_us == 0) {
556 /* RFC6298: 5.7 We've failed to get a valid RTT sample from 557 /* RFC6298: 5.7 We've failed to get a valid RTT sample from
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 75632a925824..acb366dd61e6 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -27,9 +27,6 @@
27#include <net/inet_common.h> 27#include <net/inet_common.h>
28#include <net/xfrm.h> 28#include <net/xfrm.h>
29 29
30int sysctl_tcp_syncookies __read_mostly = 1;
31EXPORT_SYMBOL(sysctl_tcp_syncookies);
32
33int sysctl_tcp_abort_on_overflow __read_mostly; 30int sysctl_tcp_abort_on_overflow __read_mostly;
34 31
35struct inet_timewait_death_row tcp_death_row = { 32struct inet_timewait_death_row tcp_death_row = {
@@ -455,7 +452,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
455 452
456 newtp->rcv_wup = newtp->copied_seq = 453 newtp->rcv_wup = newtp->copied_seq =
457 newtp->rcv_nxt = treq->rcv_isn + 1; 454 newtp->rcv_nxt = treq->rcv_isn + 1;
458 newtp->segs_in = 0; 455 newtp->segs_in = 1;
459 456
460 newtp->snd_sml = newtp->snd_una = 457 newtp->snd_sml = newtp->snd_una =
461 newtp->snd_nxt = newtp->snd_up = treq->snt_isn + 1; 458 newtp->snd_nxt = newtp->snd_up = treq->snt_isn + 1;
@@ -815,6 +812,7 @@ int tcp_child_process(struct sock *parent, struct sock *child,
815 int ret = 0; 812 int ret = 0;
816 int state = child->sk_state; 813 int state = child->sk_state;
817 814
815 tcp_segs_in(tcp_sk(child), skb);
818 if (!sock_owned_by_user(child)) { 816 if (!sock_owned_by_user(child)) {
819 ret = tcp_rcv_state_process(child, skb); 817 ret = tcp_rcv_state_process(child, skb);
820 /* Wakeup parent, send SIGIO */ 818 /* Wakeup parent, send SIGIO */
diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c
index 9864a2dbadce..773083b7f1e9 100644
--- a/net/ipv4/tcp_offload.c
+++ b/net/ipv4/tcp_offload.c
@@ -135,7 +135,9 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb,
135 th->fin = th->psh = 0; 135 th->fin = th->psh = 0;
136 th->check = newcheck; 136 th->check = newcheck;
137 137
138 if (skb->ip_summed != CHECKSUM_PARTIAL) 138 if (skb->ip_summed == CHECKSUM_PARTIAL)
139 gso_reset_checksum(skb, ~th->check);
140 else
139 th->check = gso_make_checksum(skb, ~th->check); 141 th->check = gso_make_checksum(skb, ~th->check);
140 142
141 seq += mss; 143 seq += mss;
@@ -169,7 +171,9 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb,
169 skb->data_len); 171 skb->data_len);
170 th->check = ~csum_fold((__force __wsum)((__force u32)th->check + 172 th->check = ~csum_fold((__force __wsum)((__force u32)th->check +
171 (__force u32)delta)); 173 (__force u32)delta));
172 if (skb->ip_summed != CHECKSUM_PARTIAL) 174 if (skb->ip_summed == CHECKSUM_PARTIAL)
175 gso_reset_checksum(skb, ~th->check);
176 else
173 th->check = gso_make_checksum(skb, ~th->check); 177 th->check = gso_make_checksum(skb, ~th->check);
174out: 178out:
175 return segs; 179 return segs;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index fda379cd600d..7d2dc015cd19 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -62,9 +62,6 @@ int sysctl_tcp_tso_win_divisor __read_mostly = 3;
62/* By default, RFC2861 behavior. */ 62/* By default, RFC2861 behavior. */
63int sysctl_tcp_slow_start_after_idle __read_mostly = 1; 63int sysctl_tcp_slow_start_after_idle __read_mostly = 1;
64 64
65unsigned int sysctl_tcp_notsent_lowat __read_mostly = UINT_MAX;
66EXPORT_SYMBOL(sysctl_tcp_notsent_lowat);
67
68static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, 65static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
69 int push_one, gfp_t gfp); 66 int push_one, gfp_t gfp);
70 67
@@ -1006,8 +1003,10 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
1006 if (likely(tcb->tcp_flags & TCPHDR_ACK)) 1003 if (likely(tcb->tcp_flags & TCPHDR_ACK))
1007 tcp_event_ack_sent(sk, tcp_skb_pcount(skb)); 1004 tcp_event_ack_sent(sk, tcp_skb_pcount(skb));
1008 1005
1009 if (skb->len != tcp_header_size) 1006 if (skb->len != tcp_header_size) {
1010 tcp_event_data_sent(tp, sk); 1007 tcp_event_data_sent(tp, sk);
1008 tp->data_segs_out += tcp_skb_pcount(skb);
1009 }
1011 1010
1012 if (after(tcb->end_seq, tp->snd_nxt) || tcb->seq == tcb->end_seq) 1011 if (after(tcb->end_seq, tp->snd_nxt) || tcb->seq == tcb->end_seq)
1013 TCP_ADD_STATS(sock_net(sk), TCP_MIB_OUTSEGS, 1012 TCP_ADD_STATS(sock_net(sk), TCP_MIB_OUTSEGS,
@@ -3476,6 +3475,7 @@ void tcp_send_probe0(struct sock *sk)
3476{ 3475{
3477 struct inet_connection_sock *icsk = inet_csk(sk); 3476 struct inet_connection_sock *icsk = inet_csk(sk);
3478 struct tcp_sock *tp = tcp_sk(sk); 3477 struct tcp_sock *tp = tcp_sk(sk);
3478 struct net *net = sock_net(sk);
3479 unsigned long probe_max; 3479 unsigned long probe_max;
3480 int err; 3480 int err;
3481 3481
@@ -3489,7 +3489,7 @@ void tcp_send_probe0(struct sock *sk)
3489 } 3489 }
3490 3490
3491 if (err <= 0) { 3491 if (err <= 0) {
3492 if (icsk->icsk_backoff < sysctl_tcp_retries2) 3492 if (icsk->icsk_backoff < net->ipv4.sysctl_tcp_retries2)
3493 icsk->icsk_backoff++; 3493 icsk->icsk_backoff++;
3494 icsk->icsk_probes_out++; 3494 icsk->icsk_probes_out++;
3495 probe_max = TCP_RTO_MAX; 3495 probe_max = TCP_RTO_MAX;
diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c
index ebf5ff57526e..f6c50af24a64 100644
--- a/net/ipv4/tcp_probe.c
+++ b/net/ipv4/tcp_probe.c
@@ -187,13 +187,13 @@ static int tcpprobe_sprint(char *tbuf, int n)
187{ 187{
188 const struct tcp_log *p 188 const struct tcp_log *p
189 = tcp_probe.log + tcp_probe.tail; 189 = tcp_probe.log + tcp_probe.tail;
190 struct timespec tv 190 struct timespec64 ts
191 = ktime_to_timespec(ktime_sub(p->tstamp, tcp_probe.start)); 191 = ktime_to_timespec64(ktime_sub(p->tstamp, tcp_probe.start));
192 192
193 return scnprintf(tbuf, n, 193 return scnprintf(tbuf, n,
194 "%lu.%09lu %pISpc %pISpc %d %#x %#x %u %u %u %u %u\n", 194 "%lu.%09lu %pISpc %pISpc %d %#x %#x %u %u %u %u %u\n",
195 (unsigned long)tv.tv_sec, 195 (unsigned long)ts.tv_sec,
196 (unsigned long)tv.tv_nsec, 196 (unsigned long)ts.tv_nsec,
197 &p->src, &p->dst, p->length, p->snd_nxt, p->snd_una, 197 &p->src, &p->dst, p->length, p->snd_nxt, p->snd_una,
198 p->snd_cwnd, p->ssthresh, p->snd_wnd, p->srtt, p->rcv_wnd); 198 p->snd_cwnd, p->ssthresh, p->snd_wnd, p->srtt, p->rcv_wnd);
199} 199}
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index a4730a28b220..49bc474f8e35 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -22,11 +22,6 @@
22#include <linux/gfp.h> 22#include <linux/gfp.h>
23#include <net/tcp.h> 23#include <net/tcp.h>
24 24
25int sysctl_tcp_syn_retries __read_mostly = TCP_SYN_RETRIES;
26int sysctl_tcp_synack_retries __read_mostly = TCP_SYNACK_RETRIES;
27int sysctl_tcp_retries1 __read_mostly = TCP_RETR1;
28int sysctl_tcp_retries2 __read_mostly = TCP_RETR2;
29int sysctl_tcp_orphan_retries __read_mostly;
30int sysctl_tcp_thin_linear_timeouts __read_mostly; 25int sysctl_tcp_thin_linear_timeouts __read_mostly;
31 26
32static void tcp_write_err(struct sock *sk) 27static void tcp_write_err(struct sock *sk)
@@ -82,7 +77,7 @@ static int tcp_out_of_resources(struct sock *sk, bool do_reset)
82/* Calculate maximal number or retries on an orphaned socket. */ 77/* Calculate maximal number or retries on an orphaned socket. */
83static int tcp_orphan_retries(struct sock *sk, bool alive) 78static int tcp_orphan_retries(struct sock *sk, bool alive)
84{ 79{
85 int retries = sysctl_tcp_orphan_retries; /* May be zero. */ 80 int retries = sock_net(sk)->ipv4.sysctl_tcp_orphan_retries; /* May be zero. */
86 81
87 /* We know from an ICMP that something is wrong. */ 82 /* We know from an ICMP that something is wrong. */
88 if (sk->sk_err_soft && !alive) 83 if (sk->sk_err_soft && !alive)
@@ -157,6 +152,7 @@ static int tcp_write_timeout(struct sock *sk)
157{ 152{
158 struct inet_connection_sock *icsk = inet_csk(sk); 153 struct inet_connection_sock *icsk = inet_csk(sk);
159 struct tcp_sock *tp = tcp_sk(sk); 154 struct tcp_sock *tp = tcp_sk(sk);
155 struct net *net = sock_net(sk);
160 int retry_until; 156 int retry_until;
161 bool do_reset, syn_set = false; 157 bool do_reset, syn_set = false;
162 158
@@ -169,10 +165,10 @@ static int tcp_write_timeout(struct sock *sk)
169 NET_INC_STATS_BH(sock_net(sk), 165 NET_INC_STATS_BH(sock_net(sk),
170 LINUX_MIB_TCPFASTOPENACTIVEFAIL); 166 LINUX_MIB_TCPFASTOPENACTIVEFAIL);
171 } 167 }
172 retry_until = icsk->icsk_syn_retries ? : sysctl_tcp_syn_retries; 168 retry_until = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_syn_retries;
173 syn_set = true; 169 syn_set = true;
174 } else { 170 } else {
175 if (retransmits_timed_out(sk, sysctl_tcp_retries1, 0, 0)) { 171 if (retransmits_timed_out(sk, net->ipv4.sysctl_tcp_retries1, 0, 0)) {
176 /* Some middle-boxes may black-hole Fast Open _after_ 172 /* Some middle-boxes may black-hole Fast Open _after_
177 * the handshake. Therefore we conservatively disable 173 * the handshake. Therefore we conservatively disable
178 * Fast Open on this path on recurring timeouts with 174 * Fast Open on this path on recurring timeouts with
@@ -181,7 +177,7 @@ static int tcp_write_timeout(struct sock *sk)
181 if (tp->syn_data_acked && 177 if (tp->syn_data_acked &&
182 tp->bytes_acked <= tp->rx_opt.mss_clamp) { 178 tp->bytes_acked <= tp->rx_opt.mss_clamp) {
183 tcp_fastopen_cache_set(sk, 0, NULL, true, 0); 179 tcp_fastopen_cache_set(sk, 0, NULL, true, 0);
184 if (icsk->icsk_retransmits == sysctl_tcp_retries1) 180 if (icsk->icsk_retransmits == net->ipv4.sysctl_tcp_retries1)
185 NET_INC_STATS_BH(sock_net(sk), 181 NET_INC_STATS_BH(sock_net(sk),
186 LINUX_MIB_TCPFASTOPENACTIVEFAIL); 182 LINUX_MIB_TCPFASTOPENACTIVEFAIL);
187 } 183 }
@@ -191,7 +187,7 @@ static int tcp_write_timeout(struct sock *sk)
191 dst_negative_advice(sk); 187 dst_negative_advice(sk);
192 } 188 }
193 189
194 retry_until = sysctl_tcp_retries2; 190 retry_until = net->ipv4.sysctl_tcp_retries2;
195 if (sock_flag(sk, SOCK_DEAD)) { 191 if (sock_flag(sk, SOCK_DEAD)) {
196 const bool alive = icsk->icsk_rto < TCP_RTO_MAX; 192 const bool alive = icsk->icsk_rto < TCP_RTO_MAX;
197 193
@@ -305,7 +301,7 @@ static void tcp_probe_timer(struct sock *sk)
305 (s32)(tcp_time_stamp - start_ts) > icsk->icsk_user_timeout) 301 (s32)(tcp_time_stamp - start_ts) > icsk->icsk_user_timeout)
306 goto abort; 302 goto abort;
307 303
308 max_probes = sysctl_tcp_retries2; 304 max_probes = sock_net(sk)->ipv4.sysctl_tcp_retries2;
309 if (sock_flag(sk, SOCK_DEAD)) { 305 if (sock_flag(sk, SOCK_DEAD)) {
310 const bool alive = inet_csk_rto_backoff(icsk, TCP_RTO_MAX) < TCP_RTO_MAX; 306 const bool alive = inet_csk_rto_backoff(icsk, TCP_RTO_MAX) < TCP_RTO_MAX;
311 307
@@ -332,7 +328,7 @@ static void tcp_fastopen_synack_timer(struct sock *sk)
332{ 328{
333 struct inet_connection_sock *icsk = inet_csk(sk); 329 struct inet_connection_sock *icsk = inet_csk(sk);
334 int max_retries = icsk->icsk_syn_retries ? : 330 int max_retries = icsk->icsk_syn_retries ? :
335 sysctl_tcp_synack_retries + 1; /* add one more retry for fastopen */ 331 sock_net(sk)->ipv4.sysctl_tcp_synack_retries + 1; /* add one more retry for fastopen */
336 struct request_sock *req; 332 struct request_sock *req;
337 333
338 req = tcp_sk(sk)->fastopen_rsk; 334 req = tcp_sk(sk)->fastopen_rsk;
@@ -360,6 +356,7 @@ static void tcp_fastopen_synack_timer(struct sock *sk)
360void tcp_retransmit_timer(struct sock *sk) 356void tcp_retransmit_timer(struct sock *sk)
361{ 357{
362 struct tcp_sock *tp = tcp_sk(sk); 358 struct tcp_sock *tp = tcp_sk(sk);
359 struct net *net = sock_net(sk);
363 struct inet_connection_sock *icsk = inet_csk(sk); 360 struct inet_connection_sock *icsk = inet_csk(sk);
364 361
365 if (tp->fastopen_rsk) { 362 if (tp->fastopen_rsk) {
@@ -490,7 +487,7 @@ out_reset_timer:
490 icsk->icsk_rto = min(icsk->icsk_rto << 1, TCP_RTO_MAX); 487 icsk->icsk_rto = min(icsk->icsk_rto << 1, TCP_RTO_MAX);
491 } 488 }
492 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto, TCP_RTO_MAX); 489 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto, TCP_RTO_MAX);
493 if (retransmits_timed_out(sk, sysctl_tcp_retries1 + 1, 0, 0)) 490 if (retransmits_timed_out(sk, net->ipv4.sysctl_tcp_retries1 + 1, 0, 0))
494 __sk_dst_reset(sk); 491 __sk_dst_reset(sk);
495 492
496out:; 493out:;
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index dc45b538e237..08eed5e16df0 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -356,8 +356,8 @@ EXPORT_SYMBOL(udp_lib_get_port);
356 * match_wildcard == false: addresses must be exactly the same, i.e. 356 * match_wildcard == false: addresses must be exactly the same, i.e.
357 * 0.0.0.0 only equals to 0.0.0.0 357 * 0.0.0.0 only equals to 0.0.0.0
358 */ 358 */
359static int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2, 359int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2,
360 bool match_wildcard) 360 bool match_wildcard)
361{ 361{
362 struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2); 362 struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2);
363 363
@@ -499,6 +499,7 @@ static struct sock *udp4_lib_lookup2(struct net *net,
499 struct sock *sk, *result; 499 struct sock *sk, *result;
500 struct hlist_nulls_node *node; 500 struct hlist_nulls_node *node;
501 int score, badness, matches = 0, reuseport = 0; 501 int score, badness, matches = 0, reuseport = 0;
502 bool select_ok = true;
502 u32 hash = 0; 503 u32 hash = 0;
503 504
504begin: 505begin:
@@ -512,14 +513,18 @@ begin:
512 badness = score; 513 badness = score;
513 reuseport = sk->sk_reuseport; 514 reuseport = sk->sk_reuseport;
514 if (reuseport) { 515 if (reuseport) {
515 struct sock *sk2;
516 hash = udp_ehashfn(net, daddr, hnum, 516 hash = udp_ehashfn(net, daddr, hnum,
517 saddr, sport); 517 saddr, sport);
518 sk2 = reuseport_select_sock(sk, hash, skb, 518 if (select_ok) {
519 sizeof(struct udphdr)); 519 struct sock *sk2;
520 if (sk2) { 520
521 result = sk2; 521 sk2 = reuseport_select_sock(sk, hash, skb,
522 goto found; 522 sizeof(struct udphdr));
523 if (sk2) {
524 result = sk2;
525 select_ok = false;
526 goto found;
527 }
523 } 528 }
524 matches = 1; 529 matches = 1;
525 } 530 }
@@ -563,6 +568,7 @@ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
563 unsigned int hash2, slot2, slot = udp_hashfn(net, hnum, udptable->mask); 568 unsigned int hash2, slot2, slot = udp_hashfn(net, hnum, udptable->mask);
564 struct udp_hslot *hslot2, *hslot = &udptable->hash[slot]; 569 struct udp_hslot *hslot2, *hslot = &udptable->hash[slot];
565 int score, badness, matches = 0, reuseport = 0; 570 int score, badness, matches = 0, reuseport = 0;
571 bool select_ok = true;
566 u32 hash = 0; 572 u32 hash = 0;
567 573
568 rcu_read_lock(); 574 rcu_read_lock();
@@ -601,14 +607,18 @@ begin:
601 badness = score; 607 badness = score;
602 reuseport = sk->sk_reuseport; 608 reuseport = sk->sk_reuseport;
603 if (reuseport) { 609 if (reuseport) {
604 struct sock *sk2;
605 hash = udp_ehashfn(net, daddr, hnum, 610 hash = udp_ehashfn(net, daddr, hnum,
606 saddr, sport); 611 saddr, sport);
607 sk2 = reuseport_select_sock(sk, hash, skb, 612 if (select_ok) {
613 struct sock *sk2;
614
615 sk2 = reuseport_select_sock(sk, hash, skb,
608 sizeof(struct udphdr)); 616 sizeof(struct udphdr));
609 if (sk2) { 617 if (sk2) {
610 result = sk2; 618 result = sk2;
611 goto found; 619 select_ok = false;
620 goto found;
621 }
612 } 622 }
613 matches = 1; 623 matches = 1;
614 } 624 }
@@ -838,32 +848,20 @@ void udp_set_csum(bool nocheck, struct sk_buff *skb,
838{ 848{
839 struct udphdr *uh = udp_hdr(skb); 849 struct udphdr *uh = udp_hdr(skb);
840 850
841 if (nocheck) 851 if (nocheck) {
842 uh->check = 0; 852 uh->check = 0;
843 else if (skb_is_gso(skb)) 853 } else if (skb_is_gso(skb)) {
844 uh->check = ~udp_v4_check(len, saddr, daddr, 0); 854 uh->check = ~udp_v4_check(len, saddr, daddr, 0);
845 else if (skb_dst(skb) && skb_dst(skb)->dev && 855 } else if (skb->ip_summed == CHECKSUM_PARTIAL) {
846 (skb_dst(skb)->dev->features & 856 uh->check = 0;
847 (NETIF_F_IP_CSUM | NETIF_F_HW_CSUM))) { 857 uh->check = udp_v4_check(len, saddr, daddr, lco_csum(skb));
848 858 if (uh->check == 0)
849 BUG_ON(skb->ip_summed == CHECKSUM_PARTIAL); 859 uh->check = CSUM_MANGLED_0;
850 860 } else {
851 skb->ip_summed = CHECKSUM_PARTIAL; 861 skb->ip_summed = CHECKSUM_PARTIAL;
852 skb->csum_start = skb_transport_header(skb) - skb->head; 862 skb->csum_start = skb_transport_header(skb) - skb->head;
853 skb->csum_offset = offsetof(struct udphdr, check); 863 skb->csum_offset = offsetof(struct udphdr, check);
854 uh->check = ~udp_v4_check(len, saddr, daddr, 0); 864 uh->check = ~udp_v4_check(len, saddr, daddr, 0);
855 } else {
856 __wsum csum;
857
858 BUG_ON(skb->ip_summed == CHECKSUM_PARTIAL);
859
860 uh->check = 0;
861 csum = skb_checksum(skb, 0, len, 0);
862 uh->check = udp_v4_check(len, saddr, daddr, csum);
863 if (uh->check == 0)
864 uh->check = CSUM_MANGLED_0;
865
866 skb->ip_summed = CHECKSUM_UNNECESSARY;
867 } 865 }
868} 866}
869EXPORT_SYMBOL(udp_set_csum); 867EXPORT_SYMBOL(udp_set_csum);
@@ -1038,8 +1036,10 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
1038 if (msg->msg_controllen) { 1036 if (msg->msg_controllen) {
1039 err = ip_cmsg_send(sock_net(sk), msg, &ipc, 1037 err = ip_cmsg_send(sock_net(sk), msg, &ipc,
1040 sk->sk_family == AF_INET6); 1038 sk->sk_family == AF_INET6);
1041 if (err) 1039 if (unlikely(err)) {
1040 kfree(ipc.opt);
1042 return err; 1041 return err;
1042 }
1043 if (ipc.opt) 1043 if (ipc.opt)
1044 free = 1; 1044 free = 1;
1045 connected = 0; 1045 connected = 0;
@@ -2070,10 +2070,14 @@ void udp_v4_early_demux(struct sk_buff *skb)
2070 if (!in_dev) 2070 if (!in_dev)
2071 return; 2071 return;
2072 2072
2073 ours = ip_check_mc_rcu(in_dev, iph->daddr, iph->saddr, 2073 /* we are supposed to accept bcast packets */
2074 iph->protocol); 2074 if (skb->pkt_type == PACKET_MULTICAST) {
2075 if (!ours) 2075 ours = ip_check_mc_rcu(in_dev, iph->daddr, iph->saddr,
2076 return; 2076 iph->protocol);
2077 if (!ours)
2078 return;
2079 }
2080
2077 sk = __udp4_lib_mcast_demux_lookup(net, uh->dest, iph->daddr, 2081 sk = __udp4_lib_mcast_demux_lookup(net, uh->dest, iph->daddr,
2078 uh->source, iph->saddr, dif); 2082 uh->source, iph->saddr, dif);
2079 } else if (skb->pkt_type == PACKET_HOST) { 2083 } else if (skb->pkt_type == PACKET_HOST) {
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 4c519c1dc161..0ed2dafb7cc4 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -32,42 +32,65 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb,
32 netdev_features_t features), 32 netdev_features_t features),
33 __be16 new_protocol, bool is_ipv6) 33 __be16 new_protocol, bool is_ipv6)
34{ 34{
35 int tnl_hlen = skb_inner_mac_header(skb) - skb_transport_header(skb);
36 bool remcsum, need_csum, offload_csum, ufo;
35 struct sk_buff *segs = ERR_PTR(-EINVAL); 37 struct sk_buff *segs = ERR_PTR(-EINVAL);
38 struct udphdr *uh = udp_hdr(skb);
36 u16 mac_offset = skb->mac_header; 39 u16 mac_offset = skb->mac_header;
37 int mac_len = skb->mac_len;
38 int tnl_hlen = skb_inner_mac_header(skb) - skb_transport_header(skb);
39 __be16 protocol = skb->protocol; 40 __be16 protocol = skb->protocol;
40 netdev_features_t enc_features; 41 u16 mac_len = skb->mac_len;
41 int udp_offset, outer_hlen; 42 int udp_offset, outer_hlen;
42 unsigned int oldlen; 43 __wsum partial;
43 bool need_csum = !!(skb_shinfo(skb)->gso_type &
44 SKB_GSO_UDP_TUNNEL_CSUM);
45 bool remcsum = !!(skb_shinfo(skb)->gso_type & SKB_GSO_TUNNEL_REMCSUM);
46 bool offload_csum = false, dont_encap = (need_csum || remcsum);
47
48 oldlen = (u16)~skb->len;
49 44
50 if (unlikely(!pskb_may_pull(skb, tnl_hlen))) 45 if (unlikely(!pskb_may_pull(skb, tnl_hlen)))
51 goto out; 46 goto out;
52 47
48 /* Adjust partial header checksum to negate old length.
49 * We cannot rely on the value contained in uh->len as it is
50 * possible that the actual value exceeds the boundaries of the
51 * 16 bit length field due to the header being added outside of an
52 * IP or IPv6 frame that was already limited to 64K - 1.
53 */
54 partial = csum_sub(csum_unfold(uh->check),
55 (__force __wsum)htonl(skb->len));
56
57 /* setup inner skb. */
53 skb->encapsulation = 0; 58 skb->encapsulation = 0;
59 SKB_GSO_CB(skb)->encap_level = 0;
54 __skb_pull(skb, tnl_hlen); 60 __skb_pull(skb, tnl_hlen);
55 skb_reset_mac_header(skb); 61 skb_reset_mac_header(skb);
56 skb_set_network_header(skb, skb_inner_network_offset(skb)); 62 skb_set_network_header(skb, skb_inner_network_offset(skb));
57 skb->mac_len = skb_inner_network_offset(skb); 63 skb->mac_len = skb_inner_network_offset(skb);
58 skb->protocol = new_protocol; 64 skb->protocol = new_protocol;
65
66 need_csum = !!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM);
59 skb->encap_hdr_csum = need_csum; 67 skb->encap_hdr_csum = need_csum;
68
69 remcsum = !!(skb_shinfo(skb)->gso_type & SKB_GSO_TUNNEL_REMCSUM);
60 skb->remcsum_offload = remcsum; 70 skb->remcsum_offload = remcsum;
61 71
72 ufo = !!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP);
73
62 /* Try to offload checksum if possible */ 74 /* Try to offload checksum if possible */
63 offload_csum = !!(need_csum && 75 offload_csum = !!(need_csum &&
64 ((skb->dev->features & NETIF_F_HW_CSUM) || 76 (skb->dev->features &
65 (skb->dev->features & (is_ipv6 ? 77 (is_ipv6 ? (NETIF_F_HW_CSUM | NETIF_F_IPV6_CSUM) :
66 NETIF_F_IPV6_CSUM : NETIF_F_IP_CSUM)))); 78 (NETIF_F_HW_CSUM | NETIF_F_IP_CSUM))));
79
80 features &= skb->dev->hw_enc_features;
81
82 /* The only checksum offload we care about from here on out is the
83 * outer one so strip the existing checksum feature flags and
84 * instead set the flag based on our outer checksum offload value.
85 */
86 if (remcsum || ufo) {
87 features &= ~NETIF_F_CSUM_MASK;
88 if (!need_csum || offload_csum)
89 features |= NETIF_F_HW_CSUM;
90 }
67 91
68 /* segment inner packet. */ 92 /* segment inner packet. */
69 enc_features = skb->dev->hw_enc_features & features; 93 segs = gso_inner_segment(skb, features);
70 segs = gso_inner_segment(skb, enc_features);
71 if (IS_ERR_OR_NULL(segs)) { 94 if (IS_ERR_OR_NULL(segs)) {
72 skb_gso_error_unwind(skb, protocol, tnl_hlen, mac_offset, 95 skb_gso_error_unwind(skb, protocol, tnl_hlen, mac_offset,
73 mac_len); 96 mac_len);
@@ -78,17 +101,13 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb,
78 udp_offset = outer_hlen - tnl_hlen; 101 udp_offset = outer_hlen - tnl_hlen;
79 skb = segs; 102 skb = segs;
80 do { 103 do {
81 struct udphdr *uh; 104 __be16 len;
82 int len;
83 __be32 delta;
84 105
85 if (dont_encap) { 106 if (remcsum)
86 skb->encapsulation = 0;
87 skb->ip_summed = CHECKSUM_NONE; 107 skb->ip_summed = CHECKSUM_NONE;
88 } else { 108
89 /* Only set up inner headers if we might be offloading 109 /* Set up inner headers if we are offloading inner checksum */
90 * inner checksum. 110 if (skb->ip_summed == CHECKSUM_PARTIAL) {
91 */
92 skb_reset_inner_headers(skb); 111 skb_reset_inner_headers(skb);
93 skb->encapsulation = 1; 112 skb->encapsulation = 1;
94 } 113 }
@@ -96,43 +115,27 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb,
96 skb->mac_len = mac_len; 115 skb->mac_len = mac_len;
97 skb->protocol = protocol; 116 skb->protocol = protocol;
98 117
99 skb_push(skb, outer_hlen); 118 __skb_push(skb, outer_hlen);
100 skb_reset_mac_header(skb); 119 skb_reset_mac_header(skb);
101 skb_set_network_header(skb, mac_len); 120 skb_set_network_header(skb, mac_len);
102 skb_set_transport_header(skb, udp_offset); 121 skb_set_transport_header(skb, udp_offset);
103 len = skb->len - udp_offset; 122 len = htons(skb->len - udp_offset);
104 uh = udp_hdr(skb); 123 uh = udp_hdr(skb);
105 uh->len = htons(len); 124 uh->len = len;
106 125
107 if (!need_csum) 126 if (!need_csum)
108 continue; 127 continue;
109 128
110 delta = htonl(oldlen + len); 129 uh->check = ~csum_fold(csum_add(partial, (__force __wsum)len));
111 130
112 uh->check = ~csum_fold((__force __wsum) 131 if (skb->encapsulation || !offload_csum) {
113 ((__force u32)uh->check +
114 (__force u32)delta));
115 if (offload_csum) {
116 skb->ip_summed = CHECKSUM_PARTIAL;
117 skb->csum_start = skb_transport_header(skb) - skb->head;
118 skb->csum_offset = offsetof(struct udphdr, check);
119 } else if (remcsum) {
120 /* Need to calculate checksum from scratch,
121 * inner checksums are never when doing
122 * remote_checksum_offload.
123 */
124
125 skb->csum = skb_checksum(skb, udp_offset,
126 skb->len - udp_offset,
127 0);
128 uh->check = csum_fold(skb->csum);
129 if (uh->check == 0)
130 uh->check = CSUM_MANGLED_0;
131 } else {
132 uh->check = gso_make_checksum(skb, ~uh->check); 132 uh->check = gso_make_checksum(skb, ~uh->check);
133
134 if (uh->check == 0) 133 if (uh->check == 0)
135 uh->check = CSUM_MANGLED_0; 134 uh->check = CSUM_MANGLED_0;
135 } else {
136 skb->ip_summed = CHECKSUM_PARTIAL;
137 skb->csum_start = skb_transport_header(skb) - skb->head;
138 skb->csum_offset = offsetof(struct udphdr, check);
136 } 139 }
137 } while ((skb = skb->next)); 140 } while ((skb = skb->next));
138out: 141out:
@@ -235,6 +238,13 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
235 238
236 skb->ip_summed = CHECKSUM_NONE; 239 skb->ip_summed = CHECKSUM_NONE;
237 240
241 /* If there is no outer header we can fake a checksum offload
242 * due to the fact that we have already done the checksum in
243 * software prior to segmenting the frame.
244 */
245 if (!skb->encap_hdr_csum)
246 features |= NETIF_F_HW_CSUM;
247
238 /* Fragment the skb. IP headers of the fragments are updated in 248 /* Fragment the skb. IP headers of the fragments are updated in
239 * inet_gso_segment() 249 * inet_gso_segment()
240 */ 250 */
@@ -302,14 +312,14 @@ struct sk_buff **udp_gro_receive(struct sk_buff **head, struct sk_buff *skb,
302 unsigned int off = skb_gro_offset(skb); 312 unsigned int off = skb_gro_offset(skb);
303 int flush = 1; 313 int flush = 1;
304 314
305 if (NAPI_GRO_CB(skb)->udp_mark || 315 if (NAPI_GRO_CB(skb)->encap_mark ||
306 (skb->ip_summed != CHECKSUM_PARTIAL && 316 (skb->ip_summed != CHECKSUM_PARTIAL &&
307 NAPI_GRO_CB(skb)->csum_cnt == 0 && 317 NAPI_GRO_CB(skb)->csum_cnt == 0 &&
308 !NAPI_GRO_CB(skb)->csum_valid)) 318 !NAPI_GRO_CB(skb)->csum_valid))
309 goto out; 319 goto out;
310 320
311 /* mark that this skb passed once through the udp gro layer */ 321 /* mark that this skb passed once through the tunnel gro layer */
312 NAPI_GRO_CB(skb)->udp_mark = 1; 322 NAPI_GRO_CB(skb)->encap_mark = 1;
313 323
314 rcu_read_lock(); 324 rcu_read_lock();
315 uo_priv = rcu_dereference(udp_offload_base); 325 uo_priv = rcu_dereference(udp_offload_base);
diff --git a/net/ipv4/udp_tunnel.c b/net/ipv4/udp_tunnel.c
index 0ec08814f37d..96599d1a1318 100644
--- a/net/ipv4/udp_tunnel.c
+++ b/net/ipv4/udp_tunnel.c
@@ -89,6 +89,8 @@ void udp_tunnel_xmit_skb(struct rtable *rt, struct sock *sk, struct sk_buff *skb
89 uh->source = src_port; 89 uh->source = src_port;
90 uh->len = htons(skb->len); 90 uh->len = htons(skb->len);
91 91
92 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
93
92 udp_set_csum(nocheck, skb, src, dst, skb->len); 94 udp_set_csum(nocheck, skb, src, dst, skb->len);
93 95
94 iptunnel_xmit(sk, rt, skb, src, dst, IPPROTO_UDP, tos, ttl, df, xnet); 96 iptunnel_xmit(sk, rt, skb, src, dst, IPPROTO_UDP, tos, ttl, df, xnet);
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
index bb7dabe2ebbf..11e875ffd7ac 100644
--- a/net/ipv6/Kconfig
+++ b/net/ipv6/Kconfig
@@ -69,6 +69,7 @@ config INET6_ESP
69 select CRYPTO_CBC 69 select CRYPTO_CBC
70 select CRYPTO_SHA1 70 select CRYPTO_SHA1
71 select CRYPTO_DES 71 select CRYPTO_DES
72 select CRYPTO_ECHAINIV
72 ---help--- 73 ---help---
73 Support for IPsec ESP. 74 Support for IPsec ESP.
74 75
@@ -206,6 +207,7 @@ config IPV6_NDISC_NODETYPE
206config IPV6_TUNNEL 207config IPV6_TUNNEL
207 tristate "IPv6: IP-in-IPv6 tunnel (RFC2473)" 208 tristate "IPv6: IP-in-IPv6 tunnel (RFC2473)"
208 select INET6_TUNNEL 209 select INET6_TUNNEL
210 select DST_CACHE
209 ---help--- 211 ---help---
210 Support for IPv6-in-IPv6 and IPv4-in-IPv6 tunnels described in 212 Support for IPv6-in-IPv6 and IPv4-in-IPv6 tunnels described in
211 RFC 2473. 213 RFC 2473.
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 38eeddedfc21..27aed1afcf81 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -216,6 +216,7 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = {
216 }, 216 },
217 .use_oif_addrs_only = 0, 217 .use_oif_addrs_only = 0,
218 .ignore_routes_with_linkdown = 0, 218 .ignore_routes_with_linkdown = 0,
219 .keep_addr_on_down = 0,
219}; 220};
220 221
221static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { 222static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
@@ -260,6 +261,7 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
260 }, 261 },
261 .use_oif_addrs_only = 0, 262 .use_oif_addrs_only = 0,
262 .ignore_routes_with_linkdown = 0, 263 .ignore_routes_with_linkdown = 0,
264 .keep_addr_on_down = 0,
263}; 265};
264 266
265/* Check if a valid qdisc is available */ 267/* Check if a valid qdisc is available */
@@ -471,18 +473,21 @@ static int inet6_netconf_msgsize_devconf(int type)
471{ 473{
472 int size = NLMSG_ALIGN(sizeof(struct netconfmsg)) 474 int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
473 + nla_total_size(4); /* NETCONFA_IFINDEX */ 475 + nla_total_size(4); /* NETCONFA_IFINDEX */
476 bool all = false;
474 477
475 /* type -1 is used for ALL */ 478 if (type == NETCONFA_ALL)
476 if (type == -1 || type == NETCONFA_FORWARDING) 479 all = true;
480
481 if (all || type == NETCONFA_FORWARDING)
477 size += nla_total_size(4); 482 size += nla_total_size(4);
478#ifdef CONFIG_IPV6_MROUTE 483#ifdef CONFIG_IPV6_MROUTE
479 if (type == -1 || type == NETCONFA_MC_FORWARDING) 484 if (all || type == NETCONFA_MC_FORWARDING)
480 size += nla_total_size(4); 485 size += nla_total_size(4);
481#endif 486#endif
482 if (type == -1 || type == NETCONFA_PROXY_NEIGH) 487 if (all || type == NETCONFA_PROXY_NEIGH)
483 size += nla_total_size(4); 488 size += nla_total_size(4);
484 489
485 if (type == -1 || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) 490 if (all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN)
486 size += nla_total_size(4); 491 size += nla_total_size(4);
487 492
488 return size; 493 return size;
@@ -495,33 +500,36 @@ static int inet6_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
495{ 500{
496 struct nlmsghdr *nlh; 501 struct nlmsghdr *nlh;
497 struct netconfmsg *ncm; 502 struct netconfmsg *ncm;
503 bool all = false;
498 504
499 nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg), 505 nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
500 flags); 506 flags);
501 if (!nlh) 507 if (!nlh)
502 return -EMSGSIZE; 508 return -EMSGSIZE;
503 509
510 if (type == NETCONFA_ALL)
511 all = true;
512
504 ncm = nlmsg_data(nlh); 513 ncm = nlmsg_data(nlh);
505 ncm->ncm_family = AF_INET6; 514 ncm->ncm_family = AF_INET6;
506 515
507 if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0) 516 if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
508 goto nla_put_failure; 517 goto nla_put_failure;
509 518
510 /* type -1 is used for ALL */ 519 if ((all || type == NETCONFA_FORWARDING) &&
511 if ((type == -1 || type == NETCONFA_FORWARDING) &&
512 nla_put_s32(skb, NETCONFA_FORWARDING, devconf->forwarding) < 0) 520 nla_put_s32(skb, NETCONFA_FORWARDING, devconf->forwarding) < 0)
513 goto nla_put_failure; 521 goto nla_put_failure;
514#ifdef CONFIG_IPV6_MROUTE 522#ifdef CONFIG_IPV6_MROUTE
515 if ((type == -1 || type == NETCONFA_MC_FORWARDING) && 523 if ((all || type == NETCONFA_MC_FORWARDING) &&
516 nla_put_s32(skb, NETCONFA_MC_FORWARDING, 524 nla_put_s32(skb, NETCONFA_MC_FORWARDING,
517 devconf->mc_forwarding) < 0) 525 devconf->mc_forwarding) < 0)
518 goto nla_put_failure; 526 goto nla_put_failure;
519#endif 527#endif
520 if ((type == -1 || type == NETCONFA_PROXY_NEIGH) && 528 if ((all || type == NETCONFA_PROXY_NEIGH) &&
521 nla_put_s32(skb, NETCONFA_PROXY_NEIGH, devconf->proxy_ndp) < 0) 529 nla_put_s32(skb, NETCONFA_PROXY_NEIGH, devconf->proxy_ndp) < 0)
522 goto nla_put_failure; 530 goto nla_put_failure;
523 531
524 if ((type == -1 || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) && 532 if ((all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) &&
525 nla_put_s32(skb, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN, 533 nla_put_s32(skb, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
526 devconf->ignore_routes_with_linkdown) < 0) 534 devconf->ignore_routes_with_linkdown) < 0)
527 goto nla_put_failure; 535 goto nla_put_failure;
@@ -583,7 +591,7 @@ static int inet6_netconf_get_devconf(struct sk_buff *in_skb,
583 if (err < 0) 591 if (err < 0)
584 goto errout; 592 goto errout;
585 593
586 err = EINVAL; 594 err = -EINVAL;
587 if (!tb[NETCONFA_IFINDEX]) 595 if (!tb[NETCONFA_IFINDEX])
588 goto errout; 596 goto errout;
589 597
@@ -607,14 +615,14 @@ static int inet6_netconf_get_devconf(struct sk_buff *in_skb,
607 } 615 }
608 616
609 err = -ENOBUFS; 617 err = -ENOBUFS;
610 skb = nlmsg_new(inet6_netconf_msgsize_devconf(-1), GFP_ATOMIC); 618 skb = nlmsg_new(inet6_netconf_msgsize_devconf(NETCONFA_ALL), GFP_ATOMIC);
611 if (!skb) 619 if (!skb)
612 goto errout; 620 goto errout;
613 621
614 err = inet6_netconf_fill_devconf(skb, ifindex, devconf, 622 err = inet6_netconf_fill_devconf(skb, ifindex, devconf,
615 NETLINK_CB(in_skb).portid, 623 NETLINK_CB(in_skb).portid,
616 nlh->nlmsg_seq, RTM_NEWNETCONF, 0, 624 nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
617 -1); 625 NETCONFA_ALL);
618 if (err < 0) { 626 if (err < 0) {
619 /* -EMSGSIZE implies BUG in inet6_netconf_msgsize_devconf() */ 627 /* -EMSGSIZE implies BUG in inet6_netconf_msgsize_devconf() */
620 WARN_ON(err == -EMSGSIZE); 628 WARN_ON(err == -EMSGSIZE);
@@ -658,7 +666,7 @@ static int inet6_netconf_dump_devconf(struct sk_buff *skb,
658 cb->nlh->nlmsg_seq, 666 cb->nlh->nlmsg_seq,
659 RTM_NEWNETCONF, 667 RTM_NEWNETCONF,
660 NLM_F_MULTI, 668 NLM_F_MULTI,
661 -1) < 0) { 669 NETCONFA_ALL) < 0) {
662 rcu_read_unlock(); 670 rcu_read_unlock();
663 goto done; 671 goto done;
664 } 672 }
@@ -674,7 +682,7 @@ cont:
674 NETLINK_CB(cb->skb).portid, 682 NETLINK_CB(cb->skb).portid,
675 cb->nlh->nlmsg_seq, 683 cb->nlh->nlmsg_seq,
676 RTM_NEWNETCONF, NLM_F_MULTI, 684 RTM_NEWNETCONF, NLM_F_MULTI,
677 -1) < 0) 685 NETCONFA_ALL) < 0)
678 goto done; 686 goto done;
679 else 687 else
680 h++; 688 h++;
@@ -685,7 +693,7 @@ cont:
685 NETLINK_CB(cb->skb).portid, 693 NETLINK_CB(cb->skb).portid,
686 cb->nlh->nlmsg_seq, 694 cb->nlh->nlmsg_seq,
687 RTM_NEWNETCONF, NLM_F_MULTI, 695 RTM_NEWNETCONF, NLM_F_MULTI,
688 -1) < 0) 696 NETCONFA_ALL) < 0)
689 goto done; 697 goto done;
690 else 698 else
691 h++; 699 h++;
@@ -3168,6 +3176,81 @@ static void addrconf_gre_config(struct net_device *dev)
3168} 3176}
3169#endif 3177#endif
3170 3178
3179#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV)
3180/* If the host route is cached on the addr struct make sure it is associated
3181 * with the proper table. e.g., enslavement can change and if so the cached
3182 * host route needs to move to the new table.
3183 */
3184static void l3mdev_check_host_rt(struct inet6_dev *idev,
3185 struct inet6_ifaddr *ifp)
3186{
3187 if (ifp->rt) {
3188 u32 tb_id = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL;
3189
3190 if (tb_id != ifp->rt->rt6i_table->tb6_id) {
3191 ip6_del_rt(ifp->rt);
3192 ifp->rt = NULL;
3193 }
3194 }
3195}
3196#else
3197static void l3mdev_check_host_rt(struct inet6_dev *idev,
3198 struct inet6_ifaddr *ifp)
3199{
3200}
3201#endif
3202
3203static int fixup_permanent_addr(struct inet6_dev *idev,
3204 struct inet6_ifaddr *ifp)
3205{
3206 l3mdev_check_host_rt(idev, ifp);
3207
3208 if (!ifp->rt) {
3209 struct rt6_info *rt;
3210
3211 rt = addrconf_dst_alloc(idev, &ifp->addr, false);
3212 if (unlikely(IS_ERR(rt)))
3213 return PTR_ERR(rt);
3214
3215 ifp->rt = rt;
3216 }
3217
3218 if (!(ifp->flags & IFA_F_NOPREFIXROUTE)) {
3219 addrconf_prefix_route(&ifp->addr, ifp->prefix_len,
3220 idev->dev, 0, 0);
3221 }
3222
3223 addrconf_dad_start(ifp);
3224
3225 return 0;
3226}
3227
3228static void addrconf_permanent_addr(struct net_device *dev)
3229{
3230 struct inet6_ifaddr *ifp, *tmp;
3231 struct inet6_dev *idev;
3232
3233 idev = __in6_dev_get(dev);
3234 if (!idev)
3235 return;
3236
3237 write_lock_bh(&idev->lock);
3238
3239 list_for_each_entry_safe(ifp, tmp, &idev->addr_list, if_list) {
3240 if ((ifp->flags & IFA_F_PERMANENT) &&
3241 fixup_permanent_addr(idev, ifp) < 0) {
3242 write_unlock_bh(&idev->lock);
3243 ipv6_del_addr(ifp);
3244 write_lock_bh(&idev->lock);
3245
3246 net_info_ratelimited("%s: Failed to add prefix route for address %pI6c; dropping\n",
3247 idev->dev->name, &ifp->addr);
3248 }
3249 }
3250
3251 write_unlock_bh(&idev->lock);
3252}
3253
3171static int addrconf_notify(struct notifier_block *this, unsigned long event, 3254static int addrconf_notify(struct notifier_block *this, unsigned long event,
3172 void *ptr) 3255 void *ptr)
3173{ 3256{
@@ -3253,6 +3336,9 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
3253 run_pending = 1; 3336 run_pending = 1;
3254 } 3337 }
3255 3338
3339 /* restore routes for permanent addresses */
3340 addrconf_permanent_addr(dev);
3341
3256 switch (dev->type) { 3342 switch (dev->type) {
3257#if IS_ENABLED(CONFIG_IPV6_SIT) 3343#if IS_ENABLED(CONFIG_IPV6_SIT)
3258 case ARPHRD_SIT: 3344 case ARPHRD_SIT:
@@ -3356,7 +3442,10 @@ static int addrconf_ifdown(struct net_device *dev, int how)
3356{ 3442{
3357 struct net *net = dev_net(dev); 3443 struct net *net = dev_net(dev);
3358 struct inet6_dev *idev; 3444 struct inet6_dev *idev;
3359 struct inet6_ifaddr *ifa; 3445 struct inet6_ifaddr *ifa, *tmp;
3446 struct list_head del_list;
3447 int _keep_addr;
3448 bool keep_addr;
3360 int state, i; 3449 int state, i;
3361 3450
3362 ASSERT_RTNL(); 3451 ASSERT_RTNL();
@@ -3383,6 +3472,16 @@ static int addrconf_ifdown(struct net_device *dev, int how)
3383 3472
3384 } 3473 }
3385 3474
3475 /* aggregate the system setting and interface setting */
3476 _keep_addr = net->ipv6.devconf_all->keep_addr_on_down;
3477 if (!_keep_addr)
3478 _keep_addr = idev->cnf.keep_addr_on_down;
3479
3480 /* combine the user config with event to determine if permanent
3481 * addresses are to be removed from address hash table
3482 */
3483 keep_addr = !(how || _keep_addr <= 0);
3484
3386 /* Step 2: clear hash table */ 3485 /* Step 2: clear hash table */
3387 for (i = 0; i < IN6_ADDR_HSIZE; i++) { 3486 for (i = 0; i < IN6_ADDR_HSIZE; i++) {
3388 struct hlist_head *h = &inet6_addr_lst[i]; 3487 struct hlist_head *h = &inet6_addr_lst[i];
@@ -3391,9 +3490,15 @@ static int addrconf_ifdown(struct net_device *dev, int how)
3391restart: 3490restart:
3392 hlist_for_each_entry_rcu(ifa, h, addr_lst) { 3491 hlist_for_each_entry_rcu(ifa, h, addr_lst) {
3393 if (ifa->idev == idev) { 3492 if (ifa->idev == idev) {
3394 hlist_del_init_rcu(&ifa->addr_lst);
3395 addrconf_del_dad_work(ifa); 3493 addrconf_del_dad_work(ifa);
3396 goto restart; 3494 /* combined flag + permanent flag decide if
3495 * address is retained on a down event
3496 */
3497 if (!keep_addr ||
3498 !(ifa->flags & IFA_F_PERMANENT)) {
3499 hlist_del_init_rcu(&ifa->addr_lst);
3500 goto restart;
3501 }
3397 } 3502 }
3398 } 3503 }
3399 spin_unlock_bh(&addrconf_hash_lock); 3504 spin_unlock_bh(&addrconf_hash_lock);
@@ -3427,31 +3532,53 @@ restart:
3427 write_lock_bh(&idev->lock); 3532 write_lock_bh(&idev->lock);
3428 } 3533 }
3429 3534
3430 while (!list_empty(&idev->addr_list)) { 3535 /* re-combine the user config with event to determine if permanent
3431 ifa = list_first_entry(&idev->addr_list, 3536 * addresses are to be removed from the interface list
3432 struct inet6_ifaddr, if_list); 3537 */
3433 addrconf_del_dad_work(ifa); 3538 keep_addr = (!how && _keep_addr > 0);
3434 3539
3435 list_del(&ifa->if_list); 3540 INIT_LIST_HEAD(&del_list);
3541 list_for_each_entry_safe(ifa, tmp, &idev->addr_list, if_list) {
3542 addrconf_del_dad_work(ifa);
3436 3543
3437 write_unlock_bh(&idev->lock); 3544 write_unlock_bh(&idev->lock);
3438
3439 spin_lock_bh(&ifa->lock); 3545 spin_lock_bh(&ifa->lock);
3440 state = ifa->state; 3546
3441 ifa->state = INET6_IFADDR_STATE_DEAD; 3547 if (keep_addr && (ifa->flags & IFA_F_PERMANENT)) {
3548 /* set state to skip the notifier below */
3549 state = INET6_IFADDR_STATE_DEAD;
3550 ifa->state = 0;
3551 if (!(ifa->flags & IFA_F_NODAD))
3552 ifa->flags |= IFA_F_TENTATIVE;
3553 } else {
3554 state = ifa->state;
3555 ifa->state = INET6_IFADDR_STATE_DEAD;
3556
3557 list_del(&ifa->if_list);
3558 list_add(&ifa->if_list, &del_list);
3559 }
3560
3442 spin_unlock_bh(&ifa->lock); 3561 spin_unlock_bh(&ifa->lock);
3443 3562
3444 if (state != INET6_IFADDR_STATE_DEAD) { 3563 if (state != INET6_IFADDR_STATE_DEAD) {
3445 __ipv6_ifa_notify(RTM_DELADDR, ifa); 3564 __ipv6_ifa_notify(RTM_DELADDR, ifa);
3446 inet6addr_notifier_call_chain(NETDEV_DOWN, ifa); 3565 inet6addr_notifier_call_chain(NETDEV_DOWN, ifa);
3447 } 3566 }
3448 in6_ifa_put(ifa);
3449 3567
3450 write_lock_bh(&idev->lock); 3568 write_lock_bh(&idev->lock);
3451 } 3569 }
3452 3570
3453 write_unlock_bh(&idev->lock); 3571 write_unlock_bh(&idev->lock);
3454 3572
3573 /* now clean up addresses to be removed */
3574 while (!list_empty(&del_list)) {
3575 ifa = list_first_entry(&del_list,
3576 struct inet6_ifaddr, if_list);
3577 list_del(&ifa->if_list);
3578
3579 in6_ifa_put(ifa);
3580 }
3581
3455 /* Step 5: Discard anycast and multicast list */ 3582 /* Step 5: Discard anycast and multicast list */
3456 if (how) { 3583 if (how) {
3457 ipv6_ac_destroy_dev(idev); 3584 ipv6_ac_destroy_dev(idev);
@@ -3538,6 +3665,7 @@ static void addrconf_dad_begin(struct inet6_ifaddr *ifp)
3538{ 3665{
3539 struct inet6_dev *idev = ifp->idev; 3666 struct inet6_dev *idev = ifp->idev;
3540 struct net_device *dev = idev->dev; 3667 struct net_device *dev = idev->dev;
3668 bool notify = false;
3541 3669
3542 addrconf_join_solict(dev, &ifp->addr); 3670 addrconf_join_solict(dev, &ifp->addr);
3543 3671
@@ -3583,7 +3711,7 @@ static void addrconf_dad_begin(struct inet6_ifaddr *ifp)
3583 /* Because optimistic nodes can use this address, 3711 /* Because optimistic nodes can use this address,
3584 * notify listeners. If DAD fails, RTM_DELADDR is sent. 3712 * notify listeners. If DAD fails, RTM_DELADDR is sent.
3585 */ 3713 */
3586 ipv6_ifa_notify(RTM_NEWADDR, ifp); 3714 notify = true;
3587 } 3715 }
3588 } 3716 }
3589 3717
@@ -3591,6 +3719,8 @@ static void addrconf_dad_begin(struct inet6_ifaddr *ifp)
3591out: 3719out:
3592 spin_unlock(&ifp->lock); 3720 spin_unlock(&ifp->lock);
3593 read_unlock_bh(&idev->lock); 3721 read_unlock_bh(&idev->lock);
3722 if (notify)
3723 ipv6_ifa_notify(RTM_NEWADDR, ifp);
3594} 3724}
3595 3725
3596static void addrconf_dad_start(struct inet6_ifaddr *ifp) 3726static void addrconf_dad_start(struct inet6_ifaddr *ifp)
@@ -4711,6 +4841,9 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
4711 array[DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN] = cnf->ignore_routes_with_linkdown; 4841 array[DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN] = cnf->ignore_routes_with_linkdown;
4712 /* we omit DEVCONF_STABLE_SECRET for now */ 4842 /* we omit DEVCONF_STABLE_SECRET for now */
4713 array[DEVCONF_USE_OIF_ADDRS_ONLY] = cnf->use_oif_addrs_only; 4843 array[DEVCONF_USE_OIF_ADDRS_ONLY] = cnf->use_oif_addrs_only;
4844 array[DEVCONF_DROP_UNICAST_IN_L2_MULTICAST] = cnf->drop_unicast_in_l2_multicast;
4845 array[DEVCONF_DROP_UNSOLICITED_NA] = cnf->drop_unsolicited_na;
4846 array[DEVCONF_KEEP_ADDR_ON_DOWN] = cnf->keep_addr_on_down;
4714} 4847}
4715 4848
4716static inline size_t inet6_ifla6_size(void) 4849static inline size_t inet6_ifla6_size(void)
@@ -5785,6 +5918,28 @@ static struct addrconf_sysctl_table
5785 .proc_handler = addrconf_sysctl_ignore_routes_with_linkdown, 5918 .proc_handler = addrconf_sysctl_ignore_routes_with_linkdown,
5786 }, 5919 },
5787 { 5920 {
5921 .procname = "drop_unicast_in_l2_multicast",
5922 .data = &ipv6_devconf.drop_unicast_in_l2_multicast,
5923 .maxlen = sizeof(int),
5924 .mode = 0644,
5925 .proc_handler = proc_dointvec,
5926 },
5927 {
5928 .procname = "drop_unsolicited_na",
5929 .data = &ipv6_devconf.drop_unsolicited_na,
5930 .maxlen = sizeof(int),
5931 .mode = 0644,
5932 .proc_handler = proc_dointvec,
5933 },
5934 {
5935 .procname = "keep_addr_on_down",
5936 .data = &ipv6_devconf.keep_addr_on_down,
5937 .maxlen = sizeof(int),
5938 .mode = 0644,
5939 .proc_handler = proc_dointvec,
5940
5941 },
5942 {
5788 /* sentinel */ 5943 /* sentinel */
5789 } 5944 }
5790 }, 5945 },
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 9f5137cd604e..b11c37cfd67c 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -235,7 +235,11 @@ lookup_protocol:
235 * creation time automatically shares. 235 * creation time automatically shares.
236 */ 236 */
237 inet->inet_sport = htons(inet->inet_num); 237 inet->inet_sport = htons(inet->inet_num);
238 sk->sk_prot->hash(sk); 238 err = sk->sk_prot->hash(sk);
239 if (err) {
240 sk_common_release(sk);
241 goto out;
242 }
239 } 243 }
240 if (sk->sk_prot->init) { 244 if (sk->sk_prot->init) {
241 err = sk->sk_prot->init(sk); 245 err = sk->sk_prot->init(sk);
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 517c55b01ba8..428162155280 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -162,6 +162,9 @@ ipv4_connected:
162 fl6.fl6_dport = inet->inet_dport; 162 fl6.fl6_dport = inet->inet_dport;
163 fl6.fl6_sport = inet->inet_sport; 163 fl6.fl6_sport = inet->inet_sport;
164 164
165 if (!fl6.flowi6_oif)
166 fl6.flowi6_oif = np->sticky_pktinfo.ipi6_ifindex;
167
165 if (!fl6.flowi6_oif && (addr_type&IPV6_ADDR_MULTICAST)) 168 if (!fl6.flowi6_oif && (addr_type&IPV6_ADDR_MULTICAST))
166 fl6.flowi6_oif = np->mcast_oif; 169 fl6.flowi6_oif = np->mcast_oif;
167 170
diff --git a/net/ipv6/exthdrs_core.c b/net/ipv6/exthdrs_core.c
index 5c5d23e59da5..9508a20fbf61 100644
--- a/net/ipv6/exthdrs_core.c
+++ b/net/ipv6/exthdrs_core.c
@@ -257,7 +257,11 @@ int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset,
257 *fragoff = _frag_off; 257 *fragoff = _frag_off;
258 return hp->nexthdr; 258 return hp->nexthdr;
259 } 259 }
260 return -ENOENT; 260 if (!found)
261 return -ENOENT;
262 if (fragoff)
263 *fragoff = _frag_off;
264 break;
261 } 265 }
262 hdrlen = 8; 266 hdrlen = 8;
263 } else if (nexthdr == NEXTHDR_AUTH) { 267 } else if (nexthdr == NEXTHDR_AUTH) {
diff --git a/net/ipv6/ila/ila_common.c b/net/ipv6/ila/ila_common.c
index 32dc9aab7297..30613050e4ca 100644
--- a/net/ipv6/ila/ila_common.c
+++ b/net/ipv6/ila/ila_common.c
@@ -99,5 +99,6 @@ static void __exit ila_fini(void)
99 99
100module_init(ila_init); 100module_init(ila_init);
101module_exit(ila_fini); 101module_exit(ila_fini);
102MODULE_ALIAS_RTNL_LWT(ILA);
102MODULE_AUTHOR("Tom Herbert <tom@herbertland.com>"); 103MODULE_AUTHOR("Tom Herbert <tom@herbertland.com>");
103MODULE_LICENSE("GPL"); 104MODULE_LICENSE("GPL");
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index 36c3f0155010..532c3ef282c5 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -26,6 +26,7 @@
26#include <net/ip6_route.h> 26#include <net/ip6_route.h>
27#include <net/sock.h> 27#include <net/sock.h>
28#include <net/inet6_connection_sock.h> 28#include <net/inet6_connection_sock.h>
29#include <net/sock_reuseport.h>
29 30
30int inet6_csk_bind_conflict(const struct sock *sk, 31int inet6_csk_bind_conflict(const struct sock *sk,
31 const struct inet_bind_bucket *tb, bool relax) 32 const struct inet_bind_bucket *tb, bool relax)
@@ -48,6 +49,7 @@ int inet6_csk_bind_conflict(const struct sock *sk,
48 if ((!reuse || !sk2->sk_reuse || 49 if ((!reuse || !sk2->sk_reuse ||
49 sk2->sk_state == TCP_LISTEN) && 50 sk2->sk_state == TCP_LISTEN) &&
50 (!reuseport || !sk2->sk_reuseport || 51 (!reuseport || !sk2->sk_reuseport ||
52 rcu_access_pointer(sk->sk_reuseport_cb) ||
51 (sk2->sk_state != TCP_TIME_WAIT && 53 (sk2->sk_state != TCP_TIME_WAIT &&
52 !uid_eq(uid, 54 !uid_eq(uid,
53 sock_i_uid((struct sock *)sk2))))) { 55 sock_i_uid((struct sock *)sk2))))) {
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index 21ace5a2bf7c..70f2628be6fa 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -17,11 +17,13 @@
17#include <linux/module.h> 17#include <linux/module.h>
18#include <linux/random.h> 18#include <linux/random.h>
19 19
20#include <net/addrconf.h>
20#include <net/inet_connection_sock.h> 21#include <net/inet_connection_sock.h>
21#include <net/inet_hashtables.h> 22#include <net/inet_hashtables.h>
22#include <net/inet6_hashtables.h> 23#include <net/inet6_hashtables.h>
23#include <net/secure_seq.h> 24#include <net/secure_seq.h>
24#include <net/ip.h> 25#include <net/ip.h>
26#include <net/sock_reuseport.h>
25 27
26u32 inet6_ehashfn(const struct net *net, 28u32 inet6_ehashfn(const struct net *net,
27 const struct in6_addr *laddr, const u16 lport, 29 const struct in6_addr *laddr, const u16 lport,
@@ -121,7 +123,9 @@ static inline int compute_score(struct sock *sk, struct net *net,
121} 123}
122 124
123struct sock *inet6_lookup_listener(struct net *net, 125struct sock *inet6_lookup_listener(struct net *net,
124 struct inet_hashinfo *hashinfo, const struct in6_addr *saddr, 126 struct inet_hashinfo *hashinfo,
127 struct sk_buff *skb, int doff,
128 const struct in6_addr *saddr,
125 const __be16 sport, const struct in6_addr *daddr, 129 const __be16 sport, const struct in6_addr *daddr,
126 const unsigned short hnum, const int dif) 130 const unsigned short hnum, const int dif)
127{ 131{
@@ -129,6 +133,7 @@ struct sock *inet6_lookup_listener(struct net *net,
129 const struct hlist_nulls_node *node; 133 const struct hlist_nulls_node *node;
130 struct sock *result; 134 struct sock *result;
131 int score, hiscore, matches = 0, reuseport = 0; 135 int score, hiscore, matches = 0, reuseport = 0;
136 bool select_ok = true;
132 u32 phash = 0; 137 u32 phash = 0;
133 unsigned int hash = inet_lhashfn(net, hnum); 138 unsigned int hash = inet_lhashfn(net, hnum);
134 struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash]; 139 struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash];
@@ -146,6 +151,15 @@ begin:
146 if (reuseport) { 151 if (reuseport) {
147 phash = inet6_ehashfn(net, daddr, hnum, 152 phash = inet6_ehashfn(net, daddr, hnum,
148 saddr, sport); 153 saddr, sport);
154 if (select_ok) {
155 struct sock *sk2;
156 sk2 = reuseport_select_sock(sk, phash,
157 skb, doff);
158 if (sk2) {
159 result = sk2;
160 goto found;
161 }
162 }
149 matches = 1; 163 matches = 1;
150 } 164 }
151 } else if (score == hiscore && reuseport) { 165 } else if (score == hiscore && reuseport) {
@@ -163,11 +177,13 @@ begin:
163 if (get_nulls_value(node) != hash + LISTENING_NULLS_BASE) 177 if (get_nulls_value(node) != hash + LISTENING_NULLS_BASE)
164 goto begin; 178 goto begin;
165 if (result) { 179 if (result) {
180found:
166 if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt))) 181 if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt)))
167 result = NULL; 182 result = NULL;
168 else if (unlikely(compute_score(result, net, hnum, daddr, 183 else if (unlikely(compute_score(result, net, hnum, daddr,
169 dif) < hiscore)) { 184 dif) < hiscore)) {
170 sock_put(result); 185 sock_put(result);
186 select_ok = false;
171 goto begin; 187 goto begin;
172 } 188 }
173 } 189 }
@@ -177,6 +193,7 @@ begin:
177EXPORT_SYMBOL_GPL(inet6_lookup_listener); 193EXPORT_SYMBOL_GPL(inet6_lookup_listener);
178 194
179struct sock *inet6_lookup(struct net *net, struct inet_hashinfo *hashinfo, 195struct sock *inet6_lookup(struct net *net, struct inet_hashinfo *hashinfo,
196 struct sk_buff *skb, int doff,
180 const struct in6_addr *saddr, const __be16 sport, 197 const struct in6_addr *saddr, const __be16 sport,
181 const struct in6_addr *daddr, const __be16 dport, 198 const struct in6_addr *daddr, const __be16 dport,
182 const int dif) 199 const int dif)
@@ -184,7 +201,8 @@ struct sock *inet6_lookup(struct net *net, struct inet_hashinfo *hashinfo,
184 struct sock *sk; 201 struct sock *sk;
185 202
186 local_bh_disable(); 203 local_bh_disable();
187 sk = __inet6_lookup(net, hashinfo, saddr, sport, daddr, ntohs(dport), dif); 204 sk = __inet6_lookup(net, hashinfo, skb, doff, saddr, sport, daddr,
205 ntohs(dport), dif);
188 local_bh_enable(); 206 local_bh_enable();
189 207
190 return sk; 208 return sk;
@@ -274,3 +292,59 @@ int inet6_hash_connect(struct inet_timewait_death_row *death_row,
274 __inet6_check_established); 292 __inet6_check_established);
275} 293}
276EXPORT_SYMBOL_GPL(inet6_hash_connect); 294EXPORT_SYMBOL_GPL(inet6_hash_connect);
295
296int inet6_hash(struct sock *sk)
297{
298 if (sk->sk_state != TCP_CLOSE) {
299 local_bh_disable();
300 __inet_hash(sk, NULL, ipv6_rcv_saddr_equal);
301 local_bh_enable();
302 }
303
304 return 0;
305}
306EXPORT_SYMBOL_GPL(inet6_hash);
307
308/* match_wildcard == true: IPV6_ADDR_ANY equals to any IPv6 addresses if IPv6
309 * only, and any IPv4 addresses if not IPv6 only
310 * match_wildcard == false: addresses must be exactly the same, i.e.
311 * IPV6_ADDR_ANY only equals to IPV6_ADDR_ANY,
312 * and 0.0.0.0 equals to 0.0.0.0 only
313 */
314int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2,
315 bool match_wildcard)
316{
317 const struct in6_addr *sk2_rcv_saddr6 = inet6_rcv_saddr(sk2);
318 int sk2_ipv6only = inet_v6_ipv6only(sk2);
319 int addr_type = ipv6_addr_type(&sk->sk_v6_rcv_saddr);
320 int addr_type2 = sk2_rcv_saddr6 ? ipv6_addr_type(sk2_rcv_saddr6) : IPV6_ADDR_MAPPED;
321
322 /* if both are mapped, treat as IPv4 */
323 if (addr_type == IPV6_ADDR_MAPPED && addr_type2 == IPV6_ADDR_MAPPED) {
324 if (!sk2_ipv6only) {
325 if (sk->sk_rcv_saddr == sk2->sk_rcv_saddr)
326 return 1;
327 if (!sk->sk_rcv_saddr || !sk2->sk_rcv_saddr)
328 return match_wildcard;
329 }
330 return 0;
331 }
332
333 if (addr_type == IPV6_ADDR_ANY && addr_type2 == IPV6_ADDR_ANY)
334 return 1;
335
336 if (addr_type2 == IPV6_ADDR_ANY && match_wildcard &&
337 !(sk2_ipv6only && addr_type == IPV6_ADDR_MAPPED))
338 return 1;
339
340 if (addr_type == IPV6_ADDR_ANY && match_wildcard &&
341 !(ipv6_only_sock(sk) && addr_type2 == IPV6_ADDR_MAPPED))
342 return 1;
343
344 if (sk2_rcv_saddr6 &&
345 ipv6_addr_equal(&sk->sk_v6_rcv_saddr, sk2_rcv_saddr6))
346 return 1;
347
348 return 0;
349}
350EXPORT_SYMBOL_GPL(ipv6_rcv_saddr_equal);
diff --git a/net/ipv6/ip6_checksum.c b/net/ipv6/ip6_checksum.c
index 9a4d7322fb22..b2025bf3da4a 100644
--- a/net/ipv6/ip6_checksum.c
+++ b/net/ipv6/ip6_checksum.c
@@ -6,8 +6,7 @@
6#ifndef _HAVE_ARCH_IPV6_CSUM 6#ifndef _HAVE_ARCH_IPV6_CSUM
7__sum16 csum_ipv6_magic(const struct in6_addr *saddr, 7__sum16 csum_ipv6_magic(const struct in6_addr *saddr,
8 const struct in6_addr *daddr, 8 const struct in6_addr *daddr,
9 __u32 len, unsigned short proto, 9 __u32 len, __u8 proto, __wsum csum)
10 __wsum csum)
11{ 10{
12 11
13 int carry; 12 int carry;
@@ -98,27 +97,16 @@ void udp6_set_csum(bool nocheck, struct sk_buff *skb,
98 uh->check = 0; 97 uh->check = 0;
99 else if (skb_is_gso(skb)) 98 else if (skb_is_gso(skb))
100 uh->check = ~udp_v6_check(len, saddr, daddr, 0); 99 uh->check = ~udp_v6_check(len, saddr, daddr, 0);
101 else if (skb_dst(skb) && skb_dst(skb)->dev && 100 else if (skb->ip_summed == CHECKSUM_PARTIAL) {
102 (skb_dst(skb)->dev->features & NETIF_F_IPV6_CSUM)) { 101 uh->check = 0;
103 102 uh->check = udp_v6_check(len, saddr, daddr, lco_csum(skb));
104 BUG_ON(skb->ip_summed == CHECKSUM_PARTIAL); 103 if (uh->check == 0)
105 104 uh->check = CSUM_MANGLED_0;
105 } else {
106 skb->ip_summed = CHECKSUM_PARTIAL; 106 skb->ip_summed = CHECKSUM_PARTIAL;
107 skb->csum_start = skb_transport_header(skb) - skb->head; 107 skb->csum_start = skb_transport_header(skb) - skb->head;
108 skb->csum_offset = offsetof(struct udphdr, check); 108 skb->csum_offset = offsetof(struct udphdr, check);
109 uh->check = ~udp_v6_check(len, saddr, daddr, 0); 109 uh->check = ~udp_v6_check(len, saddr, daddr, 0);
110 } else {
111 __wsum csum;
112
113 BUG_ON(skb->ip_summed == CHECKSUM_PARTIAL);
114
115 uh->check = 0;
116 csum = skb_checksum(skb, 0, len, 0);
117 uh->check = udp_v6_check(len, saddr, daddr, csum);
118 if (uh->check == 0)
119 uh->check = CSUM_MANGLED_0;
120
121 skb->ip_summed = CHECKSUM_UNNECESSARY;
122 } 110 }
123} 111}
124EXPORT_SYMBOL(udp6_set_csum); 112EXPORT_SYMBOL(udp6_set_csum);
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 0c7e276c230e..ea071fad67a0 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -55,8 +55,6 @@ struct fib6_cleaner {
55 void *arg; 55 void *arg;
56}; 56};
57 57
58static DEFINE_RWLOCK(fib6_walker_lock);
59
60#ifdef CONFIG_IPV6_SUBTREES 58#ifdef CONFIG_IPV6_SUBTREES
61#define FWS_INIT FWS_S 59#define FWS_INIT FWS_S
62#else 60#else
@@ -66,7 +64,7 @@ static DEFINE_RWLOCK(fib6_walker_lock);
66static void fib6_prune_clones(struct net *net, struct fib6_node *fn); 64static void fib6_prune_clones(struct net *net, struct fib6_node *fn);
67static struct rt6_info *fib6_find_prefix(struct net *net, struct fib6_node *fn); 65static struct rt6_info *fib6_find_prefix(struct net *net, struct fib6_node *fn);
68static struct fib6_node *fib6_repair_tree(struct net *net, struct fib6_node *fn); 66static struct fib6_node *fib6_repair_tree(struct net *net, struct fib6_node *fn);
69static int fib6_walk(struct fib6_walker *w); 67static int fib6_walk(struct net *net, struct fib6_walker *w);
70static int fib6_walk_continue(struct fib6_walker *w); 68static int fib6_walk_continue(struct fib6_walker *w);
71 69
72/* 70/*
@@ -78,21 +76,21 @@ static int fib6_walk_continue(struct fib6_walker *w);
78 76
79static void fib6_gc_timer_cb(unsigned long arg); 77static void fib6_gc_timer_cb(unsigned long arg);
80 78
81static LIST_HEAD(fib6_walkers); 79#define FOR_WALKERS(net, w) \
82#define FOR_WALKERS(w) list_for_each_entry(w, &fib6_walkers, lh) 80 list_for_each_entry(w, &(net)->ipv6.fib6_walkers, lh)
83 81
84static void fib6_walker_link(struct fib6_walker *w) 82static void fib6_walker_link(struct net *net, struct fib6_walker *w)
85{ 83{
86 write_lock_bh(&fib6_walker_lock); 84 write_lock_bh(&net->ipv6.fib6_walker_lock);
87 list_add(&w->lh, &fib6_walkers); 85 list_add(&w->lh, &net->ipv6.fib6_walkers);
88 write_unlock_bh(&fib6_walker_lock); 86 write_unlock_bh(&net->ipv6.fib6_walker_lock);
89} 87}
90 88
91static void fib6_walker_unlink(struct fib6_walker *w) 89static void fib6_walker_unlink(struct net *net, struct fib6_walker *w)
92{ 90{
93 write_lock_bh(&fib6_walker_lock); 91 write_lock_bh(&net->ipv6.fib6_walker_lock);
94 list_del(&w->lh); 92 list_del(&w->lh);
95 write_unlock_bh(&fib6_walker_lock); 93 write_unlock_bh(&net->ipv6.fib6_walker_lock);
96} 94}
97 95
98static int fib6_new_sernum(struct net *net) 96static int fib6_new_sernum(struct net *net)
@@ -325,12 +323,13 @@ static int fib6_dump_node(struct fib6_walker *w)
325 323
326static void fib6_dump_end(struct netlink_callback *cb) 324static void fib6_dump_end(struct netlink_callback *cb)
327{ 325{
326 struct net *net = sock_net(cb->skb->sk);
328 struct fib6_walker *w = (void *)cb->args[2]; 327 struct fib6_walker *w = (void *)cb->args[2];
329 328
330 if (w) { 329 if (w) {
331 if (cb->args[4]) { 330 if (cb->args[4]) {
332 cb->args[4] = 0; 331 cb->args[4] = 0;
333 fib6_walker_unlink(w); 332 fib6_walker_unlink(net, w);
334 } 333 }
335 cb->args[2] = 0; 334 cb->args[2] = 0;
336 kfree(w); 335 kfree(w);
@@ -348,6 +347,7 @@ static int fib6_dump_done(struct netlink_callback *cb)
348static int fib6_dump_table(struct fib6_table *table, struct sk_buff *skb, 347static int fib6_dump_table(struct fib6_table *table, struct sk_buff *skb,
349 struct netlink_callback *cb) 348 struct netlink_callback *cb)
350{ 349{
350 struct net *net = sock_net(skb->sk);
351 struct fib6_walker *w; 351 struct fib6_walker *w;
352 int res; 352 int res;
353 353
@@ -359,7 +359,7 @@ static int fib6_dump_table(struct fib6_table *table, struct sk_buff *skb,
359 w->skip = 0; 359 w->skip = 0;
360 360
361 read_lock_bh(&table->tb6_lock); 361 read_lock_bh(&table->tb6_lock);
362 res = fib6_walk(w); 362 res = fib6_walk(net, w);
363 read_unlock_bh(&table->tb6_lock); 363 read_unlock_bh(&table->tb6_lock);
364 if (res > 0) { 364 if (res > 0) {
365 cb->args[4] = 1; 365 cb->args[4] = 1;
@@ -379,7 +379,7 @@ static int fib6_dump_table(struct fib6_table *table, struct sk_buff *skb,
379 res = fib6_walk_continue(w); 379 res = fib6_walk_continue(w);
380 read_unlock_bh(&table->tb6_lock); 380 read_unlock_bh(&table->tb6_lock);
381 if (res <= 0) { 381 if (res <= 0) {
382 fib6_walker_unlink(w); 382 fib6_walker_unlink(net, w);
383 cb->args[4] = 0; 383 cb->args[4] = 0;
384 } 384 }
385 } 385 }
@@ -1340,8 +1340,8 @@ static struct fib6_node *fib6_repair_tree(struct net *net,
1340 } 1340 }
1341#endif 1341#endif
1342 1342
1343 read_lock(&fib6_walker_lock); 1343 read_lock(&net->ipv6.fib6_walker_lock);
1344 FOR_WALKERS(w) { 1344 FOR_WALKERS(net, w) {
1345 if (!child) { 1345 if (!child) {
1346 if (w->root == fn) { 1346 if (w->root == fn) {
1347 w->root = w->node = NULL; 1347 w->root = w->node = NULL;
@@ -1368,7 +1368,7 @@ static struct fib6_node *fib6_repair_tree(struct net *net,
1368 } 1368 }
1369 } 1369 }
1370 } 1370 }
1371 read_unlock(&fib6_walker_lock); 1371 read_unlock(&net->ipv6.fib6_walker_lock);
1372 1372
1373 node_free(fn); 1373 node_free(fn);
1374 if (pn->fn_flags & RTN_RTINFO || FIB6_SUBTREE(pn)) 1374 if (pn->fn_flags & RTN_RTINFO || FIB6_SUBTREE(pn))
@@ -1411,8 +1411,8 @@ static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp,
1411 } 1411 }
1412 1412
1413 /* Adjust walkers */ 1413 /* Adjust walkers */
1414 read_lock(&fib6_walker_lock); 1414 read_lock(&net->ipv6.fib6_walker_lock);
1415 FOR_WALKERS(w) { 1415 FOR_WALKERS(net, w) {
1416 if (w->state == FWS_C && w->leaf == rt) { 1416 if (w->state == FWS_C && w->leaf == rt) {
1417 RT6_TRACE("walker %p adjusted by delroute\n", w); 1417 RT6_TRACE("walker %p adjusted by delroute\n", w);
1418 w->leaf = rt->dst.rt6_next; 1418 w->leaf = rt->dst.rt6_next;
@@ -1420,7 +1420,7 @@ static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp,
1420 w->state = FWS_U; 1420 w->state = FWS_U;
1421 } 1421 }
1422 } 1422 }
1423 read_unlock(&fib6_walker_lock); 1423 read_unlock(&net->ipv6.fib6_walker_lock);
1424 1424
1425 rt->dst.rt6_next = NULL; 1425 rt->dst.rt6_next = NULL;
1426 1426
@@ -1588,17 +1588,17 @@ skip:
1588 } 1588 }
1589} 1589}
1590 1590
1591static int fib6_walk(struct fib6_walker *w) 1591static int fib6_walk(struct net *net, struct fib6_walker *w)
1592{ 1592{
1593 int res; 1593 int res;
1594 1594
1595 w->state = FWS_INIT; 1595 w->state = FWS_INIT;
1596 w->node = w->root; 1596 w->node = w->root;
1597 1597
1598 fib6_walker_link(w); 1598 fib6_walker_link(net, w);
1599 res = fib6_walk_continue(w); 1599 res = fib6_walk_continue(w);
1600 if (res <= 0) 1600 if (res <= 0)
1601 fib6_walker_unlink(w); 1601 fib6_walker_unlink(net, w);
1602 return res; 1602 return res;
1603} 1603}
1604 1604
@@ -1668,7 +1668,7 @@ static void fib6_clean_tree(struct net *net, struct fib6_node *root,
1668 c.arg = arg; 1668 c.arg = arg;
1669 c.net = net; 1669 c.net = net;
1670 1670
1671 fib6_walk(&c.w); 1671 fib6_walk(net, &c.w);
1672} 1672}
1673 1673
1674static void __fib6_clean_all(struct net *net, 1674static void __fib6_clean_all(struct net *net,
@@ -1725,14 +1725,15 @@ static void fib6_flush_trees(struct net *net)
1725 * Garbage collection 1725 * Garbage collection
1726 */ 1726 */
1727 1727
1728static struct fib6_gc_args 1728struct fib6_gc_args
1729{ 1729{
1730 int timeout; 1730 int timeout;
1731 int more; 1731 int more;
1732} gc_args; 1732};
1733 1733
1734static int fib6_age(struct rt6_info *rt, void *arg) 1734static int fib6_age(struct rt6_info *rt, void *arg)
1735{ 1735{
1736 struct fib6_gc_args *gc_args = arg;
1736 unsigned long now = jiffies; 1737 unsigned long now = jiffies;
1737 1738
1738 /* 1739 /*
@@ -1748,10 +1749,10 @@ static int fib6_age(struct rt6_info *rt, void *arg)
1748 RT6_TRACE("expiring %p\n", rt); 1749 RT6_TRACE("expiring %p\n", rt);
1749 return -1; 1750 return -1;
1750 } 1751 }
1751 gc_args.more++; 1752 gc_args->more++;
1752 } else if (rt->rt6i_flags & RTF_CACHE) { 1753 } else if (rt->rt6i_flags & RTF_CACHE) {
1753 if (atomic_read(&rt->dst.__refcnt) == 0 && 1754 if (atomic_read(&rt->dst.__refcnt) == 0 &&
1754 time_after_eq(now, rt->dst.lastuse + gc_args.timeout)) { 1755 time_after_eq(now, rt->dst.lastuse + gc_args->timeout)) {
1755 RT6_TRACE("aging clone %p\n", rt); 1756 RT6_TRACE("aging clone %p\n", rt);
1756 return -1; 1757 return -1;
1757 } else if (rt->rt6i_flags & RTF_GATEWAY) { 1758 } else if (rt->rt6i_flags & RTF_GATEWAY) {
@@ -1769,21 +1770,20 @@ static int fib6_age(struct rt6_info *rt, void *arg)
1769 return -1; 1770 return -1;
1770 } 1771 }
1771 } 1772 }
1772 gc_args.more++; 1773 gc_args->more++;
1773 } 1774 }
1774 1775
1775 return 0; 1776 return 0;
1776} 1777}
1777 1778
1778static DEFINE_SPINLOCK(fib6_gc_lock);
1779
1780void fib6_run_gc(unsigned long expires, struct net *net, bool force) 1779void fib6_run_gc(unsigned long expires, struct net *net, bool force)
1781{ 1780{
1781 struct fib6_gc_args gc_args;
1782 unsigned long now; 1782 unsigned long now;
1783 1783
1784 if (force) { 1784 if (force) {
1785 spin_lock_bh(&fib6_gc_lock); 1785 spin_lock_bh(&net->ipv6.fib6_gc_lock);
1786 } else if (!spin_trylock_bh(&fib6_gc_lock)) { 1786 } else if (!spin_trylock_bh(&net->ipv6.fib6_gc_lock)) {
1787 mod_timer(&net->ipv6.ip6_fib_timer, jiffies + HZ); 1787 mod_timer(&net->ipv6.ip6_fib_timer, jiffies + HZ);
1788 return; 1788 return;
1789 } 1789 }
@@ -1792,7 +1792,7 @@ void fib6_run_gc(unsigned long expires, struct net *net, bool force)
1792 1792
1793 gc_args.more = icmp6_dst_gc(); 1793 gc_args.more = icmp6_dst_gc();
1794 1794
1795 fib6_clean_all(net, fib6_age, NULL); 1795 fib6_clean_all(net, fib6_age, &gc_args);
1796 now = jiffies; 1796 now = jiffies;
1797 net->ipv6.ip6_rt_last_gc = now; 1797 net->ipv6.ip6_rt_last_gc = now;
1798 1798
@@ -1802,7 +1802,7 @@ void fib6_run_gc(unsigned long expires, struct net *net, bool force)
1802 + net->ipv6.sysctl.ip6_rt_gc_interval)); 1802 + net->ipv6.sysctl.ip6_rt_gc_interval));
1803 else 1803 else
1804 del_timer(&net->ipv6.ip6_fib_timer); 1804 del_timer(&net->ipv6.ip6_fib_timer);
1805 spin_unlock_bh(&fib6_gc_lock); 1805 spin_unlock_bh(&net->ipv6.fib6_gc_lock);
1806} 1806}
1807 1807
1808static void fib6_gc_timer_cb(unsigned long arg) 1808static void fib6_gc_timer_cb(unsigned long arg)
@@ -1814,6 +1814,9 @@ static int __net_init fib6_net_init(struct net *net)
1814{ 1814{
1815 size_t size = sizeof(struct hlist_head) * FIB6_TABLE_HASHSZ; 1815 size_t size = sizeof(struct hlist_head) * FIB6_TABLE_HASHSZ;
1816 1816
1817 spin_lock_init(&net->ipv6.fib6_gc_lock);
1818 rwlock_init(&net->ipv6.fib6_walker_lock);
1819 INIT_LIST_HEAD(&net->ipv6.fib6_walkers);
1817 setup_timer(&net->ipv6.ip6_fib_timer, fib6_gc_timer_cb, (unsigned long)net); 1820 setup_timer(&net->ipv6.ip6_fib_timer, fib6_gc_timer_cb, (unsigned long)net);
1818 1821
1819 net->ipv6.rt6_stats = kzalloc(sizeof(*net->ipv6.rt6_stats), GFP_KERNEL); 1822 net->ipv6.rt6_stats = kzalloc(sizeof(*net->ipv6.rt6_stats), GFP_KERNEL);
@@ -1974,7 +1977,8 @@ static int ipv6_route_yield(struct fib6_walker *w)
1974 return 0; 1977 return 0;
1975} 1978}
1976 1979
1977static void ipv6_route_seq_setup_walk(struct ipv6_route_iter *iter) 1980static void ipv6_route_seq_setup_walk(struct ipv6_route_iter *iter,
1981 struct net *net)
1978{ 1982{
1979 memset(&iter->w, 0, sizeof(iter->w)); 1983 memset(&iter->w, 0, sizeof(iter->w));
1980 iter->w.func = ipv6_route_yield; 1984 iter->w.func = ipv6_route_yield;
@@ -1984,7 +1988,7 @@ static void ipv6_route_seq_setup_walk(struct ipv6_route_iter *iter)
1984 iter->w.args = iter; 1988 iter->w.args = iter;
1985 iter->sernum = iter->w.root->fn_sernum; 1989 iter->sernum = iter->w.root->fn_sernum;
1986 INIT_LIST_HEAD(&iter->w.lh); 1990 INIT_LIST_HEAD(&iter->w.lh);
1987 fib6_walker_link(&iter->w); 1991 fib6_walker_link(net, &iter->w);
1988} 1992}
1989 1993
1990static struct fib6_table *ipv6_route_seq_next_table(struct fib6_table *tbl, 1994static struct fib6_table *ipv6_route_seq_next_table(struct fib6_table *tbl,
@@ -2045,16 +2049,16 @@ iter_table:
2045 ++*pos; 2049 ++*pos;
2046 return iter->w.leaf; 2050 return iter->w.leaf;
2047 } else if (r < 0) { 2051 } else if (r < 0) {
2048 fib6_walker_unlink(&iter->w); 2052 fib6_walker_unlink(net, &iter->w);
2049 return NULL; 2053 return NULL;
2050 } 2054 }
2051 fib6_walker_unlink(&iter->w); 2055 fib6_walker_unlink(net, &iter->w);
2052 2056
2053 iter->tbl = ipv6_route_seq_next_table(iter->tbl, net); 2057 iter->tbl = ipv6_route_seq_next_table(iter->tbl, net);
2054 if (!iter->tbl) 2058 if (!iter->tbl)
2055 return NULL; 2059 return NULL;
2056 2060
2057 ipv6_route_seq_setup_walk(iter); 2061 ipv6_route_seq_setup_walk(iter, net);
2058 goto iter_table; 2062 goto iter_table;
2059} 2063}
2060 2064
@@ -2069,7 +2073,7 @@ static void *ipv6_route_seq_start(struct seq_file *seq, loff_t *pos)
2069 iter->skip = *pos; 2073 iter->skip = *pos;
2070 2074
2071 if (iter->tbl) { 2075 if (iter->tbl) {
2072 ipv6_route_seq_setup_walk(iter); 2076 ipv6_route_seq_setup_walk(iter, net);
2073 return ipv6_route_seq_next(seq, NULL, pos); 2077 return ipv6_route_seq_next(seq, NULL, pos);
2074 } else { 2078 } else {
2075 return NULL; 2079 return NULL;
@@ -2085,10 +2089,11 @@ static bool ipv6_route_iter_active(struct ipv6_route_iter *iter)
2085static void ipv6_route_seq_stop(struct seq_file *seq, void *v) 2089static void ipv6_route_seq_stop(struct seq_file *seq, void *v)
2086 __releases(RCU_BH) 2090 __releases(RCU_BH)
2087{ 2091{
2092 struct net *net = seq_file_net(seq);
2088 struct ipv6_route_iter *iter = seq->private; 2093 struct ipv6_route_iter *iter = seq->private;
2089 2094
2090 if (ipv6_route_iter_active(iter)) 2095 if (ipv6_route_iter_active(iter))
2091 fib6_walker_unlink(&iter->w); 2096 fib6_walker_unlink(net, &iter->w);
2092 2097
2093 rcu_read_unlock_bh(); 2098 rcu_read_unlock_bh();
2094} 2099}
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index 1f9ebe3cbb4a..dc2db4f7b182 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -540,12 +540,13 @@ int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen)
540 } 540 }
541 spin_lock_bh(&ip6_sk_fl_lock); 541 spin_lock_bh(&ip6_sk_fl_lock);
542 for (sflp = &np->ipv6_fl_list; 542 for (sflp = &np->ipv6_fl_list;
543 (sfl = rcu_dereference(*sflp)) != NULL; 543 (sfl = rcu_dereference_protected(*sflp,
544 lockdep_is_held(&ip6_sk_fl_lock))) != NULL;
544 sflp = &sfl->next) { 545 sflp = &sfl->next) {
545 if (sfl->fl->label == freq.flr_label) { 546 if (sfl->fl->label == freq.flr_label) {
546 if (freq.flr_label == (np->flow_label&IPV6_FLOWLABEL_MASK)) 547 if (freq.flr_label == (np->flow_label&IPV6_FLOWLABEL_MASK))
547 np->flow_label &= ~IPV6_FLOWLABEL_MASK; 548 np->flow_label &= ~IPV6_FLOWLABEL_MASK;
548 *sflp = rcu_dereference(sfl->next); 549 *sflp = sfl->next;
549 spin_unlock_bh(&ip6_sk_fl_lock); 550 spin_unlock_bh(&ip6_sk_fl_lock);
550 fl_release(sfl->fl); 551 fl_release(sfl->fl);
551 kfree_rcu(sfl, rcu); 552 kfree_rcu(sfl, rcu);
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index f37f18b6b40c..4e636e60a360 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -360,7 +360,7 @@ static void ip6gre_tunnel_uninit(struct net_device *dev)
360 struct ip6gre_net *ign = net_generic(t->net, ip6gre_net_id); 360 struct ip6gre_net *ign = net_generic(t->net, ip6gre_net_id);
361 361
362 ip6gre_tunnel_unlink(ign, t); 362 ip6gre_tunnel_unlink(ign, t);
363 ip6_tnl_dst_reset(t); 363 dst_cache_reset(&t->dst_cache);
364 dev_put(dev); 364 dev_put(dev);
365} 365}
366 366
@@ -633,7 +633,7 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb,
633 } 633 }
634 634
635 if (!fl6->flowi6_mark) 635 if (!fl6->flowi6_mark)
636 dst = ip6_tnl_dst_get(tunnel); 636 dst = dst_cache_get(&tunnel->dst_cache);
637 637
638 if (!dst) { 638 if (!dst) {
639 dst = ip6_route_output(net, NULL, fl6); 639 dst = ip6_route_output(net, NULL, fl6);
@@ -702,7 +702,7 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb,
702 } 702 }
703 703
704 if (!fl6->flowi6_mark && ndst) 704 if (!fl6->flowi6_mark && ndst)
705 ip6_tnl_dst_set(tunnel, ndst); 705 dst_cache_set_ip6(&tunnel->dst_cache, ndst, &fl6->saddr);
706 skb_dst_set(skb, dst); 706 skb_dst_set(skb, dst);
707 707
708 proto = NEXTHDR_GRE; 708 proto = NEXTHDR_GRE;
@@ -777,6 +777,8 @@ static inline int ip6gre_xmit_ipv4(struct sk_buff *skb, struct net_device *dev)
777 __u32 mtu; 777 __u32 mtu;
778 int err; 778 int err;
779 779
780 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
781
780 if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) 782 if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
781 encap_limit = t->parms.encap_limit; 783 encap_limit = t->parms.encap_limit;
782 784
@@ -1009,7 +1011,7 @@ static int ip6gre_tnl_change(struct ip6_tnl *t,
1009 t->parms.o_key = p->o_key; 1011 t->parms.o_key = p->o_key;
1010 t->parms.i_flags = p->i_flags; 1012 t->parms.i_flags = p->i_flags;
1011 t->parms.o_flags = p->o_flags; 1013 t->parms.o_flags = p->o_flags;
1012 ip6_tnl_dst_reset(t); 1014 dst_cache_reset(&t->dst_cache);
1013 ip6gre_tnl_link_config(t, set_mtu); 1015 ip6gre_tnl_link_config(t, set_mtu);
1014 return 0; 1016 return 0;
1015} 1017}
@@ -1219,7 +1221,7 @@ static void ip6gre_dev_free(struct net_device *dev)
1219{ 1221{
1220 struct ip6_tnl *t = netdev_priv(dev); 1222 struct ip6_tnl *t = netdev_priv(dev);
1221 1223
1222 ip6_tnl_dst_destroy(t); 1224 dst_cache_destroy(&t->dst_cache);
1223 free_percpu(dev->tstats); 1225 free_percpu(dev->tstats);
1224 free_netdev(dev); 1226 free_netdev(dev);
1225} 1227}
@@ -1257,7 +1259,7 @@ static int ip6gre_tunnel_init_common(struct net_device *dev)
1257 if (!dev->tstats) 1259 if (!dev->tstats)
1258 return -ENOMEM; 1260 return -ENOMEM;
1259 1261
1260 ret = ip6_tnl_dst_init(tunnel); 1262 ret = dst_cache_init(&tunnel->dst_cache, GFP_KERNEL);
1261 if (ret) { 1263 if (ret) {
1262 free_percpu(dev->tstats); 1264 free_percpu(dev->tstats);
1263 dev->tstats = NULL; 1265 dev->tstats = NULL;
@@ -1512,6 +1514,7 @@ static void ip6gre_tap_setup(struct net_device *dev)
1512 dev->destructor = ip6gre_dev_free; 1514 dev->destructor = ip6gre_dev_free;
1513 1515
1514 dev->features |= NETIF_F_NETNS_LOCAL; 1516 dev->features |= NETIF_F_NETNS_LOCAL;
1517 dev->priv_flags &= ~IFF_TX_SKB_SHARING;
1515} 1518}
1516 1519
1517static int ip6gre_newlink(struct net *src_net, struct net_device *dev, 1520static int ip6gre_newlink(struct net *src_net, struct net_device *dev,
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index 9075acf081dd..c05c425c2389 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -49,7 +49,7 @@
49 49
50int ip6_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb) 50int ip6_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
51{ 51{
52 if (sysctl_ip_early_demux && !skb_dst(skb) && skb->sk == NULL) { 52 if (net->ipv4.sysctl_ip_early_demux && !skb_dst(skb) && skb->sk == NULL) {
53 const struct inet6_protocol *ipprot; 53 const struct inet6_protocol *ipprot;
54 54
55 ipprot = rcu_dereference(inet6_protos[ipv6_hdr(skb)->nexthdr]); 55 ipprot = rcu_dereference(inet6_protos[ipv6_hdr(skb)->nexthdr]);
@@ -134,6 +134,16 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
134 IPV6_ADDR_MC_SCOPE(&hdr->daddr) == 1) 134 IPV6_ADDR_MC_SCOPE(&hdr->daddr) == 1)
135 goto err; 135 goto err;
136 136
137 /* If enabled, drop unicast packets that were encapsulated in link-layer
138 * multicast or broadcast to protected against the so-called "hole-196"
139 * attack in 802.11 wireless.
140 */
141 if (!ipv6_addr_is_multicast(&hdr->daddr) &&
142 (skb->pkt_type == PACKET_BROADCAST ||
143 skb->pkt_type == PACKET_MULTICAST) &&
144 idev->cnf.drop_unicast_in_l2_multicast)
145 goto err;
146
137 /* RFC4291 2.7 147 /* RFC4291 2.7
138 * Nodes must not originate a packet to a multicast address whose scope 148 * Nodes must not originate a packet to a multicast address whose scope
139 * field contains the reserved value 0; if such a packet is received, it 149 * field contains the reserved value 0; if such a packet is received, it
diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
index eeca943f12dc..82e9f3076028 100644
--- a/net/ipv6/ip6_offload.c
+++ b/net/ipv6/ip6_offload.c
@@ -258,6 +258,19 @@ out:
258 return pp; 258 return pp;
259} 259}
260 260
261static struct sk_buff **sit_gro_receive(struct sk_buff **head,
262 struct sk_buff *skb)
263{
264 if (NAPI_GRO_CB(skb)->encap_mark) {
265 NAPI_GRO_CB(skb)->flush = 1;
266 return NULL;
267 }
268
269 NAPI_GRO_CB(skb)->encap_mark = 1;
270
271 return ipv6_gro_receive(head, skb);
272}
273
261static int ipv6_gro_complete(struct sk_buff *skb, int nhoff) 274static int ipv6_gro_complete(struct sk_buff *skb, int nhoff)
262{ 275{
263 const struct net_offload *ops; 276 const struct net_offload *ops;
@@ -302,7 +315,7 @@ static struct packet_offload ipv6_packet_offload __read_mostly = {
302static const struct net_offload sit_offload = { 315static const struct net_offload sit_offload = {
303 .callbacks = { 316 .callbacks = {
304 .gso_segment = ipv6_gso_segment, 317 .gso_segment = ipv6_gso_segment,
305 .gro_receive = ipv6_gro_receive, 318 .gro_receive = sit_gro_receive,
306 .gro_complete = sit_gro_complete, 319 .gro_complete = sit_gro_complete,
307 }, 320 },
308}; 321};
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 23de98f976d5..9428345d3a07 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -332,7 +332,6 @@ static int ip6_forward_proxy_check(struct sk_buff *skb)
332static inline int ip6_forward_finish(struct net *net, struct sock *sk, 332static inline int ip6_forward_finish(struct net *net, struct sock *sk,
333 struct sk_buff *skb) 333 struct sk_buff *skb)
334{ 334{
335 skb_sender_cpu_clear(skb);
336 return dst_output(net, sk, skb); 335 return dst_output(net, sk, skb);
337} 336}
338 337
@@ -909,6 +908,7 @@ static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
909 struct rt6_info *rt; 908 struct rt6_info *rt;
910#endif 909#endif
911 int err; 910 int err;
911 int flags = 0;
912 912
913 /* The correct way to handle this would be to do 913 /* The correct way to handle this would be to do
914 * ip6_route_get_saddr, and then ip6_route_output; however, 914 * ip6_route_get_saddr, and then ip6_route_output; however,
@@ -940,10 +940,13 @@ static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
940 dst_release(*dst); 940 dst_release(*dst);
941 *dst = NULL; 941 *dst = NULL;
942 } 942 }
943
944 if (fl6->flowi6_oif)
945 flags |= RT6_LOOKUP_F_IFACE;
943 } 946 }
944 947
945 if (!*dst) 948 if (!*dst)
946 *dst = ip6_route_output(net, sk, fl6); 949 *dst = ip6_route_output_flags(net, sk, fl6, flags);
947 950
948 err = (*dst)->error; 951 err = (*dst)->error;
949 if (err) 952 if (err)
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 137fca42aaa6..eb2ac4bb09ce 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -122,97 +122,6 @@ static struct net_device_stats *ip6_get_stats(struct net_device *dev)
122 return &dev->stats; 122 return &dev->stats;
123} 123}
124 124
125/*
126 * Locking : hash tables are protected by RCU and RTNL
127 */
128
129static void ip6_tnl_per_cpu_dst_set(struct ip6_tnl_dst *idst,
130 struct dst_entry *dst)
131{
132 write_seqlock_bh(&idst->lock);
133 dst_release(rcu_dereference_protected(
134 idst->dst,
135 lockdep_is_held(&idst->lock.lock)));
136 if (dst) {
137 dst_hold(dst);
138 idst->cookie = rt6_get_cookie((struct rt6_info *)dst);
139 } else {
140 idst->cookie = 0;
141 }
142 rcu_assign_pointer(idst->dst, dst);
143 write_sequnlock_bh(&idst->lock);
144}
145
146struct dst_entry *ip6_tnl_dst_get(struct ip6_tnl *t)
147{
148 struct ip6_tnl_dst *idst;
149 struct dst_entry *dst;
150 unsigned int seq;
151 u32 cookie;
152
153 idst = raw_cpu_ptr(t->dst_cache);
154
155 rcu_read_lock();
156 do {
157 seq = read_seqbegin(&idst->lock);
158 dst = rcu_dereference(idst->dst);
159 cookie = idst->cookie;
160 } while (read_seqretry(&idst->lock, seq));
161
162 if (dst && !atomic_inc_not_zero(&dst->__refcnt))
163 dst = NULL;
164 rcu_read_unlock();
165
166 if (dst && dst->obsolete && !dst->ops->check(dst, cookie)) {
167 ip6_tnl_per_cpu_dst_set(idst, NULL);
168 dst_release(dst);
169 dst = NULL;
170 }
171 return dst;
172}
173EXPORT_SYMBOL_GPL(ip6_tnl_dst_get);
174
175void ip6_tnl_dst_reset(struct ip6_tnl *t)
176{
177 int i;
178
179 for_each_possible_cpu(i)
180 ip6_tnl_per_cpu_dst_set(per_cpu_ptr(t->dst_cache, i), NULL);
181}
182EXPORT_SYMBOL_GPL(ip6_tnl_dst_reset);
183
184void ip6_tnl_dst_set(struct ip6_tnl *t, struct dst_entry *dst)
185{
186 ip6_tnl_per_cpu_dst_set(raw_cpu_ptr(t->dst_cache), dst);
187
188}
189EXPORT_SYMBOL_GPL(ip6_tnl_dst_set);
190
191void ip6_tnl_dst_destroy(struct ip6_tnl *t)
192{
193 if (!t->dst_cache)
194 return;
195
196 ip6_tnl_dst_reset(t);
197 free_percpu(t->dst_cache);
198}
199EXPORT_SYMBOL_GPL(ip6_tnl_dst_destroy);
200
201int ip6_tnl_dst_init(struct ip6_tnl *t)
202{
203 int i;
204
205 t->dst_cache = alloc_percpu(struct ip6_tnl_dst);
206 if (!t->dst_cache)
207 return -ENOMEM;
208
209 for_each_possible_cpu(i)
210 seqlock_init(&per_cpu_ptr(t->dst_cache, i)->lock);
211
212 return 0;
213}
214EXPORT_SYMBOL_GPL(ip6_tnl_dst_init);
215
216/** 125/**
217 * ip6_tnl_lookup - fetch tunnel matching the end-point addresses 126 * ip6_tnl_lookup - fetch tunnel matching the end-point addresses
218 * @remote: the address of the tunnel exit-point 127 * @remote: the address of the tunnel exit-point
@@ -329,7 +238,7 @@ static void ip6_dev_free(struct net_device *dev)
329{ 238{
330 struct ip6_tnl *t = netdev_priv(dev); 239 struct ip6_tnl *t = netdev_priv(dev);
331 240
332 ip6_tnl_dst_destroy(t); 241 dst_cache_destroy(&t->dst_cache);
333 free_percpu(dev->tstats); 242 free_percpu(dev->tstats);
334 free_netdev(dev); 243 free_netdev(dev);
335} 244}
@@ -462,7 +371,7 @@ ip6_tnl_dev_uninit(struct net_device *dev)
462 RCU_INIT_POINTER(ip6n->tnls_wc[0], NULL); 371 RCU_INIT_POINTER(ip6n->tnls_wc[0], NULL);
463 else 372 else
464 ip6_tnl_unlink(ip6n, t); 373 ip6_tnl_unlink(ip6n, t);
465 ip6_tnl_dst_reset(t); 374 dst_cache_reset(&t->dst_cache);
466 dev_put(dev); 375 dev_put(dev);
467} 376}
468 377
@@ -1069,7 +978,7 @@ static int ip6_tnl_xmit2(struct sk_buff *skb,
1069 memcpy(&fl6->daddr, addr6, sizeof(fl6->daddr)); 978 memcpy(&fl6->daddr, addr6, sizeof(fl6->daddr));
1070 neigh_release(neigh); 979 neigh_release(neigh);
1071 } else if (!fl6->flowi6_mark) 980 } else if (!fl6->flowi6_mark)
1072 dst = ip6_tnl_dst_get(t); 981 dst = dst_cache_get(&t->dst_cache);
1073 982
1074 if (!ip6_tnl_xmit_ctl(t, &fl6->saddr, &fl6->daddr)) 983 if (!ip6_tnl_xmit_ctl(t, &fl6->saddr, &fl6->daddr))
1075 goto tx_err_link_failure; 984 goto tx_err_link_failure;
@@ -1133,7 +1042,7 @@ static int ip6_tnl_xmit2(struct sk_buff *skb,
1133 } 1042 }
1134 1043
1135 if (!fl6->flowi6_mark && ndst) 1044 if (!fl6->flowi6_mark && ndst)
1136 ip6_tnl_dst_set(t, ndst); 1045 dst_cache_set_ip6(&t->dst_cache, ndst, &fl6->saddr);
1137 skb_dst_set(skb, dst); 1046 skb_dst_set(skb, dst);
1138 1047
1139 skb->transport_header = skb->network_header; 1048 skb->transport_header = skb->network_header;
@@ -1180,6 +1089,8 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
1180 u8 tproto; 1089 u8 tproto;
1181 int err; 1090 int err;
1182 1091
1092 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
1093
1183 tproto = ACCESS_ONCE(t->parms.proto); 1094 tproto = ACCESS_ONCE(t->parms.proto);
1184 if (tproto != IPPROTO_IPIP && tproto != 0) 1095 if (tproto != IPPROTO_IPIP && tproto != 0)
1185 return -1; 1096 return -1;
@@ -1366,7 +1277,7 @@ ip6_tnl_change(struct ip6_tnl *t, const struct __ip6_tnl_parm *p)
1366 t->parms.flowinfo = p->flowinfo; 1277 t->parms.flowinfo = p->flowinfo;
1367 t->parms.link = p->link; 1278 t->parms.link = p->link;
1368 t->parms.proto = p->proto; 1279 t->parms.proto = p->proto;
1369 ip6_tnl_dst_reset(t); 1280 dst_cache_reset(&t->dst_cache);
1370 ip6_tnl_link_config(t); 1281 ip6_tnl_link_config(t);
1371 return 0; 1282 return 0;
1372} 1283}
@@ -1637,7 +1548,7 @@ ip6_tnl_dev_init_gen(struct net_device *dev)
1637 if (!dev->tstats) 1548 if (!dev->tstats)
1638 return -ENOMEM; 1549 return -ENOMEM;
1639 1550
1640 ret = ip6_tnl_dst_init(t); 1551 ret = dst_cache_init(&t->dst_cache, GFP_KERNEL);
1641 if (ret) { 1552 if (ret) {
1642 free_percpu(dev->tstats); 1553 free_percpu(dev->tstats);
1643 dev->tstats = NULL; 1554 dev->tstats = NULL;
diff --git a/net/ipv6/ip6_udp_tunnel.c b/net/ipv6/ip6_udp_tunnel.c
index 14dacf1df529..a7520528ecd2 100644
--- a/net/ipv6/ip6_udp_tunnel.c
+++ b/net/ipv6/ip6_udp_tunnel.c
@@ -73,8 +73,8 @@ int udp_tunnel6_xmit_skb(struct dst_entry *dst, struct sock *sk,
73 struct sk_buff *skb, 73 struct sk_buff *skb,
74 struct net_device *dev, struct in6_addr *saddr, 74 struct net_device *dev, struct in6_addr *saddr,
75 struct in6_addr *daddr, 75 struct in6_addr *daddr,
76 __u8 prio, __u8 ttl, __be16 src_port, 76 __u8 prio, __u8 ttl, __be32 label,
77 __be16 dst_port, bool nocheck) 77 __be16 src_port, __be16 dst_port, bool nocheck)
78{ 78{
79 struct udphdr *uh; 79 struct udphdr *uh;
80 struct ipv6hdr *ip6h; 80 struct ipv6hdr *ip6h;
@@ -98,7 +98,7 @@ int udp_tunnel6_xmit_skb(struct dst_entry *dst, struct sock *sk,
98 __skb_push(skb, sizeof(*ip6h)); 98 __skb_push(skb, sizeof(*ip6h));
99 skb_reset_network_header(skb); 99 skb_reset_network_header(skb);
100 ip6h = ipv6_hdr(skb); 100 ip6h = ipv6_hdr(skb);
101 ip6_flow_hdr(ip6h, prio, htonl(0)); 101 ip6_flow_hdr(ip6h, prio, label);
102 ip6h->payload_len = htons(skb->len); 102 ip6h->payload_len = htons(skb->len);
103 ip6h->nexthdr = IPPROTO_UDP; 103 ip6h->nexthdr = IPPROTO_UDP;
104 ip6h->hop_limit = ttl; 104 ip6h->hop_limit = ttl;
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index 0a8610b33d79..d90a11f14040 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -640,7 +640,7 @@ vti6_tnl_change(struct ip6_tnl *t, const struct __ip6_tnl_parm *p)
640 t->parms.i_key = p->i_key; 640 t->parms.i_key = p->i_key;
641 t->parms.o_key = p->o_key; 641 t->parms.o_key = p->o_key;
642 t->parms.proto = p->proto; 642 t->parms.proto = p->proto;
643 ip6_tnl_dst_reset(t); 643 dst_cache_reset(&t->dst_cache);
644 vti6_link_config(t); 644 vti6_link_config(t);
645 return 0; 645 return 0;
646} 646}
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 5ee56d0a8699..d64ee7e83664 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -1574,9 +1574,8 @@ static struct sk_buff *mld_newpack(struct inet6_dev *idev, unsigned int mtu)
1574 return NULL; 1574 return NULL;
1575 1575
1576 skb->priority = TC_PRIO_CONTROL; 1576 skb->priority = TC_PRIO_CONTROL;
1577 skb->reserved_tailroom = skb_end_offset(skb) -
1578 min(mtu, skb_end_offset(skb));
1579 skb_reserve(skb, hlen); 1577 skb_reserve(skb, hlen);
1578 skb_tailroom_reserve(skb, mtu, tlen);
1580 1579
1581 if (__ipv6_get_lladdr(idev, &addr_buf, IFA_F_TENTATIVE)) { 1580 if (__ipv6_get_lladdr(idev, &addr_buf, IFA_F_TENTATIVE)) {
1582 /* <draft-ietf-magma-mld-source-05.txt>: 1581 /* <draft-ietf-magma-mld-source-05.txt>:
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 84afb9a77278..c245895a3d41 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -883,6 +883,7 @@ static void ndisc_recv_na(struct sk_buff *skb)
883 offsetof(struct nd_msg, opt)); 883 offsetof(struct nd_msg, opt));
884 struct ndisc_options ndopts; 884 struct ndisc_options ndopts;
885 struct net_device *dev = skb->dev; 885 struct net_device *dev = skb->dev;
886 struct inet6_dev *idev = __in6_dev_get(dev);
886 struct inet6_ifaddr *ifp; 887 struct inet6_ifaddr *ifp;
887 struct neighbour *neigh; 888 struct neighbour *neigh;
888 889
@@ -902,6 +903,14 @@ static void ndisc_recv_na(struct sk_buff *skb)
902 return; 903 return;
903 } 904 }
904 905
906 /* For some 802.11 wireless deployments (and possibly other networks),
907 * there will be a NA proxy and unsolicitd packets are attacks
908 * and thus should not be accepted.
909 */
910 if (!msg->icmph.icmp6_solicited && idev &&
911 idev->cnf.drop_unsolicited_na)
912 return;
913
905 if (!ndisc_parse_options(msg->opt, ndoptlen, &ndopts)) { 914 if (!ndisc_parse_options(msg->opt, ndoptlen, &ndopts)) {
906 ND_PRINTK(2, warn, "NS: invalid ND option\n"); 915 ND_PRINTK(2, warn, "NS: invalid ND option\n");
907 return; 916 return;
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 99425cf2819b..84f9baf7aee8 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -2071,9 +2071,28 @@ do_ip6t_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2071 return ret; 2071 return ret;
2072} 2072}
2073 2073
2074struct xt_table *ip6t_register_table(struct net *net, 2074static void __ip6t_unregister_table(struct net *net, struct xt_table *table)
2075 const struct xt_table *table, 2075{
2076 const struct ip6t_replace *repl) 2076 struct xt_table_info *private;
2077 void *loc_cpu_entry;
2078 struct module *table_owner = table->me;
2079 struct ip6t_entry *iter;
2080
2081 private = xt_unregister_table(table);
2082
2083 /* Decrease module usage counts and free resources */
2084 loc_cpu_entry = private->entries;
2085 xt_entry_foreach(iter, loc_cpu_entry, private->size)
2086 cleanup_entry(iter, net);
2087 if (private->number > private->initial_entries)
2088 module_put(table_owner);
2089 xt_free_table_info(private);
2090}
2091
2092int ip6t_register_table(struct net *net, const struct xt_table *table,
2093 const struct ip6t_replace *repl,
2094 const struct nf_hook_ops *ops,
2095 struct xt_table **res)
2077{ 2096{
2078 int ret; 2097 int ret;
2079 struct xt_table_info *newinfo; 2098 struct xt_table_info *newinfo;
@@ -2082,10 +2101,8 @@ struct xt_table *ip6t_register_table(struct net *net,
2082 struct xt_table *new_table; 2101 struct xt_table *new_table;
2083 2102
2084 newinfo = xt_alloc_table_info(repl->size); 2103 newinfo = xt_alloc_table_info(repl->size);
2085 if (!newinfo) { 2104 if (!newinfo)
2086 ret = -ENOMEM; 2105 return -ENOMEM;
2087 goto out;
2088 }
2089 2106
2090 loc_cpu_entry = newinfo->entries; 2107 loc_cpu_entry = newinfo->entries;
2091 memcpy(loc_cpu_entry, repl->entries, repl->size); 2108 memcpy(loc_cpu_entry, repl->entries, repl->size);
@@ -2099,30 +2116,28 @@ struct xt_table *ip6t_register_table(struct net *net,
2099 ret = PTR_ERR(new_table); 2116 ret = PTR_ERR(new_table);
2100 goto out_free; 2117 goto out_free;
2101 } 2118 }
2102 return new_table; 2119
2120 /* set res now, will see skbs right after nf_register_net_hooks */
2121 WRITE_ONCE(*res, new_table);
2122
2123 ret = nf_register_net_hooks(net, ops, hweight32(table->valid_hooks));
2124 if (ret != 0) {
2125 __ip6t_unregister_table(net, new_table);
2126 *res = NULL;
2127 }
2128
2129 return ret;
2103 2130
2104out_free: 2131out_free:
2105 xt_free_table_info(newinfo); 2132 xt_free_table_info(newinfo);
2106out: 2133 return ret;
2107 return ERR_PTR(ret);
2108} 2134}
2109 2135
2110void ip6t_unregister_table(struct net *net, struct xt_table *table) 2136void ip6t_unregister_table(struct net *net, struct xt_table *table,
2137 const struct nf_hook_ops *ops)
2111{ 2138{
2112 struct xt_table_info *private; 2139 nf_unregister_net_hooks(net, ops, hweight32(table->valid_hooks));
2113 void *loc_cpu_entry; 2140 __ip6t_unregister_table(net, table);
2114 struct module *table_owner = table->me;
2115 struct ip6t_entry *iter;
2116
2117 private = xt_unregister_table(table);
2118
2119 /* Decrease module usage counts and free resources */
2120 loc_cpu_entry = private->entries;
2121 xt_entry_foreach(iter, loc_cpu_entry, private->size)
2122 cleanup_entry(iter, net);
2123 if (private->number > private->initial_entries)
2124 module_put(table_owner);
2125 xt_free_table_info(private);
2126} 2141}
2127 2142
2128/* Returns 1 if the type and code is matched by the range, 0 otherwise */ 2143/* Returns 1 if the type and code is matched by the range, 0 otherwise */
diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c
index 8b277b983ca5..1343077dde93 100644
--- a/net/ipv6/netfilter/ip6table_filter.c
+++ b/net/ipv6/netfilter/ip6table_filter.c
@@ -22,12 +22,15 @@ MODULE_DESCRIPTION("ip6tables filter table");
22 (1 << NF_INET_FORWARD) | \ 22 (1 << NF_INET_FORWARD) | \
23 (1 << NF_INET_LOCAL_OUT)) 23 (1 << NF_INET_LOCAL_OUT))
24 24
25static int __net_init ip6table_filter_table_init(struct net *net);
26
25static const struct xt_table packet_filter = { 27static const struct xt_table packet_filter = {
26 .name = "filter", 28 .name = "filter",
27 .valid_hooks = FILTER_VALID_HOOKS, 29 .valid_hooks = FILTER_VALID_HOOKS,
28 .me = THIS_MODULE, 30 .me = THIS_MODULE,
29 .af = NFPROTO_IPV6, 31 .af = NFPROTO_IPV6,
30 .priority = NF_IP6_PRI_FILTER, 32 .priority = NF_IP6_PRI_FILTER,
33 .table_init = ip6table_filter_table_init,
31}; 34};
32 35
33/* The work comes in here from netfilter.c. */ 36/* The work comes in here from netfilter.c. */
@@ -44,9 +47,13 @@ static struct nf_hook_ops *filter_ops __read_mostly;
44static bool forward = true; 47static bool forward = true;
45module_param(forward, bool, 0000); 48module_param(forward, bool, 0000);
46 49
47static int __net_init ip6table_filter_net_init(struct net *net) 50static int __net_init ip6table_filter_table_init(struct net *net)
48{ 51{
49 struct ip6t_replace *repl; 52 struct ip6t_replace *repl;
53 int err;
54
55 if (net->ipv6.ip6table_filter)
56 return 0;
50 57
51 repl = ip6t_alloc_initial_table(&packet_filter); 58 repl = ip6t_alloc_initial_table(&packet_filter);
52 if (repl == NULL) 59 if (repl == NULL)
@@ -55,15 +62,26 @@ static int __net_init ip6table_filter_net_init(struct net *net)
55 ((struct ip6t_standard *)repl->entries)[1].target.verdict = 62 ((struct ip6t_standard *)repl->entries)[1].target.verdict =
56 forward ? -NF_ACCEPT - 1 : -NF_DROP - 1; 63 forward ? -NF_ACCEPT - 1 : -NF_DROP - 1;
57 64
58 net->ipv6.ip6table_filter = 65 err = ip6t_register_table(net, &packet_filter, repl, filter_ops,
59 ip6t_register_table(net, &packet_filter, repl); 66 &net->ipv6.ip6table_filter);
60 kfree(repl); 67 kfree(repl);
61 return PTR_ERR_OR_ZERO(net->ipv6.ip6table_filter); 68 return err;
69}
70
71static int __net_init ip6table_filter_net_init(struct net *net)
72{
73 if (net == &init_net || !forward)
74 return ip6table_filter_table_init(net);
75
76 return 0;
62} 77}
63 78
64static void __net_exit ip6table_filter_net_exit(struct net *net) 79static void __net_exit ip6table_filter_net_exit(struct net *net)
65{ 80{
66 ip6t_unregister_table(net, net->ipv6.ip6table_filter); 81 if (!net->ipv6.ip6table_filter)
82 return;
83 ip6t_unregister_table(net, net->ipv6.ip6table_filter, filter_ops);
84 net->ipv6.ip6table_filter = NULL;
67} 85}
68 86
69static struct pernet_operations ip6table_filter_net_ops = { 87static struct pernet_operations ip6table_filter_net_ops = {
@@ -75,28 +93,21 @@ static int __init ip6table_filter_init(void)
75{ 93{
76 int ret; 94 int ret;
77 95
96 filter_ops = xt_hook_ops_alloc(&packet_filter, ip6table_filter_hook);
97 if (IS_ERR(filter_ops))
98 return PTR_ERR(filter_ops);
99
78 ret = register_pernet_subsys(&ip6table_filter_net_ops); 100 ret = register_pernet_subsys(&ip6table_filter_net_ops);
79 if (ret < 0) 101 if (ret < 0)
80 return ret; 102 kfree(filter_ops);
81
82 /* Register hooks */
83 filter_ops = xt_hook_link(&packet_filter, ip6table_filter_hook);
84 if (IS_ERR(filter_ops)) {
85 ret = PTR_ERR(filter_ops);
86 goto cleanup_table;
87 }
88 103
89 return ret; 104 return ret;
90
91 cleanup_table:
92 unregister_pernet_subsys(&ip6table_filter_net_ops);
93 return ret;
94} 105}
95 106
96static void __exit ip6table_filter_fini(void) 107static void __exit ip6table_filter_fini(void)
97{ 108{
98 xt_hook_unlink(&packet_filter, filter_ops);
99 unregister_pernet_subsys(&ip6table_filter_net_ops); 109 unregister_pernet_subsys(&ip6table_filter_net_ops);
110 kfree(filter_ops);
100} 111}
101 112
102module_init(ip6table_filter_init); 113module_init(ip6table_filter_init);
diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c
index abe278b07932..cb2b28883252 100644
--- a/net/ipv6/netfilter/ip6table_mangle.c
+++ b/net/ipv6/netfilter/ip6table_mangle.c
@@ -23,12 +23,15 @@ MODULE_DESCRIPTION("ip6tables mangle table");
23 (1 << NF_INET_LOCAL_OUT) | \ 23 (1 << NF_INET_LOCAL_OUT) | \
24 (1 << NF_INET_POST_ROUTING)) 24 (1 << NF_INET_POST_ROUTING))
25 25
26static int __net_init ip6table_mangle_table_init(struct net *net);
27
26static const struct xt_table packet_mangler = { 28static const struct xt_table packet_mangler = {
27 .name = "mangle", 29 .name = "mangle",
28 .valid_hooks = MANGLE_VALID_HOOKS, 30 .valid_hooks = MANGLE_VALID_HOOKS,
29 .me = THIS_MODULE, 31 .me = THIS_MODULE,
30 .af = NFPROTO_IPV6, 32 .af = NFPROTO_IPV6,
31 .priority = NF_IP6_PRI_MANGLE, 33 .priority = NF_IP6_PRI_MANGLE,
34 .table_init = ip6table_mangle_table_init,
32}; 35};
33 36
34static unsigned int 37static unsigned int
@@ -88,26 +91,33 @@ ip6table_mangle_hook(void *priv, struct sk_buff *skb,
88} 91}
89 92
90static struct nf_hook_ops *mangle_ops __read_mostly; 93static struct nf_hook_ops *mangle_ops __read_mostly;
91static int __net_init ip6table_mangle_net_init(struct net *net) 94static int __net_init ip6table_mangle_table_init(struct net *net)
92{ 95{
93 struct ip6t_replace *repl; 96 struct ip6t_replace *repl;
97 int ret;
98
99 if (net->ipv6.ip6table_mangle)
100 return 0;
94 101
95 repl = ip6t_alloc_initial_table(&packet_mangler); 102 repl = ip6t_alloc_initial_table(&packet_mangler);
96 if (repl == NULL) 103 if (repl == NULL)
97 return -ENOMEM; 104 return -ENOMEM;
98 net->ipv6.ip6table_mangle = 105 ret = ip6t_register_table(net, &packet_mangler, repl, mangle_ops,
99 ip6t_register_table(net, &packet_mangler, repl); 106 &net->ipv6.ip6table_mangle);
100 kfree(repl); 107 kfree(repl);
101 return PTR_ERR_OR_ZERO(net->ipv6.ip6table_mangle); 108 return ret;
102} 109}
103 110
104static void __net_exit ip6table_mangle_net_exit(struct net *net) 111static void __net_exit ip6table_mangle_net_exit(struct net *net)
105{ 112{
106 ip6t_unregister_table(net, net->ipv6.ip6table_mangle); 113 if (!net->ipv6.ip6table_mangle)
114 return;
115
116 ip6t_unregister_table(net, net->ipv6.ip6table_mangle, mangle_ops);
117 net->ipv6.ip6table_mangle = NULL;
107} 118}
108 119
109static struct pernet_operations ip6table_mangle_net_ops = { 120static struct pernet_operations ip6table_mangle_net_ops = {
110 .init = ip6table_mangle_net_init,
111 .exit = ip6table_mangle_net_exit, 121 .exit = ip6table_mangle_net_exit,
112}; 122};
113 123
@@ -115,28 +125,28 @@ static int __init ip6table_mangle_init(void)
115{ 125{
116 int ret; 126 int ret;
117 127
128 mangle_ops = xt_hook_ops_alloc(&packet_mangler, ip6table_mangle_hook);
129 if (IS_ERR(mangle_ops))
130 return PTR_ERR(mangle_ops);
131
118 ret = register_pernet_subsys(&ip6table_mangle_net_ops); 132 ret = register_pernet_subsys(&ip6table_mangle_net_ops);
119 if (ret < 0) 133 if (ret < 0) {
134 kfree(mangle_ops);
120 return ret; 135 return ret;
121
122 /* Register hooks */
123 mangle_ops = xt_hook_link(&packet_mangler, ip6table_mangle_hook);
124 if (IS_ERR(mangle_ops)) {
125 ret = PTR_ERR(mangle_ops);
126 goto cleanup_table;
127 } 136 }
128 137
129 return ret; 138 ret = ip6table_mangle_table_init(&init_net);
130 139 if (ret) {
131 cleanup_table: 140 unregister_pernet_subsys(&ip6table_mangle_net_ops);
132 unregister_pernet_subsys(&ip6table_mangle_net_ops); 141 kfree(mangle_ops);
142 }
133 return ret; 143 return ret;
134} 144}
135 145
136static void __exit ip6table_mangle_fini(void) 146static void __exit ip6table_mangle_fini(void)
137{ 147{
138 xt_hook_unlink(&packet_mangler, mangle_ops);
139 unregister_pernet_subsys(&ip6table_mangle_net_ops); 148 unregister_pernet_subsys(&ip6table_mangle_net_ops);
149 kfree(mangle_ops);
140} 150}
141 151
142module_init(ip6table_mangle_init); 152module_init(ip6table_mangle_init);
diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c
index de2a10a565f5..7d2bd940291f 100644
--- a/net/ipv6/netfilter/ip6table_nat.c
+++ b/net/ipv6/netfilter/ip6table_nat.c
@@ -20,6 +20,8 @@
20#include <net/netfilter/nf_nat_core.h> 20#include <net/netfilter/nf_nat_core.h>
21#include <net/netfilter/nf_nat_l3proto.h> 21#include <net/netfilter/nf_nat_l3proto.h>
22 22
23static int __net_init ip6table_nat_table_init(struct net *net);
24
23static const struct xt_table nf_nat_ipv6_table = { 25static const struct xt_table nf_nat_ipv6_table = {
24 .name = "nat", 26 .name = "nat",
25 .valid_hooks = (1 << NF_INET_PRE_ROUTING) | 27 .valid_hooks = (1 << NF_INET_PRE_ROUTING) |
@@ -28,6 +30,7 @@ static const struct xt_table nf_nat_ipv6_table = {
28 (1 << NF_INET_LOCAL_IN), 30 (1 << NF_INET_LOCAL_IN),
29 .me = THIS_MODULE, 31 .me = THIS_MODULE,
30 .af = NFPROTO_IPV6, 32 .af = NFPROTO_IPV6,
33 .table_init = ip6table_nat_table_init,
31}; 34};
32 35
33static unsigned int ip6table_nat_do_chain(void *priv, 36static unsigned int ip6table_nat_do_chain(void *priv,
@@ -97,50 +100,50 @@ static struct nf_hook_ops nf_nat_ipv6_ops[] __read_mostly = {
97 }, 100 },
98}; 101};
99 102
100static int __net_init ip6table_nat_net_init(struct net *net) 103static int __net_init ip6table_nat_table_init(struct net *net)
101{ 104{
102 struct ip6t_replace *repl; 105 struct ip6t_replace *repl;
106 int ret;
107
108 if (net->ipv6.ip6table_nat)
109 return 0;
103 110
104 repl = ip6t_alloc_initial_table(&nf_nat_ipv6_table); 111 repl = ip6t_alloc_initial_table(&nf_nat_ipv6_table);
105 if (repl == NULL) 112 if (repl == NULL)
106 return -ENOMEM; 113 return -ENOMEM;
107 net->ipv6.ip6table_nat = ip6t_register_table(net, &nf_nat_ipv6_table, repl); 114 ret = ip6t_register_table(net, &nf_nat_ipv6_table, repl,
115 nf_nat_ipv6_ops, &net->ipv6.ip6table_nat);
108 kfree(repl); 116 kfree(repl);
109 return PTR_ERR_OR_ZERO(net->ipv6.ip6table_nat); 117 return ret;
110} 118}
111 119
112static void __net_exit ip6table_nat_net_exit(struct net *net) 120static void __net_exit ip6table_nat_net_exit(struct net *net)
113{ 121{
114 ip6t_unregister_table(net, net->ipv6.ip6table_nat); 122 if (!net->ipv6.ip6table_nat)
123 return;
124 ip6t_unregister_table(net, net->ipv6.ip6table_nat, nf_nat_ipv6_ops);
125 net->ipv6.ip6table_nat = NULL;
115} 126}
116 127
117static struct pernet_operations ip6table_nat_net_ops = { 128static struct pernet_operations ip6table_nat_net_ops = {
118 .init = ip6table_nat_net_init,
119 .exit = ip6table_nat_net_exit, 129 .exit = ip6table_nat_net_exit,
120}; 130};
121 131
122static int __init ip6table_nat_init(void) 132static int __init ip6table_nat_init(void)
123{ 133{
124 int err; 134 int ret = register_pernet_subsys(&ip6table_nat_net_ops);
125 135
126 err = register_pernet_subsys(&ip6table_nat_net_ops); 136 if (ret)
127 if (err < 0) 137 return ret;
128 goto err1;
129 138
130 err = nf_register_hooks(nf_nat_ipv6_ops, ARRAY_SIZE(nf_nat_ipv6_ops)); 139 ret = ip6table_nat_table_init(&init_net);
131 if (err < 0) 140 if (ret)
132 goto err2; 141 unregister_pernet_subsys(&ip6table_nat_net_ops);
133 return 0; 142 return ret;
134
135err2:
136 unregister_pernet_subsys(&ip6table_nat_net_ops);
137err1:
138 return err;
139} 143}
140 144
141static void __exit ip6table_nat_exit(void) 145static void __exit ip6table_nat_exit(void)
142{ 146{
143 nf_unregister_hooks(nf_nat_ipv6_ops, ARRAY_SIZE(nf_nat_ipv6_ops));
144 unregister_pernet_subsys(&ip6table_nat_net_ops); 147 unregister_pernet_subsys(&ip6table_nat_net_ops);
145} 148}
146 149
diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c
index 9021963565c3..d4bc56443dc1 100644
--- a/net/ipv6/netfilter/ip6table_raw.c
+++ b/net/ipv6/netfilter/ip6table_raw.c
@@ -9,12 +9,15 @@
9 9
10#define RAW_VALID_HOOKS ((1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_OUT)) 10#define RAW_VALID_HOOKS ((1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_OUT))
11 11
12static int __net_init ip6table_raw_table_init(struct net *net);
13
12static const struct xt_table packet_raw = { 14static const struct xt_table packet_raw = {
13 .name = "raw", 15 .name = "raw",
14 .valid_hooks = RAW_VALID_HOOKS, 16 .valid_hooks = RAW_VALID_HOOKS,
15 .me = THIS_MODULE, 17 .me = THIS_MODULE,
16 .af = NFPROTO_IPV6, 18 .af = NFPROTO_IPV6,
17 .priority = NF_IP6_PRI_RAW, 19 .priority = NF_IP6_PRI_RAW,
20 .table_init = ip6table_raw_table_init,
18}; 21};
19 22
20/* The work comes in here from netfilter.c. */ 23/* The work comes in here from netfilter.c. */
@@ -27,26 +30,32 @@ ip6table_raw_hook(void *priv, struct sk_buff *skb,
27 30
28static struct nf_hook_ops *rawtable_ops __read_mostly; 31static struct nf_hook_ops *rawtable_ops __read_mostly;
29 32
30static int __net_init ip6table_raw_net_init(struct net *net) 33static int __net_init ip6table_raw_table_init(struct net *net)
31{ 34{
32 struct ip6t_replace *repl; 35 struct ip6t_replace *repl;
36 int ret;
37
38 if (net->ipv6.ip6table_raw)
39 return 0;
33 40
34 repl = ip6t_alloc_initial_table(&packet_raw); 41 repl = ip6t_alloc_initial_table(&packet_raw);
35 if (repl == NULL) 42 if (repl == NULL)
36 return -ENOMEM; 43 return -ENOMEM;
37 net->ipv6.ip6table_raw = 44 ret = ip6t_register_table(net, &packet_raw, repl, rawtable_ops,
38 ip6t_register_table(net, &packet_raw, repl); 45 &net->ipv6.ip6table_raw);
39 kfree(repl); 46 kfree(repl);
40 return PTR_ERR_OR_ZERO(net->ipv6.ip6table_raw); 47 return ret;
41} 48}
42 49
43static void __net_exit ip6table_raw_net_exit(struct net *net) 50static void __net_exit ip6table_raw_net_exit(struct net *net)
44{ 51{
45 ip6t_unregister_table(net, net->ipv6.ip6table_raw); 52 if (!net->ipv6.ip6table_raw)
53 return;
54 ip6t_unregister_table(net, net->ipv6.ip6table_raw, rawtable_ops);
55 net->ipv6.ip6table_raw = NULL;
46} 56}
47 57
48static struct pernet_operations ip6table_raw_net_ops = { 58static struct pernet_operations ip6table_raw_net_ops = {
49 .init = ip6table_raw_net_init,
50 .exit = ip6table_raw_net_exit, 59 .exit = ip6table_raw_net_exit,
51}; 60};
52 61
@@ -54,28 +63,29 @@ static int __init ip6table_raw_init(void)
54{ 63{
55 int ret; 64 int ret;
56 65
66 /* Register hooks */
67 rawtable_ops = xt_hook_ops_alloc(&packet_raw, ip6table_raw_hook);
68 if (IS_ERR(rawtable_ops))
69 return PTR_ERR(rawtable_ops);
70
57 ret = register_pernet_subsys(&ip6table_raw_net_ops); 71 ret = register_pernet_subsys(&ip6table_raw_net_ops);
58 if (ret < 0) 72 if (ret < 0) {
73 kfree(rawtable_ops);
59 return ret; 74 return ret;
60
61 /* Register hooks */
62 rawtable_ops = xt_hook_link(&packet_raw, ip6table_raw_hook);
63 if (IS_ERR(rawtable_ops)) {
64 ret = PTR_ERR(rawtable_ops);
65 goto cleanup_table;
66 } 75 }
67 76
68 return ret; 77 ret = ip6table_raw_table_init(&init_net);
69 78 if (ret) {
70 cleanup_table: 79 unregister_pernet_subsys(&ip6table_raw_net_ops);
71 unregister_pernet_subsys(&ip6table_raw_net_ops); 80 kfree(rawtable_ops);
81 }
72 return ret; 82 return ret;
73} 83}
74 84
75static void __exit ip6table_raw_fini(void) 85static void __exit ip6table_raw_fini(void)
76{ 86{
77 xt_hook_unlink(&packet_raw, rawtable_ops);
78 unregister_pernet_subsys(&ip6table_raw_net_ops); 87 unregister_pernet_subsys(&ip6table_raw_net_ops);
88 kfree(rawtable_ops);
79} 89}
80 90
81module_init(ip6table_raw_init); 91module_init(ip6table_raw_init);
diff --git a/net/ipv6/netfilter/ip6table_security.c b/net/ipv6/netfilter/ip6table_security.c
index 0d856fedfeb0..cf26ccb04056 100644
--- a/net/ipv6/netfilter/ip6table_security.c
+++ b/net/ipv6/netfilter/ip6table_security.c
@@ -27,12 +27,15 @@ MODULE_DESCRIPTION("ip6tables security table, for MAC rules");
27 (1 << NF_INET_FORWARD) | \ 27 (1 << NF_INET_FORWARD) | \
28 (1 << NF_INET_LOCAL_OUT) 28 (1 << NF_INET_LOCAL_OUT)
29 29
30static int __net_init ip6table_security_table_init(struct net *net);
31
30static const struct xt_table security_table = { 32static const struct xt_table security_table = {
31 .name = "security", 33 .name = "security",
32 .valid_hooks = SECURITY_VALID_HOOKS, 34 .valid_hooks = SECURITY_VALID_HOOKS,
33 .me = THIS_MODULE, 35 .me = THIS_MODULE,
34 .af = NFPROTO_IPV6, 36 .af = NFPROTO_IPV6,
35 .priority = NF_IP6_PRI_SECURITY, 37 .priority = NF_IP6_PRI_SECURITY,
38 .table_init = ip6table_security_table_init,
36}; 39};
37 40
38static unsigned int 41static unsigned int
@@ -44,26 +47,32 @@ ip6table_security_hook(void *priv, struct sk_buff *skb,
44 47
45static struct nf_hook_ops *sectbl_ops __read_mostly; 48static struct nf_hook_ops *sectbl_ops __read_mostly;
46 49
47static int __net_init ip6table_security_net_init(struct net *net) 50static int __net_init ip6table_security_table_init(struct net *net)
48{ 51{
49 struct ip6t_replace *repl; 52 struct ip6t_replace *repl;
53 int ret;
54
55 if (net->ipv6.ip6table_security)
56 return 0;
50 57
51 repl = ip6t_alloc_initial_table(&security_table); 58 repl = ip6t_alloc_initial_table(&security_table);
52 if (repl == NULL) 59 if (repl == NULL)
53 return -ENOMEM; 60 return -ENOMEM;
54 net->ipv6.ip6table_security = 61 ret = ip6t_register_table(net, &security_table, repl, sectbl_ops,
55 ip6t_register_table(net, &security_table, repl); 62 &net->ipv6.ip6table_security);
56 kfree(repl); 63 kfree(repl);
57 return PTR_ERR_OR_ZERO(net->ipv6.ip6table_security); 64 return ret;
58} 65}
59 66
60static void __net_exit ip6table_security_net_exit(struct net *net) 67static void __net_exit ip6table_security_net_exit(struct net *net)
61{ 68{
62 ip6t_unregister_table(net, net->ipv6.ip6table_security); 69 if (!net->ipv6.ip6table_security)
70 return;
71 ip6t_unregister_table(net, net->ipv6.ip6table_security, sectbl_ops);
72 net->ipv6.ip6table_security = NULL;
63} 73}
64 74
65static struct pernet_operations ip6table_security_net_ops = { 75static struct pernet_operations ip6table_security_net_ops = {
66 .init = ip6table_security_net_init,
67 .exit = ip6table_security_net_exit, 76 .exit = ip6table_security_net_exit,
68}; 77};
69 78
@@ -71,27 +80,28 @@ static int __init ip6table_security_init(void)
71{ 80{
72 int ret; 81 int ret;
73 82
83 sectbl_ops = xt_hook_ops_alloc(&security_table, ip6table_security_hook);
84 if (IS_ERR(sectbl_ops))
85 return PTR_ERR(sectbl_ops);
86
74 ret = register_pernet_subsys(&ip6table_security_net_ops); 87 ret = register_pernet_subsys(&ip6table_security_net_ops);
75 if (ret < 0) 88 if (ret < 0) {
89 kfree(sectbl_ops);
76 return ret; 90 return ret;
77
78 sectbl_ops = xt_hook_link(&security_table, ip6table_security_hook);
79 if (IS_ERR(sectbl_ops)) {
80 ret = PTR_ERR(sectbl_ops);
81 goto cleanup_table;
82 } 91 }
83 92
84 return ret; 93 ret = ip6table_security_table_init(&init_net);
85 94 if (ret) {
86cleanup_table: 95 unregister_pernet_subsys(&ip6table_security_net_ops);
87 unregister_pernet_subsys(&ip6table_security_net_ops); 96 kfree(sectbl_ops);
97 }
88 return ret; 98 return ret;
89} 99}
90 100
91static void __exit ip6table_security_fini(void) 101static void __exit ip6table_security_fini(void)
92{ 102{
93 xt_hook_unlink(&security_table, sectbl_ops);
94 unregister_pernet_subsys(&ip6table_security_net_ops); 103 unregister_pernet_subsys(&ip6table_security_net_ops);
104 kfree(sectbl_ops);
95} 105}
96 106
97module_init(ip6table_security_init); 107module_init(ip6table_security_init);
diff --git a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
index 6ce309928841..e0be97e636a4 100644
--- a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
@@ -131,29 +131,15 @@ static void nf_nat_ipv6_csum_recalc(struct sk_buff *skb,
131 u8 proto, void *data, __sum16 *check, 131 u8 proto, void *data, __sum16 *check,
132 int datalen, int oldlen) 132 int datalen, int oldlen)
133{ 133{
134 const struct ipv6hdr *ipv6h = ipv6_hdr(skb);
135 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
136
137 if (skb->ip_summed != CHECKSUM_PARTIAL) { 134 if (skb->ip_summed != CHECKSUM_PARTIAL) {
138 if (!(rt->rt6i_flags & RTF_LOCAL) && 135 const struct ipv6hdr *ipv6h = ipv6_hdr(skb);
139 (!skb->dev || skb->dev->features & 136
140 (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM))) { 137 skb->ip_summed = CHECKSUM_PARTIAL;
141 skb->ip_summed = CHECKSUM_PARTIAL; 138 skb->csum_start = skb_headroom(skb) + skb_network_offset(skb) +
142 skb->csum_start = skb_headroom(skb) + 139 (data - (void *)skb->data);
143 skb_network_offset(skb) + 140 skb->csum_offset = (void *)check - data;
144 (data - (void *)skb->data); 141 *check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr,
145 skb->csum_offset = (void *)check - data; 142 datalen, proto, 0);
146 *check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr,
147 datalen, proto, 0);
148 } else {
149 *check = 0;
150 *check = csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr,
151 datalen, proto,
152 csum_partial(data, datalen,
153 0));
154 if (proto == IPPROTO_UDP && !*check)
155 *check = CSUM_MANGLED_0;
156 }
157 } else 143 } else
158 inet_proto_csum_replace2(check, skb, 144 inet_proto_csum_replace2(check, skb,
159 htons(oldlen), htons(datalen), true); 145 htons(oldlen), htons(datalen), true);
diff --git a/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c b/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c
index 31ba7ca19757..051b6a6bfff6 100644
--- a/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c
+++ b/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c
@@ -21,6 +21,10 @@
21#include <net/ipv6.h> 21#include <net/ipv6.h>
22#include <net/netfilter/ipv6/nf_nat_masquerade.h> 22#include <net/netfilter/ipv6/nf_nat_masquerade.h>
23 23
24#define MAX_WORK_COUNT 16
25
26static atomic_t v6_worker_count;
27
24unsigned int 28unsigned int
25nf_nat_masquerade_ipv6(struct sk_buff *skb, const struct nf_nat_range *range, 29nf_nat_masquerade_ipv6(struct sk_buff *skb, const struct nf_nat_range *range,
26 const struct net_device *out) 30 const struct net_device *out)
@@ -78,14 +82,78 @@ static struct notifier_block masq_dev_notifier = {
78 .notifier_call = masq_device_event, 82 .notifier_call = masq_device_event,
79}; 83};
80 84
85struct masq_dev_work {
86 struct work_struct work;
87 struct net *net;
88 int ifindex;
89};
90
91static void iterate_cleanup_work(struct work_struct *work)
92{
93 struct masq_dev_work *w;
94 long index;
95
96 w = container_of(work, struct masq_dev_work, work);
97
98 index = w->ifindex;
99 nf_ct_iterate_cleanup(w->net, device_cmp, (void *)index, 0, 0);
100
101 put_net(w->net);
102 kfree(w);
103 atomic_dec(&v6_worker_count);
104 module_put(THIS_MODULE);
105}
106
107/* ipv6 inet notifier is an atomic notifier, i.e. we cannot
108 * schedule.
109 *
110 * Unfortunately, nf_ct_iterate_cleanup can run for a long
111 * time if there are lots of conntracks and the system
112 * handles high softirq load, so it frequently calls cond_resched
113 * while iterating the conntrack table.
114 *
115 * So we defer nf_ct_iterate_cleanup walk to the system workqueue.
116 *
117 * As we can have 'a lot' of inet_events (depending on amount
118 * of ipv6 addresses being deleted), we also need to add an upper
119 * limit to the number of queued work items.
120 */
81static int masq_inet_event(struct notifier_block *this, 121static int masq_inet_event(struct notifier_block *this,
82 unsigned long event, void *ptr) 122 unsigned long event, void *ptr)
83{ 123{
84 struct inet6_ifaddr *ifa = ptr; 124 struct inet6_ifaddr *ifa = ptr;
85 struct netdev_notifier_info info; 125 const struct net_device *dev;
126 struct masq_dev_work *w;
127 struct net *net;
128
129 if (event != NETDEV_DOWN ||
130 atomic_read(&v6_worker_count) >= MAX_WORK_COUNT)
131 return NOTIFY_DONE;
132
133 dev = ifa->idev->dev;
134 net = maybe_get_net(dev_net(dev));
135 if (!net)
136 return NOTIFY_DONE;
86 137
87 netdev_notifier_info_init(&info, ifa->idev->dev); 138 if (!try_module_get(THIS_MODULE))
88 return masq_device_event(this, event, &info); 139 goto err_module;
140
141 w = kmalloc(sizeof(*w), GFP_ATOMIC);
142 if (w) {
143 atomic_inc(&v6_worker_count);
144
145 INIT_WORK(&w->work, iterate_cleanup_work);
146 w->ifindex = dev->ifindex;
147 w->net = net;
148 schedule_work(&w->work);
149
150 return NOTIFY_DONE;
151 }
152
153 module_put(THIS_MODULE);
154 err_module:
155 put_net(net);
156 return NOTIFY_DONE;
89} 157}
90 158
91static struct notifier_block masq_inet_notifier = { 159static struct notifier_block masq_inet_notifier = {
diff --git a/net/ipv6/netfilter/nft_masq_ipv6.c b/net/ipv6/netfilter/nft_masq_ipv6.c
index cd1ac1637a05..9597ffb74077 100644
--- a/net/ipv6/netfilter/nft_masq_ipv6.c
+++ b/net/ipv6/netfilter/nft_masq_ipv6.c
@@ -26,7 +26,12 @@ static void nft_masq_ipv6_eval(const struct nft_expr *expr,
26 26
27 memset(&range, 0, sizeof(range)); 27 memset(&range, 0, sizeof(range));
28 range.flags = priv->flags; 28 range.flags = priv->flags;
29 29 if (priv->sreg_proto_min) {
30 range.min_proto.all =
31 *(__be16 *)&regs->data[priv->sreg_proto_min];
32 range.max_proto.all =
33 *(__be16 *)&regs->data[priv->sreg_proto_max];
34 }
30 regs->verdict.code = nf_nat_masquerade_ipv6(pkt->skb, &range, pkt->out); 35 regs->verdict.code = nf_nat_masquerade_ipv6(pkt->skb, &range, pkt->out);
31} 36}
32 37
diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c
index 263a5164a6f5..c382db7a2e73 100644
--- a/net/ipv6/ping.c
+++ b/net/ipv6/ping.c
@@ -26,35 +26,6 @@
26#include <net/transp_v6.h> 26#include <net/transp_v6.h>
27#include <net/ping.h> 27#include <net/ping.h>
28 28
29struct proto pingv6_prot = {
30 .name = "PINGv6",
31 .owner = THIS_MODULE,
32 .init = ping_init_sock,
33 .close = ping_close,
34 .connect = ip6_datagram_connect_v6_only,
35 .disconnect = udp_disconnect,
36 .setsockopt = ipv6_setsockopt,
37 .getsockopt = ipv6_getsockopt,
38 .sendmsg = ping_v6_sendmsg,
39 .recvmsg = ping_recvmsg,
40 .bind = ping_bind,
41 .backlog_rcv = ping_queue_rcv_skb,
42 .hash = ping_hash,
43 .unhash = ping_unhash,
44 .get_port = ping_get_port,
45 .obj_size = sizeof(struct raw6_sock),
46};
47EXPORT_SYMBOL_GPL(pingv6_prot);
48
49static struct inet_protosw pingv6_protosw = {
50 .type = SOCK_DGRAM,
51 .protocol = IPPROTO_ICMPV6,
52 .prot = &pingv6_prot,
53 .ops = &inet6_dgram_ops,
54 .flags = INET_PROTOSW_REUSE,
55};
56
57
58/* Compatibility glue so we can support IPv6 when it's compiled as a module */ 29/* Compatibility glue so we can support IPv6 when it's compiled as a module */
59static int dummy_ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, 30static int dummy_ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len,
60 int *addr_len) 31 int *addr_len)
@@ -77,7 +48,7 @@ static int dummy_ipv6_chk_addr(struct net *net, const struct in6_addr *addr,
77 return 0; 48 return 0;
78} 49}
79 50
80int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) 51static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
81{ 52{
82 struct inet_sock *inet = inet_sk(sk); 53 struct inet_sock *inet = inet_sk(sk);
83 struct ipv6_pinfo *np = inet6_sk(sk); 54 struct ipv6_pinfo *np = inet6_sk(sk);
@@ -192,6 +163,34 @@ int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
192 return len; 163 return len;
193} 164}
194 165
166struct proto pingv6_prot = {
167 .name = "PINGv6",
168 .owner = THIS_MODULE,
169 .init = ping_init_sock,
170 .close = ping_close,
171 .connect = ip6_datagram_connect_v6_only,
172 .disconnect = udp_disconnect,
173 .setsockopt = ipv6_setsockopt,
174 .getsockopt = ipv6_getsockopt,
175 .sendmsg = ping_v6_sendmsg,
176 .recvmsg = ping_recvmsg,
177 .bind = ping_bind,
178 .backlog_rcv = ping_queue_rcv_skb,
179 .hash = ping_hash,
180 .unhash = ping_unhash,
181 .get_port = ping_get_port,
182 .obj_size = sizeof(struct raw6_sock),
183};
184EXPORT_SYMBOL_GPL(pingv6_prot);
185
186static struct inet_protosw pingv6_protosw = {
187 .type = SOCK_DGRAM,
188 .protocol = IPPROTO_ICMPV6,
189 .prot = &pingv6_prot,
190 .ops = &inet6_dgram_ops,
191 .flags = INET_PROTOSW_REUSE,
192};
193
195#ifdef CONFIG_PROC_FS 194#ifdef CONFIG_PROC_FS
196static void *ping_v6_seq_start(struct seq_file *seq, loff_t *pos) 195static void *ping_v6_seq_start(struct seq_file *seq, loff_t *pos)
197{ 196{
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 18f3498a6c80..e2ea31175ef9 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -496,10 +496,8 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
496 IP6CB(head)->flags |= IP6SKB_FRAGMENTED; 496 IP6CB(head)->flags |= IP6SKB_FRAGMENTED;
497 497
498 /* Yes, and fold redundant checksum back. 8) */ 498 /* Yes, and fold redundant checksum back. 8) */
499 if (head->ip_summed == CHECKSUM_COMPLETE) 499 skb_postpush_rcsum(head, skb_network_header(head),
500 head->csum = csum_partial(skb_network_header(head), 500 skb_network_header_len(head));
501 skb_network_header_len(head),
502 head->csum);
503 501
504 rcu_read_lock(); 502 rcu_read_lock();
505 IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMOKS); 503 IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMOKS);
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 3c8834bc822d..ed446639219c 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1183,11 +1183,10 @@ static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table
1183 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags); 1183 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
1184} 1184}
1185 1185
1186struct dst_entry *ip6_route_output(struct net *net, const struct sock *sk, 1186struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
1187 struct flowi6 *fl6) 1187 struct flowi6 *fl6, int flags)
1188{ 1188{
1189 struct dst_entry *dst; 1189 struct dst_entry *dst;
1190 int flags = 0;
1191 bool any_src; 1190 bool any_src;
1192 1191
1193 dst = l3mdev_rt6_dst_by_oif(net, fl6); 1192 dst = l3mdev_rt6_dst_by_oif(net, fl6);
@@ -1208,7 +1207,7 @@ struct dst_entry *ip6_route_output(struct net *net, const struct sock *sk,
1208 1207
1209 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output); 1208 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1210} 1209}
1211EXPORT_SYMBOL(ip6_route_output); 1210EXPORT_SYMBOL_GPL(ip6_route_output_flags);
1212 1211
1213struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig) 1212struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
1214{ 1213{
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index e794ef66a401..83384308d032 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -201,14 +201,14 @@ static int ipip6_tunnel_create(struct net_device *dev)
201 if ((__force u16)t->parms.i_flags & SIT_ISATAP) 201 if ((__force u16)t->parms.i_flags & SIT_ISATAP)
202 dev->priv_flags |= IFF_ISATAP; 202 dev->priv_flags |= IFF_ISATAP;
203 203
204 dev->rtnl_link_ops = &sit_link_ops;
205
204 err = register_netdevice(dev); 206 err = register_netdevice(dev);
205 if (err < 0) 207 if (err < 0)
206 goto out; 208 goto out;
207 209
208 ipip6_tunnel_clone_6rd(dev, sitn); 210 ipip6_tunnel_clone_6rd(dev, sitn);
209 211
210 dev->rtnl_link_ops = &sit_link_ops;
211
212 dev_hold(dev); 212 dev_hold(dev);
213 213
214 ipip6_tunnel_link(sitn, t); 214 ipip6_tunnel_link(sitn, t);
@@ -475,7 +475,7 @@ static void ipip6_tunnel_uninit(struct net_device *dev)
475 ipip6_tunnel_unlink(sitn, tunnel); 475 ipip6_tunnel_unlink(sitn, tunnel);
476 ipip6_tunnel_del_prl(tunnel, NULL); 476 ipip6_tunnel_del_prl(tunnel, NULL);
477 } 477 }
478 ip_tunnel_dst_reset_all(tunnel); 478 dst_cache_reset(&tunnel->dst_cache);
479 dev_put(dev); 479 dev_put(dev);
480} 480}
481 481
@@ -681,14 +681,16 @@ static int ipip6_rcv(struct sk_buff *skb)
681 skb->mac_header = skb->network_header; 681 skb->mac_header = skb->network_header;
682 skb_reset_network_header(skb); 682 skb_reset_network_header(skb);
683 IPCB(skb)->flags = 0; 683 IPCB(skb)->flags = 0;
684 skb->protocol = htons(ETH_P_IPV6); 684 skb->dev = tunnel->dev;
685 685
686 if (packet_is_spoofed(skb, iph, tunnel)) { 686 if (packet_is_spoofed(skb, iph, tunnel)) {
687 tunnel->dev->stats.rx_errors++; 687 tunnel->dev->stats.rx_errors++;
688 goto out; 688 goto out;
689 } 689 }
690 690
691 __skb_tunnel_rx(skb, tunnel->dev, tunnel->net); 691 if (iptunnel_pull_header(skb, 0, htons(ETH_P_IPV6),
692 !net_eq(tunnel->net, dev_net(tunnel->dev))))
693 goto out;
692 694
693 err = IP_ECN_decapsulate(iph, skb); 695 err = IP_ECN_decapsulate(iph, skb);
694 if (unlikely(err)) { 696 if (unlikely(err)) {
@@ -740,7 +742,7 @@ static int ipip_rcv(struct sk_buff *skb)
740 742
741 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) 743 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
742 goto drop; 744 goto drop;
743 if (iptunnel_pull_header(skb, 0, tpi.proto)) 745 if (iptunnel_pull_header(skb, 0, tpi.proto, false))
744 goto drop; 746 goto drop;
745 return ip_tunnel_rcv(tunnel, skb, &tpi, NULL, log_ecn_error); 747 return ip_tunnel_rcv(tunnel, skb, &tpi, NULL, log_ecn_error);
746 } 748 }
@@ -911,7 +913,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
911 goto tx_error; 913 goto tx_error;
912 } 914 }
913 915
914 skb = iptunnel_handle_offloads(skb, false, SKB_GSO_SIT); 916 skb = iptunnel_handle_offloads(skb, SKB_GSO_SIT);
915 if (IS_ERR(skb)) { 917 if (IS_ERR(skb)) {
916 ip_rt_put(rt); 918 ip_rt_put(rt);
917 goto out; 919 goto out;
@@ -1000,7 +1002,7 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
1000 struct ip_tunnel *tunnel = netdev_priv(dev); 1002 struct ip_tunnel *tunnel = netdev_priv(dev);
1001 const struct iphdr *tiph = &tunnel->parms.iph; 1003 const struct iphdr *tiph = &tunnel->parms.iph;
1002 1004
1003 skb = iptunnel_handle_offloads(skb, false, SKB_GSO_IPIP); 1005 skb = iptunnel_handle_offloads(skb, SKB_GSO_IPIP);
1004 if (IS_ERR(skb)) 1006 if (IS_ERR(skb))
1005 goto out; 1007 goto out;
1006 1008
@@ -1093,7 +1095,7 @@ static void ipip6_tunnel_update(struct ip_tunnel *t, struct ip_tunnel_parm *p)
1093 t->parms.link = p->link; 1095 t->parms.link = p->link;
1094 ipip6_tunnel_bind_dev(t->dev); 1096 ipip6_tunnel_bind_dev(t->dev);
1095 } 1097 }
1096 ip_tunnel_dst_reset_all(t); 1098 dst_cache_reset(&t->dst_cache);
1097 netdev_state_change(t->dev); 1099 netdev_state_change(t->dev);
1098} 1100}
1099 1101
@@ -1124,7 +1126,7 @@ static int ipip6_tunnel_update_6rd(struct ip_tunnel *t,
1124 t->ip6rd.relay_prefix = relay_prefix; 1126 t->ip6rd.relay_prefix = relay_prefix;
1125 t->ip6rd.prefixlen = ip6rd->prefixlen; 1127 t->ip6rd.prefixlen = ip6rd->prefixlen;
1126 t->ip6rd.relay_prefixlen = ip6rd->relay_prefixlen; 1128 t->ip6rd.relay_prefixlen = ip6rd->relay_prefixlen;
1127 ip_tunnel_dst_reset_all(t); 1129 dst_cache_reset(&t->dst_cache);
1128 netdev_state_change(t->dev); 1130 netdev_state_change(t->dev);
1129 return 0; 1131 return 0;
1130} 1132}
@@ -1278,7 +1280,7 @@ ipip6_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
1278 err = ipip6_tunnel_add_prl(t, &prl, cmd == SIOCCHGPRL); 1280 err = ipip6_tunnel_add_prl(t, &prl, cmd == SIOCCHGPRL);
1279 break; 1281 break;
1280 } 1282 }
1281 ip_tunnel_dst_reset_all(t); 1283 dst_cache_reset(&t->dst_cache);
1282 netdev_state_change(dev); 1284 netdev_state_change(dev);
1283 break; 1285 break;
1284 1286
@@ -1339,7 +1341,7 @@ static void ipip6_dev_free(struct net_device *dev)
1339{ 1341{
1340 struct ip_tunnel *tunnel = netdev_priv(dev); 1342 struct ip_tunnel *tunnel = netdev_priv(dev);
1341 1343
1342 free_percpu(tunnel->dst_cache); 1344 dst_cache_destroy(&tunnel->dst_cache);
1343 free_percpu(dev->tstats); 1345 free_percpu(dev->tstats);
1344 free_netdev(dev); 1346 free_netdev(dev);
1345} 1347}
@@ -1372,6 +1374,7 @@ static void ipip6_tunnel_setup(struct net_device *dev)
1372static int ipip6_tunnel_init(struct net_device *dev) 1374static int ipip6_tunnel_init(struct net_device *dev)
1373{ 1375{
1374 struct ip_tunnel *tunnel = netdev_priv(dev); 1376 struct ip_tunnel *tunnel = netdev_priv(dev);
1377 int err;
1375 1378
1376 tunnel->dev = dev; 1379 tunnel->dev = dev;
1377 tunnel->net = dev_net(dev); 1380 tunnel->net = dev_net(dev);
@@ -1382,10 +1385,10 @@ static int ipip6_tunnel_init(struct net_device *dev)
1382 if (!dev->tstats) 1385 if (!dev->tstats)
1383 return -ENOMEM; 1386 return -ENOMEM;
1384 1387
1385 tunnel->dst_cache = alloc_percpu(struct ip_tunnel_dst); 1388 err = dst_cache_init(&tunnel->dst_cache, GFP_KERNEL);
1386 if (!tunnel->dst_cache) { 1389 if (err) {
1387 free_percpu(dev->tstats); 1390 free_percpu(dev->tstats);
1388 return -ENOMEM; 1391 return err;
1389 } 1392 }
1390 1393
1391 return 0; 1394 return 0;
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 2906ef20795e..aab91fa86c5e 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -41,8 +41,7 @@ static __u16 const msstab[] = {
41 9000 - 60, 41 9000 - 60,
42}; 42};
43 43
44static DEFINE_PER_CPU(__u32 [16 + 5 + SHA_WORKSPACE_WORDS], 44static DEFINE_PER_CPU(__u32 [16 + 5 + SHA_WORKSPACE_WORDS], ipv6_cookie_scratch);
45 ipv6_cookie_scratch);
46 45
47static u32 cookie_hash(const struct in6_addr *saddr, const struct in6_addr *daddr, 46static u32 cookie_hash(const struct in6_addr *saddr, const struct in6_addr *daddr,
48 __be16 sport, __be16 dport, u32 count, int c) 47 __be16 sport, __be16 dport, u32 count, int c)
@@ -148,7 +147,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
148 struct dst_entry *dst; 147 struct dst_entry *dst;
149 __u8 rcv_wscale; 148 __u8 rcv_wscale;
150 149
151 if (!sysctl_tcp_syncookies || !th->ack || th->rst) 150 if (!sock_net(sk)->ipv4.sysctl_tcp_syncookies || !th->ack || th->rst)
152 goto out; 151 goto out;
153 152
154 if (tcp_synq_no_recent_overflow(sk)) 153 if (tcp_synq_no_recent_overflow(sk))
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 006396e31cb0..711d209f9124 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -66,7 +66,7 @@
66#include <linux/proc_fs.h> 66#include <linux/proc_fs.h>
67#include <linux/seq_file.h> 67#include <linux/seq_file.h>
68 68
69#include <linux/crypto.h> 69#include <crypto/hash.h>
70#include <linux/scatterlist.h> 70#include <linux/scatterlist.h>
71 71
72static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb); 72static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb);
@@ -327,6 +327,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
327 struct tcp_sock *tp; 327 struct tcp_sock *tp;
328 __u32 seq, snd_una; 328 __u32 seq, snd_una;
329 struct sock *sk; 329 struct sock *sk;
330 bool fatal;
330 int err; 331 int err;
331 332
332 sk = __inet6_lookup_established(net, &tcp_hashinfo, 333 sk = __inet6_lookup_established(net, &tcp_hashinfo,
@@ -345,8 +346,9 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
345 return; 346 return;
346 } 347 }
347 seq = ntohl(th->seq); 348 seq = ntohl(th->seq);
349 fatal = icmpv6_err_convert(type, code, &err);
348 if (sk->sk_state == TCP_NEW_SYN_RECV) 350 if (sk->sk_state == TCP_NEW_SYN_RECV)
349 return tcp_req_err(sk, seq); 351 return tcp_req_err(sk, seq, fatal);
350 352
351 bh_lock_sock(sk); 353 bh_lock_sock(sk);
352 if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG) 354 if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
@@ -400,7 +402,6 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
400 goto out; 402 goto out;
401 } 403 }
402 404
403 icmpv6_err_convert(type, code, &err);
404 405
405 /* Might be for an request_sock */ 406 /* Might be for an request_sock */
406 switch (sk->sk_state) { 407 switch (sk->sk_state) {
@@ -540,7 +541,8 @@ static int tcp_v6_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp,
540 bp->len = cpu_to_be32(nbytes); 541 bp->len = cpu_to_be32(nbytes);
541 542
542 sg_init_one(&sg, bp, sizeof(*bp)); 543 sg_init_one(&sg, bp, sizeof(*bp));
543 return crypto_hash_update(&hp->md5_desc, &sg, sizeof(*bp)); 544 ahash_request_set_crypt(hp->md5_req, &sg, NULL, sizeof(*bp));
545 return crypto_ahash_update(hp->md5_req);
544} 546}
545 547
546static int tcp_v6_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key, 548static int tcp_v6_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key,
@@ -548,14 +550,14 @@ static int tcp_v6_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key,
548 const struct tcphdr *th) 550 const struct tcphdr *th)
549{ 551{
550 struct tcp_md5sig_pool *hp; 552 struct tcp_md5sig_pool *hp;
551 struct hash_desc *desc; 553 struct ahash_request *req;
552 554
553 hp = tcp_get_md5sig_pool(); 555 hp = tcp_get_md5sig_pool();
554 if (!hp) 556 if (!hp)
555 goto clear_hash_noput; 557 goto clear_hash_noput;
556 desc = &hp->md5_desc; 558 req = hp->md5_req;
557 559
558 if (crypto_hash_init(desc)) 560 if (crypto_ahash_init(req))
559 goto clear_hash; 561 goto clear_hash;
560 if (tcp_v6_md5_hash_pseudoheader(hp, daddr, saddr, th->doff << 2)) 562 if (tcp_v6_md5_hash_pseudoheader(hp, daddr, saddr, th->doff << 2))
561 goto clear_hash; 563 goto clear_hash;
@@ -563,7 +565,8 @@ static int tcp_v6_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key,
563 goto clear_hash; 565 goto clear_hash;
564 if (tcp_md5_hash_key(hp, key)) 566 if (tcp_md5_hash_key(hp, key))
565 goto clear_hash; 567 goto clear_hash;
566 if (crypto_hash_final(desc, md5_hash)) 568 ahash_request_set_crypt(req, NULL, md5_hash, 0);
569 if (crypto_ahash_final(req))
567 goto clear_hash; 570 goto clear_hash;
568 571
569 tcp_put_md5sig_pool(); 572 tcp_put_md5sig_pool();
@@ -583,7 +586,7 @@ static int tcp_v6_md5_hash_skb(char *md5_hash,
583{ 586{
584 const struct in6_addr *saddr, *daddr; 587 const struct in6_addr *saddr, *daddr;
585 struct tcp_md5sig_pool *hp; 588 struct tcp_md5sig_pool *hp;
586 struct hash_desc *desc; 589 struct ahash_request *req;
587 const struct tcphdr *th = tcp_hdr(skb); 590 const struct tcphdr *th = tcp_hdr(skb);
588 591
589 if (sk) { /* valid for establish/request sockets */ 592 if (sk) { /* valid for establish/request sockets */
@@ -598,9 +601,9 @@ static int tcp_v6_md5_hash_skb(char *md5_hash,
598 hp = tcp_get_md5sig_pool(); 601 hp = tcp_get_md5sig_pool();
599 if (!hp) 602 if (!hp)
600 goto clear_hash_noput; 603 goto clear_hash_noput;
601 desc = &hp->md5_desc; 604 req = hp->md5_req;
602 605
603 if (crypto_hash_init(desc)) 606 if (crypto_ahash_init(req))
604 goto clear_hash; 607 goto clear_hash;
605 608
606 if (tcp_v6_md5_hash_pseudoheader(hp, daddr, saddr, skb->len)) 609 if (tcp_v6_md5_hash_pseudoheader(hp, daddr, saddr, skb->len))
@@ -611,7 +614,8 @@ static int tcp_v6_md5_hash_skb(char *md5_hash,
611 goto clear_hash; 614 goto clear_hash;
612 if (tcp_md5_hash_key(hp, key)) 615 if (tcp_md5_hash_key(hp, key))
613 goto clear_hash; 616 goto clear_hash;
614 if (crypto_hash_final(desc, md5_hash)) 617 ahash_request_set_crypt(req, NULL, md5_hash, 0);
618 if (crypto_ahash_final(req))
615 goto clear_hash; 619 goto clear_hash;
616 620
617 tcp_put_md5sig_pool(); 621 tcp_put_md5sig_pool();
@@ -866,7 +870,8 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
866 * no RST generated if md5 hash doesn't match. 870 * no RST generated if md5 hash doesn't match.
867 */ 871 */
868 sk1 = inet6_lookup_listener(dev_net(skb_dst(skb)->dev), 872 sk1 = inet6_lookup_listener(dev_net(skb_dst(skb)->dev),
869 &tcp_hashinfo, &ipv6h->saddr, 873 &tcp_hashinfo, NULL, 0,
874 &ipv6h->saddr,
870 th->source, &ipv6h->daddr, 875 th->source, &ipv6h->daddr,
871 ntohs(th->source), tcp_v6_iif(skb)); 876 ntohs(th->source), tcp_v6_iif(skb));
872 if (!sk1) 877 if (!sk1)
@@ -1375,8 +1380,8 @@ static int tcp_v6_rcv(struct sk_buff *skb)
1375 hdr = ipv6_hdr(skb); 1380 hdr = ipv6_hdr(skb);
1376 1381
1377lookup: 1382lookup:
1378 sk = __inet6_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest, 1383 sk = __inet6_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th),
1379 inet6_iif(skb)); 1384 th->source, th->dest, inet6_iif(skb));
1380 if (!sk) 1385 if (!sk)
1381 goto no_tcp_socket; 1386 goto no_tcp_socket;
1382 1387
@@ -1386,7 +1391,7 @@ process:
1386 1391
1387 if (sk->sk_state == TCP_NEW_SYN_RECV) { 1392 if (sk->sk_state == TCP_NEW_SYN_RECV) {
1388 struct request_sock *req = inet_reqsk(sk); 1393 struct request_sock *req = inet_reqsk(sk);
1389 struct sock *nsk = NULL; 1394 struct sock *nsk;
1390 1395
1391 sk = req->rsk_listener; 1396 sk = req->rsk_listener;
1392 tcp_v6_fill_cb(skb, hdr, th); 1397 tcp_v6_fill_cb(skb, hdr, th);
@@ -1394,24 +1399,24 @@ process:
1394 reqsk_put(req); 1399 reqsk_put(req);
1395 goto discard_it; 1400 goto discard_it;
1396 } 1401 }
1397 if (likely(sk->sk_state == TCP_LISTEN)) { 1402 if (unlikely(sk->sk_state != TCP_LISTEN)) {
1398 nsk = tcp_check_req(sk, skb, req, false);
1399 } else {
1400 inet_csk_reqsk_queue_drop_and_put(sk, req); 1403 inet_csk_reqsk_queue_drop_and_put(sk, req);
1401 goto lookup; 1404 goto lookup;
1402 } 1405 }
1406 sock_hold(sk);
1407 nsk = tcp_check_req(sk, skb, req, false);
1403 if (!nsk) { 1408 if (!nsk) {
1404 reqsk_put(req); 1409 reqsk_put(req);
1405 goto discard_it; 1410 goto discard_and_relse;
1406 } 1411 }
1407 if (nsk == sk) { 1412 if (nsk == sk) {
1408 sock_hold(sk);
1409 reqsk_put(req); 1413 reqsk_put(req);
1410 tcp_v6_restore_cb(skb); 1414 tcp_v6_restore_cb(skb);
1411 } else if (tcp_child_process(sk, nsk, skb)) { 1415 } else if (tcp_child_process(sk, nsk, skb)) {
1412 tcp_v6_send_reset(nsk, skb); 1416 tcp_v6_send_reset(nsk, skb);
1413 goto discard_it; 1417 goto discard_and_relse;
1414 } else { 1418 } else {
1419 sock_put(sk);
1415 return 0; 1420 return 0;
1416 } 1421 }
1417 } 1422 }
@@ -1441,7 +1446,7 @@ process:
1441 sk_incoming_cpu_update(sk); 1446 sk_incoming_cpu_update(sk);
1442 1447
1443 bh_lock_sock_nested(sk); 1448 bh_lock_sock_nested(sk);
1444 tcp_sk(sk)->segs_in += max_t(u16, 1, skb_shinfo(skb)->gso_segs); 1449 tcp_segs_in(tcp_sk(sk), skb);
1445 ret = 0; 1450 ret = 0;
1446 if (!sock_owned_by_user(sk)) { 1451 if (!sock_owned_by_user(sk)) {
1447 if (!tcp_prequeue(sk, skb)) 1452 if (!tcp_prequeue(sk, skb))
@@ -1500,6 +1505,7 @@ do_time_wait:
1500 struct sock *sk2; 1505 struct sock *sk2;
1501 1506
1502 sk2 = inet6_lookup_listener(dev_net(skb->dev), &tcp_hashinfo, 1507 sk2 = inet6_lookup_listener(dev_net(skb->dev), &tcp_hashinfo,
1508 skb, __tcp_hdrlen(th),
1503 &ipv6_hdr(skb)->saddr, th->source, 1509 &ipv6_hdr(skb)->saddr, th->source,
1504 &ipv6_hdr(skb)->daddr, 1510 &ipv6_hdr(skb)->daddr,
1505 ntohs(th->dest), tcp_v6_iif(skb)); 1511 ntohs(th->dest), tcp_v6_iif(skb));
@@ -1865,7 +1871,7 @@ struct proto tcpv6_prot = {
1865 .sendpage = tcp_sendpage, 1871 .sendpage = tcp_sendpage,
1866 .backlog_rcv = tcp_v6_do_rcv, 1872 .backlog_rcv = tcp_v6_do_rcv,
1867 .release_cb = tcp_release_cb, 1873 .release_cb = tcp_release_cb,
1868 .hash = inet_hash, 1874 .hash = inet6_hash,
1869 .unhash = inet_unhash, 1875 .unhash = inet_unhash,
1870 .get_port = inet_csk_get_port, 1876 .get_port = inet_csk_get_port,
1871 .enter_memory_pressure = tcp_enter_memory_pressure, 1877 .enter_memory_pressure = tcp_enter_memory_pressure,
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 5d2c2afffe7b..fd25e447a5fa 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -37,6 +37,7 @@
37#include <linux/slab.h> 37#include <linux/slab.h>
38#include <asm/uaccess.h> 38#include <asm/uaccess.h>
39 39
40#include <net/addrconf.h>
40#include <net/ndisc.h> 41#include <net/ndisc.h>
41#include <net/protocol.h> 42#include <net/protocol.h>
42#include <net/transp_v6.h> 43#include <net/transp_v6.h>
@@ -77,49 +78,6 @@ static u32 udp6_ehashfn(const struct net *net,
77 udp_ipv6_hash_secret + net_hash_mix(net)); 78 udp_ipv6_hash_secret + net_hash_mix(net));
78} 79}
79 80
80/* match_wildcard == true: IPV6_ADDR_ANY equals to any IPv6 addresses if IPv6
81 * only, and any IPv4 addresses if not IPv6 only
82 * match_wildcard == false: addresses must be exactly the same, i.e.
83 * IPV6_ADDR_ANY only equals to IPV6_ADDR_ANY,
84 * and 0.0.0.0 equals to 0.0.0.0 only
85 */
86int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2,
87 bool match_wildcard)
88{
89 const struct in6_addr *sk2_rcv_saddr6 = inet6_rcv_saddr(sk2);
90 int sk2_ipv6only = inet_v6_ipv6only(sk2);
91 int addr_type = ipv6_addr_type(&sk->sk_v6_rcv_saddr);
92 int addr_type2 = sk2_rcv_saddr6 ? ipv6_addr_type(sk2_rcv_saddr6) : IPV6_ADDR_MAPPED;
93
94 /* if both are mapped, treat as IPv4 */
95 if (addr_type == IPV6_ADDR_MAPPED && addr_type2 == IPV6_ADDR_MAPPED) {
96 if (!sk2_ipv6only) {
97 if (sk->sk_rcv_saddr == sk2->sk_rcv_saddr)
98 return 1;
99 if (!sk->sk_rcv_saddr || !sk2->sk_rcv_saddr)
100 return match_wildcard;
101 }
102 return 0;
103 }
104
105 if (addr_type == IPV6_ADDR_ANY && addr_type2 == IPV6_ADDR_ANY)
106 return 1;
107
108 if (addr_type2 == IPV6_ADDR_ANY && match_wildcard &&
109 !(sk2_ipv6only && addr_type == IPV6_ADDR_MAPPED))
110 return 1;
111
112 if (addr_type == IPV6_ADDR_ANY && match_wildcard &&
113 !(ipv6_only_sock(sk) && addr_type2 == IPV6_ADDR_MAPPED))
114 return 1;
115
116 if (sk2_rcv_saddr6 &&
117 ipv6_addr_equal(&sk->sk_v6_rcv_saddr, sk2_rcv_saddr6))
118 return 1;
119
120 return 0;
121}
122
123static u32 udp6_portaddr_hash(const struct net *net, 81static u32 udp6_portaddr_hash(const struct net *net,
124 const struct in6_addr *addr6, 82 const struct in6_addr *addr6,
125 unsigned int port) 83 unsigned int port)
@@ -257,6 +215,7 @@ static struct sock *udp6_lib_lookup2(struct net *net,
257 struct sock *sk, *result; 215 struct sock *sk, *result;
258 struct hlist_nulls_node *node; 216 struct hlist_nulls_node *node;
259 int score, badness, matches = 0, reuseport = 0; 217 int score, badness, matches = 0, reuseport = 0;
218 bool select_ok = true;
260 u32 hash = 0; 219 u32 hash = 0;
261 220
262begin: 221begin:
@@ -270,14 +229,18 @@ begin:
270 badness = score; 229 badness = score;
271 reuseport = sk->sk_reuseport; 230 reuseport = sk->sk_reuseport;
272 if (reuseport) { 231 if (reuseport) {
273 struct sock *sk2;
274 hash = udp6_ehashfn(net, daddr, hnum, 232 hash = udp6_ehashfn(net, daddr, hnum,
275 saddr, sport); 233 saddr, sport);
276 sk2 = reuseport_select_sock(sk, hash, skb, 234 if (select_ok) {
277 sizeof(struct udphdr)); 235 struct sock *sk2;
278 if (sk2) { 236
279 result = sk2; 237 sk2 = reuseport_select_sock(sk, hash, skb,
280 goto found; 238 sizeof(struct udphdr));
239 if (sk2) {
240 result = sk2;
241 select_ok = false;
242 goto found;
243 }
281 } 244 }
282 matches = 1; 245 matches = 1;
283 } 246 }
@@ -321,6 +284,7 @@ struct sock *__udp6_lib_lookup(struct net *net,
321 unsigned int hash2, slot2, slot = udp_hashfn(net, hnum, udptable->mask); 284 unsigned int hash2, slot2, slot = udp_hashfn(net, hnum, udptable->mask);
322 struct udp_hslot *hslot2, *hslot = &udptable->hash[slot]; 285 struct udp_hslot *hslot2, *hslot = &udptable->hash[slot];
323 int score, badness, matches = 0, reuseport = 0; 286 int score, badness, matches = 0, reuseport = 0;
287 bool select_ok = true;
324 u32 hash = 0; 288 u32 hash = 0;
325 289
326 rcu_read_lock(); 290 rcu_read_lock();
@@ -358,14 +322,18 @@ begin:
358 badness = score; 322 badness = score;
359 reuseport = sk->sk_reuseport; 323 reuseport = sk->sk_reuseport;
360 if (reuseport) { 324 if (reuseport) {
361 struct sock *sk2;
362 hash = udp6_ehashfn(net, daddr, hnum, 325 hash = udp6_ehashfn(net, daddr, hnum,
363 saddr, sport); 326 saddr, sport);
364 sk2 = reuseport_select_sock(sk, hash, skb, 327 if (select_ok) {
328 struct sock *sk2;
329
330 sk2 = reuseport_select_sock(sk, hash, skb,
365 sizeof(struct udphdr)); 331 sizeof(struct udphdr));
366 if (sk2) { 332 if (sk2) {
367 result = sk2; 333 result = sk2;
368 goto found; 334 select_ok = false;
335 goto found;
336 }
369 } 337 }
370 matches = 1; 338 matches = 1;
371 } 339 }
@@ -580,6 +548,7 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
580 const struct in6_addr *daddr = &hdr->daddr; 548 const struct in6_addr *daddr = &hdr->daddr;
581 struct udphdr *uh = (struct udphdr *)(skb->data+offset); 549 struct udphdr *uh = (struct udphdr *)(skb->data+offset);
582 struct sock *sk; 550 struct sock *sk;
551 int harderr;
583 int err; 552 int err;
584 struct net *net = dev_net(skb->dev); 553 struct net *net = dev_net(skb->dev);
585 554
@@ -591,26 +560,27 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
591 return; 560 return;
592 } 561 }
593 562
563 harderr = icmpv6_err_convert(type, code, &err);
564 np = inet6_sk(sk);
565
594 if (type == ICMPV6_PKT_TOOBIG) { 566 if (type == ICMPV6_PKT_TOOBIG) {
595 if (!ip6_sk_accept_pmtu(sk)) 567 if (!ip6_sk_accept_pmtu(sk))
596 goto out; 568 goto out;
597 ip6_sk_update_pmtu(skb, sk, info); 569 ip6_sk_update_pmtu(skb, sk, info);
570 if (np->pmtudisc != IPV6_PMTUDISC_DONT)
571 harderr = 1;
598 } 572 }
599 if (type == NDISC_REDIRECT) { 573 if (type == NDISC_REDIRECT) {
600 ip6_sk_redirect(skb, sk); 574 ip6_sk_redirect(skb, sk);
601 goto out; 575 goto out;
602 } 576 }
603 577
604 np = inet6_sk(sk); 578 if (!np->recverr) {
605 579 if (!harderr || sk->sk_state != TCP_ESTABLISHED)
606 if (!icmpv6_err_convert(type, code, &err) && !np->recverr) 580 goto out;
607 goto out; 581 } else {
608
609 if (sk->sk_state != TCP_ESTABLISHED && !np->recverr)
610 goto out;
611
612 if (np->recverr)
613 ipv6_icmp_error(sk, skb, err, uh->dest, ntohl(info), (u8 *)(uh+1)); 582 ipv6_icmp_error(sk, skb, err, uh->dest, ntohl(info), (u8 *)(uh+1));
583 }
614 584
615 sk->sk_err = err; 585 sk->sk_err = err;
616 sk->sk_error_report(sk); 586 sk->sk_error_report(sk);
@@ -952,11 +922,9 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
952 ret = udpv6_queue_rcv_skb(sk, skb); 922 ret = udpv6_queue_rcv_skb(sk, skb);
953 sock_put(sk); 923 sock_put(sk);
954 924
955 /* a return value > 0 means to resubmit the input, but 925 /* a return value > 0 means to resubmit the input */
956 * it wants the return to be -protocol, or 0
957 */
958 if (ret > 0) 926 if (ret > 0)
959 return -ret; 927 return ret;
960 928
961 return 0; 929 return 0;
962 } 930 }
diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
index 7441e1e63893..2b0fbe6929e8 100644
--- a/net/ipv6/udp_offload.c
+++ b/net/ipv6/udp_offload.c
@@ -81,12 +81,18 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
81 csum = skb_checksum(skb, 0, skb->len, 0); 81 csum = skb_checksum(skb, 0, skb->len, 0);
82 uh->check = udp_v6_check(skb->len, &ipv6h->saddr, 82 uh->check = udp_v6_check(skb->len, &ipv6h->saddr,
83 &ipv6h->daddr, csum); 83 &ipv6h->daddr, csum);
84
85 if (uh->check == 0) 84 if (uh->check == 0)
86 uh->check = CSUM_MANGLED_0; 85 uh->check = CSUM_MANGLED_0;
87 86
88 skb->ip_summed = CHECKSUM_NONE; 87 skb->ip_summed = CHECKSUM_NONE;
89 88
89 /* If there is no outer header we can fake a checksum offload
90 * due to the fact that we have already done the checksum in
91 * software prior to segmenting the frame.
92 */
93 if (!skb->encap_hdr_csum)
94 features |= NETIF_F_HW_CSUM;
95
90 /* Check if there is enough headroom to insert fragment header. */ 96 /* Check if there is enough headroom to insert fragment header. */
91 tnl_hlen = skb_tnl_header_len(skb); 97 tnl_hlen = skb_tnl_header_len(skb);
92 if (skb->mac_header < (tnl_hlen + frag_hdr_sz)) { 98 if (skb->mac_header < (tnl_hlen + frag_hdr_sz)) {
diff --git a/net/irda/ircomm/ircomm_param.c b/net/irda/ircomm/ircomm_param.c
index 3c4caa60c926..5728e76ca6d5 100644
--- a/net/irda/ircomm/ircomm_param.c
+++ b/net/irda/ircomm/ircomm_param.c
@@ -134,11 +134,10 @@ int ircomm_param_request(struct ircomm_tty_cb *self, __u8 pi, int flush)
134 return -1; 134 return -1;
135 } 135 }
136 skb_put(skb, count); 136 skb_put(skb, count);
137 pr_debug("%s(), skb->len=%d\n", __func__, skb->len);
137 138
138 spin_unlock_irqrestore(&self->spinlock, flags); 139 spin_unlock_irqrestore(&self->spinlock, flags);
139 140
140 pr_debug("%s(), skb->len=%d\n", __func__ , skb->len);
141
142 if (flush) { 141 if (flush) {
143 /* ircomm_tty_do_softint will take care of the rest */ 142 /* ircomm_tty_do_softint will take care of the rest */
144 schedule_work(&self->tqueue); 143 schedule_work(&self->tqueue);
diff --git a/net/irda/ircomm/ircomm_tty.c b/net/irda/ircomm/ircomm_tty.c
index a4237707f79d..da126ee6d218 100644
--- a/net/irda/ircomm/ircomm_tty.c
+++ b/net/irda/ircomm/ircomm_tty.c
@@ -287,14 +287,14 @@ static int ircomm_tty_block_til_ready(struct ircomm_tty_cb *self,
287 287
288 if (filp->f_flags & O_NONBLOCK) { 288 if (filp->f_flags & O_NONBLOCK) {
289 /* nonblock mode is set */ 289 /* nonblock mode is set */
290 if (tty->termios.c_cflag & CBAUD) 290 if (C_BAUD(tty))
291 tty_port_raise_dtr_rts(port); 291 tty_port_raise_dtr_rts(port);
292 port->flags |= ASYNC_NORMAL_ACTIVE; 292 port->flags |= ASYNC_NORMAL_ACTIVE;
293 pr_debug("%s(), O_NONBLOCK requested!\n", __func__); 293 pr_debug("%s(), O_NONBLOCK requested!\n", __func__);
294 return 0; 294 return 0;
295 } 295 }
296 296
297 if (tty->termios.c_cflag & CLOCAL) { 297 if (C_CLOCAL(tty)) {
298 pr_debug("%s(), doing CLOCAL!\n", __func__); 298 pr_debug("%s(), doing CLOCAL!\n", __func__);
299 do_clocal = 1; 299 do_clocal = 1;
300 } 300 }
@@ -806,7 +806,7 @@ static void ircomm_tty_throttle(struct tty_struct *tty)
806 ircomm_tty_send_xchar(tty, STOP_CHAR(tty)); 806 ircomm_tty_send_xchar(tty, STOP_CHAR(tty));
807 807
808 /* Hardware flow control? */ 808 /* Hardware flow control? */
809 if (tty->termios.c_cflag & CRTSCTS) { 809 if (C_CRTSCTS(tty)) {
810 self->settings.dte &= ~IRCOMM_RTS; 810 self->settings.dte &= ~IRCOMM_RTS;
811 self->settings.dte |= IRCOMM_DELTA_RTS; 811 self->settings.dte |= IRCOMM_DELTA_RTS;
812 812
@@ -831,12 +831,11 @@ static void ircomm_tty_unthrottle(struct tty_struct *tty)
831 IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return;); 831 IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return;);
832 832
833 /* Using software flow control? */ 833 /* Using software flow control? */
834 if (I_IXOFF(tty)) { 834 if (I_IXOFF(tty))
835 ircomm_tty_send_xchar(tty, START_CHAR(tty)); 835 ircomm_tty_send_xchar(tty, START_CHAR(tty));
836 }
837 836
838 /* Using hardware flow control? */ 837 /* Using hardware flow control? */
839 if (tty->termios.c_cflag & CRTSCTS) { 838 if (C_CRTSCTS(tty)) {
840 self->settings.dte |= (IRCOMM_RTS|IRCOMM_DELTA_RTS); 839 self->settings.dte |= (IRCOMM_RTS|IRCOMM_DELTA_RTS);
841 840
842 ircomm_param_request(self, IRCOMM_DTE, TRUE); 841 ircomm_param_request(self, IRCOMM_DTE, TRUE);
@@ -1268,10 +1267,6 @@ static void ircomm_tty_line_info(struct ircomm_tty_cb *self, struct seq_file *m)
1268 seq_printf(m, "%cASYNC_LOW_LATENCY", sep); 1267 seq_printf(m, "%cASYNC_LOW_LATENCY", sep);
1269 sep = '|'; 1268 sep = '|';
1270 } 1269 }
1271 if (self->port.flags & ASYNC_CLOSING) {
1272 seq_printf(m, "%cASYNC_CLOSING", sep);
1273 sep = '|';
1274 }
1275 if (self->port.flags & ASYNC_NORMAL_ACTIVE) { 1270 if (self->port.flags & ASYNC_NORMAL_ACTIVE) {
1276 seq_printf(m, "%cASYNC_NORMAL_ACTIVE", sep); 1271 seq_printf(m, "%cASYNC_NORMAL_ACTIVE", sep);
1277 sep = '|'; 1272 sep = '|';
diff --git a/net/irda/ircomm/ircomm_tty_ioctl.c b/net/irda/ircomm/ircomm_tty_ioctl.c
index 75ccdbd0728e..d3687aaa23de 100644
--- a/net/irda/ircomm/ircomm_tty_ioctl.c
+++ b/net/irda/ircomm/ircomm_tty_ioctl.c
@@ -158,26 +158,21 @@ void ircomm_tty_set_termios(struct tty_struct *tty,
158 ircomm_tty_change_speed(self, tty); 158 ircomm_tty_change_speed(self, tty);
159 159
160 /* Handle transition to B0 status */ 160 /* Handle transition to B0 status */
161 if ((old_termios->c_cflag & CBAUD) && 161 if ((old_termios->c_cflag & CBAUD) && !(cflag & CBAUD)) {
162 !(cflag & CBAUD)) {
163 self->settings.dte &= ~(IRCOMM_DTR|IRCOMM_RTS); 162 self->settings.dte &= ~(IRCOMM_DTR|IRCOMM_RTS);
164 ircomm_param_request(self, IRCOMM_DTE, TRUE); 163 ircomm_param_request(self, IRCOMM_DTE, TRUE);
165 } 164 }
166 165
167 /* Handle transition away from B0 status */ 166 /* Handle transition away from B0 status */
168 if (!(old_termios->c_cflag & CBAUD) && 167 if (!(old_termios->c_cflag & CBAUD) && (cflag & CBAUD)) {
169 (cflag & CBAUD)) {
170 self->settings.dte |= IRCOMM_DTR; 168 self->settings.dte |= IRCOMM_DTR;
171 if (!(tty->termios.c_cflag & CRTSCTS) || 169 if (!C_CRTSCTS(tty) || !test_bit(TTY_THROTTLED, &tty->flags))
172 !test_bit(TTY_THROTTLED, &tty->flags)) {
173 self->settings.dte |= IRCOMM_RTS; 170 self->settings.dte |= IRCOMM_RTS;
174 }
175 ircomm_param_request(self, IRCOMM_DTE, TRUE); 171 ircomm_param_request(self, IRCOMM_DTE, TRUE);
176 } 172 }
177 173
178 /* Handle turning off CRTSCTS */ 174 /* Handle turning off CRTSCTS */
179 if ((old_termios->c_cflag & CRTSCTS) && 175 if ((old_termios->c_cflag & CRTSCTS) && !C_CRTSCTS(tty))
180 !(tty->termios.c_cflag & CRTSCTS))
181 { 176 {
182 tty->hw_stopped = 0; 177 tty->hw_stopped = 0;
183 ircomm_tty_start(tty); 178 ircomm_tty_start(tty);
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index ef50a94d3eb7..fc3598a922b0 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -708,6 +708,9 @@ static int iucv_sock_bind(struct socket *sock, struct sockaddr *addr,
708 if (!addr || addr->sa_family != AF_IUCV) 708 if (!addr || addr->sa_family != AF_IUCV)
709 return -EINVAL; 709 return -EINVAL;
710 710
711 if (addr_len < sizeof(struct sockaddr_iucv))
712 return -EINVAL;
713
711 lock_sock(sk); 714 lock_sock(sk);
712 if (sk->sk_state != IUCV_OPEN) { 715 if (sk->sk_state != IUCV_OPEN) {
713 err = -EBADFD; 716 err = -EBADFD;
diff --git a/net/kcm/Kconfig b/net/kcm/Kconfig
new file mode 100644
index 000000000000..5db94d940ecc
--- /dev/null
+++ b/net/kcm/Kconfig
@@ -0,0 +1,10 @@
1
2config AF_KCM
3 tristate "KCM sockets"
4 depends on INET
5 select BPF_SYSCALL
6 ---help---
7 KCM (Kernel Connection Multiplexor) sockets provide a method
8 for multiplexing messages of a message based application
9 protocol over kernel connectons (e.g. TCP connections).
10
diff --git a/net/kcm/Makefile b/net/kcm/Makefile
new file mode 100644
index 000000000000..71256133e677
--- /dev/null
+++ b/net/kcm/Makefile
@@ -0,0 +1,3 @@
1obj-$(CONFIG_AF_KCM) += kcm.o
2
3kcm-y := kcmsock.o kcmproc.o
diff --git a/net/kcm/kcmproc.c b/net/kcm/kcmproc.c
new file mode 100644
index 000000000000..738008726cc6
--- /dev/null
+++ b/net/kcm/kcmproc.c
@@ -0,0 +1,426 @@
1#include <linux/in.h>
2#include <linux/inet.h>
3#include <linux/list.h>
4#include <linux/module.h>
5#include <linux/net.h>
6#include <linux/proc_fs.h>
7#include <linux/rculist.h>
8#include <linux/seq_file.h>
9#include <linux/socket.h>
10#include <net/inet_sock.h>
11#include <net/kcm.h>
12#include <net/net_namespace.h>
13#include <net/netns/generic.h>
14#include <net/tcp.h>
15
16#ifdef CONFIG_PROC_FS
17struct kcm_seq_muxinfo {
18 char *name;
19 const struct file_operations *seq_fops;
20 const struct seq_operations seq_ops;
21};
22
23static struct kcm_mux *kcm_get_first(struct seq_file *seq)
24{
25 struct net *net = seq_file_net(seq);
26 struct kcm_net *knet = net_generic(net, kcm_net_id);
27
28 return list_first_or_null_rcu(&knet->mux_list,
29 struct kcm_mux, kcm_mux_list);
30}
31
32static struct kcm_mux *kcm_get_next(struct kcm_mux *mux)
33{
34 struct kcm_net *knet = mux->knet;
35
36 return list_next_or_null_rcu(&knet->mux_list, &mux->kcm_mux_list,
37 struct kcm_mux, kcm_mux_list);
38}
39
40static struct kcm_mux *kcm_get_idx(struct seq_file *seq, loff_t pos)
41{
42 struct net *net = seq_file_net(seq);
43 struct kcm_net *knet = net_generic(net, kcm_net_id);
44 struct kcm_mux *m;
45
46 list_for_each_entry_rcu(m, &knet->mux_list, kcm_mux_list) {
47 if (!pos)
48 return m;
49 --pos;
50 }
51 return NULL;
52}
53
54static void *kcm_seq_next(struct seq_file *seq, void *v, loff_t *pos)
55{
56 void *p;
57
58 if (v == SEQ_START_TOKEN)
59 p = kcm_get_first(seq);
60 else
61 p = kcm_get_next(v);
62 ++*pos;
63 return p;
64}
65
66static void *kcm_seq_start(struct seq_file *seq, loff_t *pos)
67 __acquires(rcu)
68{
69 rcu_read_lock();
70
71 if (!*pos)
72 return SEQ_START_TOKEN;
73 else
74 return kcm_get_idx(seq, *pos - 1);
75}
76
77static void kcm_seq_stop(struct seq_file *seq, void *v)
78 __releases(rcu)
79{
80 rcu_read_unlock();
81}
82
83struct kcm_proc_mux_state {
84 struct seq_net_private p;
85 int idx;
86};
87
88static int kcm_seq_open(struct inode *inode, struct file *file)
89{
90 struct kcm_seq_muxinfo *muxinfo = PDE_DATA(inode);
91 int err;
92
93 err = seq_open_net(inode, file, &muxinfo->seq_ops,
94 sizeof(struct kcm_proc_mux_state));
95 if (err < 0)
96 return err;
97 return err;
98}
99
100static void kcm_format_mux_header(struct seq_file *seq)
101{
102 struct net *net = seq_file_net(seq);
103 struct kcm_net *knet = net_generic(net, kcm_net_id);
104
105 seq_printf(seq,
106 "*** KCM statistics (%d MUX) ****\n",
107 knet->count);
108
109 seq_printf(seq,
110 "%-14s %-10s %-16s %-10s %-16s %-8s %-8s %-8s %-8s %s",
111 "Object",
112 "RX-Msgs",
113 "RX-Bytes",
114 "TX-Msgs",
115 "TX-Bytes",
116 "Recv-Q",
117 "Rmem",
118 "Send-Q",
119 "Smem",
120 "Status");
121
122 /* XXX: pdsts header stuff here */
123 seq_puts(seq, "\n");
124}
125
126static void kcm_format_sock(struct kcm_sock *kcm, struct seq_file *seq,
127 int i, int *len)
128{
129 seq_printf(seq,
130 " kcm-%-7u %-10llu %-16llu %-10llu %-16llu %-8d %-8d %-8d %-8s ",
131 kcm->index,
132 kcm->stats.rx_msgs,
133 kcm->stats.rx_bytes,
134 kcm->stats.tx_msgs,
135 kcm->stats.tx_bytes,
136 kcm->sk.sk_receive_queue.qlen,
137 sk_rmem_alloc_get(&kcm->sk),
138 kcm->sk.sk_write_queue.qlen,
139 "-");
140
141 if (kcm->tx_psock)
142 seq_printf(seq, "Psck-%u ", kcm->tx_psock->index);
143
144 if (kcm->tx_wait)
145 seq_puts(seq, "TxWait ");
146
147 if (kcm->tx_wait_more)
148 seq_puts(seq, "WMore ");
149
150 if (kcm->rx_wait)
151 seq_puts(seq, "RxWait ");
152
153 seq_puts(seq, "\n");
154}
155
156static void kcm_format_psock(struct kcm_psock *psock, struct seq_file *seq,
157 int i, int *len)
158{
159 seq_printf(seq,
160 " psock-%-5u %-10llu %-16llu %-10llu %-16llu %-8d %-8d %-8d %-8d ",
161 psock->index,
162 psock->stats.rx_msgs,
163 psock->stats.rx_bytes,
164 psock->stats.tx_msgs,
165 psock->stats.tx_bytes,
166 psock->sk->sk_receive_queue.qlen,
167 atomic_read(&psock->sk->sk_rmem_alloc),
168 psock->sk->sk_write_queue.qlen,
169 atomic_read(&psock->sk->sk_wmem_alloc));
170
171 if (psock->done)
172 seq_puts(seq, "Done ");
173
174 if (psock->tx_stopped)
175 seq_puts(seq, "TxStop ");
176
177 if (psock->rx_stopped)
178 seq_puts(seq, "RxStop ");
179
180 if (psock->tx_kcm)
181 seq_printf(seq, "Rsvd-%d ", psock->tx_kcm->index);
182
183 if (psock->ready_rx_msg)
184 seq_puts(seq, "RdyRx ");
185
186 seq_puts(seq, "\n");
187}
188
189static void
190kcm_format_mux(struct kcm_mux *mux, loff_t idx, struct seq_file *seq)
191{
192 int i, len;
193 struct kcm_sock *kcm;
194 struct kcm_psock *psock;
195
196 /* mux information */
197 seq_printf(seq,
198 "%-6s%-8s %-10llu %-16llu %-10llu %-16llu %-8s %-8s %-8s %-8s ",
199 "mux", "",
200 mux->stats.rx_msgs,
201 mux->stats.rx_bytes,
202 mux->stats.tx_msgs,
203 mux->stats.tx_bytes,
204 "-", "-", "-", "-");
205
206 seq_printf(seq, "KCMs: %d, Psocks %d\n",
207 mux->kcm_socks_cnt, mux->psocks_cnt);
208
209 /* kcm sock information */
210 i = 0;
211 spin_lock_bh(&mux->lock);
212 list_for_each_entry(kcm, &mux->kcm_socks, kcm_sock_list) {
213 kcm_format_sock(kcm, seq, i, &len);
214 i++;
215 }
216 i = 0;
217 list_for_each_entry(psock, &mux->psocks, psock_list) {
218 kcm_format_psock(psock, seq, i, &len);
219 i++;
220 }
221 spin_unlock_bh(&mux->lock);
222}
223
224static int kcm_seq_show(struct seq_file *seq, void *v)
225{
226 struct kcm_proc_mux_state *mux_state;
227
228 mux_state = seq->private;
229 if (v == SEQ_START_TOKEN) {
230 mux_state->idx = 0;
231 kcm_format_mux_header(seq);
232 } else {
233 kcm_format_mux(v, mux_state->idx, seq);
234 mux_state->idx++;
235 }
236 return 0;
237}
238
239static const struct file_operations kcm_seq_fops = {
240 .owner = THIS_MODULE,
241 .open = kcm_seq_open,
242 .read = seq_read,
243 .llseek = seq_lseek,
244};
245
246static struct kcm_seq_muxinfo kcm_seq_muxinfo = {
247 .name = "kcm",
248 .seq_fops = &kcm_seq_fops,
249 .seq_ops = {
250 .show = kcm_seq_show,
251 .start = kcm_seq_start,
252 .next = kcm_seq_next,
253 .stop = kcm_seq_stop,
254 }
255};
256
257static int kcm_proc_register(struct net *net, struct kcm_seq_muxinfo *muxinfo)
258{
259 struct proc_dir_entry *p;
260 int rc = 0;
261
262 p = proc_create_data(muxinfo->name, S_IRUGO, net->proc_net,
263 muxinfo->seq_fops, muxinfo);
264 if (!p)
265 rc = -ENOMEM;
266 return rc;
267}
268EXPORT_SYMBOL(kcm_proc_register);
269
270static void kcm_proc_unregister(struct net *net,
271 struct kcm_seq_muxinfo *muxinfo)
272{
273 remove_proc_entry(muxinfo->name, net->proc_net);
274}
275EXPORT_SYMBOL(kcm_proc_unregister);
276
277static int kcm_stats_seq_show(struct seq_file *seq, void *v)
278{
279 struct kcm_psock_stats psock_stats;
280 struct kcm_mux_stats mux_stats;
281 struct kcm_mux *mux;
282 struct kcm_psock *psock;
283 struct net *net = seq->private;
284 struct kcm_net *knet = net_generic(net, kcm_net_id);
285
286 memset(&mux_stats, 0, sizeof(mux_stats));
287 memset(&psock_stats, 0, sizeof(psock_stats));
288
289 mutex_lock(&knet->mutex);
290
291 aggregate_mux_stats(&knet->aggregate_mux_stats, &mux_stats);
292 aggregate_psock_stats(&knet->aggregate_psock_stats,
293 &psock_stats);
294
295 list_for_each_entry_rcu(mux, &knet->mux_list, kcm_mux_list) {
296 spin_lock_bh(&mux->lock);
297 aggregate_mux_stats(&mux->stats, &mux_stats);
298 aggregate_psock_stats(&mux->aggregate_psock_stats,
299 &psock_stats);
300 list_for_each_entry(psock, &mux->psocks, psock_list)
301 aggregate_psock_stats(&psock->stats, &psock_stats);
302 spin_unlock_bh(&mux->lock);
303 }
304
305 mutex_unlock(&knet->mutex);
306
307 seq_printf(seq,
308 "%-8s %-10s %-16s %-10s %-16s %-10s %-10s %-10s %-10s %-10s\n",
309 "MUX",
310 "RX-Msgs",
311 "RX-Bytes",
312 "TX-Msgs",
313 "TX-Bytes",
314 "TX-Retries",
315 "Attach",
316 "Unattach",
317 "UnattchRsvd",
318 "RX-RdyDrops");
319
320 seq_printf(seq,
321 "%-8s %-10llu %-16llu %-10llu %-16llu %-10u %-10u %-10u %-10u %-10u\n",
322 "",
323 mux_stats.rx_msgs,
324 mux_stats.rx_bytes,
325 mux_stats.tx_msgs,
326 mux_stats.tx_bytes,
327 mux_stats.tx_retries,
328 mux_stats.psock_attach,
329 mux_stats.psock_unattach_rsvd,
330 mux_stats.psock_unattach,
331 mux_stats.rx_ready_drops);
332
333 seq_printf(seq,
334 "%-8s %-10s %-16s %-10s %-16s %-10s %-10s %-10s %-10s %-10s %-10s %-10s %-10s %-10s\n",
335 "Psock",
336 "RX-Msgs",
337 "RX-Bytes",
338 "TX-Msgs",
339 "TX-Bytes",
340 "Reserved",
341 "Unreserved",
342 "RX-Aborts",
343 "RX-MemFail",
344 "RX-NeedMor",
345 "RX-BadLen",
346 "RX-TooBig",
347 "RX-Timeout",
348 "TX-Aborts");
349
350 seq_printf(seq,
351 "%-8s %-10llu %-16llu %-10llu %-16llu %-10llu %-10llu %-10u %-10u %-10u %-10u %-10u %-10u %-10u\n",
352 "",
353 psock_stats.rx_msgs,
354 psock_stats.rx_bytes,
355 psock_stats.tx_msgs,
356 psock_stats.tx_bytes,
357 psock_stats.reserved,
358 psock_stats.unreserved,
359 psock_stats.rx_aborts,
360 psock_stats.rx_mem_fail,
361 psock_stats.rx_need_more_hdr,
362 psock_stats.rx_bad_hdr_len,
363 psock_stats.rx_msg_too_big,
364 psock_stats.rx_msg_timeouts,
365 psock_stats.tx_aborts);
366
367 return 0;
368}
369
370static int kcm_stats_seq_open(struct inode *inode, struct file *file)
371{
372 return single_open_net(inode, file, kcm_stats_seq_show);
373}
374
375static const struct file_operations kcm_stats_seq_fops = {
376 .owner = THIS_MODULE,
377 .open = kcm_stats_seq_open,
378 .read = seq_read,
379 .llseek = seq_lseek,
380 .release = single_release_net,
381};
382
383static int kcm_proc_init_net(struct net *net)
384{
385 int err;
386
387 if (!proc_create("kcm_stats", S_IRUGO, net->proc_net,
388 &kcm_stats_seq_fops)) {
389 err = -ENOMEM;
390 goto out_kcm_stats;
391 }
392
393 err = kcm_proc_register(net, &kcm_seq_muxinfo);
394 if (err)
395 goto out_kcm;
396
397 return 0;
398
399out_kcm:
400 remove_proc_entry("kcm_stats", net->proc_net);
401out_kcm_stats:
402 return err;
403}
404
405static void kcm_proc_exit_net(struct net *net)
406{
407 kcm_proc_unregister(net, &kcm_seq_muxinfo);
408 remove_proc_entry("kcm_stats", net->proc_net);
409}
410
411static struct pernet_operations kcm_net_ops = {
412 .init = kcm_proc_init_net,
413 .exit = kcm_proc_exit_net,
414};
415
416int __init kcm_proc_init(void)
417{
418 return register_pernet_subsys(&kcm_net_ops);
419}
420
421void __exit kcm_proc_exit(void)
422{
423 unregister_pernet_subsys(&kcm_net_ops);
424}
425
426#endif /* CONFIG_PROC_FS */
diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c
new file mode 100644
index 000000000000..40662d73204f
--- /dev/null
+++ b/net/kcm/kcmsock.c
@@ -0,0 +1,2409 @@
1#include <linux/bpf.h>
2#include <linux/errno.h>
3#include <linux/errqueue.h>
4#include <linux/file.h>
5#include <linux/in.h>
6#include <linux/kernel.h>
7#include <linux/module.h>
8#include <linux/net.h>
9#include <linux/netdevice.h>
10#include <linux/poll.h>
11#include <linux/rculist.h>
12#include <linux/skbuff.h>
13#include <linux/socket.h>
14#include <linux/uaccess.h>
15#include <linux/workqueue.h>
16#include <net/kcm.h>
17#include <net/netns/generic.h>
18#include <net/sock.h>
19#include <net/tcp.h>
20#include <uapi/linux/kcm.h>
21
22unsigned int kcm_net_id;
23
24static struct kmem_cache *kcm_psockp __read_mostly;
25static struct kmem_cache *kcm_muxp __read_mostly;
26static struct workqueue_struct *kcm_wq;
27
28static inline struct kcm_sock *kcm_sk(const struct sock *sk)
29{
30 return (struct kcm_sock *)sk;
31}
32
33static inline struct kcm_tx_msg *kcm_tx_msg(struct sk_buff *skb)
34{
35 return (struct kcm_tx_msg *)skb->cb;
36}
37
38static inline struct kcm_rx_msg *kcm_rx_msg(struct sk_buff *skb)
39{
40 return (struct kcm_rx_msg *)((void *)skb->cb +
41 offsetof(struct qdisc_skb_cb, data));
42}
43
44static void report_csk_error(struct sock *csk, int err)
45{
46 csk->sk_err = EPIPE;
47 csk->sk_error_report(csk);
48}
49
50/* Callback lock held */
51static void kcm_abort_rx_psock(struct kcm_psock *psock, int err,
52 struct sk_buff *skb)
53{
54 struct sock *csk = psock->sk;
55
56 /* Unrecoverable error in receive */
57
58 del_timer(&psock->rx_msg_timer);
59
60 if (psock->rx_stopped)
61 return;
62
63 psock->rx_stopped = 1;
64 KCM_STATS_INCR(psock->stats.rx_aborts);
65
66 /* Report an error on the lower socket */
67 report_csk_error(csk, err);
68}
69
70static void kcm_abort_tx_psock(struct kcm_psock *psock, int err,
71 bool wakeup_kcm)
72{
73 struct sock *csk = psock->sk;
74 struct kcm_mux *mux = psock->mux;
75
76 /* Unrecoverable error in transmit */
77
78 spin_lock_bh(&mux->lock);
79
80 if (psock->tx_stopped) {
81 spin_unlock_bh(&mux->lock);
82 return;
83 }
84
85 psock->tx_stopped = 1;
86 KCM_STATS_INCR(psock->stats.tx_aborts);
87
88 if (!psock->tx_kcm) {
89 /* Take off psocks_avail list */
90 list_del(&psock->psock_avail_list);
91 } else if (wakeup_kcm) {
92 /* In this case psock is being aborted while outside of
93 * write_msgs and psock is reserved. Schedule tx_work
94 * to handle the failure there. Need to commit tx_stopped
95 * before queuing work.
96 */
97 smp_mb();
98
99 queue_work(kcm_wq, &psock->tx_kcm->tx_work);
100 }
101
102 spin_unlock_bh(&mux->lock);
103
104 /* Report error on lower socket */
105 report_csk_error(csk, err);
106}
107
108/* RX mux lock held. */
109static void kcm_update_rx_mux_stats(struct kcm_mux *mux,
110 struct kcm_psock *psock)
111{
112 KCM_STATS_ADD(mux->stats.rx_bytes,
113 psock->stats.rx_bytes - psock->saved_rx_bytes);
114 mux->stats.rx_msgs +=
115 psock->stats.rx_msgs - psock->saved_rx_msgs;
116 psock->saved_rx_msgs = psock->stats.rx_msgs;
117 psock->saved_rx_bytes = psock->stats.rx_bytes;
118}
119
120static void kcm_update_tx_mux_stats(struct kcm_mux *mux,
121 struct kcm_psock *psock)
122{
123 KCM_STATS_ADD(mux->stats.tx_bytes,
124 psock->stats.tx_bytes - psock->saved_tx_bytes);
125 mux->stats.tx_msgs +=
126 psock->stats.tx_msgs - psock->saved_tx_msgs;
127 psock->saved_tx_msgs = psock->stats.tx_msgs;
128 psock->saved_tx_bytes = psock->stats.tx_bytes;
129}
130
131static int kcm_queue_rcv_skb(struct sock *sk, struct sk_buff *skb);
132
133/* KCM is ready to receive messages on its queue-- either the KCM is new or
134 * has become unblocked after being blocked on full socket buffer. Queue any
135 * pending ready messages on a psock. RX mux lock held.
136 */
137static void kcm_rcv_ready(struct kcm_sock *kcm)
138{
139 struct kcm_mux *mux = kcm->mux;
140 struct kcm_psock *psock;
141 struct sk_buff *skb;
142
143 if (unlikely(kcm->rx_wait || kcm->rx_psock || kcm->rx_disabled))
144 return;
145
146 while (unlikely((skb = __skb_dequeue(&mux->rx_hold_queue)))) {
147 if (kcm_queue_rcv_skb(&kcm->sk, skb)) {
148 /* Assuming buffer limit has been reached */
149 skb_queue_head(&mux->rx_hold_queue, skb);
150 WARN_ON(!sk_rmem_alloc_get(&kcm->sk));
151 return;
152 }
153 }
154
155 while (!list_empty(&mux->psocks_ready)) {
156 psock = list_first_entry(&mux->psocks_ready, struct kcm_psock,
157 psock_ready_list);
158
159 if (kcm_queue_rcv_skb(&kcm->sk, psock->ready_rx_msg)) {
160 /* Assuming buffer limit has been reached */
161 WARN_ON(!sk_rmem_alloc_get(&kcm->sk));
162 return;
163 }
164
165 /* Consumed the ready message on the psock. Schedule rx_work to
166 * get more messages.
167 */
168 list_del(&psock->psock_ready_list);
169 psock->ready_rx_msg = NULL;
170
171 /* Commit clearing of ready_rx_msg for queuing work */
172 smp_mb();
173
174 queue_work(kcm_wq, &psock->rx_work);
175 }
176
177 /* Buffer limit is okay now, add to ready list */
178 list_add_tail(&kcm->wait_rx_list,
179 &kcm->mux->kcm_rx_waiters);
180 kcm->rx_wait = true;
181}
182
183static void kcm_rfree(struct sk_buff *skb)
184{
185 struct sock *sk = skb->sk;
186 struct kcm_sock *kcm = kcm_sk(sk);
187 struct kcm_mux *mux = kcm->mux;
188 unsigned int len = skb->truesize;
189
190 sk_mem_uncharge(sk, len);
191 atomic_sub(len, &sk->sk_rmem_alloc);
192
193 /* For reading rx_wait and rx_psock without holding lock */
194 smp_mb__after_atomic();
195
196 if (!kcm->rx_wait && !kcm->rx_psock &&
197 sk_rmem_alloc_get(sk) < sk->sk_rcvlowat) {
198 spin_lock_bh(&mux->rx_lock);
199 kcm_rcv_ready(kcm);
200 spin_unlock_bh(&mux->rx_lock);
201 }
202}
203
204static int kcm_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
205{
206 struct sk_buff_head *list = &sk->sk_receive_queue;
207
208 if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
209 return -ENOMEM;
210
211 if (!sk_rmem_schedule(sk, skb, skb->truesize))
212 return -ENOBUFS;
213
214 skb->dev = NULL;
215
216 skb_orphan(skb);
217 skb->sk = sk;
218 skb->destructor = kcm_rfree;
219 atomic_add(skb->truesize, &sk->sk_rmem_alloc);
220 sk_mem_charge(sk, skb->truesize);
221
222 skb_queue_tail(list, skb);
223
224 if (!sock_flag(sk, SOCK_DEAD))
225 sk->sk_data_ready(sk);
226
227 return 0;
228}
229
230/* Requeue received messages for a kcm socket to other kcm sockets. This is
231 * called with a kcm socket is receive disabled.
232 * RX mux lock held.
233 */
234static void requeue_rx_msgs(struct kcm_mux *mux, struct sk_buff_head *head)
235{
236 struct sk_buff *skb;
237 struct kcm_sock *kcm;
238
239 while ((skb = __skb_dequeue(head))) {
240 /* Reset destructor to avoid calling kcm_rcv_ready */
241 skb->destructor = sock_rfree;
242 skb_orphan(skb);
243try_again:
244 if (list_empty(&mux->kcm_rx_waiters)) {
245 skb_queue_tail(&mux->rx_hold_queue, skb);
246 continue;
247 }
248
249 kcm = list_first_entry(&mux->kcm_rx_waiters,
250 struct kcm_sock, wait_rx_list);
251
252 if (kcm_queue_rcv_skb(&kcm->sk, skb)) {
253 /* Should mean socket buffer full */
254 list_del(&kcm->wait_rx_list);
255 kcm->rx_wait = false;
256
257 /* Commit rx_wait to read in kcm_free */
258 smp_wmb();
259
260 goto try_again;
261 }
262 }
263}
264
265/* Lower sock lock held */
266static struct kcm_sock *reserve_rx_kcm(struct kcm_psock *psock,
267 struct sk_buff *head)
268{
269 struct kcm_mux *mux = psock->mux;
270 struct kcm_sock *kcm;
271
272 WARN_ON(psock->ready_rx_msg);
273
274 if (psock->rx_kcm)
275 return psock->rx_kcm;
276
277 spin_lock_bh(&mux->rx_lock);
278
279 if (psock->rx_kcm) {
280 spin_unlock_bh(&mux->rx_lock);
281 return psock->rx_kcm;
282 }
283
284 kcm_update_rx_mux_stats(mux, psock);
285
286 if (list_empty(&mux->kcm_rx_waiters)) {
287 psock->ready_rx_msg = head;
288 list_add_tail(&psock->psock_ready_list,
289 &mux->psocks_ready);
290 spin_unlock_bh(&mux->rx_lock);
291 return NULL;
292 }
293
294 kcm = list_first_entry(&mux->kcm_rx_waiters,
295 struct kcm_sock, wait_rx_list);
296 list_del(&kcm->wait_rx_list);
297 kcm->rx_wait = false;
298
299 psock->rx_kcm = kcm;
300 kcm->rx_psock = psock;
301
302 spin_unlock_bh(&mux->rx_lock);
303
304 return kcm;
305}
306
307static void kcm_done(struct kcm_sock *kcm);
308
309static void kcm_done_work(struct work_struct *w)
310{
311 kcm_done(container_of(w, struct kcm_sock, done_work));
312}
313
314/* Lower sock held */
315static void unreserve_rx_kcm(struct kcm_psock *psock,
316 bool rcv_ready)
317{
318 struct kcm_sock *kcm = psock->rx_kcm;
319 struct kcm_mux *mux = psock->mux;
320
321 if (!kcm)
322 return;
323
324 spin_lock_bh(&mux->rx_lock);
325
326 psock->rx_kcm = NULL;
327 kcm->rx_psock = NULL;
328
329 /* Commit kcm->rx_psock before sk_rmem_alloc_get to sync with
330 * kcm_rfree
331 */
332 smp_mb();
333
334 if (unlikely(kcm->done)) {
335 spin_unlock_bh(&mux->rx_lock);
336
337 /* Need to run kcm_done in a task since we need to qcquire
338 * callback locks which may already be held here.
339 */
340 INIT_WORK(&kcm->done_work, kcm_done_work);
341 schedule_work(&kcm->done_work);
342 return;
343 }
344
345 if (unlikely(kcm->rx_disabled)) {
346 requeue_rx_msgs(mux, &kcm->sk.sk_receive_queue);
347 } else if (rcv_ready || unlikely(!sk_rmem_alloc_get(&kcm->sk))) {
348 /* Check for degenerative race with rx_wait that all
349 * data was dequeued (accounted for in kcm_rfree).
350 */
351 kcm_rcv_ready(kcm);
352 }
353 spin_unlock_bh(&mux->rx_lock);
354}
355
356static void kcm_start_rx_timer(struct kcm_psock *psock)
357{
358 if (psock->sk->sk_rcvtimeo)
359 mod_timer(&psock->rx_msg_timer, psock->sk->sk_rcvtimeo);
360}
361
362/* Macro to invoke filter function. */
363#define KCM_RUN_FILTER(prog, ctx) \
364 (*prog->bpf_func)(ctx, prog->insnsi)
365
366/* Lower socket lock held */
367static int kcm_tcp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb,
368 unsigned int orig_offset, size_t orig_len)
369{
370 struct kcm_psock *psock = (struct kcm_psock *)desc->arg.data;
371 struct kcm_rx_msg *rxm;
372 struct kcm_sock *kcm;
373 struct sk_buff *head, *skb;
374 size_t eaten = 0, cand_len;
375 ssize_t extra;
376 int err;
377 bool cloned_orig = false;
378
379 if (psock->ready_rx_msg)
380 return 0;
381
382 head = psock->rx_skb_head;
383 if (head) {
384 /* Message already in progress */
385
386 rxm = kcm_rx_msg(head);
387 if (unlikely(rxm->early_eaten)) {
388 /* Already some number of bytes on the receive sock
389 * data saved in rx_skb_head, just indicate they
390 * are consumed.
391 */
392 eaten = orig_len <= rxm->early_eaten ?
393 orig_len : rxm->early_eaten;
394 rxm->early_eaten -= eaten;
395
396 return eaten;
397 }
398
399 if (unlikely(orig_offset)) {
400 /* Getting data with a non-zero offset when a message is
401 * in progress is not expected. If it does happen, we
402 * need to clone and pull since we can't deal with
403 * offsets in the skbs for a message expect in the head.
404 */
405 orig_skb = skb_clone(orig_skb, GFP_ATOMIC);
406 if (!orig_skb) {
407 KCM_STATS_INCR(psock->stats.rx_mem_fail);
408 desc->error = -ENOMEM;
409 return 0;
410 }
411 if (!pskb_pull(orig_skb, orig_offset)) {
412 KCM_STATS_INCR(psock->stats.rx_mem_fail);
413 kfree_skb(orig_skb);
414 desc->error = -ENOMEM;
415 return 0;
416 }
417 cloned_orig = true;
418 orig_offset = 0;
419 }
420
421 if (!psock->rx_skb_nextp) {
422 /* We are going to append to the frags_list of head.
423 * Need to unshare the frag_list.
424 */
425 err = skb_unclone(head, GFP_ATOMIC);
426 if (err) {
427 KCM_STATS_INCR(psock->stats.rx_mem_fail);
428 desc->error = err;
429 return 0;
430 }
431
432 if (unlikely(skb_shinfo(head)->frag_list)) {
433 /* We can't append to an sk_buff that already
434 * has a frag_list. We create a new head, point
435 * the frag_list of that to the old head, and
436 * then are able to use the old head->next for
437 * appending to the message.
438 */
439 if (WARN_ON(head->next)) {
440 desc->error = -EINVAL;
441 return 0;
442 }
443
444 skb = alloc_skb(0, GFP_ATOMIC);
445 if (!skb) {
446 KCM_STATS_INCR(psock->stats.rx_mem_fail);
447 desc->error = -ENOMEM;
448 return 0;
449 }
450 skb->len = head->len;
451 skb->data_len = head->len;
452 skb->truesize = head->truesize;
453 *kcm_rx_msg(skb) = *kcm_rx_msg(head);
454 psock->rx_skb_nextp = &head->next;
455 skb_shinfo(skb)->frag_list = head;
456 psock->rx_skb_head = skb;
457 head = skb;
458 } else {
459 psock->rx_skb_nextp =
460 &skb_shinfo(head)->frag_list;
461 }
462 }
463 }
464
465 while (eaten < orig_len) {
466 /* Always clone since we will consume something */
467 skb = skb_clone(orig_skb, GFP_ATOMIC);
468 if (!skb) {
469 KCM_STATS_INCR(psock->stats.rx_mem_fail);
470 desc->error = -ENOMEM;
471 break;
472 }
473
474 cand_len = orig_len - eaten;
475
476 head = psock->rx_skb_head;
477 if (!head) {
478 head = skb;
479 psock->rx_skb_head = head;
480 /* Will set rx_skb_nextp on next packet if needed */
481 psock->rx_skb_nextp = NULL;
482 rxm = kcm_rx_msg(head);
483 memset(rxm, 0, sizeof(*rxm));
484 rxm->offset = orig_offset + eaten;
485 } else {
486 /* Unclone since we may be appending to an skb that we
487 * already share a frag_list with.
488 */
489 err = skb_unclone(skb, GFP_ATOMIC);
490 if (err) {
491 KCM_STATS_INCR(psock->stats.rx_mem_fail);
492 desc->error = err;
493 break;
494 }
495
496 rxm = kcm_rx_msg(head);
497 *psock->rx_skb_nextp = skb;
498 psock->rx_skb_nextp = &skb->next;
499 head->data_len += skb->len;
500 head->len += skb->len;
501 head->truesize += skb->truesize;
502 }
503
504 if (!rxm->full_len) {
505 ssize_t len;
506
507 len = KCM_RUN_FILTER(psock->bpf_prog, head);
508
509 if (!len) {
510 /* Need more header to determine length */
511 if (!rxm->accum_len) {
512 /* Start RX timer for new message */
513 kcm_start_rx_timer(psock);
514 }
515 rxm->accum_len += cand_len;
516 eaten += cand_len;
517 KCM_STATS_INCR(psock->stats.rx_need_more_hdr);
518 WARN_ON(eaten != orig_len);
519 break;
520 } else if (len > psock->sk->sk_rcvbuf) {
521 /* Message length exceeds maximum allowed */
522 KCM_STATS_INCR(psock->stats.rx_msg_too_big);
523 desc->error = -EMSGSIZE;
524 psock->rx_skb_head = NULL;
525 kcm_abort_rx_psock(psock, EMSGSIZE, head);
526 break;
527 } else if (len <= (ssize_t)head->len -
528 skb->len - rxm->offset) {
529 /* Length must be into new skb (and also
530 * greater than zero)
531 */
532 KCM_STATS_INCR(psock->stats.rx_bad_hdr_len);
533 desc->error = -EPROTO;
534 psock->rx_skb_head = NULL;
535 kcm_abort_rx_psock(psock, EPROTO, head);
536 break;
537 }
538
539 rxm->full_len = len;
540 }
541
542 extra = (ssize_t)(rxm->accum_len + cand_len) - rxm->full_len;
543
544 if (extra < 0) {
545 /* Message not complete yet. */
546 if (rxm->full_len - rxm->accum_len >
547 tcp_inq(psock->sk)) {
548 /* Don't have the whole messages in the socket
549 * buffer. Set psock->rx_need_bytes to wait for
550 * the rest of the message. Also, set "early
551 * eaten" since we've already buffered the skb
552 * but don't consume yet per tcp_read_sock.
553 */
554
555 if (!rxm->accum_len) {
556 /* Start RX timer for new message */
557 kcm_start_rx_timer(psock);
558 }
559
560 psock->rx_need_bytes = rxm->full_len -
561 rxm->accum_len;
562 rxm->accum_len += cand_len;
563 rxm->early_eaten = cand_len;
564 KCM_STATS_ADD(psock->stats.rx_bytes, cand_len);
565 desc->count = 0; /* Stop reading socket */
566 break;
567 }
568 rxm->accum_len += cand_len;
569 eaten += cand_len;
570 WARN_ON(eaten != orig_len);
571 break;
572 }
573
574 /* Positive extra indicates ore bytes than needed for the
575 * message
576 */
577
578 WARN_ON(extra > cand_len);
579
580 eaten += (cand_len - extra);
581
582 /* Hurray, we have a new message! */
583 del_timer(&psock->rx_msg_timer);
584 psock->rx_skb_head = NULL;
585 KCM_STATS_INCR(psock->stats.rx_msgs);
586
587try_queue:
588 kcm = reserve_rx_kcm(psock, head);
589 if (!kcm) {
590 /* Unable to reserve a KCM, message is held in psock. */
591 break;
592 }
593
594 if (kcm_queue_rcv_skb(&kcm->sk, head)) {
595 /* Should mean socket buffer full */
596 unreserve_rx_kcm(psock, false);
597 goto try_queue;
598 }
599 }
600
601 if (cloned_orig)
602 kfree_skb(orig_skb);
603
604 KCM_STATS_ADD(psock->stats.rx_bytes, eaten);
605
606 return eaten;
607}
608
609/* Called with lock held on lower socket */
610static int psock_tcp_read_sock(struct kcm_psock *psock)
611{
612 read_descriptor_t desc;
613
614 desc.arg.data = psock;
615 desc.error = 0;
616 desc.count = 1; /* give more than one skb per call */
617
618 /* sk should be locked here, so okay to do tcp_read_sock */
619 tcp_read_sock(psock->sk, &desc, kcm_tcp_recv);
620
621 unreserve_rx_kcm(psock, true);
622
623 return desc.error;
624}
625
626/* Lower sock lock held */
627static void psock_tcp_data_ready(struct sock *sk)
628{
629 struct kcm_psock *psock;
630
631 read_lock_bh(&sk->sk_callback_lock);
632
633 psock = (struct kcm_psock *)sk->sk_user_data;
634 if (unlikely(!psock || psock->rx_stopped))
635 goto out;
636
637 if (psock->ready_rx_msg)
638 goto out;
639
640 if (psock->rx_need_bytes) {
641 if (tcp_inq(sk) >= psock->rx_need_bytes)
642 psock->rx_need_bytes = 0;
643 else
644 goto out;
645 }
646
647 if (psock_tcp_read_sock(psock) == -ENOMEM)
648 queue_delayed_work(kcm_wq, &psock->rx_delayed_work, 0);
649
650out:
651 read_unlock_bh(&sk->sk_callback_lock);
652}
653
654static void do_psock_rx_work(struct kcm_psock *psock)
655{
656 read_descriptor_t rd_desc;
657 struct sock *csk = psock->sk;
658
659 /* We need the read lock to synchronize with psock_tcp_data_ready. We
660 * need the socket lock for calling tcp_read_sock.
661 */
662 lock_sock(csk);
663 read_lock_bh(&csk->sk_callback_lock);
664
665 if (unlikely(csk->sk_user_data != psock))
666 goto out;
667
668 if (unlikely(psock->rx_stopped))
669 goto out;
670
671 if (psock->ready_rx_msg)
672 goto out;
673
674 rd_desc.arg.data = psock;
675
676 if (psock_tcp_read_sock(psock) == -ENOMEM)
677 queue_delayed_work(kcm_wq, &psock->rx_delayed_work, 0);
678
679out:
680 read_unlock_bh(&csk->sk_callback_lock);
681 release_sock(csk);
682}
683
684static void psock_rx_work(struct work_struct *w)
685{
686 do_psock_rx_work(container_of(w, struct kcm_psock, rx_work));
687}
688
689static void psock_rx_delayed_work(struct work_struct *w)
690{
691 do_psock_rx_work(container_of(w, struct kcm_psock,
692 rx_delayed_work.work));
693}
694
695static void psock_tcp_state_change(struct sock *sk)
696{
697 /* TCP only does a POLLIN for a half close. Do a POLLHUP here
698 * since application will normally not poll with POLLIN
699 * on the TCP sockets.
700 */
701
702 report_csk_error(sk, EPIPE);
703}
704
705static void psock_tcp_write_space(struct sock *sk)
706{
707 struct kcm_psock *psock;
708 struct kcm_mux *mux;
709 struct kcm_sock *kcm;
710
711 read_lock_bh(&sk->sk_callback_lock);
712
713 psock = (struct kcm_psock *)sk->sk_user_data;
714 if (unlikely(!psock))
715 goto out;
716
717 mux = psock->mux;
718
719 spin_lock_bh(&mux->lock);
720
721 /* Check if the socket is reserved so someone is waiting for sending. */
722 kcm = psock->tx_kcm;
723 if (kcm)
724 queue_work(kcm_wq, &kcm->tx_work);
725
726 spin_unlock_bh(&mux->lock);
727out:
728 read_unlock_bh(&sk->sk_callback_lock);
729}
730
731static void unreserve_psock(struct kcm_sock *kcm);
732
733/* kcm sock is locked. */
734static struct kcm_psock *reserve_psock(struct kcm_sock *kcm)
735{
736 struct kcm_mux *mux = kcm->mux;
737 struct kcm_psock *psock;
738
739 psock = kcm->tx_psock;
740
741 smp_rmb(); /* Must read tx_psock before tx_wait */
742
743 if (psock) {
744 WARN_ON(kcm->tx_wait);
745 if (unlikely(psock->tx_stopped))
746 unreserve_psock(kcm);
747 else
748 return kcm->tx_psock;
749 }
750
751 spin_lock_bh(&mux->lock);
752
753 /* Check again under lock to see if psock was reserved for this
754 * psock via psock_unreserve.
755 */
756 psock = kcm->tx_psock;
757 if (unlikely(psock)) {
758 WARN_ON(kcm->tx_wait);
759 spin_unlock_bh(&mux->lock);
760 return kcm->tx_psock;
761 }
762
763 if (!list_empty(&mux->psocks_avail)) {
764 psock = list_first_entry(&mux->psocks_avail,
765 struct kcm_psock,
766 psock_avail_list);
767 list_del(&psock->psock_avail_list);
768 if (kcm->tx_wait) {
769 list_del(&kcm->wait_psock_list);
770 kcm->tx_wait = false;
771 }
772 kcm->tx_psock = psock;
773 psock->tx_kcm = kcm;
774 KCM_STATS_INCR(psock->stats.reserved);
775 } else if (!kcm->tx_wait) {
776 list_add_tail(&kcm->wait_psock_list,
777 &mux->kcm_tx_waiters);
778 kcm->tx_wait = true;
779 }
780
781 spin_unlock_bh(&mux->lock);
782
783 return psock;
784}
785
786/* mux lock held */
787static void psock_now_avail(struct kcm_psock *psock)
788{
789 struct kcm_mux *mux = psock->mux;
790 struct kcm_sock *kcm;
791
792 if (list_empty(&mux->kcm_tx_waiters)) {
793 list_add_tail(&psock->psock_avail_list,
794 &mux->psocks_avail);
795 } else {
796 kcm = list_first_entry(&mux->kcm_tx_waiters,
797 struct kcm_sock,
798 wait_psock_list);
799 list_del(&kcm->wait_psock_list);
800 kcm->tx_wait = false;
801 psock->tx_kcm = kcm;
802
803 /* Commit before changing tx_psock since that is read in
804 * reserve_psock before queuing work.
805 */
806 smp_mb();
807
808 kcm->tx_psock = psock;
809 KCM_STATS_INCR(psock->stats.reserved);
810 queue_work(kcm_wq, &kcm->tx_work);
811 }
812}
813
814/* kcm sock is locked. */
815static void unreserve_psock(struct kcm_sock *kcm)
816{
817 struct kcm_psock *psock;
818 struct kcm_mux *mux = kcm->mux;
819
820 spin_lock_bh(&mux->lock);
821
822 psock = kcm->tx_psock;
823
824 if (WARN_ON(!psock)) {
825 spin_unlock_bh(&mux->lock);
826 return;
827 }
828
829 smp_rmb(); /* Read tx_psock before tx_wait */
830
831 kcm_update_tx_mux_stats(mux, psock);
832
833 WARN_ON(kcm->tx_wait);
834
835 kcm->tx_psock = NULL;
836 psock->tx_kcm = NULL;
837 KCM_STATS_INCR(psock->stats.unreserved);
838
839 if (unlikely(psock->tx_stopped)) {
840 if (psock->done) {
841 /* Deferred free */
842 list_del(&psock->psock_list);
843 mux->psocks_cnt--;
844 sock_put(psock->sk);
845 fput(psock->sk->sk_socket->file);
846 kmem_cache_free(kcm_psockp, psock);
847 }
848
849 /* Don't put back on available list */
850
851 spin_unlock_bh(&mux->lock);
852
853 return;
854 }
855
856 psock_now_avail(psock);
857
858 spin_unlock_bh(&mux->lock);
859}
860
861static void kcm_report_tx_retry(struct kcm_sock *kcm)
862{
863 struct kcm_mux *mux = kcm->mux;
864
865 spin_lock_bh(&mux->lock);
866 KCM_STATS_INCR(mux->stats.tx_retries);
867 spin_unlock_bh(&mux->lock);
868}
869
870/* Write any messages ready on the kcm socket. Called with kcm sock lock
871 * held. Return bytes actually sent or error.
872 */
873static int kcm_write_msgs(struct kcm_sock *kcm)
874{
875 struct sock *sk = &kcm->sk;
876 struct kcm_psock *psock;
877 struct sk_buff *skb, *head;
878 struct kcm_tx_msg *txm;
879 unsigned short fragidx, frag_offset;
880 unsigned int sent, total_sent = 0;
881 int ret = 0;
882
883 kcm->tx_wait_more = false;
884 psock = kcm->tx_psock;
885 if (unlikely(psock && psock->tx_stopped)) {
886 /* A reserved psock was aborted asynchronously. Unreserve
887 * it and we'll retry the message.
888 */
889 unreserve_psock(kcm);
890 kcm_report_tx_retry(kcm);
891 if (skb_queue_empty(&sk->sk_write_queue))
892 return 0;
893
894 kcm_tx_msg(skb_peek(&sk->sk_write_queue))->sent = 0;
895
896 } else if (skb_queue_empty(&sk->sk_write_queue)) {
897 return 0;
898 }
899
900 head = skb_peek(&sk->sk_write_queue);
901 txm = kcm_tx_msg(head);
902
903 if (txm->sent) {
904 /* Send of first skbuff in queue already in progress */
905 if (WARN_ON(!psock)) {
906 ret = -EINVAL;
907 goto out;
908 }
909 sent = txm->sent;
910 frag_offset = txm->frag_offset;
911 fragidx = txm->fragidx;
912 skb = txm->frag_skb;
913
914 goto do_frag;
915 }
916
917try_again:
918 psock = reserve_psock(kcm);
919 if (!psock)
920 goto out;
921
922 do {
923 skb = head;
924 txm = kcm_tx_msg(head);
925 sent = 0;
926
927do_frag_list:
928 if (WARN_ON(!skb_shinfo(skb)->nr_frags)) {
929 ret = -EINVAL;
930 goto out;
931 }
932
933 for (fragidx = 0; fragidx < skb_shinfo(skb)->nr_frags;
934 fragidx++) {
935 skb_frag_t *frag;
936
937 frag_offset = 0;
938do_frag:
939 frag = &skb_shinfo(skb)->frags[fragidx];
940 if (WARN_ON(!frag->size)) {
941 ret = -EINVAL;
942 goto out;
943 }
944
945 ret = kernel_sendpage(psock->sk->sk_socket,
946 frag->page.p,
947 frag->page_offset + frag_offset,
948 frag->size - frag_offset,
949 MSG_DONTWAIT);
950 if (ret <= 0) {
951 if (ret == -EAGAIN) {
952 /* Save state to try again when there's
953 * write space on the socket
954 */
955 txm->sent = sent;
956 txm->frag_offset = frag_offset;
957 txm->fragidx = fragidx;
958 txm->frag_skb = skb;
959
960 ret = 0;
961 goto out;
962 }
963
964 /* Hard failure in sending message, abort this
965 * psock since it has lost framing
966 * synchonization and retry sending the
967 * message from the beginning.
968 */
969 kcm_abort_tx_psock(psock, ret ? -ret : EPIPE,
970 true);
971 unreserve_psock(kcm);
972
973 txm->sent = 0;
974 kcm_report_tx_retry(kcm);
975 ret = 0;
976
977 goto try_again;
978 }
979
980 sent += ret;
981 frag_offset += ret;
982 KCM_STATS_ADD(psock->stats.tx_bytes, ret);
983 if (frag_offset < frag->size) {
984 /* Not finished with this frag */
985 goto do_frag;
986 }
987 }
988
989 if (skb == head) {
990 if (skb_has_frag_list(skb)) {
991 skb = skb_shinfo(skb)->frag_list;
992 goto do_frag_list;
993 }
994 } else if (skb->next) {
995 skb = skb->next;
996 goto do_frag_list;
997 }
998
999 /* Successfully sent the whole packet, account for it. */
1000 skb_dequeue(&sk->sk_write_queue);
1001 kfree_skb(head);
1002 sk->sk_wmem_queued -= sent;
1003 total_sent += sent;
1004 KCM_STATS_INCR(psock->stats.tx_msgs);
1005 } while ((head = skb_peek(&sk->sk_write_queue)));
1006out:
1007 if (!head) {
1008 /* Done with all queued messages. */
1009 WARN_ON(!skb_queue_empty(&sk->sk_write_queue));
1010 unreserve_psock(kcm);
1011 }
1012
1013 /* Check if write space is available */
1014 sk->sk_write_space(sk);
1015
1016 return total_sent ? : ret;
1017}
1018
1019static void kcm_tx_work(struct work_struct *w)
1020{
1021 struct kcm_sock *kcm = container_of(w, struct kcm_sock, tx_work);
1022 struct sock *sk = &kcm->sk;
1023 int err;
1024
1025 lock_sock(sk);
1026
1027 /* Primarily for SOCK_DGRAM sockets, also handle asynchronous tx
1028 * aborts
1029 */
1030 err = kcm_write_msgs(kcm);
1031 if (err < 0) {
1032 /* Hard failure in write, report error on KCM socket */
1033 pr_warn("KCM: Hard failure on kcm_write_msgs %d\n", err);
1034 report_csk_error(&kcm->sk, -err);
1035 goto out;
1036 }
1037
1038 /* Primarily for SOCK_SEQPACKET sockets */
1039 if (likely(sk->sk_socket) &&
1040 test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) {
1041 clear_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1042 sk->sk_write_space(sk);
1043 }
1044
1045out:
1046 release_sock(sk);
1047}
1048
1049static void kcm_push(struct kcm_sock *kcm)
1050{
1051 if (kcm->tx_wait_more)
1052 kcm_write_msgs(kcm);
1053}
1054
1055static ssize_t kcm_sendpage(struct socket *sock, struct page *page,
1056 int offset, size_t size, int flags)
1057
1058{
1059 struct sock *sk = sock->sk;
1060 struct kcm_sock *kcm = kcm_sk(sk);
1061 struct sk_buff *skb = NULL, *head = NULL;
1062 long timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
1063 bool eor;
1064 int err = 0;
1065 int i;
1066
1067 if (flags & MSG_SENDPAGE_NOTLAST)
1068 flags |= MSG_MORE;
1069
1070 /* No MSG_EOR from splice, only look at MSG_MORE */
1071 eor = !(flags & MSG_MORE);
1072
1073 lock_sock(sk);
1074
1075 sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
1076
1077 err = -EPIPE;
1078 if (sk->sk_err)
1079 goto out_error;
1080
1081 if (kcm->seq_skb) {
1082 /* Previously opened message */
1083 head = kcm->seq_skb;
1084 skb = kcm_tx_msg(head)->last_skb;
1085 i = skb_shinfo(skb)->nr_frags;
1086
1087 if (skb_can_coalesce(skb, i, page, offset)) {
1088 skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], size);
1089 skb_shinfo(skb)->tx_flags |= SKBTX_SHARED_FRAG;
1090 goto coalesced;
1091 }
1092
1093 if (i >= MAX_SKB_FRAGS) {
1094 struct sk_buff *tskb;
1095
1096 tskb = alloc_skb(0, sk->sk_allocation);
1097 while (!tskb) {
1098 kcm_push(kcm);
1099 err = sk_stream_wait_memory(sk, &timeo);
1100 if (err)
1101 goto out_error;
1102 }
1103
1104 if (head == skb)
1105 skb_shinfo(head)->frag_list = tskb;
1106 else
1107 skb->next = tskb;
1108
1109 skb = tskb;
1110 skb->ip_summed = CHECKSUM_UNNECESSARY;
1111 i = 0;
1112 }
1113 } else {
1114 /* Call the sk_stream functions to manage the sndbuf mem. */
1115 if (!sk_stream_memory_free(sk)) {
1116 kcm_push(kcm);
1117 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1118 err = sk_stream_wait_memory(sk, &timeo);
1119 if (err)
1120 goto out_error;
1121 }
1122
1123 head = alloc_skb(0, sk->sk_allocation);
1124 while (!head) {
1125 kcm_push(kcm);
1126 err = sk_stream_wait_memory(sk, &timeo);
1127 if (err)
1128 goto out_error;
1129 }
1130
1131 skb = head;
1132 i = 0;
1133 }
1134
1135 get_page(page);
1136 skb_fill_page_desc(skb, i, page, offset, size);
1137 skb_shinfo(skb)->tx_flags |= SKBTX_SHARED_FRAG;
1138
1139coalesced:
1140 skb->len += size;
1141 skb->data_len += size;
1142 skb->truesize += size;
1143 sk->sk_wmem_queued += size;
1144 sk_mem_charge(sk, size);
1145
1146 if (head != skb) {
1147 head->len += size;
1148 head->data_len += size;
1149 head->truesize += size;
1150 }
1151
1152 if (eor) {
1153 bool not_busy = skb_queue_empty(&sk->sk_write_queue);
1154
1155 /* Message complete, queue it on send buffer */
1156 __skb_queue_tail(&sk->sk_write_queue, head);
1157 kcm->seq_skb = NULL;
1158 KCM_STATS_INCR(kcm->stats.tx_msgs);
1159
1160 if (flags & MSG_BATCH) {
1161 kcm->tx_wait_more = true;
1162 } else if (kcm->tx_wait_more || not_busy) {
1163 err = kcm_write_msgs(kcm);
1164 if (err < 0) {
1165 /* We got a hard error in write_msgs but have
1166 * already queued this message. Report an error
1167 * in the socket, but don't affect return value
1168 * from sendmsg
1169 */
1170 pr_warn("KCM: Hard failure on kcm_write_msgs\n");
1171 report_csk_error(&kcm->sk, -err);
1172 }
1173 }
1174 } else {
1175 /* Message not complete, save state */
1176 kcm->seq_skb = head;
1177 kcm_tx_msg(head)->last_skb = skb;
1178 }
1179
1180 KCM_STATS_ADD(kcm->stats.tx_bytes, size);
1181
1182 release_sock(sk);
1183 return size;
1184
1185out_error:
1186 kcm_push(kcm);
1187
1188 err = sk_stream_error(sk, flags, err);
1189
1190 /* make sure we wake any epoll edge trigger waiter */
1191 if (unlikely(skb_queue_len(&sk->sk_write_queue) == 0 && err == -EAGAIN))
1192 sk->sk_write_space(sk);
1193
1194 release_sock(sk);
1195 return err;
1196}
1197
1198static int kcm_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
1199{
1200 struct sock *sk = sock->sk;
1201 struct kcm_sock *kcm = kcm_sk(sk);
1202 struct sk_buff *skb = NULL, *head = NULL;
1203 size_t copy, copied = 0;
1204 long timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1205 int eor = (sock->type == SOCK_DGRAM) ?
1206 !(msg->msg_flags & MSG_MORE) : !!(msg->msg_flags & MSG_EOR);
1207 int err = -EPIPE;
1208
1209 lock_sock(sk);
1210
1211 /* Per tcp_sendmsg this should be in poll */
1212 sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
1213
1214 if (sk->sk_err)
1215 goto out_error;
1216
1217 if (kcm->seq_skb) {
1218 /* Previously opened message */
1219 head = kcm->seq_skb;
1220 skb = kcm_tx_msg(head)->last_skb;
1221 goto start;
1222 }
1223
1224 /* Call the sk_stream functions to manage the sndbuf mem. */
1225 if (!sk_stream_memory_free(sk)) {
1226 kcm_push(kcm);
1227 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1228 err = sk_stream_wait_memory(sk, &timeo);
1229 if (err)
1230 goto out_error;
1231 }
1232
1233 /* New message, alloc head skb */
1234 head = alloc_skb(0, sk->sk_allocation);
1235 while (!head) {
1236 kcm_push(kcm);
1237 err = sk_stream_wait_memory(sk, &timeo);
1238 if (err)
1239 goto out_error;
1240
1241 head = alloc_skb(0, sk->sk_allocation);
1242 }
1243
1244 skb = head;
1245
1246 /* Set ip_summed to CHECKSUM_UNNECESSARY to avoid calling
1247 * csum_and_copy_from_iter from skb_do_copy_data_nocache.
1248 */
1249 skb->ip_summed = CHECKSUM_UNNECESSARY;
1250
1251start:
1252 while (msg_data_left(msg)) {
1253 bool merge = true;
1254 int i = skb_shinfo(skb)->nr_frags;
1255 struct page_frag *pfrag = sk_page_frag(sk);
1256
1257 if (!sk_page_frag_refill(sk, pfrag))
1258 goto wait_for_memory;
1259
1260 if (!skb_can_coalesce(skb, i, pfrag->page,
1261 pfrag->offset)) {
1262 if (i == MAX_SKB_FRAGS) {
1263 struct sk_buff *tskb;
1264
1265 tskb = alloc_skb(0, sk->sk_allocation);
1266 if (!tskb)
1267 goto wait_for_memory;
1268
1269 if (head == skb)
1270 skb_shinfo(head)->frag_list = tskb;
1271 else
1272 skb->next = tskb;
1273
1274 skb = tskb;
1275 skb->ip_summed = CHECKSUM_UNNECESSARY;
1276 continue;
1277 }
1278 merge = false;
1279 }
1280
1281 copy = min_t(int, msg_data_left(msg),
1282 pfrag->size - pfrag->offset);
1283
1284 if (!sk_wmem_schedule(sk, copy))
1285 goto wait_for_memory;
1286
1287 err = skb_copy_to_page_nocache(sk, &msg->msg_iter, skb,
1288 pfrag->page,
1289 pfrag->offset,
1290 copy);
1291 if (err)
1292 goto out_error;
1293
1294 /* Update the skb. */
1295 if (merge) {
1296 skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
1297 } else {
1298 skb_fill_page_desc(skb, i, pfrag->page,
1299 pfrag->offset, copy);
1300 get_page(pfrag->page);
1301 }
1302
1303 pfrag->offset += copy;
1304 copied += copy;
1305 if (head != skb) {
1306 head->len += copy;
1307 head->data_len += copy;
1308 }
1309
1310 continue;
1311
1312wait_for_memory:
1313 kcm_push(kcm);
1314 err = sk_stream_wait_memory(sk, &timeo);
1315 if (err)
1316 goto out_error;
1317 }
1318
1319 if (eor) {
1320 bool not_busy = skb_queue_empty(&sk->sk_write_queue);
1321
1322 /* Message complete, queue it on send buffer */
1323 __skb_queue_tail(&sk->sk_write_queue, head);
1324 kcm->seq_skb = NULL;
1325 KCM_STATS_INCR(kcm->stats.tx_msgs);
1326
1327 if (msg->msg_flags & MSG_BATCH) {
1328 kcm->tx_wait_more = true;
1329 } else if (kcm->tx_wait_more || not_busy) {
1330 err = kcm_write_msgs(kcm);
1331 if (err < 0) {
1332 /* We got a hard error in write_msgs but have
1333 * already queued this message. Report an error
1334 * in the socket, but don't affect return value
1335 * from sendmsg
1336 */
1337 pr_warn("KCM: Hard failure on kcm_write_msgs\n");
1338 report_csk_error(&kcm->sk, -err);
1339 }
1340 }
1341 } else {
1342 /* Message not complete, save state */
1343partial_message:
1344 kcm->seq_skb = head;
1345 kcm_tx_msg(head)->last_skb = skb;
1346 }
1347
1348 KCM_STATS_ADD(kcm->stats.tx_bytes, copied);
1349
1350 release_sock(sk);
1351 return copied;
1352
1353out_error:
1354 kcm_push(kcm);
1355
1356 if (copied && sock->type == SOCK_SEQPACKET) {
1357 /* Wrote some bytes before encountering an
1358 * error, return partial success.
1359 */
1360 goto partial_message;
1361 }
1362
1363 if (head != kcm->seq_skb)
1364 kfree_skb(head);
1365
1366 err = sk_stream_error(sk, msg->msg_flags, err);
1367
1368 /* make sure we wake any epoll edge trigger waiter */
1369 if (unlikely(skb_queue_len(&sk->sk_write_queue) == 0 && err == -EAGAIN))
1370 sk->sk_write_space(sk);
1371
1372 release_sock(sk);
1373 return err;
1374}
1375
1376static struct sk_buff *kcm_wait_data(struct sock *sk, int flags,
1377 long timeo, int *err)
1378{
1379 struct sk_buff *skb;
1380
1381 while (!(skb = skb_peek(&sk->sk_receive_queue))) {
1382 if (sk->sk_err) {
1383 *err = sock_error(sk);
1384 return NULL;
1385 }
1386
1387 if (sock_flag(sk, SOCK_DONE))
1388 return NULL;
1389
1390 if ((flags & MSG_DONTWAIT) || !timeo) {
1391 *err = -EAGAIN;
1392 return NULL;
1393 }
1394
1395 sk_wait_data(sk, &timeo, NULL);
1396
1397 /* Handle signals */
1398 if (signal_pending(current)) {
1399 *err = sock_intr_errno(timeo);
1400 return NULL;
1401 }
1402 }
1403
1404 return skb;
1405}
1406
1407static int kcm_recvmsg(struct socket *sock, struct msghdr *msg,
1408 size_t len, int flags)
1409{
1410 struct sock *sk = sock->sk;
1411 struct kcm_sock *kcm = kcm_sk(sk);
1412 int err = 0;
1413 long timeo;
1414 struct kcm_rx_msg *rxm;
1415 int copied = 0;
1416 struct sk_buff *skb;
1417
1418 timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
1419
1420 lock_sock(sk);
1421
1422 skb = kcm_wait_data(sk, flags, timeo, &err);
1423 if (!skb)
1424 goto out;
1425
1426 /* Okay, have a message on the receive queue */
1427
1428 rxm = kcm_rx_msg(skb);
1429
1430 if (len > rxm->full_len)
1431 len = rxm->full_len;
1432
1433 err = skb_copy_datagram_msg(skb, rxm->offset, msg, len);
1434 if (err < 0)
1435 goto out;
1436
1437 copied = len;
1438 if (likely(!(flags & MSG_PEEK))) {
1439 KCM_STATS_ADD(kcm->stats.rx_bytes, copied);
1440 if (copied < rxm->full_len) {
1441 if (sock->type == SOCK_DGRAM) {
1442 /* Truncated message */
1443 msg->msg_flags |= MSG_TRUNC;
1444 goto msg_finished;
1445 }
1446 rxm->offset += copied;
1447 rxm->full_len -= copied;
1448 } else {
1449msg_finished:
1450 /* Finished with message */
1451 msg->msg_flags |= MSG_EOR;
1452 KCM_STATS_INCR(kcm->stats.rx_msgs);
1453 skb_unlink(skb, &sk->sk_receive_queue);
1454 kfree_skb(skb);
1455 }
1456 }
1457
1458out:
1459 release_sock(sk);
1460
1461 return copied ? : err;
1462}
1463
1464static ssize_t kcm_sock_splice(struct sock *sk,
1465 struct pipe_inode_info *pipe,
1466 struct splice_pipe_desc *spd)
1467{
1468 int ret;
1469
1470 release_sock(sk);
1471 ret = splice_to_pipe(pipe, spd);
1472 lock_sock(sk);
1473
1474 return ret;
1475}
1476
1477static ssize_t kcm_splice_read(struct socket *sock, loff_t *ppos,
1478 struct pipe_inode_info *pipe, size_t len,
1479 unsigned int flags)
1480{
1481 struct sock *sk = sock->sk;
1482 struct kcm_sock *kcm = kcm_sk(sk);
1483 long timeo;
1484 struct kcm_rx_msg *rxm;
1485 int err = 0;
1486 size_t copied;
1487 struct sk_buff *skb;
1488
1489 /* Only support splice for SOCKSEQPACKET */
1490
1491 timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
1492
1493 lock_sock(sk);
1494
1495 skb = kcm_wait_data(sk, flags, timeo, &err);
1496 if (!skb)
1497 goto err_out;
1498
1499 /* Okay, have a message on the receive queue */
1500
1501 rxm = kcm_rx_msg(skb);
1502
1503 if (len > rxm->full_len)
1504 len = rxm->full_len;
1505
1506 copied = skb_splice_bits(skb, sk, rxm->offset, pipe, len, flags,
1507 kcm_sock_splice);
1508 if (copied < 0) {
1509 err = copied;
1510 goto err_out;
1511 }
1512
1513 KCM_STATS_ADD(kcm->stats.rx_bytes, copied);
1514
1515 rxm->offset += copied;
1516 rxm->full_len -= copied;
1517
1518 /* We have no way to return MSG_EOR. If all the bytes have been
1519 * read we still leave the message in the receive socket buffer.
1520 * A subsequent recvmsg needs to be done to return MSG_EOR and
1521 * finish reading the message.
1522 */
1523
1524 release_sock(sk);
1525
1526 return copied;
1527
1528err_out:
1529 release_sock(sk);
1530
1531 return err;
1532}
1533
1534/* kcm sock lock held */
1535static void kcm_recv_disable(struct kcm_sock *kcm)
1536{
1537 struct kcm_mux *mux = kcm->mux;
1538
1539 if (kcm->rx_disabled)
1540 return;
1541
1542 spin_lock_bh(&mux->rx_lock);
1543
1544 kcm->rx_disabled = 1;
1545
1546 /* If a psock is reserved we'll do cleanup in unreserve */
1547 if (!kcm->rx_psock) {
1548 if (kcm->rx_wait) {
1549 list_del(&kcm->wait_rx_list);
1550 kcm->rx_wait = false;
1551 }
1552
1553 requeue_rx_msgs(mux, &kcm->sk.sk_receive_queue);
1554 }
1555
1556 spin_unlock_bh(&mux->rx_lock);
1557}
1558
1559/* kcm sock lock held */
1560static void kcm_recv_enable(struct kcm_sock *kcm)
1561{
1562 struct kcm_mux *mux = kcm->mux;
1563
1564 if (!kcm->rx_disabled)
1565 return;
1566
1567 spin_lock_bh(&mux->rx_lock);
1568
1569 kcm->rx_disabled = 0;
1570 kcm_rcv_ready(kcm);
1571
1572 spin_unlock_bh(&mux->rx_lock);
1573}
1574
1575static int kcm_setsockopt(struct socket *sock, int level, int optname,
1576 char __user *optval, unsigned int optlen)
1577{
1578 struct kcm_sock *kcm = kcm_sk(sock->sk);
1579 int val, valbool;
1580 int err = 0;
1581
1582 if (level != SOL_KCM)
1583 return -ENOPROTOOPT;
1584
1585 if (optlen < sizeof(int))
1586 return -EINVAL;
1587
1588 if (get_user(val, (int __user *)optval))
1589 return -EINVAL;
1590
1591 valbool = val ? 1 : 0;
1592
1593 switch (optname) {
1594 case KCM_RECV_DISABLE:
1595 lock_sock(&kcm->sk);
1596 if (valbool)
1597 kcm_recv_disable(kcm);
1598 else
1599 kcm_recv_enable(kcm);
1600 release_sock(&kcm->sk);
1601 break;
1602 default:
1603 err = -ENOPROTOOPT;
1604 }
1605
1606 return err;
1607}
1608
1609static int kcm_getsockopt(struct socket *sock, int level, int optname,
1610 char __user *optval, int __user *optlen)
1611{
1612 struct kcm_sock *kcm = kcm_sk(sock->sk);
1613 int val, len;
1614
1615 if (level != SOL_KCM)
1616 return -ENOPROTOOPT;
1617
1618 if (get_user(len, optlen))
1619 return -EFAULT;
1620
1621 len = min_t(unsigned int, len, sizeof(int));
1622 if (len < 0)
1623 return -EINVAL;
1624
1625 switch (optname) {
1626 case KCM_RECV_DISABLE:
1627 val = kcm->rx_disabled;
1628 break;
1629 default:
1630 return -ENOPROTOOPT;
1631 }
1632
1633 if (put_user(len, optlen))
1634 return -EFAULT;
1635 if (copy_to_user(optval, &val, len))
1636 return -EFAULT;
1637 return 0;
1638}
1639
1640static void init_kcm_sock(struct kcm_sock *kcm, struct kcm_mux *mux)
1641{
1642 struct kcm_sock *tkcm;
1643 struct list_head *head;
1644 int index = 0;
1645
1646 /* For SOCK_SEQPACKET sock type, datagram_poll checks the sk_state, so
1647 * we set sk_state, otherwise epoll_wait always returns right away with
1648 * POLLHUP
1649 */
1650 kcm->sk.sk_state = TCP_ESTABLISHED;
1651
1652 /* Add to mux's kcm sockets list */
1653 kcm->mux = mux;
1654 spin_lock_bh(&mux->lock);
1655
1656 head = &mux->kcm_socks;
1657 list_for_each_entry(tkcm, &mux->kcm_socks, kcm_sock_list) {
1658 if (tkcm->index != index)
1659 break;
1660 head = &tkcm->kcm_sock_list;
1661 index++;
1662 }
1663
1664 list_add(&kcm->kcm_sock_list, head);
1665 kcm->index = index;
1666
1667 mux->kcm_socks_cnt++;
1668 spin_unlock_bh(&mux->lock);
1669
1670 INIT_WORK(&kcm->tx_work, kcm_tx_work);
1671
1672 spin_lock_bh(&mux->rx_lock);
1673 kcm_rcv_ready(kcm);
1674 spin_unlock_bh(&mux->rx_lock);
1675}
1676
1677static void kcm_rx_msg_timeout(unsigned long arg)
1678{
1679 struct kcm_psock *psock = (struct kcm_psock *)arg;
1680
1681 /* Message assembly timed out */
1682 KCM_STATS_INCR(psock->stats.rx_msg_timeouts);
1683 kcm_abort_rx_psock(psock, ETIMEDOUT, NULL);
1684}
1685
1686static int kcm_attach(struct socket *sock, struct socket *csock,
1687 struct bpf_prog *prog)
1688{
1689 struct kcm_sock *kcm = kcm_sk(sock->sk);
1690 struct kcm_mux *mux = kcm->mux;
1691 struct sock *csk;
1692 struct kcm_psock *psock = NULL, *tpsock;
1693 struct list_head *head;
1694 int index = 0;
1695
1696 if (csock->ops->family != PF_INET &&
1697 csock->ops->family != PF_INET6)
1698 return -EINVAL;
1699
1700 csk = csock->sk;
1701 if (!csk)
1702 return -EINVAL;
1703
1704 /* Only support TCP for now */
1705 if (csk->sk_protocol != IPPROTO_TCP)
1706 return -EINVAL;
1707
1708 psock = kmem_cache_zalloc(kcm_psockp, GFP_KERNEL);
1709 if (!psock)
1710 return -ENOMEM;
1711
1712 psock->mux = mux;
1713 psock->sk = csk;
1714 psock->bpf_prog = prog;
1715
1716 setup_timer(&psock->rx_msg_timer, kcm_rx_msg_timeout,
1717 (unsigned long)psock);
1718
1719 INIT_WORK(&psock->rx_work, psock_rx_work);
1720 INIT_DELAYED_WORK(&psock->rx_delayed_work, psock_rx_delayed_work);
1721
1722 sock_hold(csk);
1723
1724 write_lock_bh(&csk->sk_callback_lock);
1725 psock->save_data_ready = csk->sk_data_ready;
1726 psock->save_write_space = csk->sk_write_space;
1727 psock->save_state_change = csk->sk_state_change;
1728 csk->sk_user_data = psock;
1729 csk->sk_data_ready = psock_tcp_data_ready;
1730 csk->sk_write_space = psock_tcp_write_space;
1731 csk->sk_state_change = psock_tcp_state_change;
1732 write_unlock_bh(&csk->sk_callback_lock);
1733
1734 /* Finished initialization, now add the psock to the MUX. */
1735 spin_lock_bh(&mux->lock);
1736 head = &mux->psocks;
1737 list_for_each_entry(tpsock, &mux->psocks, psock_list) {
1738 if (tpsock->index != index)
1739 break;
1740 head = &tpsock->psock_list;
1741 index++;
1742 }
1743
1744 list_add(&psock->psock_list, head);
1745 psock->index = index;
1746
1747 KCM_STATS_INCR(mux->stats.psock_attach);
1748 mux->psocks_cnt++;
1749 psock_now_avail(psock);
1750 spin_unlock_bh(&mux->lock);
1751
1752 /* Schedule RX work in case there are already bytes queued */
1753 queue_work(kcm_wq, &psock->rx_work);
1754
1755 return 0;
1756}
1757
1758static int kcm_attach_ioctl(struct socket *sock, struct kcm_attach *info)
1759{
1760 struct socket *csock;
1761 struct bpf_prog *prog;
1762 int err;
1763
1764 csock = sockfd_lookup(info->fd, &err);
1765 if (!csock)
1766 return -ENOENT;
1767
1768 prog = bpf_prog_get(info->bpf_fd);
1769 if (IS_ERR(prog)) {
1770 err = PTR_ERR(prog);
1771 goto out;
1772 }
1773
1774 if (prog->type != BPF_PROG_TYPE_SOCKET_FILTER) {
1775 bpf_prog_put(prog);
1776 err = -EINVAL;
1777 goto out;
1778 }
1779
1780 err = kcm_attach(sock, csock, prog);
1781 if (err) {
1782 bpf_prog_put(prog);
1783 goto out;
1784 }
1785
1786 /* Keep reference on file also */
1787
1788 return 0;
1789out:
1790 fput(csock->file);
1791 return err;
1792}
1793
1794static void kcm_unattach(struct kcm_psock *psock)
1795{
1796 struct sock *csk = psock->sk;
1797 struct kcm_mux *mux = psock->mux;
1798
1799 /* Stop getting callbacks from TCP socket. After this there should
1800 * be no way to reserve a kcm for this psock.
1801 */
1802 write_lock_bh(&csk->sk_callback_lock);
1803 csk->sk_user_data = NULL;
1804 csk->sk_data_ready = psock->save_data_ready;
1805 csk->sk_write_space = psock->save_write_space;
1806 csk->sk_state_change = psock->save_state_change;
1807 psock->rx_stopped = 1;
1808
1809 if (WARN_ON(psock->rx_kcm)) {
1810 write_unlock_bh(&csk->sk_callback_lock);
1811 return;
1812 }
1813
1814 spin_lock_bh(&mux->rx_lock);
1815
1816 /* Stop receiver activities. After this point psock should not be
1817 * able to get onto ready list either through callbacks or work.
1818 */
1819 if (psock->ready_rx_msg) {
1820 list_del(&psock->psock_ready_list);
1821 kfree_skb(psock->ready_rx_msg);
1822 psock->ready_rx_msg = NULL;
1823 KCM_STATS_INCR(mux->stats.rx_ready_drops);
1824 }
1825
1826 spin_unlock_bh(&mux->rx_lock);
1827
1828 write_unlock_bh(&csk->sk_callback_lock);
1829
1830 del_timer_sync(&psock->rx_msg_timer);
1831 cancel_work_sync(&psock->rx_work);
1832 cancel_delayed_work_sync(&psock->rx_delayed_work);
1833
1834 bpf_prog_put(psock->bpf_prog);
1835
1836 kfree_skb(psock->rx_skb_head);
1837 psock->rx_skb_head = NULL;
1838
1839 spin_lock_bh(&mux->lock);
1840
1841 aggregate_psock_stats(&psock->stats, &mux->aggregate_psock_stats);
1842
1843 KCM_STATS_INCR(mux->stats.psock_unattach);
1844
1845 if (psock->tx_kcm) {
1846 /* psock was reserved. Just mark it finished and we will clean
1847 * up in the kcm paths, we need kcm lock which can not be
1848 * acquired here.
1849 */
1850 KCM_STATS_INCR(mux->stats.psock_unattach_rsvd);
1851 spin_unlock_bh(&mux->lock);
1852
1853 /* We are unattaching a socket that is reserved. Abort the
1854 * socket since we may be out of sync in sending on it. We need
1855 * to do this without the mux lock.
1856 */
1857 kcm_abort_tx_psock(psock, EPIPE, false);
1858
1859 spin_lock_bh(&mux->lock);
1860 if (!psock->tx_kcm) {
1861 /* psock now unreserved in window mux was unlocked */
1862 goto no_reserved;
1863 }
1864 psock->done = 1;
1865
1866 /* Commit done before queuing work to process it */
1867 smp_mb();
1868
1869 /* Queue tx work to make sure psock->done is handled */
1870 queue_work(kcm_wq, &psock->tx_kcm->tx_work);
1871 spin_unlock_bh(&mux->lock);
1872 } else {
1873no_reserved:
1874 if (!psock->tx_stopped)
1875 list_del(&psock->psock_avail_list);
1876 list_del(&psock->psock_list);
1877 mux->psocks_cnt--;
1878 spin_unlock_bh(&mux->lock);
1879
1880 sock_put(csk);
1881 fput(csk->sk_socket->file);
1882 kmem_cache_free(kcm_psockp, psock);
1883 }
1884}
1885
1886static int kcm_unattach_ioctl(struct socket *sock, struct kcm_unattach *info)
1887{
1888 struct kcm_sock *kcm = kcm_sk(sock->sk);
1889 struct kcm_mux *mux = kcm->mux;
1890 struct kcm_psock *psock;
1891 struct socket *csock;
1892 struct sock *csk;
1893 int err;
1894
1895 csock = sockfd_lookup(info->fd, &err);
1896 if (!csock)
1897 return -ENOENT;
1898
1899 csk = csock->sk;
1900 if (!csk) {
1901 err = -EINVAL;
1902 goto out;
1903 }
1904
1905 err = -ENOENT;
1906
1907 spin_lock_bh(&mux->lock);
1908
1909 list_for_each_entry(psock, &mux->psocks, psock_list) {
1910 if (psock->sk != csk)
1911 continue;
1912
1913 /* Found the matching psock */
1914
1915 if (psock->unattaching || WARN_ON(psock->done)) {
1916 err = -EALREADY;
1917 break;
1918 }
1919
1920 psock->unattaching = 1;
1921
1922 spin_unlock_bh(&mux->lock);
1923
1924 kcm_unattach(psock);
1925
1926 err = 0;
1927 goto out;
1928 }
1929
1930 spin_unlock_bh(&mux->lock);
1931
1932out:
1933 fput(csock->file);
1934 return err;
1935}
1936
1937static struct proto kcm_proto = {
1938 .name = "KCM",
1939 .owner = THIS_MODULE,
1940 .obj_size = sizeof(struct kcm_sock),
1941};
1942
1943/* Clone a kcm socket. */
1944static int kcm_clone(struct socket *osock, struct kcm_clone *info,
1945 struct socket **newsockp)
1946{
1947 struct socket *newsock;
1948 struct sock *newsk;
1949 struct file *newfile;
1950 int err, newfd;
1951
1952 err = -ENFILE;
1953 newsock = sock_alloc();
1954 if (!newsock)
1955 goto out;
1956
1957 newsock->type = osock->type;
1958 newsock->ops = osock->ops;
1959
1960 __module_get(newsock->ops->owner);
1961
1962 newfd = get_unused_fd_flags(0);
1963 if (unlikely(newfd < 0)) {
1964 err = newfd;
1965 goto out_fd_fail;
1966 }
1967
1968 newfile = sock_alloc_file(newsock, 0, osock->sk->sk_prot_creator->name);
1969 if (unlikely(IS_ERR(newfile))) {
1970 err = PTR_ERR(newfile);
1971 goto out_sock_alloc_fail;
1972 }
1973
1974 newsk = sk_alloc(sock_net(osock->sk), PF_KCM, GFP_KERNEL,
1975 &kcm_proto, true);
1976 if (!newsk) {
1977 err = -ENOMEM;
1978 goto out_sk_alloc_fail;
1979 }
1980
1981 sock_init_data(newsock, newsk);
1982 init_kcm_sock(kcm_sk(newsk), kcm_sk(osock->sk)->mux);
1983
1984 fd_install(newfd, newfile);
1985 *newsockp = newsock;
1986 info->fd = newfd;
1987
1988 return 0;
1989
1990out_sk_alloc_fail:
1991 fput(newfile);
1992out_sock_alloc_fail:
1993 put_unused_fd(newfd);
1994out_fd_fail:
1995 sock_release(newsock);
1996out:
1997 return err;
1998}
1999
2000static int kcm_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2001{
2002 int err;
2003
2004 switch (cmd) {
2005 case SIOCKCMATTACH: {
2006 struct kcm_attach info;
2007
2008 if (copy_from_user(&info, (void __user *)arg, sizeof(info)))
2009 err = -EFAULT;
2010
2011 err = kcm_attach_ioctl(sock, &info);
2012
2013 break;
2014 }
2015 case SIOCKCMUNATTACH: {
2016 struct kcm_unattach info;
2017
2018 if (copy_from_user(&info, (void __user *)arg, sizeof(info)))
2019 err = -EFAULT;
2020
2021 err = kcm_unattach_ioctl(sock, &info);
2022
2023 break;
2024 }
2025 case SIOCKCMCLONE: {
2026 struct kcm_clone info;
2027 struct socket *newsock = NULL;
2028
2029 if (copy_from_user(&info, (void __user *)arg, sizeof(info)))
2030 err = -EFAULT;
2031
2032 err = kcm_clone(sock, &info, &newsock);
2033
2034 if (!err) {
2035 if (copy_to_user((void __user *)arg, &info,
2036 sizeof(info))) {
2037 err = -EFAULT;
2038 sock_release(newsock);
2039 }
2040 }
2041
2042 break;
2043 }
2044 default:
2045 err = -ENOIOCTLCMD;
2046 break;
2047 }
2048
2049 return err;
2050}
2051
2052static void free_mux(struct rcu_head *rcu)
2053{
2054 struct kcm_mux *mux = container_of(rcu,
2055 struct kcm_mux, rcu);
2056
2057 kmem_cache_free(kcm_muxp, mux);
2058}
2059
2060static void release_mux(struct kcm_mux *mux)
2061{
2062 struct kcm_net *knet = mux->knet;
2063 struct kcm_psock *psock, *tmp_psock;
2064
2065 /* Release psocks */
2066 list_for_each_entry_safe(psock, tmp_psock,
2067 &mux->psocks, psock_list) {
2068 if (!WARN_ON(psock->unattaching))
2069 kcm_unattach(psock);
2070 }
2071
2072 if (WARN_ON(mux->psocks_cnt))
2073 return;
2074
2075 __skb_queue_purge(&mux->rx_hold_queue);
2076
2077 mutex_lock(&knet->mutex);
2078 aggregate_mux_stats(&mux->stats, &knet->aggregate_mux_stats);
2079 aggregate_psock_stats(&mux->aggregate_psock_stats,
2080 &knet->aggregate_psock_stats);
2081 list_del_rcu(&mux->kcm_mux_list);
2082 knet->count--;
2083 mutex_unlock(&knet->mutex);
2084
2085 call_rcu(&mux->rcu, free_mux);
2086}
2087
2088static void kcm_done(struct kcm_sock *kcm)
2089{
2090 struct kcm_mux *mux = kcm->mux;
2091 struct sock *sk = &kcm->sk;
2092 int socks_cnt;
2093
2094 spin_lock_bh(&mux->rx_lock);
2095 if (kcm->rx_psock) {
2096 /* Cleanup in unreserve_rx_kcm */
2097 WARN_ON(kcm->done);
2098 kcm->rx_disabled = 1;
2099 kcm->done = 1;
2100 spin_unlock_bh(&mux->rx_lock);
2101 return;
2102 }
2103
2104 if (kcm->rx_wait) {
2105 list_del(&kcm->wait_rx_list);
2106 kcm->rx_wait = false;
2107 }
2108 /* Move any pending receive messages to other kcm sockets */
2109 requeue_rx_msgs(mux, &sk->sk_receive_queue);
2110
2111 spin_unlock_bh(&mux->rx_lock);
2112
2113 if (WARN_ON(sk_rmem_alloc_get(sk)))
2114 return;
2115
2116 /* Detach from MUX */
2117 spin_lock_bh(&mux->lock);
2118
2119 list_del(&kcm->kcm_sock_list);
2120 mux->kcm_socks_cnt--;
2121 socks_cnt = mux->kcm_socks_cnt;
2122
2123 spin_unlock_bh(&mux->lock);
2124
2125 if (!socks_cnt) {
2126 /* We are done with the mux now. */
2127 release_mux(mux);
2128 }
2129
2130 WARN_ON(kcm->rx_wait);
2131
2132 sock_put(&kcm->sk);
2133}
2134
2135/* Called by kcm_release to close a KCM socket.
2136 * If this is the last KCM socket on the MUX, destroy the MUX.
2137 */
2138static int kcm_release(struct socket *sock)
2139{
2140 struct sock *sk = sock->sk;
2141 struct kcm_sock *kcm;
2142 struct kcm_mux *mux;
2143 struct kcm_psock *psock;
2144
2145 if (!sk)
2146 return 0;
2147
2148 kcm = kcm_sk(sk);
2149 mux = kcm->mux;
2150
2151 sock_orphan(sk);
2152 kfree_skb(kcm->seq_skb);
2153
2154 lock_sock(sk);
2155 /* Purge queue under lock to avoid race condition with tx_work trying
2156 * to act when queue is nonempty. If tx_work runs after this point
2157 * it will just return.
2158 */
2159 __skb_queue_purge(&sk->sk_write_queue);
2160 release_sock(sk);
2161
2162 spin_lock_bh(&mux->lock);
2163 if (kcm->tx_wait) {
2164 /* Take of tx_wait list, after this point there should be no way
2165 * that a psock will be assigned to this kcm.
2166 */
2167 list_del(&kcm->wait_psock_list);
2168 kcm->tx_wait = false;
2169 }
2170 spin_unlock_bh(&mux->lock);
2171
2172 /* Cancel work. After this point there should be no outside references
2173 * to the kcm socket.
2174 */
2175 cancel_work_sync(&kcm->tx_work);
2176
2177 lock_sock(sk);
2178 psock = kcm->tx_psock;
2179 if (psock) {
2180 /* A psock was reserved, so we need to kill it since it
2181 * may already have some bytes queued from a message. We
2182 * need to do this after removing kcm from tx_wait list.
2183 */
2184 kcm_abort_tx_psock(psock, EPIPE, false);
2185 unreserve_psock(kcm);
2186 }
2187 release_sock(sk);
2188
2189 WARN_ON(kcm->tx_wait);
2190 WARN_ON(kcm->tx_psock);
2191
2192 sock->sk = NULL;
2193
2194 kcm_done(kcm);
2195
2196 return 0;
2197}
2198
2199static const struct proto_ops kcm_dgram_ops = {
2200 .family = PF_KCM,
2201 .owner = THIS_MODULE,
2202 .release = kcm_release,
2203 .bind = sock_no_bind,
2204 .connect = sock_no_connect,
2205 .socketpair = sock_no_socketpair,
2206 .accept = sock_no_accept,
2207 .getname = sock_no_getname,
2208 .poll = datagram_poll,
2209 .ioctl = kcm_ioctl,
2210 .listen = sock_no_listen,
2211 .shutdown = sock_no_shutdown,
2212 .setsockopt = kcm_setsockopt,
2213 .getsockopt = kcm_getsockopt,
2214 .sendmsg = kcm_sendmsg,
2215 .recvmsg = kcm_recvmsg,
2216 .mmap = sock_no_mmap,
2217 .sendpage = kcm_sendpage,
2218};
2219
2220static const struct proto_ops kcm_seqpacket_ops = {
2221 .family = PF_KCM,
2222 .owner = THIS_MODULE,
2223 .release = kcm_release,
2224 .bind = sock_no_bind,
2225 .connect = sock_no_connect,
2226 .socketpair = sock_no_socketpair,
2227 .accept = sock_no_accept,
2228 .getname = sock_no_getname,
2229 .poll = datagram_poll,
2230 .ioctl = kcm_ioctl,
2231 .listen = sock_no_listen,
2232 .shutdown = sock_no_shutdown,
2233 .setsockopt = kcm_setsockopt,
2234 .getsockopt = kcm_getsockopt,
2235 .sendmsg = kcm_sendmsg,
2236 .recvmsg = kcm_recvmsg,
2237 .mmap = sock_no_mmap,
2238 .sendpage = kcm_sendpage,
2239 .splice_read = kcm_splice_read,
2240};
2241
2242/* Create proto operation for kcm sockets */
2243static int kcm_create(struct net *net, struct socket *sock,
2244 int protocol, int kern)
2245{
2246 struct kcm_net *knet = net_generic(net, kcm_net_id);
2247 struct sock *sk;
2248 struct kcm_mux *mux;
2249
2250 switch (sock->type) {
2251 case SOCK_DGRAM:
2252 sock->ops = &kcm_dgram_ops;
2253 break;
2254 case SOCK_SEQPACKET:
2255 sock->ops = &kcm_seqpacket_ops;
2256 break;
2257 default:
2258 return -ESOCKTNOSUPPORT;
2259 }
2260
2261 if (protocol != KCMPROTO_CONNECTED)
2262 return -EPROTONOSUPPORT;
2263
2264 sk = sk_alloc(net, PF_KCM, GFP_KERNEL, &kcm_proto, kern);
2265 if (!sk)
2266 return -ENOMEM;
2267
2268 /* Allocate a kcm mux, shared between KCM sockets */
2269 mux = kmem_cache_zalloc(kcm_muxp, GFP_KERNEL);
2270 if (!mux) {
2271 sk_free(sk);
2272 return -ENOMEM;
2273 }
2274
2275 spin_lock_init(&mux->lock);
2276 spin_lock_init(&mux->rx_lock);
2277 INIT_LIST_HEAD(&mux->kcm_socks);
2278 INIT_LIST_HEAD(&mux->kcm_rx_waiters);
2279 INIT_LIST_HEAD(&mux->kcm_tx_waiters);
2280
2281 INIT_LIST_HEAD(&mux->psocks);
2282 INIT_LIST_HEAD(&mux->psocks_ready);
2283 INIT_LIST_HEAD(&mux->psocks_avail);
2284
2285 mux->knet = knet;
2286
2287 /* Add new MUX to list */
2288 mutex_lock(&knet->mutex);
2289 list_add_rcu(&mux->kcm_mux_list, &knet->mux_list);
2290 knet->count++;
2291 mutex_unlock(&knet->mutex);
2292
2293 skb_queue_head_init(&mux->rx_hold_queue);
2294
2295 /* Init KCM socket */
2296 sock_init_data(sock, sk);
2297 init_kcm_sock(kcm_sk(sk), mux);
2298
2299 return 0;
2300}
2301
2302static struct net_proto_family kcm_family_ops = {
2303 .family = PF_KCM,
2304 .create = kcm_create,
2305 .owner = THIS_MODULE,
2306};
2307
2308static __net_init int kcm_init_net(struct net *net)
2309{
2310 struct kcm_net *knet = net_generic(net, kcm_net_id);
2311
2312 INIT_LIST_HEAD_RCU(&knet->mux_list);
2313 mutex_init(&knet->mutex);
2314
2315 return 0;
2316}
2317
2318static __net_exit void kcm_exit_net(struct net *net)
2319{
2320 struct kcm_net *knet = net_generic(net, kcm_net_id);
2321
2322 /* All KCM sockets should be closed at this point, which should mean
2323 * that all multiplexors and psocks have been destroyed.
2324 */
2325 WARN_ON(!list_empty(&knet->mux_list));
2326}
2327
2328static struct pernet_operations kcm_net_ops = {
2329 .init = kcm_init_net,
2330 .exit = kcm_exit_net,
2331 .id = &kcm_net_id,
2332 .size = sizeof(struct kcm_net),
2333};
2334
2335static int __init kcm_init(void)
2336{
2337 int err = -ENOMEM;
2338
2339 kcm_muxp = kmem_cache_create("kcm_mux_cache",
2340 sizeof(struct kcm_mux), 0,
2341 SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);
2342 if (!kcm_muxp)
2343 goto fail;
2344
2345 kcm_psockp = kmem_cache_create("kcm_psock_cache",
2346 sizeof(struct kcm_psock), 0,
2347 SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);
2348 if (!kcm_psockp)
2349 goto fail;
2350
2351 kcm_wq = create_singlethread_workqueue("kkcmd");
2352 if (!kcm_wq)
2353 goto fail;
2354
2355 err = proto_register(&kcm_proto, 1);
2356 if (err)
2357 goto fail;
2358
2359 err = sock_register(&kcm_family_ops);
2360 if (err)
2361 goto sock_register_fail;
2362
2363 err = register_pernet_device(&kcm_net_ops);
2364 if (err)
2365 goto net_ops_fail;
2366
2367 err = kcm_proc_init();
2368 if (err)
2369 goto proc_init_fail;
2370
2371 return 0;
2372
2373proc_init_fail:
2374 unregister_pernet_device(&kcm_net_ops);
2375
2376net_ops_fail:
2377 sock_unregister(PF_KCM);
2378
2379sock_register_fail:
2380 proto_unregister(&kcm_proto);
2381
2382fail:
2383 kmem_cache_destroy(kcm_muxp);
2384 kmem_cache_destroy(kcm_psockp);
2385
2386 if (kcm_wq)
2387 destroy_workqueue(kcm_wq);
2388
2389 return err;
2390}
2391
2392static void __exit kcm_exit(void)
2393{
2394 kcm_proc_exit();
2395 unregister_pernet_device(&kcm_net_ops);
2396 sock_unregister(PF_KCM);
2397 proto_unregister(&kcm_proto);
2398 destroy_workqueue(kcm_wq);
2399
2400 kmem_cache_destroy(kcm_muxp);
2401 kmem_cache_destroy(kcm_psockp);
2402}
2403
2404module_init(kcm_init);
2405module_exit(kcm_exit);
2406
2407MODULE_LICENSE("GPL");
2408MODULE_ALIAS_NETPROTO(PF_KCM);
2409
diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index a2c8747d2936..6b54ff3ff4cb 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -25,6 +25,7 @@
25#include <net/udp.h> 25#include <net/udp.h>
26#include <net/inet_common.h> 26#include <net/inet_common.h>
27#include <net/inet_hashtables.h> 27#include <net/inet_hashtables.h>
28#include <net/inet6_hashtables.h>
28#include <net/tcp_states.h> 29#include <net/tcp_states.h>
29#include <net/protocol.h> 30#include <net/protocol.h>
30#include <net/xfrm.h> 31#include <net/xfrm.h>
@@ -718,7 +719,7 @@ static struct proto l2tp_ip6_prot = {
718 .sendmsg = l2tp_ip6_sendmsg, 719 .sendmsg = l2tp_ip6_sendmsg,
719 .recvmsg = l2tp_ip6_recvmsg, 720 .recvmsg = l2tp_ip6_recvmsg,
720 .backlog_rcv = l2tp_ip6_backlog_recv, 721 .backlog_rcv = l2tp_ip6_backlog_recv,
721 .hash = inet_hash, 722 .hash = inet6_hash,
722 .unhash = inet_unhash, 723 .unhash = inet_unhash,
723 .obj_size = sizeof(struct l2tp_ip6_sock), 724 .obj_size = sizeof(struct l2tp_ip6_sock),
724#ifdef CONFIG_COMPAT 725#ifdef CONFIG_COMPAT
diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c
index f93c5be612a7..2caaa84ce92d 100644
--- a/net/l2tp/l2tp_netlink.c
+++ b/net/l2tp/l2tp_netlink.c
@@ -124,8 +124,13 @@ static int l2tp_tunnel_notify(struct genl_family *family,
124 ret = l2tp_nl_tunnel_send(msg, info->snd_portid, info->snd_seq, 124 ret = l2tp_nl_tunnel_send(msg, info->snd_portid, info->snd_seq,
125 NLM_F_ACK, tunnel, cmd); 125 NLM_F_ACK, tunnel, cmd);
126 126
127 if (ret >= 0) 127 if (ret >= 0) {
128 return genlmsg_multicast_allns(family, msg, 0, 0, GFP_ATOMIC); 128 ret = genlmsg_multicast_allns(family, msg, 0, 0, GFP_ATOMIC);
129 /* We don't care if no one is listening */
130 if (ret == -ESRCH)
131 ret = 0;
132 return ret;
133 }
129 134
130 nlmsg_free(msg); 135 nlmsg_free(msg);
131 136
@@ -147,8 +152,13 @@ static int l2tp_session_notify(struct genl_family *family,
147 ret = l2tp_nl_session_send(msg, info->snd_portid, info->snd_seq, 152 ret = l2tp_nl_session_send(msg, info->snd_portid, info->snd_seq,
148 NLM_F_ACK, session, cmd); 153 NLM_F_ACK, session, cmd);
149 154
150 if (ret >= 0) 155 if (ret >= 0) {
151 return genlmsg_multicast_allns(family, msg, 0, 0, GFP_ATOMIC); 156 ret = genlmsg_multicast_allns(family, msg, 0, 0, GFP_ATOMIC);
157 /* We don't care if no one is listening */
158 if (ret == -ESRCH)
159 ret = 0;
160 return ret;
161 }
152 162
153 nlmsg_free(msg); 163 nlmsg_free(msg);
154 164
diff --git a/net/l3mdev/l3mdev.c b/net/l3mdev/l3mdev.c
index 8e5ead366e7f..e925037fa0df 100644
--- a/net/l3mdev/l3mdev.c
+++ b/net/l3mdev/l3mdev.c
@@ -17,7 +17,7 @@
17 * @dev: targeted interface 17 * @dev: targeted interface
18 */ 18 */
19 19
20int l3mdev_master_ifindex_rcu(struct net_device *dev) 20int l3mdev_master_ifindex_rcu(const struct net_device *dev)
21{ 21{
22 int ifindex = 0; 22 int ifindex = 0;
23 23
@@ -28,8 +28,15 @@ int l3mdev_master_ifindex_rcu(struct net_device *dev)
28 ifindex = dev->ifindex; 28 ifindex = dev->ifindex;
29 } else if (netif_is_l3_slave(dev)) { 29 } else if (netif_is_l3_slave(dev)) {
30 struct net_device *master; 30 struct net_device *master;
31 struct net_device *_dev = (struct net_device *)dev;
31 32
32 master = netdev_master_upper_dev_get_rcu(dev); 33 /* netdev_master_upper_dev_get_rcu calls
34 * list_first_or_null_rcu to walk the upper dev list.
35 * list_first_or_null_rcu does not handle a const arg. We aren't
36 * making changes, just want the master device from that list so
37 * typecast to remove the const
38 */
39 master = netdev_master_upper_dev_get_rcu(_dev);
33 if (master) 40 if (master)
34 ifindex = master->ifindex; 41 ifindex = master->ifindex;
35 } 42 }
diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index 8dab4e569571..b3c52e3f689a 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -38,7 +38,7 @@ static u16 llc_ui_sap_link_no_max[256];
38static struct sockaddr_llc llc_ui_addrnull; 38static struct sockaddr_llc llc_ui_addrnull;
39static const struct proto_ops llc_ui_ops; 39static const struct proto_ops llc_ui_ops;
40 40
41static int llc_ui_wait_for_conn(struct sock *sk, long timeout); 41static long llc_ui_wait_for_conn(struct sock *sk, long timeout);
42static int llc_ui_wait_for_disc(struct sock *sk, long timeout); 42static int llc_ui_wait_for_disc(struct sock *sk, long timeout);
43static int llc_ui_wait_for_busy_core(struct sock *sk, long timeout); 43static int llc_ui_wait_for_busy_core(struct sock *sk, long timeout);
44 44
@@ -551,7 +551,7 @@ static int llc_ui_wait_for_disc(struct sock *sk, long timeout)
551 return rc; 551 return rc;
552} 552}
553 553
554static int llc_ui_wait_for_conn(struct sock *sk, long timeout) 554static long llc_ui_wait_for_conn(struct sock *sk, long timeout)
555{ 555{
556 DEFINE_WAIT(wait); 556 DEFINE_WAIT(wait);
557 557
diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c
index 10ad4ac1fa0b..3a8f881b22f1 100644
--- a/net/mac80211/agg-rx.c
+++ b/net/mac80211/agg-rx.c
@@ -7,6 +7,7 @@
7 * Copyright 2006-2007 Jiri Benc <jbenc@suse.cz> 7 * Copyright 2006-2007 Jiri Benc <jbenc@suse.cz>
8 * Copyright 2007, Michael Wu <flamingice@sourmilk.net> 8 * Copyright 2007, Michael Wu <flamingice@sourmilk.net>
9 * Copyright 2007-2010, Intel Corporation 9 * Copyright 2007-2010, Intel Corporation
10 * Copyright(c) 2015 Intel Deutschland GmbH
10 * 11 *
11 * This program is free software; you can redistribute it and/or modify 12 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License version 2 as 13 * it under the terms of the GNU General Public License version 2 as
@@ -61,16 +62,25 @@ void ___ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid,
61{ 62{
62 struct ieee80211_local *local = sta->local; 63 struct ieee80211_local *local = sta->local;
63 struct tid_ampdu_rx *tid_rx; 64 struct tid_ampdu_rx *tid_rx;
65 struct ieee80211_ampdu_params params = {
66 .sta = &sta->sta,
67 .action = IEEE80211_AMPDU_RX_STOP,
68 .tid = tid,
69 .amsdu = false,
70 .timeout = 0,
71 .ssn = 0,
72 };
64 73
65 lockdep_assert_held(&sta->ampdu_mlme.mtx); 74 lockdep_assert_held(&sta->ampdu_mlme.mtx);
66 75
67 tid_rx = rcu_dereference_protected(sta->ampdu_mlme.tid_rx[tid], 76 tid_rx = rcu_dereference_protected(sta->ampdu_mlme.tid_rx[tid],
68 lockdep_is_held(&sta->ampdu_mlme.mtx)); 77 lockdep_is_held(&sta->ampdu_mlme.mtx));
69 78
70 if (!tid_rx) 79 if (!test_bit(tid, sta->ampdu_mlme.agg_session_valid))
71 return; 80 return;
72 81
73 RCU_INIT_POINTER(sta->ampdu_mlme.tid_rx[tid], NULL); 82 RCU_INIT_POINTER(sta->ampdu_mlme.tid_rx[tid], NULL);
83 __clear_bit(tid, sta->ampdu_mlme.agg_session_valid);
74 84
75 ht_dbg(sta->sdata, 85 ht_dbg(sta->sdata,
76 "Rx BA session stop requested for %pM tid %u %s reason: %d\n", 86 "Rx BA session stop requested for %pM tid %u %s reason: %d\n",
@@ -78,8 +88,7 @@ void ___ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid,
78 initiator == WLAN_BACK_RECIPIENT ? "recipient" : "inititator", 88 initiator == WLAN_BACK_RECIPIENT ? "recipient" : "inititator",
79 (int)reason); 89 (int)reason);
80 90
81 if (drv_ampdu_action(local, sta->sdata, IEEE80211_AMPDU_RX_STOP, 91 if (drv_ampdu_action(local, sta->sdata, &params))
82 &sta->sta, tid, NULL, 0, false))
83 sdata_info(sta->sdata, 92 sdata_info(sta->sdata,
84 "HW problem - can not stop rx aggregation for %pM tid %d\n", 93 "HW problem - can not stop rx aggregation for %pM tid %d\n",
85 sta->sta.addr, tid); 94 sta->sta.addr, tid);
@@ -89,6 +98,13 @@ void ___ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid,
89 ieee80211_send_delba(sta->sdata, sta->sta.addr, 98 ieee80211_send_delba(sta->sdata, sta->sta.addr,
90 tid, WLAN_BACK_RECIPIENT, reason); 99 tid, WLAN_BACK_RECIPIENT, reason);
91 100
101 /*
102 * return here in case tid_rx is not assigned - which will happen if
103 * IEEE80211_HW_SUPPORTS_REORDERING_BUFFER is set.
104 */
105 if (!tid_rx)
106 return;
107
92 del_timer_sync(&tid_rx->session_timer); 108 del_timer_sync(&tid_rx->session_timer);
93 109
94 /* make sure ieee80211_sta_reorder_release() doesn't re-arm the timer */ 110 /* make sure ieee80211_sta_reorder_release() doesn't re-arm the timer */
@@ -237,6 +253,15 @@ void __ieee80211_start_rx_ba_session(struct sta_info *sta,
237{ 253{
238 struct ieee80211_local *local = sta->sdata->local; 254 struct ieee80211_local *local = sta->sdata->local;
239 struct tid_ampdu_rx *tid_agg_rx; 255 struct tid_ampdu_rx *tid_agg_rx;
256 struct ieee80211_ampdu_params params = {
257 .sta = &sta->sta,
258 .action = IEEE80211_AMPDU_RX_START,
259 .tid = tid,
260 .amsdu = false,
261 .timeout = timeout,
262 .ssn = start_seq_num,
263 };
264
240 int i, ret = -EOPNOTSUPP; 265 int i, ret = -EOPNOTSUPP;
241 u16 status = WLAN_STATUS_REQUEST_DECLINED; 266 u16 status = WLAN_STATUS_REQUEST_DECLINED;
242 267
@@ -275,11 +300,12 @@ void __ieee80211_start_rx_ba_session(struct sta_info *sta,
275 /* make sure the size doesn't exceed the maximum supported by the hw */ 300 /* make sure the size doesn't exceed the maximum supported by the hw */
276 if (buf_size > local->hw.max_rx_aggregation_subframes) 301 if (buf_size > local->hw.max_rx_aggregation_subframes)
277 buf_size = local->hw.max_rx_aggregation_subframes; 302 buf_size = local->hw.max_rx_aggregation_subframes;
303 params.buf_size = buf_size;
278 304
279 /* examine state machine */ 305 /* examine state machine */
280 mutex_lock(&sta->ampdu_mlme.mtx); 306 mutex_lock(&sta->ampdu_mlme.mtx);
281 307
282 if (sta->ampdu_mlme.tid_rx[tid]) { 308 if (test_bit(tid, sta->ampdu_mlme.agg_session_valid)) {
283 ht_dbg_ratelimited(sta->sdata, 309 ht_dbg_ratelimited(sta->sdata,
284 "unexpected AddBA Req from %pM on tid %u\n", 310 "unexpected AddBA Req from %pM on tid %u\n",
285 sta->sta.addr, tid); 311 sta->sta.addr, tid);
@@ -290,8 +316,18 @@ void __ieee80211_start_rx_ba_session(struct sta_info *sta,
290 false); 316 false);
291 } 317 }
292 318
319 if (ieee80211_hw_check(&local->hw, SUPPORTS_REORDERING_BUFFER)) {
320 ret = drv_ampdu_action(local, sta->sdata, &params);
321 ht_dbg(sta->sdata,
322 "Rx A-MPDU request on %pM tid %d result %d\n",
323 sta->sta.addr, tid, ret);
324 if (!ret)
325 status = WLAN_STATUS_SUCCESS;
326 goto end;
327 }
328
293 /* prepare A-MPDU MLME for Rx aggregation */ 329 /* prepare A-MPDU MLME for Rx aggregation */
294 tid_agg_rx = kmalloc(sizeof(struct tid_ampdu_rx), GFP_KERNEL); 330 tid_agg_rx = kzalloc(sizeof(*tid_agg_rx), GFP_KERNEL);
295 if (!tid_agg_rx) 331 if (!tid_agg_rx)
296 goto end; 332 goto end;
297 333
@@ -322,8 +358,7 @@ void __ieee80211_start_rx_ba_session(struct sta_info *sta,
322 for (i = 0; i < buf_size; i++) 358 for (i = 0; i < buf_size; i++)
323 __skb_queue_head_init(&tid_agg_rx->reorder_buf[i]); 359 __skb_queue_head_init(&tid_agg_rx->reorder_buf[i]);
324 360
325 ret = drv_ampdu_action(local, sta->sdata, IEEE80211_AMPDU_RX_START, 361 ret = drv_ampdu_action(local, sta->sdata, &params);
326 &sta->sta, tid, &start_seq_num, 0, false);
327 ht_dbg(sta->sdata, "Rx A-MPDU request on %pM tid %d result %d\n", 362 ht_dbg(sta->sdata, "Rx A-MPDU request on %pM tid %d result %d\n",
328 sta->sta.addr, tid, ret); 363 sta->sta.addr, tid, ret);
329 if (ret) { 364 if (ret) {
@@ -341,6 +376,7 @@ void __ieee80211_start_rx_ba_session(struct sta_info *sta,
341 tid_agg_rx->timeout = timeout; 376 tid_agg_rx->timeout = timeout;
342 tid_agg_rx->stored_mpdu_num = 0; 377 tid_agg_rx->stored_mpdu_num = 0;
343 tid_agg_rx->auto_seq = auto_seq; 378 tid_agg_rx->auto_seq = auto_seq;
379 tid_agg_rx->reorder_buf_filtered = 0;
344 status = WLAN_STATUS_SUCCESS; 380 status = WLAN_STATUS_SUCCESS;
345 381
346 /* activate it for RX */ 382 /* activate it for RX */
@@ -352,6 +388,8 @@ void __ieee80211_start_rx_ba_session(struct sta_info *sta,
352 } 388 }
353 389
354end: 390end:
391 if (status == WLAN_STATUS_SUCCESS)
392 __set_bit(tid, sta->ampdu_mlme.agg_session_valid);
355 mutex_unlock(&sta->ampdu_mlme.mtx); 393 mutex_unlock(&sta->ampdu_mlme.mtx);
356 394
357end_no_lock: 395end_no_lock:
diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c
index ff757181b0a8..4932e9f243a2 100644
--- a/net/mac80211/agg-tx.c
+++ b/net/mac80211/agg-tx.c
@@ -7,6 +7,7 @@
7 * Copyright 2006-2007 Jiri Benc <jbenc@suse.cz> 7 * Copyright 2006-2007 Jiri Benc <jbenc@suse.cz>
8 * Copyright 2007, Michael Wu <flamingice@sourmilk.net> 8 * Copyright 2007, Michael Wu <flamingice@sourmilk.net>
9 * Copyright 2007-2010, Intel Corporation 9 * Copyright 2007-2010, Intel Corporation
10 * Copyright(c) 2015 Intel Deutschland GmbH
10 * 11 *
11 * This program is free software; you can redistribute it and/or modify 12 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License version 2 as 13 * it under the terms of the GNU General Public License version 2 as
@@ -295,7 +296,14 @@ int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid,
295{ 296{
296 struct ieee80211_local *local = sta->local; 297 struct ieee80211_local *local = sta->local;
297 struct tid_ampdu_tx *tid_tx; 298 struct tid_ampdu_tx *tid_tx;
298 enum ieee80211_ampdu_mlme_action action; 299 struct ieee80211_ampdu_params params = {
300 .sta = &sta->sta,
301 .tid = tid,
302 .buf_size = 0,
303 .amsdu = false,
304 .timeout = 0,
305 .ssn = 0,
306 };
299 int ret; 307 int ret;
300 308
301 lockdep_assert_held(&sta->ampdu_mlme.mtx); 309 lockdep_assert_held(&sta->ampdu_mlme.mtx);
@@ -304,10 +312,10 @@ int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid,
304 case AGG_STOP_DECLINED: 312 case AGG_STOP_DECLINED:
305 case AGG_STOP_LOCAL_REQUEST: 313 case AGG_STOP_LOCAL_REQUEST:
306 case AGG_STOP_PEER_REQUEST: 314 case AGG_STOP_PEER_REQUEST:
307 action = IEEE80211_AMPDU_TX_STOP_CONT; 315 params.action = IEEE80211_AMPDU_TX_STOP_CONT;
308 break; 316 break;
309 case AGG_STOP_DESTROY_STA: 317 case AGG_STOP_DESTROY_STA:
310 action = IEEE80211_AMPDU_TX_STOP_FLUSH; 318 params.action = IEEE80211_AMPDU_TX_STOP_FLUSH;
311 break; 319 break;
312 default: 320 default:
313 WARN_ON_ONCE(1); 321 WARN_ON_ONCE(1);
@@ -330,9 +338,8 @@ int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid,
330 spin_unlock_bh(&sta->lock); 338 spin_unlock_bh(&sta->lock);
331 if (reason != AGG_STOP_DESTROY_STA) 339 if (reason != AGG_STOP_DESTROY_STA)
332 return -EALREADY; 340 return -EALREADY;
333 ret = drv_ampdu_action(local, sta->sdata, 341 params.action = IEEE80211_AMPDU_TX_STOP_FLUSH_CONT;
334 IEEE80211_AMPDU_TX_STOP_FLUSH_CONT, 342 ret = drv_ampdu_action(local, sta->sdata, &params);
335 &sta->sta, tid, NULL, 0, false);
336 WARN_ON_ONCE(ret); 343 WARN_ON_ONCE(ret);
337 return 0; 344 return 0;
338 } 345 }
@@ -381,8 +388,7 @@ int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid,
381 WLAN_BACK_INITIATOR; 388 WLAN_BACK_INITIATOR;
382 tid_tx->tx_stop = reason == AGG_STOP_LOCAL_REQUEST; 389 tid_tx->tx_stop = reason == AGG_STOP_LOCAL_REQUEST;
383 390
384 ret = drv_ampdu_action(local, sta->sdata, action, 391 ret = drv_ampdu_action(local, sta->sdata, &params);
385 &sta->sta, tid, NULL, 0, false);
386 392
387 /* HW shall not deny going back to legacy */ 393 /* HW shall not deny going back to legacy */
388 if (WARN_ON(ret)) { 394 if (WARN_ON(ret)) {
@@ -445,7 +451,14 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid)
445 struct tid_ampdu_tx *tid_tx; 451 struct tid_ampdu_tx *tid_tx;
446 struct ieee80211_local *local = sta->local; 452 struct ieee80211_local *local = sta->local;
447 struct ieee80211_sub_if_data *sdata = sta->sdata; 453 struct ieee80211_sub_if_data *sdata = sta->sdata;
448 u16 start_seq_num; 454 struct ieee80211_ampdu_params params = {
455 .sta = &sta->sta,
456 .action = IEEE80211_AMPDU_TX_START,
457 .tid = tid,
458 .buf_size = 0,
459 .amsdu = false,
460 .timeout = 0,
461 };
449 int ret; 462 int ret;
450 463
451 tid_tx = rcu_dereference_protected_tid_tx(sta, tid); 464 tid_tx = rcu_dereference_protected_tid_tx(sta, tid);
@@ -467,10 +480,8 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid)
467 */ 480 */
468 synchronize_net(); 481 synchronize_net();
469 482
470 start_seq_num = sta->tid_seq[tid] >> 4; 483 params.ssn = sta->tid_seq[tid] >> 4;
471 484 ret = drv_ampdu_action(local, sdata, &params);
472 ret = drv_ampdu_action(local, sdata, IEEE80211_AMPDU_TX_START,
473 &sta->sta, tid, &start_seq_num, 0, false);
474 if (ret) { 485 if (ret) {
475 ht_dbg(sdata, 486 ht_dbg(sdata,
476 "BA request denied - HW unavailable for %pM tid %d\n", 487 "BA request denied - HW unavailable for %pM tid %d\n",
@@ -499,7 +510,7 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid)
499 510
500 /* send AddBA request */ 511 /* send AddBA request */
501 ieee80211_send_addba_request(sdata, sta->sta.addr, tid, 512 ieee80211_send_addba_request(sdata, sta->sta.addr, tid,
502 tid_tx->dialog_token, start_seq_num, 513 tid_tx->dialog_token, params.ssn,
503 IEEE80211_MAX_AMPDU_BUF, 514 IEEE80211_MAX_AMPDU_BUF,
504 tid_tx->timeout); 515 tid_tx->timeout);
505} 516}
@@ -684,18 +695,24 @@ static void ieee80211_agg_tx_operational(struct ieee80211_local *local,
684 struct sta_info *sta, u16 tid) 695 struct sta_info *sta, u16 tid)
685{ 696{
686 struct tid_ampdu_tx *tid_tx; 697 struct tid_ampdu_tx *tid_tx;
698 struct ieee80211_ampdu_params params = {
699 .sta = &sta->sta,
700 .action = IEEE80211_AMPDU_TX_OPERATIONAL,
701 .tid = tid,
702 .timeout = 0,
703 .ssn = 0,
704 };
687 705
688 lockdep_assert_held(&sta->ampdu_mlme.mtx); 706 lockdep_assert_held(&sta->ampdu_mlme.mtx);
689 707
690 tid_tx = rcu_dereference_protected_tid_tx(sta, tid); 708 tid_tx = rcu_dereference_protected_tid_tx(sta, tid);
709 params.buf_size = tid_tx->buf_size;
710 params.amsdu = tid_tx->amsdu;
691 711
692 ht_dbg(sta->sdata, "Aggregation is on for %pM tid %d\n", 712 ht_dbg(sta->sdata, "Aggregation is on for %pM tid %d\n",
693 sta->sta.addr, tid); 713 sta->sta.addr, tid);
694 714
695 drv_ampdu_action(local, sta->sdata, 715 drv_ampdu_action(local, sta->sdata, &params);
696 IEEE80211_AMPDU_TX_OPERATIONAL,
697 &sta->sta, tid, NULL, tid_tx->buf_size,
698 tid_tx->amsdu);
699 716
700 /* 717 /*
701 * synchronize with TX path, while splicing the TX path 718 * synchronize with TX path, while splicing the TX path
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 166a29fe6c35..fe1704c4e8fb 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -339,8 +339,9 @@ static int ieee80211_get_key(struct wiphy *wiphy, struct net_device *dev,
339 339
340 switch (key->conf.cipher) { 340 switch (key->conf.cipher) {
341 case WLAN_CIPHER_SUITE_TKIP: 341 case WLAN_CIPHER_SUITE_TKIP:
342 iv32 = key->u.tkip.tx.iv32; 342 pn64 = atomic64_read(&key->conf.tx_pn);
343 iv16 = key->u.tkip.tx.iv16; 343 iv32 = TKIP_PN_TO_IV32(pn64);
344 iv16 = TKIP_PN_TO_IV16(pn64);
344 345
345 if (key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE && 346 if (key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE &&
346 !(key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV)) { 347 !(key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV)) {
@@ -1131,6 +1132,34 @@ static int sta_apply_parameters(struct ieee80211_local *local,
1131 sta->sta.max_sp = params->max_sp; 1132 sta->sta.max_sp = params->max_sp;
1132 } 1133 }
1133 1134
1135 /* The sender might not have sent the last bit, consider it to be 0 */
1136 if (params->ext_capab_len >= 8) {
1137 u8 val = (params->ext_capab[7] &
1138 WLAN_EXT_CAPA8_MAX_MSDU_IN_AMSDU_LSB) >> 7;
1139
1140 /* we did get all the bits, take the MSB as well */
1141 if (params->ext_capab_len >= 9) {
1142 u8 val_msb = params->ext_capab[8] &
1143 WLAN_EXT_CAPA9_MAX_MSDU_IN_AMSDU_MSB;
1144 val_msb <<= 1;
1145 val |= val_msb;
1146 }
1147
1148 switch (val) {
1149 case 1:
1150 sta->sta.max_amsdu_subframes = 32;
1151 break;
1152 case 2:
1153 sta->sta.max_amsdu_subframes = 16;
1154 break;
1155 case 3:
1156 sta->sta.max_amsdu_subframes = 8;
1157 break;
1158 default:
1159 sta->sta.max_amsdu_subframes = 0;
1160 }
1161 }
1162
1134 /* 1163 /*
1135 * cfg80211 validates this (1-2007) and allows setting the AID 1164 * cfg80211 validates this (1-2007) and allows setting the AID
1136 * only when creating a new station entry 1165 * only when creating a new station entry
@@ -1160,6 +1189,7 @@ static int sta_apply_parameters(struct ieee80211_local *local,
1160 ieee80211_ht_cap_ie_to_sta_ht_cap(sdata, sband, 1189 ieee80211_ht_cap_ie_to_sta_ht_cap(sdata, sband,
1161 params->ht_capa, sta); 1190 params->ht_capa, sta);
1162 1191
1192 /* VHT can override some HT caps such as the A-MSDU max length */
1163 if (params->vht_capa) 1193 if (params->vht_capa)
1164 ieee80211_vht_cap_ie_to_sta_vht_cap(sdata, sband, 1194 ieee80211_vht_cap_ie_to_sta_vht_cap(sdata, sband,
1165 params->vht_capa, sta); 1195 params->vht_capa, sta);
diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c
index 1d1b9b7bdefe..283981108ca8 100644
--- a/net/mac80211/chan.c
+++ b/net/mac80211/chan.c
@@ -231,7 +231,7 @@ ieee80211_get_max_required_bw(struct ieee80211_sub_if_data *sdata)
231 !(sta->sdata->bss && sta->sdata->bss == sdata->bss)) 231 !(sta->sdata->bss && sta->sdata->bss == sdata->bss))
232 continue; 232 continue;
233 233
234 if (!sta->uploaded) 234 if (!sta->uploaded || !test_sta_flag(sta, WLAN_STA_ASSOC))
235 continue; 235 continue;
236 236
237 max_bw = max(max_bw, ieee80211_get_sta_bw(&sta->sta)); 237 max_bw = max(max_bw, ieee80211_get_sta_bw(&sta->sta));
diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c
index 3e24d0ddb51b..4ab5c522ceee 100644
--- a/net/mac80211/debugfs.c
+++ b/net/mac80211/debugfs.c
@@ -126,6 +126,7 @@ static const char *hw_flag_names[] = {
126 FLAG(SUPPORTS_AMSDU_IN_AMPDU), 126 FLAG(SUPPORTS_AMSDU_IN_AMPDU),
127 FLAG(BEACON_TX_STATUS), 127 FLAG(BEACON_TX_STATUS),
128 FLAG(NEEDS_UNIQUE_STA_ADDR), 128 FLAG(NEEDS_UNIQUE_STA_ADDR),
129 FLAG(SUPPORTS_REORDERING_BUFFER),
129#undef FLAG 130#undef FLAG
130}; 131};
131 132
diff --git a/net/mac80211/debugfs_key.c b/net/mac80211/debugfs_key.c
index 7961e7d0b61e..a2ef95f16f11 100644
--- a/net/mac80211/debugfs_key.c
+++ b/net/mac80211/debugfs_key.c
@@ -132,9 +132,10 @@ static ssize_t key_tx_spec_read(struct file *file, char __user *userbuf,
132 len = scnprintf(buf, sizeof(buf), "\n"); 132 len = scnprintf(buf, sizeof(buf), "\n");
133 break; 133 break;
134 case WLAN_CIPHER_SUITE_TKIP: 134 case WLAN_CIPHER_SUITE_TKIP:
135 pn = atomic64_read(&key->conf.tx_pn);
135 len = scnprintf(buf, sizeof(buf), "%08x %04x\n", 136 len = scnprintf(buf, sizeof(buf), "%08x %04x\n",
136 key->u.tkip.tx.iv32, 137 TKIP_PN_TO_IV32(pn),
137 key->u.tkip.tx.iv16); 138 TKIP_PN_TO_IV16(pn));
138 break; 139 break;
139 case WLAN_CIPHER_SUITE_CCMP: 140 case WLAN_CIPHER_SUITE_CCMP:
140 case WLAN_CIPHER_SUITE_CCMP_256: 141 case WLAN_CIPHER_SUITE_CCMP_256:
diff --git a/net/mac80211/driver-ops.c b/net/mac80211/driver-ops.c
index ca1fe5576103..c258f1041d33 100644
--- a/net/mac80211/driver-ops.c
+++ b/net/mac80211/driver-ops.c
@@ -284,9 +284,7 @@ int drv_switch_vif_chanctx(struct ieee80211_local *local,
284 284
285int drv_ampdu_action(struct ieee80211_local *local, 285int drv_ampdu_action(struct ieee80211_local *local,
286 struct ieee80211_sub_if_data *sdata, 286 struct ieee80211_sub_if_data *sdata,
287 enum ieee80211_ampdu_mlme_action action, 287 struct ieee80211_ampdu_params *params)
288 struct ieee80211_sta *sta, u16 tid,
289 u16 *ssn, u8 buf_size, bool amsdu)
290{ 288{
291 int ret = -EOPNOTSUPP; 289 int ret = -EOPNOTSUPP;
292 290
@@ -296,12 +294,10 @@ int drv_ampdu_action(struct ieee80211_local *local,
296 if (!check_sdata_in_driver(sdata)) 294 if (!check_sdata_in_driver(sdata))
297 return -EIO; 295 return -EIO;
298 296
299 trace_drv_ampdu_action(local, sdata, action, sta, tid, 297 trace_drv_ampdu_action(local, sdata, params);
300 ssn, buf_size, amsdu);
301 298
302 if (local->ops->ampdu_action) 299 if (local->ops->ampdu_action)
303 ret = local->ops->ampdu_action(&local->hw, &sdata->vif, action, 300 ret = local->ops->ampdu_action(&local->hw, &sdata->vif, params);
304 sta, tid, ssn, buf_size, amsdu);
305 301
306 trace_drv_return_int(local, ret); 302 trace_drv_return_int(local, ret);
307 303
diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index 154ce4b13406..18b0d65baff0 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -585,9 +585,7 @@ static inline int drv_tx_last_beacon(struct ieee80211_local *local)
585 585
586int drv_ampdu_action(struct ieee80211_local *local, 586int drv_ampdu_action(struct ieee80211_local *local,
587 struct ieee80211_sub_if_data *sdata, 587 struct ieee80211_sub_if_data *sdata,
588 enum ieee80211_ampdu_mlme_action action, 588 struct ieee80211_ampdu_params *params);
589 struct ieee80211_sta *sta, u16 tid,
590 u16 *ssn, u8 buf_size, bool amsdu);
591 589
592static inline int drv_get_survey(struct ieee80211_local *local, int idx, 590static inline int drv_get_survey(struct ieee80211_local *local, int idx,
593 struct survey_info *survey) 591 struct survey_info *survey)
diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c
index 7a76ce639d58..f4a528773563 100644
--- a/net/mac80211/ht.c
+++ b/net/mac80211/ht.c
@@ -230,6 +230,11 @@ bool ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_sub_if_data *sdata,
230 /* set Rx highest rate */ 230 /* set Rx highest rate */
231 ht_cap.mcs.rx_highest = ht_cap_ie->mcs.rx_highest; 231 ht_cap.mcs.rx_highest = ht_cap_ie->mcs.rx_highest;
232 232
233 if (ht_cap.cap & IEEE80211_HT_CAP_MAX_AMSDU)
234 sta->sta.max_amsdu_len = IEEE80211_MAX_MPDU_LEN_HT_7935;
235 else
236 sta->sta.max_amsdu_len = IEEE80211_MAX_MPDU_LEN_HT_3839;
237
233 apply: 238 apply:
234 changed = memcmp(&sta->sta.ht_cap, &ht_cap, sizeof(ht_cap)); 239 changed = memcmp(&sta->sta.ht_cap, &ht_cap, sizeof(ht_cap));
235 240
diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index f7fc0e00497f..fc3238376b39 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -7,6 +7,7 @@
7 * Copyright 2007, Michael Wu <flamingice@sourmilk.net> 7 * Copyright 2007, Michael Wu <flamingice@sourmilk.net>
8 * Copyright 2009, Johannes Berg <johannes@sipsolutions.net> 8 * Copyright 2009, Johannes Berg <johannes@sipsolutions.net>
9 * Copyright 2013-2014 Intel Mobile Communications GmbH 9 * Copyright 2013-2014 Intel Mobile Communications GmbH
10 * Copyright(c) 2016 Intel Deutschland GmbH
10 * 11 *
11 * This program is free software; you can redistribute it and/or modify 12 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License version 2 as 13 * it under the terms of the GNU General Public License version 2 as
@@ -1050,9 +1051,8 @@ static void ieee80211_update_sta_info(struct ieee80211_sub_if_data *sdata,
1050 struct cfg80211_chan_def chandef; 1051 struct cfg80211_chan_def chandef;
1051 enum ieee80211_sta_rx_bandwidth bw = sta->sta.bandwidth; 1052 enum ieee80211_sta_rx_bandwidth bw = sta->sta.bandwidth;
1052 1053
1053 ieee80211_ht_oper_to_chandef(channel, 1054 cfg80211_chandef_create(&chandef, channel, NL80211_CHAN_NO_HT);
1054 elems->ht_operation, 1055 ieee80211_chandef_ht_oper(elems->ht_operation, &chandef);
1055 &chandef);
1056 1056
1057 memcpy(&htcap_ie, elems->ht_cap_elem, sizeof(htcap_ie)); 1057 memcpy(&htcap_ie, elems->ht_cap_elem, sizeof(htcap_ie));
1058 rates_updated |= ieee80211_ht_cap_ie_to_sta_ht_cap(sdata, sband, 1058 rates_updated |= ieee80211_ht_cap_ie_to_sta_ht_cap(sdata, sband,
@@ -1066,9 +1066,8 @@ static void ieee80211_update_sta_info(struct ieee80211_sub_if_data *sdata,
1066 struct ieee80211_vht_cap cap_ie; 1066 struct ieee80211_vht_cap cap_ie;
1067 struct ieee80211_sta_vht_cap cap = sta->sta.vht_cap; 1067 struct ieee80211_sta_vht_cap cap = sta->sta.vht_cap;
1068 1068
1069 ieee80211_vht_oper_to_chandef(channel, 1069 ieee80211_chandef_vht_oper(elems->vht_operation,
1070 elems->vht_operation, 1070 &chandef);
1071 &chandef);
1072 memcpy(&cap_ie, elems->vht_cap_elem, sizeof(cap_ie)); 1071 memcpy(&cap_ie, elems->vht_cap_elem, sizeof(cap_ie));
1073 ieee80211_vht_cap_ie_to_sta_vht_cap(sdata, sband, 1072 ieee80211_vht_cap_ie_to_sta_vht_cap(sdata, sband,
1074 &cap_ie, sta); 1073 &cap_ie, sta);
@@ -1485,14 +1484,21 @@ static void ieee80211_sta_find_ibss(struct ieee80211_sub_if_data *sdata)
1485 1484
1486 sdata_info(sdata, "Trigger new scan to find an IBSS to join\n"); 1485 sdata_info(sdata, "Trigger new scan to find an IBSS to join\n");
1487 1486
1488 num = ieee80211_ibss_setup_scan_channels(local->hw.wiphy,
1489 &ifibss->chandef,
1490 channels,
1491 ARRAY_SIZE(channels));
1492 scan_width = cfg80211_chandef_to_scan_width(&ifibss->chandef); 1487 scan_width = cfg80211_chandef_to_scan_width(&ifibss->chandef);
1493 ieee80211_request_ibss_scan(sdata, ifibss->ssid, 1488
1494 ifibss->ssid_len, channels, num, 1489 if (ifibss->fixed_channel) {
1495 scan_width); 1490 num = ieee80211_ibss_setup_scan_channels(local->hw.wiphy,
1491 &ifibss->chandef,
1492 channels,
1493 ARRAY_SIZE(channels));
1494 ieee80211_request_ibss_scan(sdata, ifibss->ssid,
1495 ifibss->ssid_len, channels,
1496 num, scan_width);
1497 } else {
1498 ieee80211_request_ibss_scan(sdata, ifibss->ssid,
1499 ifibss->ssid_len, NULL,
1500 0, scan_width);
1501 }
1496 } else { 1502 } else {
1497 int interval = IEEE80211_SCAN_INTERVAL; 1503 int interval = IEEE80211_SCAN_INTERVAL;
1498 1504
@@ -1733,7 +1739,6 @@ void ieee80211_ibss_notify_scan_completed(struct ieee80211_local *local)
1733 if (sdata->vif.type != NL80211_IFTYPE_ADHOC) 1739 if (sdata->vif.type != NL80211_IFTYPE_ADHOC)
1734 continue; 1740 continue;
1735 sdata->u.ibss.last_scan_completed = jiffies; 1741 sdata->u.ibss.last_scan_completed = jiffies;
1736 ieee80211_queue_work(&local->hw, &sdata->work);
1737 } 1742 }
1738 mutex_unlock(&local->iflist_mtx); 1743 mutex_unlock(&local->iflist_mtx);
1739} 1744}
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index b84f6aa32c08..804575ff7af5 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -92,7 +92,7 @@ struct ieee80211_fragment_entry {
92 u16 extra_len; 92 u16 extra_len;
93 u16 last_frag; 93 u16 last_frag;
94 u8 rx_queue; 94 u8 rx_queue;
95 bool ccmp; /* Whether fragments were encrypted with CCMP */ 95 bool check_sequential_pn; /* needed for CCMP/GCMP */
96 u8 last_pn[6]; /* PN of the last fragment if CCMP was used */ 96 u8 last_pn[6]; /* PN of the last fragment if CCMP was used */
97}; 97};
98 98
@@ -716,7 +716,6 @@ struct ieee80211_if_mesh {
716 * back to wireless media and to the local net stack. 716 * back to wireless media and to the local net stack.
717 * @IEEE80211_SDATA_DISCONNECT_RESUME: Disconnect after resume. 717 * @IEEE80211_SDATA_DISCONNECT_RESUME: Disconnect after resume.
718 * @IEEE80211_SDATA_IN_DRIVER: indicates interface was added to driver 718 * @IEEE80211_SDATA_IN_DRIVER: indicates interface was added to driver
719 * @IEEE80211_SDATA_MU_MIMO_OWNER: indicates interface owns MU-MIMO capability
720 */ 719 */
721enum ieee80211_sub_if_data_flags { 720enum ieee80211_sub_if_data_flags {
722 IEEE80211_SDATA_ALLMULTI = BIT(0), 721 IEEE80211_SDATA_ALLMULTI = BIT(0),
@@ -724,7 +723,6 @@ enum ieee80211_sub_if_data_flags {
724 IEEE80211_SDATA_DONT_BRIDGE_PACKETS = BIT(3), 723 IEEE80211_SDATA_DONT_BRIDGE_PACKETS = BIT(3),
725 IEEE80211_SDATA_DISCONNECT_RESUME = BIT(4), 724 IEEE80211_SDATA_DISCONNECT_RESUME = BIT(4),
726 IEEE80211_SDATA_IN_DRIVER = BIT(5), 725 IEEE80211_SDATA_IN_DRIVER = BIT(5),
727 IEEE80211_SDATA_MU_MIMO_OWNER = BIT(6),
728}; 726};
729 727
730/** 728/**
@@ -804,6 +802,7 @@ enum txq_info_flags {
804struct txq_info { 802struct txq_info {
805 struct sk_buff_head queue; 803 struct sk_buff_head queue;
806 unsigned long flags; 804 unsigned long flags;
805 unsigned long byte_cnt;
807 806
808 /* keep last! */ 807 /* keep last! */
809 struct ieee80211_txq txq; 808 struct ieee80211_txq txq;
@@ -1466,7 +1465,13 @@ ieee80211_have_rx_timestamp(struct ieee80211_rx_status *status)
1466{ 1465{
1467 WARN_ON_ONCE(status->flag & RX_FLAG_MACTIME_START && 1466 WARN_ON_ONCE(status->flag & RX_FLAG_MACTIME_START &&
1468 status->flag & RX_FLAG_MACTIME_END); 1467 status->flag & RX_FLAG_MACTIME_END);
1469 return status->flag & (RX_FLAG_MACTIME_START | RX_FLAG_MACTIME_END); 1468 if (status->flag & (RX_FLAG_MACTIME_START | RX_FLAG_MACTIME_END))
1469 return true;
1470 /* can't handle HT/VHT preamble yet */
1471 if (status->flag & RX_FLAG_MACTIME_PLCP_START &&
1472 !(status->flag & (RX_FLAG_HT | RX_FLAG_VHT)))
1473 return true;
1474 return false;
1470} 1475}
1471 1476
1472u64 ieee80211_calculate_rx_timestamp(struct ieee80211_local *local, 1477u64 ieee80211_calculate_rx_timestamp(struct ieee80211_local *local,
@@ -1714,6 +1719,8 @@ ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata,
1714enum ieee80211_sta_rx_bandwidth ieee80211_sta_cap_rx_bw(struct sta_info *sta); 1719enum ieee80211_sta_rx_bandwidth ieee80211_sta_cap_rx_bw(struct sta_info *sta);
1715enum ieee80211_sta_rx_bandwidth ieee80211_sta_cur_vht_bw(struct sta_info *sta); 1720enum ieee80211_sta_rx_bandwidth ieee80211_sta_cur_vht_bw(struct sta_info *sta);
1716void ieee80211_sta_set_rx_nss(struct sta_info *sta); 1721void ieee80211_sta_set_rx_nss(struct sta_info *sta);
1722void ieee80211_process_mu_groups(struct ieee80211_sub_if_data *sdata,
1723 struct ieee80211_mgmt *mgmt);
1717u32 __ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata, 1724u32 __ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata,
1718 struct sta_info *sta, u8 opmode, 1725 struct sta_info *sta, u8 opmode,
1719 enum ieee80211_band band); 1726 enum ieee80211_band band);
@@ -1829,20 +1836,6 @@ static inline void ieee802_11_parse_elems(const u8 *start, size_t len,
1829 ieee802_11_parse_elems_crc(start, len, action, elems, 0, 0); 1836 ieee802_11_parse_elems_crc(start, len, action, elems, 0, 0);
1830} 1837}
1831 1838
1832static inline bool ieee80211_rx_reorder_ready(struct sk_buff_head *frames)
1833{
1834 struct sk_buff *tail = skb_peek_tail(frames);
1835 struct ieee80211_rx_status *status;
1836
1837 if (!tail)
1838 return false;
1839
1840 status = IEEE80211_SKB_RXCB(tail);
1841 if (status->flag & RX_FLAG_AMSDU_MORE)
1842 return false;
1843
1844 return true;
1845}
1846 1839
1847extern const int ieee802_1d_to_ac[8]; 1840extern const int ieee802_1d_to_ac[8];
1848 1841
@@ -1986,12 +1979,10 @@ int ieee80211_add_ext_srates_ie(struct ieee80211_sub_if_data *sdata,
1986u8 *ieee80211_add_wmm_info_ie(u8 *buf, u8 qosinfo); 1979u8 *ieee80211_add_wmm_info_ie(u8 *buf, u8 qosinfo);
1987 1980
1988/* channel management */ 1981/* channel management */
1989void ieee80211_ht_oper_to_chandef(struct ieee80211_channel *control_chan, 1982bool ieee80211_chandef_ht_oper(const struct ieee80211_ht_operation *ht_oper,
1990 const struct ieee80211_ht_operation *ht_oper, 1983 struct cfg80211_chan_def *chandef);
1991 struct cfg80211_chan_def *chandef); 1984bool ieee80211_chandef_vht_oper(const struct ieee80211_vht_operation *oper,
1992void ieee80211_vht_oper_to_chandef(struct ieee80211_channel *control_chan, 1985 struct cfg80211_chan_def *chandef);
1993 const struct ieee80211_vht_operation *oper,
1994 struct cfg80211_chan_def *chandef);
1995u32 ieee80211_chandef_downgrade(struct cfg80211_chan_def *c); 1986u32 ieee80211_chandef_downgrade(struct cfg80211_chan_def *c);
1996 1987
1997int __must_check 1988int __must_check
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index c9e325d2e120..453b4e741780 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -977,7 +977,11 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata,
977 if (sdata->vif.txq) { 977 if (sdata->vif.txq) {
978 struct txq_info *txqi = to_txq_info(sdata->vif.txq); 978 struct txq_info *txqi = to_txq_info(sdata->vif.txq);
979 979
980 spin_lock_bh(&txqi->queue.lock);
980 ieee80211_purge_tx_queue(&local->hw, &txqi->queue); 981 ieee80211_purge_tx_queue(&local->hw, &txqi->queue);
982 txqi->byte_cnt = 0;
983 spin_unlock_bh(&txqi->queue.lock);
984
981 atomic_set(&sdata->txqs_len[txqi->txq.ac], 0); 985 atomic_set(&sdata->txqs_len[txqi->txq.ac], 0);
982 } 986 }
983 987
@@ -1271,6 +1275,16 @@ static void ieee80211_iface_work(struct work_struct *work)
1271 } 1275 }
1272 } 1276 }
1273 mutex_unlock(&local->sta_mtx); 1277 mutex_unlock(&local->sta_mtx);
1278 } else if (ieee80211_is_action(mgmt->frame_control) &&
1279 mgmt->u.action.category == WLAN_CATEGORY_VHT) {
1280 switch (mgmt->u.action.u.vht_group_notif.action_code) {
1281 case WLAN_VHT_ACTION_GROUPID_MGMT:
1282 ieee80211_process_mu_groups(sdata, mgmt);
1283 break;
1284 default:
1285 WARN_ON(1);
1286 break;
1287 }
1274 } else if (ieee80211_is_data_qos(mgmt->frame_control)) { 1288 } else if (ieee80211_is_data_qos(mgmt->frame_control)) {
1275 struct ieee80211_hdr *hdr = (void *)mgmt; 1289 struct ieee80211_hdr *hdr = (void *)mgmt;
1276 /* 1290 /*
diff --git a/net/mac80211/key.c b/net/mac80211/key.c
index 5e5bc599da4c..3df7b0392d30 100644
--- a/net/mac80211/key.c
+++ b/net/mac80211/key.c
@@ -932,50 +932,6 @@ void ieee80211_gtk_rekey_notify(struct ieee80211_vif *vif, const u8 *bssid,
932} 932}
933EXPORT_SYMBOL_GPL(ieee80211_gtk_rekey_notify); 933EXPORT_SYMBOL_GPL(ieee80211_gtk_rekey_notify);
934 934
935void ieee80211_get_key_tx_seq(struct ieee80211_key_conf *keyconf,
936 struct ieee80211_key_seq *seq)
937{
938 struct ieee80211_key *key;
939 u64 pn64;
940
941 if (WARN_ON(!(keyconf->flags & IEEE80211_KEY_FLAG_GENERATE_IV)))
942 return;
943
944 key = container_of(keyconf, struct ieee80211_key, conf);
945
946 switch (key->conf.cipher) {
947 case WLAN_CIPHER_SUITE_TKIP:
948 seq->tkip.iv32 = key->u.tkip.tx.iv32;
949 seq->tkip.iv16 = key->u.tkip.tx.iv16;
950 break;
951 case WLAN_CIPHER_SUITE_CCMP:
952 case WLAN_CIPHER_SUITE_CCMP_256:
953 case WLAN_CIPHER_SUITE_AES_CMAC:
954 case WLAN_CIPHER_SUITE_BIP_CMAC_256:
955 BUILD_BUG_ON(offsetof(typeof(*seq), ccmp) !=
956 offsetof(typeof(*seq), aes_cmac));
957 case WLAN_CIPHER_SUITE_BIP_GMAC_128:
958 case WLAN_CIPHER_SUITE_BIP_GMAC_256:
959 BUILD_BUG_ON(offsetof(typeof(*seq), ccmp) !=
960 offsetof(typeof(*seq), aes_gmac));
961 case WLAN_CIPHER_SUITE_GCMP:
962 case WLAN_CIPHER_SUITE_GCMP_256:
963 BUILD_BUG_ON(offsetof(typeof(*seq), ccmp) !=
964 offsetof(typeof(*seq), gcmp));
965 pn64 = atomic64_read(&key->conf.tx_pn);
966 seq->ccmp.pn[5] = pn64;
967 seq->ccmp.pn[4] = pn64 >> 8;
968 seq->ccmp.pn[3] = pn64 >> 16;
969 seq->ccmp.pn[2] = pn64 >> 24;
970 seq->ccmp.pn[1] = pn64 >> 32;
971 seq->ccmp.pn[0] = pn64 >> 40;
972 break;
973 default:
974 WARN_ON(1);
975 }
976}
977EXPORT_SYMBOL(ieee80211_get_key_tx_seq);
978
979void ieee80211_get_key_rx_seq(struct ieee80211_key_conf *keyconf, 935void ieee80211_get_key_rx_seq(struct ieee80211_key_conf *keyconf,
980 int tid, struct ieee80211_key_seq *seq) 936 int tid, struct ieee80211_key_seq *seq)
981{ 937{
@@ -1029,48 +985,6 @@ void ieee80211_get_key_rx_seq(struct ieee80211_key_conf *keyconf,
1029} 985}
1030EXPORT_SYMBOL(ieee80211_get_key_rx_seq); 986EXPORT_SYMBOL(ieee80211_get_key_rx_seq);
1031 987
1032void ieee80211_set_key_tx_seq(struct ieee80211_key_conf *keyconf,
1033 struct ieee80211_key_seq *seq)
1034{
1035 struct ieee80211_key *key;
1036 u64 pn64;
1037
1038 key = container_of(keyconf, struct ieee80211_key, conf);
1039
1040 switch (key->conf.cipher) {
1041 case WLAN_CIPHER_SUITE_TKIP:
1042 key->u.tkip.tx.iv32 = seq->tkip.iv32;
1043 key->u.tkip.tx.iv16 = seq->tkip.iv16;
1044 break;
1045 case WLAN_CIPHER_SUITE_CCMP:
1046 case WLAN_CIPHER_SUITE_CCMP_256:
1047 case WLAN_CIPHER_SUITE_AES_CMAC:
1048 case WLAN_CIPHER_SUITE_BIP_CMAC_256:
1049 BUILD_BUG_ON(offsetof(typeof(*seq), ccmp) !=
1050 offsetof(typeof(*seq), aes_cmac));
1051 case WLAN_CIPHER_SUITE_BIP_GMAC_128:
1052 case WLAN_CIPHER_SUITE_BIP_GMAC_256:
1053 BUILD_BUG_ON(offsetof(typeof(*seq), ccmp) !=
1054 offsetof(typeof(*seq), aes_gmac));
1055 case WLAN_CIPHER_SUITE_GCMP:
1056 case WLAN_CIPHER_SUITE_GCMP_256:
1057 BUILD_BUG_ON(offsetof(typeof(*seq), ccmp) !=
1058 offsetof(typeof(*seq), gcmp));
1059 pn64 = (u64)seq->ccmp.pn[5] |
1060 ((u64)seq->ccmp.pn[4] << 8) |
1061 ((u64)seq->ccmp.pn[3] << 16) |
1062 ((u64)seq->ccmp.pn[2] << 24) |
1063 ((u64)seq->ccmp.pn[1] << 32) |
1064 ((u64)seq->ccmp.pn[0] << 40);
1065 atomic64_set(&key->conf.tx_pn, pn64);
1066 break;
1067 default:
1068 WARN_ON(1);
1069 break;
1070 }
1071}
1072EXPORT_SYMBOL_GPL(ieee80211_set_key_tx_seq);
1073
1074void ieee80211_set_key_rx_seq(struct ieee80211_key_conf *keyconf, 988void ieee80211_set_key_rx_seq(struct ieee80211_key_conf *keyconf,
1075 int tid, struct ieee80211_key_seq *seq) 989 int tid, struct ieee80211_key_seq *seq)
1076{ 990{
diff --git a/net/mac80211/key.h b/net/mac80211/key.h
index 9951ef06323e..4aa20cef0859 100644
--- a/net/mac80211/key.h
+++ b/net/mac80211/key.h
@@ -44,13 +44,17 @@ enum ieee80211_internal_tkip_state {
44}; 44};
45 45
46struct tkip_ctx { 46struct tkip_ctx {
47 u32 iv32; /* current iv32 */
48 u16 iv16; /* current iv16 */
49 u16 p1k[5]; /* p1k cache */ 47 u16 p1k[5]; /* p1k cache */
50 u32 p1k_iv32; /* iv32 for which p1k computed */ 48 u32 p1k_iv32; /* iv32 for which p1k computed */
51 enum ieee80211_internal_tkip_state state; 49 enum ieee80211_internal_tkip_state state;
52}; 50};
53 51
52struct tkip_ctx_rx {
53 struct tkip_ctx ctx;
54 u32 iv32; /* current iv32 */
55 u16 iv16; /* current iv16 */
56};
57
54struct ieee80211_key { 58struct ieee80211_key {
55 struct ieee80211_local *local; 59 struct ieee80211_local *local;
56 struct ieee80211_sub_if_data *sdata; 60 struct ieee80211_sub_if_data *sdata;
@@ -71,7 +75,7 @@ struct ieee80211_key {
71 struct tkip_ctx tx; 75 struct tkip_ctx tx;
72 76
73 /* last received RSC */ 77 /* last received RSC */
74 struct tkip_ctx rx[IEEE80211_NUM_TIDS]; 78 struct tkip_ctx_rx rx[IEEE80211_NUM_TIDS];
75 79
76 /* number of mic failures */ 80 /* number of mic failures */
77 u32 mic_failures; 81 u32 mic_failures;
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 6bcf0faa4a89..8190bf27ebff 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -248,6 +248,7 @@ static void ieee80211_restart_work(struct work_struct *work)
248 248
249 /* wait for scan work complete */ 249 /* wait for scan work complete */
250 flush_workqueue(local->workqueue); 250 flush_workqueue(local->workqueue);
251 flush_work(&local->sched_scan_stopped_work);
251 252
252 WARN(test_bit(SCAN_HW_SCANNING, &local->scanning), 253 WARN(test_bit(SCAN_HW_SCANNING, &local->scanning),
253 "%s called with hardware scan in progress\n", __func__); 254 "%s called with hardware scan in progress\n", __func__);
@@ -256,6 +257,11 @@ static void ieee80211_restart_work(struct work_struct *work)
256 list_for_each_entry(sdata, &local->interfaces, list) 257 list_for_each_entry(sdata, &local->interfaces, list)
257 flush_delayed_work(&sdata->dec_tailroom_needed_wk); 258 flush_delayed_work(&sdata->dec_tailroom_needed_wk);
258 ieee80211_scan_cancel(local); 259 ieee80211_scan_cancel(local);
260
261 /* make sure any new ROC will consider local->in_reconfig */
262 flush_delayed_work(&local->roc_work);
263 flush_work(&local->hw_roc_done);
264
259 ieee80211_reconfig(local); 265 ieee80211_reconfig(local);
260 rtnl_unlock(); 266 rtnl_unlock();
261} 267}
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index fa28500f28fd..d32cefcb63b0 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -91,11 +91,10 @@ bool mesh_matches_local(struct ieee80211_sub_if_data *sdata,
91 if (sdata->vif.bss_conf.basic_rates != basic_rates) 91 if (sdata->vif.bss_conf.basic_rates != basic_rates)
92 return false; 92 return false;
93 93
94 ieee80211_ht_oper_to_chandef(sdata->vif.bss_conf.chandef.chan, 94 cfg80211_chandef_create(&sta_chan_def, sdata->vif.bss_conf.chandef.chan,
95 ie->ht_operation, &sta_chan_def); 95 NL80211_CHAN_NO_HT);
96 96 ieee80211_chandef_ht_oper(ie->ht_operation, &sta_chan_def);
97 ieee80211_vht_oper_to_chandef(sdata->vif.bss_conf.chandef.chan, 97 ieee80211_chandef_vht_oper(ie->vht_operation, &sta_chan_def);
98 ie->vht_operation, &sta_chan_def);
99 98
100 if (!cfg80211_chandef_compatible(&sdata->vif.bss_conf.chandef, 99 if (!cfg80211_chandef_compatible(&sdata->vif.bss_conf.chandef,
101 &sta_chan_def)) 100 &sta_chan_def))
@@ -1370,17 +1369,6 @@ out:
1370 sdata_unlock(sdata); 1369 sdata_unlock(sdata);
1371} 1370}
1372 1371
1373void ieee80211_mesh_notify_scan_completed(struct ieee80211_local *local)
1374{
1375 struct ieee80211_sub_if_data *sdata;
1376
1377 rcu_read_lock();
1378 list_for_each_entry_rcu(sdata, &local->interfaces, list)
1379 if (ieee80211_vif_is_mesh(&sdata->vif) &&
1380 ieee80211_sdata_running(sdata))
1381 ieee80211_queue_work(&local->hw, &sdata->work);
1382 rcu_read_unlock();
1383}
1384 1372
1385void ieee80211_mesh_init_sdata(struct ieee80211_sub_if_data *sdata) 1373void ieee80211_mesh_init_sdata(struct ieee80211_sub_if_data *sdata)
1386{ 1374{
diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h
index a1596344c3ba..87c017a3b1ce 100644
--- a/net/mac80211/mesh.h
+++ b/net/mac80211/mesh.h
@@ -137,8 +137,6 @@ struct mesh_path {
137 * @copy_node: function to copy nodes of the table 137 * @copy_node: function to copy nodes of the table
138 * @size_order: determines size of the table, there will be 2^size_order hash 138 * @size_order: determines size of the table, there will be 2^size_order hash
139 * buckets 139 * buckets
140 * @mean_chain_len: maximum average length for the hash buckets' list, if it is
141 * reached, the table will grow
142 * @known_gates: list of known mesh gates and their mpaths by the station. The 140 * @known_gates: list of known mesh gates and their mpaths by the station. The
143 * gate's mpath may or may not be resolved and active. 141 * gate's mpath may or may not be resolved and active.
144 * 142 *
@@ -154,7 +152,6 @@ struct mesh_table {
154 void (*free_node) (struct hlist_node *p, bool free_leafs); 152 void (*free_node) (struct hlist_node *p, bool free_leafs);
155 int (*copy_node) (struct hlist_node *p, struct mesh_table *newtbl); 153 int (*copy_node) (struct hlist_node *p, struct mesh_table *newtbl);
156 int size_order; 154 int size_order;
157 int mean_chain_len;
158 struct hlist_head *known_gates; 155 struct hlist_head *known_gates;
159 spinlock_t gates_lock; 156 spinlock_t gates_lock;
160 157
@@ -362,14 +359,10 @@ static inline bool mesh_path_sel_is_hwmp(struct ieee80211_sub_if_data *sdata)
362 return sdata->u.mesh.mesh_pp_id == IEEE80211_PATH_PROTOCOL_HWMP; 359 return sdata->u.mesh.mesh_pp_id == IEEE80211_PATH_PROTOCOL_HWMP;
363} 360}
364 361
365void ieee80211_mesh_notify_scan_completed(struct ieee80211_local *local);
366
367void mesh_path_flush_by_iface(struct ieee80211_sub_if_data *sdata); 362void mesh_path_flush_by_iface(struct ieee80211_sub_if_data *sdata);
368void mesh_sync_adjust_tbtt(struct ieee80211_sub_if_data *sdata); 363void mesh_sync_adjust_tbtt(struct ieee80211_sub_if_data *sdata);
369void ieee80211s_stop(void); 364void ieee80211s_stop(void);
370#else 365#else
371static inline void
372ieee80211_mesh_notify_scan_completed(struct ieee80211_local *local) {}
373static inline bool mesh_path_sel_is_hwmp(struct ieee80211_sub_if_data *sdata) 366static inline bool mesh_path_sel_is_hwmp(struct ieee80211_sub_if_data *sdata)
374{ return false; } 367{ return false; }
375static inline void mesh_path_flush_by_iface(struct ieee80211_sub_if_data *sdata) 368static inline void mesh_path_flush_by_iface(struct ieee80211_sub_if_data *sdata)
diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c
index c6be0b4f4058..5b6aec1a0630 100644
--- a/net/mac80211/mesh_hwmp.c
+++ b/net/mac80211/mesh_hwmp.c
@@ -205,9 +205,9 @@ static void prepare_frame_for_deferred_tx(struct ieee80211_sub_if_data *sdata,
205 struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); 205 struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
206 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; 206 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
207 207
208 skb_set_mac_header(skb, 0); 208 skb_reset_mac_header(skb);
209 skb_set_network_header(skb, 0); 209 skb_reset_network_header(skb);
210 skb_set_transport_header(skb, 0); 210 skb_reset_transport_header(skb);
211 211
212 /* Send all internal mgmt frames on VO. Accordingly set TID to 7. */ 212 /* Send all internal mgmt frames on VO. Accordingly set TID to 7. */
213 skb_set_queue_mapping(skb, IEEE80211_AC_VO); 213 skb_set_queue_mapping(skb, IEEE80211_AC_VO);
diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c
index dadf8dc6f1cf..2ba7aa56b11c 100644
--- a/net/mac80211/mesh_pathtbl.c
+++ b/net/mac80211/mesh_pathtbl.c
@@ -55,16 +55,21 @@ int mpp_paths_generation;
55static DEFINE_RWLOCK(pathtbl_resize_lock); 55static DEFINE_RWLOCK(pathtbl_resize_lock);
56 56
57 57
58static inline struct mesh_table *resize_dereference_paths(
59 struct mesh_table __rcu *table)
60{
61 return rcu_dereference_protected(table,
62 lockdep_is_held(&pathtbl_resize_lock));
63}
64
58static inline struct mesh_table *resize_dereference_mesh_paths(void) 65static inline struct mesh_table *resize_dereference_mesh_paths(void)
59{ 66{
60 return rcu_dereference_protected(mesh_paths, 67 return resize_dereference_paths(mesh_paths);
61 lockdep_is_held(&pathtbl_resize_lock));
62} 68}
63 69
64static inline struct mesh_table *resize_dereference_mpp_paths(void) 70static inline struct mesh_table *resize_dereference_mpp_paths(void)
65{ 71{
66 return rcu_dereference_protected(mpp_paths, 72 return resize_dereference_paths(mpp_paths);
67 lockdep_is_held(&pathtbl_resize_lock));
68} 73}
69 74
70/* 75/*
@@ -160,11 +165,10 @@ static int mesh_table_grow(struct mesh_table *oldtbl,
160 int i; 165 int i;
161 166
162 if (atomic_read(&oldtbl->entries) 167 if (atomic_read(&oldtbl->entries)
163 < oldtbl->mean_chain_len * (oldtbl->hash_mask + 1)) 168 < MEAN_CHAIN_LEN * (oldtbl->hash_mask + 1))
164 return -EAGAIN; 169 return -EAGAIN;
165 170
166 newtbl->free_node = oldtbl->free_node; 171 newtbl->free_node = oldtbl->free_node;
167 newtbl->mean_chain_len = oldtbl->mean_chain_len;
168 newtbl->copy_node = oldtbl->copy_node; 172 newtbl->copy_node = oldtbl->copy_node;
169 newtbl->known_gates = oldtbl->known_gates; 173 newtbl->known_gates = oldtbl->known_gates;
170 atomic_set(&newtbl->entries, atomic_read(&oldtbl->entries)); 174 atomic_set(&newtbl->entries, atomic_read(&oldtbl->entries));
@@ -585,7 +589,7 @@ struct mesh_path *mesh_path_add(struct ieee80211_sub_if_data *sdata,
585 589
586 hlist_add_head_rcu(&new_node->list, bucket); 590 hlist_add_head_rcu(&new_node->list, bucket);
587 if (atomic_inc_return(&tbl->entries) >= 591 if (atomic_inc_return(&tbl->entries) >=
588 tbl->mean_chain_len * (tbl->hash_mask + 1)) 592 MEAN_CHAIN_LEN * (tbl->hash_mask + 1))
589 grow = 1; 593 grow = 1;
590 594
591 mesh_paths_generation++; 595 mesh_paths_generation++;
@@ -714,7 +718,7 @@ int mpp_path_add(struct ieee80211_sub_if_data *sdata,
714 718
715 hlist_add_head_rcu(&new_node->list, bucket); 719 hlist_add_head_rcu(&new_node->list, bucket);
716 if (atomic_inc_return(&tbl->entries) >= 720 if (atomic_inc_return(&tbl->entries) >=
717 tbl->mean_chain_len * (tbl->hash_mask + 1)) 721 MEAN_CHAIN_LEN * (tbl->hash_mask + 1))
718 grow = 1; 722 grow = 1;
719 723
720 spin_unlock(&tbl->hashwlock[hash_idx]); 724 spin_unlock(&tbl->hashwlock[hash_idx]);
@@ -835,6 +839,29 @@ void mesh_path_flush_by_nexthop(struct sta_info *sta)
835 rcu_read_unlock(); 839 rcu_read_unlock();
836} 840}
837 841
842static void mpp_flush_by_proxy(struct ieee80211_sub_if_data *sdata,
843 const u8 *proxy)
844{
845 struct mesh_table *tbl;
846 struct mesh_path *mpp;
847 struct mpath_node *node;
848 int i;
849
850 rcu_read_lock();
851 read_lock_bh(&pathtbl_resize_lock);
852 tbl = resize_dereference_mpp_paths();
853 for_each_mesh_entry(tbl, node, i) {
854 mpp = node->mpath;
855 if (ether_addr_equal(mpp->mpp, proxy)) {
856 spin_lock(&tbl->hashwlock[i]);
857 __mesh_path_del(tbl, node);
858 spin_unlock(&tbl->hashwlock[i]);
859 }
860 }
861 read_unlock_bh(&pathtbl_resize_lock);
862 rcu_read_unlock();
863}
864
838static void table_flush_by_iface(struct mesh_table *tbl, 865static void table_flush_by_iface(struct mesh_table *tbl,
839 struct ieee80211_sub_if_data *sdata) 866 struct ieee80211_sub_if_data *sdata)
840{ 867{
@@ -876,14 +903,17 @@ void mesh_path_flush_by_iface(struct ieee80211_sub_if_data *sdata)
876} 903}
877 904
878/** 905/**
879 * mesh_path_del - delete a mesh path from the table 906 * table_path_del - delete a path from the mesh or mpp table
880 * 907 *
881 * @addr: dst address (ETH_ALEN length) 908 * @tbl: mesh or mpp path table
882 * @sdata: local subif 909 * @sdata: local subif
910 * @addr: dst address (ETH_ALEN length)
883 * 911 *
884 * Returns: 0 if successful 912 * Returns: 0 if successful
885 */ 913 */
886int mesh_path_del(struct ieee80211_sub_if_data *sdata, const u8 *addr) 914static int table_path_del(struct mesh_table __rcu *rcu_tbl,
915 struct ieee80211_sub_if_data *sdata,
916 const u8 *addr)
887{ 917{
888 struct mesh_table *tbl; 918 struct mesh_table *tbl;
889 struct mesh_path *mpath; 919 struct mesh_path *mpath;
@@ -892,8 +922,7 @@ int mesh_path_del(struct ieee80211_sub_if_data *sdata, const u8 *addr)
892 int hash_idx; 922 int hash_idx;
893 int err = 0; 923 int err = 0;
894 924
895 read_lock_bh(&pathtbl_resize_lock); 925 tbl = resize_dereference_paths(rcu_tbl);
896 tbl = resize_dereference_mesh_paths();
897 hash_idx = mesh_table_hash(addr, sdata, tbl); 926 hash_idx = mesh_table_hash(addr, sdata, tbl);
898 bucket = &tbl->hash_buckets[hash_idx]; 927 bucket = &tbl->hash_buckets[hash_idx];
899 928
@@ -909,9 +938,50 @@ int mesh_path_del(struct ieee80211_sub_if_data *sdata, const u8 *addr)
909 938
910 err = -ENXIO; 939 err = -ENXIO;
911enddel: 940enddel:
912 mesh_paths_generation++;
913 spin_unlock(&tbl->hashwlock[hash_idx]); 941 spin_unlock(&tbl->hashwlock[hash_idx]);
942 return err;
943}
944
945/**
946 * mesh_path_del - delete a mesh path from the table
947 *
948 * @addr: dst address (ETH_ALEN length)
949 * @sdata: local subif
950 *
951 * Returns: 0 if successful
952 */
953int mesh_path_del(struct ieee80211_sub_if_data *sdata, const u8 *addr)
954{
955 int err = 0;
956
957 /* flush relevant mpp entries first */
958 mpp_flush_by_proxy(sdata, addr);
959
960 read_lock_bh(&pathtbl_resize_lock);
961 err = table_path_del(mesh_paths, sdata, addr);
962 mesh_paths_generation++;
914 read_unlock_bh(&pathtbl_resize_lock); 963 read_unlock_bh(&pathtbl_resize_lock);
964
965 return err;
966}
967
968/**
969 * mpp_path_del - delete a mesh proxy path from the table
970 *
971 * @addr: addr address (ETH_ALEN length)
972 * @sdata: local subif
973 *
974 * Returns: 0 if successful
975 */
976static int mpp_path_del(struct ieee80211_sub_if_data *sdata, const u8 *addr)
977{
978 int err = 0;
979
980 read_lock_bh(&pathtbl_resize_lock);
981 err = table_path_del(mpp_paths, sdata, addr);
982 mpp_paths_generation++;
983 read_unlock_bh(&pathtbl_resize_lock);
984
915 return err; 985 return err;
916} 986}
917 987
@@ -1076,7 +1146,6 @@ int mesh_pathtbl_init(void)
1076 return -ENOMEM; 1146 return -ENOMEM;
1077 tbl_path->free_node = &mesh_path_node_free; 1147 tbl_path->free_node = &mesh_path_node_free;
1078 tbl_path->copy_node = &mesh_path_node_copy; 1148 tbl_path->copy_node = &mesh_path_node_copy;
1079 tbl_path->mean_chain_len = MEAN_CHAIN_LEN;
1080 tbl_path->known_gates = kzalloc(sizeof(struct hlist_head), GFP_ATOMIC); 1149 tbl_path->known_gates = kzalloc(sizeof(struct hlist_head), GFP_ATOMIC);
1081 if (!tbl_path->known_gates) { 1150 if (!tbl_path->known_gates) {
1082 ret = -ENOMEM; 1151 ret = -ENOMEM;
@@ -1092,7 +1161,6 @@ int mesh_pathtbl_init(void)
1092 } 1161 }
1093 tbl_mpp->free_node = &mesh_path_node_free; 1162 tbl_mpp->free_node = &mesh_path_node_free;
1094 tbl_mpp->copy_node = &mesh_path_node_copy; 1163 tbl_mpp->copy_node = &mesh_path_node_copy;
1095 tbl_mpp->mean_chain_len = MEAN_CHAIN_LEN;
1096 tbl_mpp->known_gates = kzalloc(sizeof(struct hlist_head), GFP_ATOMIC); 1164 tbl_mpp->known_gates = kzalloc(sizeof(struct hlist_head), GFP_ATOMIC);
1097 if (!tbl_mpp->known_gates) { 1165 if (!tbl_mpp->known_gates) {
1098 ret = -ENOMEM; 1166 ret = -ENOMEM;
@@ -1131,6 +1199,17 @@ void mesh_path_expire(struct ieee80211_sub_if_data *sdata)
1131 time_after(jiffies, mpath->exp_time + MESH_PATH_EXPIRE)) 1199 time_after(jiffies, mpath->exp_time + MESH_PATH_EXPIRE))
1132 mesh_path_del(mpath->sdata, mpath->dst); 1200 mesh_path_del(mpath->sdata, mpath->dst);
1133 } 1201 }
1202
1203 tbl = rcu_dereference(mpp_paths);
1204 for_each_mesh_entry(tbl, node, i) {
1205 if (node->mpath->sdata != sdata)
1206 continue;
1207 mpath = node->mpath;
1208 if ((!(mpath->flags & MESH_PATH_FIXED)) &&
1209 time_after(jiffies, mpath->exp_time + MESH_PATH_EXPIRE))
1210 mpp_path_del(mpath->sdata, mpath->dst);
1211 }
1212
1134 rcu_read_unlock(); 1213 rcu_read_unlock();
1135} 1214}
1136 1215
diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c
index bd3d55eb21d4..a07e93c21c9e 100644
--- a/net/mac80211/mesh_plink.c
+++ b/net/mac80211/mesh_plink.c
@@ -976,6 +976,10 @@ mesh_plink_get_event(struct ieee80211_sub_if_data *sdata,
976 mpl_dbg(sdata, "Mesh plink error: no more free plinks\n"); 976 mpl_dbg(sdata, "Mesh plink error: no more free plinks\n");
977 goto out; 977 goto out;
978 } 978 }
979
980 /* new matching peer */
981 event = OPN_ACPT;
982 goto out;
979 } else { 983 } else {
980 if (!test_sta_flag(sta, WLAN_STA_AUTH)) { 984 if (!test_sta_flag(sta, WLAN_STA_AUTH)) {
981 mpl_dbg(sdata, "Mesh plink: Action frame from non-authed peer\n"); 985 mpl_dbg(sdata, "Mesh plink: Action frame from non-authed peer\n");
@@ -985,12 +989,6 @@ mesh_plink_get_event(struct ieee80211_sub_if_data *sdata,
985 goto out; 989 goto out;
986 } 990 }
987 991
988 /* new matching peer */
989 if (!sta) {
990 event = OPN_ACPT;
991 goto out;
992 }
993
994 switch (ftype) { 992 switch (ftype) {
995 case WLAN_SP_MESH_PEERING_OPEN: 993 case WLAN_SP_MESH_PEERING_OPEN:
996 if (!matches_local) 994 if (!matches_local)
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 1c342e2592c4..281b8d6e5109 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -6,7 +6,7 @@
6 * Copyright 2006-2007 Jiri Benc <jbenc@suse.cz> 6 * Copyright 2006-2007 Jiri Benc <jbenc@suse.cz>
7 * Copyright 2007, Michael Wu <flamingice@sourmilk.net> 7 * Copyright 2007, Michael Wu <flamingice@sourmilk.net>
8 * Copyright 2013-2014 Intel Mobile Communications GmbH 8 * Copyright 2013-2014 Intel Mobile Communications GmbH
9 * Copyright (C) 2015 Intel Deutschland GmbH 9 * Copyright (C) 2015 - 2016 Intel Deutschland GmbH
10 * 10 *
11 * This program is free software; you can redistribute it and/or modify 11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License version 2 as 12 * it under the terms of the GNU General Public License version 2 as
@@ -196,16 +196,7 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata,
196 196
197 /* check 40 MHz support, if we have it */ 197 /* check 40 MHz support, if we have it */
198 if (sta_ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40) { 198 if (sta_ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40) {
199 switch (ht_oper->ht_param & IEEE80211_HT_PARAM_CHA_SEC_OFFSET) { 199 ieee80211_chandef_ht_oper(ht_oper, chandef);
200 case IEEE80211_HT_PARAM_CHA_SEC_ABOVE:
201 chandef->width = NL80211_CHAN_WIDTH_40;
202 chandef->center_freq1 += 10;
203 break;
204 case IEEE80211_HT_PARAM_CHA_SEC_BELOW:
205 chandef->width = NL80211_CHAN_WIDTH_40;
206 chandef->center_freq1 -= 10;
207 break;
208 }
209 } else { 200 } else {
210 /* 40 MHz (and 80 MHz) must be supported for VHT */ 201 /* 40 MHz (and 80 MHz) must be supported for VHT */
211 ret = IEEE80211_STA_DISABLE_VHT; 202 ret = IEEE80211_STA_DISABLE_VHT;
@@ -219,35 +210,11 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata,
219 goto out; 210 goto out;
220 } 211 }
221 212
222 vht_chandef.chan = channel; 213 vht_chandef = *chandef;
223 vht_chandef.center_freq1 = 214 if (!ieee80211_chandef_vht_oper(vht_oper, &vht_chandef)) {
224 ieee80211_channel_to_frequency(vht_oper->center_freq_seg1_idx,
225 channel->band);
226 vht_chandef.center_freq2 = 0;
227
228 switch (vht_oper->chan_width) {
229 case IEEE80211_VHT_CHANWIDTH_USE_HT:
230 vht_chandef.width = chandef->width;
231 vht_chandef.center_freq1 = chandef->center_freq1;
232 break;
233 case IEEE80211_VHT_CHANWIDTH_80MHZ:
234 vht_chandef.width = NL80211_CHAN_WIDTH_80;
235 break;
236 case IEEE80211_VHT_CHANWIDTH_160MHZ:
237 vht_chandef.width = NL80211_CHAN_WIDTH_160;
238 break;
239 case IEEE80211_VHT_CHANWIDTH_80P80MHZ:
240 vht_chandef.width = NL80211_CHAN_WIDTH_80P80;
241 vht_chandef.center_freq2 =
242 ieee80211_channel_to_frequency(
243 vht_oper->center_freq_seg2_idx,
244 channel->band);
245 break;
246 default:
247 if (!(ifmgd->flags & IEEE80211_STA_DISABLE_VHT)) 215 if (!(ifmgd->flags & IEEE80211_STA_DISABLE_VHT))
248 sdata_info(sdata, 216 sdata_info(sdata,
249 "AP VHT operation IE has invalid channel width (%d), disable VHT\n", 217 "AP VHT information is invalid, disable VHT\n");
250 vht_oper->chan_width);
251 ret = IEEE80211_STA_DISABLE_VHT; 218 ret = IEEE80211_STA_DISABLE_VHT;
252 goto out; 219 goto out;
253 } 220 }
@@ -592,7 +559,7 @@ static void ieee80211_add_vht_ie(struct ieee80211_sub_if_data *sdata,
592 struct ieee80211_sub_if_data *other; 559 struct ieee80211_sub_if_data *other;
593 560
594 list_for_each_entry_rcu(other, &local->interfaces, list) { 561 list_for_each_entry_rcu(other, &local->interfaces, list) {
595 if (other->flags & IEEE80211_SDATA_MU_MIMO_OWNER) { 562 if (other->vif.mu_mimo_owner) {
596 disable_mu_mimo = true; 563 disable_mu_mimo = true;
597 break; 564 break;
598 } 565 }
@@ -600,7 +567,7 @@ static void ieee80211_add_vht_ie(struct ieee80211_sub_if_data *sdata,
600 if (disable_mu_mimo) 567 if (disable_mu_mimo)
601 cap &= ~IEEE80211_VHT_CAP_MU_BEAMFORMEE_CAPABLE; 568 cap &= ~IEEE80211_VHT_CAP_MU_BEAMFORMEE_CAPABLE;
602 else 569 else
603 sdata->flags |= IEEE80211_SDATA_MU_MIMO_OWNER; 570 sdata->vif.mu_mimo_owner = true;
604 } 571 }
605 572
606 mask = IEEE80211_VHT_CAP_BEAMFORMEE_STS_MASK; 573 mask = IEEE80211_VHT_CAP_BEAMFORMEE_STS_MASK;
@@ -1638,8 +1605,7 @@ void ieee80211_dynamic_ps_timer(unsigned long data)
1638 1605
1639void ieee80211_dfs_cac_timer_work(struct work_struct *work) 1606void ieee80211_dfs_cac_timer_work(struct work_struct *work)
1640{ 1607{
1641 struct delayed_work *delayed_work = 1608 struct delayed_work *delayed_work = to_delayed_work(work);
1642 container_of(work, struct delayed_work, work);
1643 struct ieee80211_sub_if_data *sdata = 1609 struct ieee80211_sub_if_data *sdata =
1644 container_of(delayed_work, struct ieee80211_sub_if_data, 1610 container_of(delayed_work, struct ieee80211_sub_if_data,
1645 dfs_cac_timer_work); 1611 dfs_cac_timer_work);
@@ -2079,7 +2045,14 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
2079 memset(&ifmgd->ht_capa_mask, 0, sizeof(ifmgd->ht_capa_mask)); 2045 memset(&ifmgd->ht_capa_mask, 0, sizeof(ifmgd->ht_capa_mask));
2080 memset(&ifmgd->vht_capa, 0, sizeof(ifmgd->vht_capa)); 2046 memset(&ifmgd->vht_capa, 0, sizeof(ifmgd->vht_capa));
2081 memset(&ifmgd->vht_capa_mask, 0, sizeof(ifmgd->vht_capa_mask)); 2047 memset(&ifmgd->vht_capa_mask, 0, sizeof(ifmgd->vht_capa_mask));
2082 sdata->flags &= ~IEEE80211_SDATA_MU_MIMO_OWNER; 2048
2049 /* reset MU-MIMO ownership and group data */
2050 memset(sdata->vif.bss_conf.mu_group.membership, 0,
2051 sizeof(sdata->vif.bss_conf.mu_group.membership));
2052 memset(sdata->vif.bss_conf.mu_group.position, 0,
2053 sizeof(sdata->vif.bss_conf.mu_group.position));
2054 changed |= BSS_CHANGED_MU_GROUPS;
2055 sdata->vif.mu_mimo_owner = false;
2083 2056
2084 sdata->ap_power_level = IEEE80211_UNSET_POWER_LEVEL; 2057 sdata->ap_power_level = IEEE80211_UNSET_POWER_LEVEL;
2085 2058
@@ -2536,7 +2509,8 @@ static void ieee80211_destroy_assoc_data(struct ieee80211_sub_if_data *sdata,
2536 eth_zero_addr(sdata->u.mgd.bssid); 2509 eth_zero_addr(sdata->u.mgd.bssid);
2537 ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BSSID); 2510 ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BSSID);
2538 sdata->u.mgd.flags = 0; 2511 sdata->u.mgd.flags = 0;
2539 sdata->flags &= ~IEEE80211_SDATA_MU_MIMO_OWNER; 2512 sdata->vif.mu_mimo_owner = false;
2513
2540 mutex_lock(&sdata->local->mtx); 2514 mutex_lock(&sdata->local->mtx);
2541 ieee80211_vif_release_channel(sdata); 2515 ieee80211_vif_release_channel(sdata);
2542 mutex_unlock(&sdata->local->mtx); 2516 mutex_unlock(&sdata->local->mtx);
@@ -3571,6 +3545,9 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
3571 elems.ht_cap_elem, elems.ht_operation, 3545 elems.ht_cap_elem, elems.ht_operation,
3572 elems.vht_operation, bssid, &changed)) { 3546 elems.vht_operation, bssid, &changed)) {
3573 mutex_unlock(&local->sta_mtx); 3547 mutex_unlock(&local->sta_mtx);
3548 sdata_info(sdata,
3549 "failed to follow AP %pM bandwidth change, disconnect\n",
3550 bssid);
3574 ieee80211_set_disassoc(sdata, IEEE80211_STYPE_DEAUTH, 3551 ieee80211_set_disassoc(sdata, IEEE80211_STYPE_DEAUTH,
3575 WLAN_REASON_DEAUTH_LEAVING, 3552 WLAN_REASON_DEAUTH_LEAVING,
3576 true, deauth_buf); 3553 true, deauth_buf);
@@ -3946,11 +3923,9 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata)
3946 * We actually lost the connection ... or did we? 3923 * We actually lost the connection ... or did we?
3947 * Let's make sure! 3924 * Let's make sure!
3948 */ 3925 */
3949 wiphy_debug(local->hw.wiphy, 3926 mlme_dbg(sdata,
3950 "%s: No probe response from AP %pM" 3927 "No probe response from AP %pM after %dms, disconnecting.\n",
3951 " after %dms, disconnecting.\n", 3928 bssid, probe_wait_ms);
3952 sdata->name,
3953 bssid, probe_wait_ms);
3954 3929
3955 ieee80211_sta_connection_lost(sdata, bssid, 3930 ieee80211_sta_connection_lost(sdata, bssid,
3956 WLAN_REASON_DISASSOC_DUE_TO_INACTIVITY, false); 3931 WLAN_REASON_DISASSOC_DUE_TO_INACTIVITY, false);
@@ -4005,8 +3980,6 @@ static void ieee80211_restart_sta_timer(struct ieee80211_sub_if_data *sdata)
4005 if (!ieee80211_hw_check(&sdata->local->hw, CONNECTION_MONITOR)) 3980 if (!ieee80211_hw_check(&sdata->local->hw, CONNECTION_MONITOR))
4006 ieee80211_queue_work(&sdata->local->hw, 3981 ieee80211_queue_work(&sdata->local->hw,
4007 &sdata->u.mgd.monitor_work); 3982 &sdata->u.mgd.monitor_work);
4008 /* and do all the other regular work too */
4009 ieee80211_queue_work(&sdata->local->hw, &sdata->work);
4010 } 3983 }
4011} 3984}
4012 3985
@@ -4538,6 +4511,9 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata,
4538 if (ifmgd->associated) { 4511 if (ifmgd->associated) {
4539 u8 frame_buf[IEEE80211_DEAUTH_FRAME_LEN]; 4512 u8 frame_buf[IEEE80211_DEAUTH_FRAME_LEN];
4540 4513
4514 sdata_info(sdata,
4515 "disconnect from AP %pM for new auth to %pM\n",
4516 ifmgd->associated->bssid, req->bss->bssid);
4541 ieee80211_set_disassoc(sdata, IEEE80211_STYPE_DEAUTH, 4517 ieee80211_set_disassoc(sdata, IEEE80211_STYPE_DEAUTH,
4542 WLAN_REASON_UNSPECIFIED, 4518 WLAN_REASON_UNSPECIFIED,
4543 false, frame_buf); 4519 false, frame_buf);
@@ -4606,6 +4582,9 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
4606 if (ifmgd->associated) { 4582 if (ifmgd->associated) {
4607 u8 frame_buf[IEEE80211_DEAUTH_FRAME_LEN]; 4583 u8 frame_buf[IEEE80211_DEAUTH_FRAME_LEN];
4608 4584
4585 sdata_info(sdata,
4586 "disconnect from AP %pM for new assoc to %pM\n",
4587 ifmgd->associated->bssid, req->bss->bssid);
4609 ieee80211_set_disassoc(sdata, IEEE80211_STYPE_DEAUTH, 4588 ieee80211_set_disassoc(sdata, IEEE80211_STYPE_DEAUTH,
4610 WLAN_REASON_UNSPECIFIED, 4589 WLAN_REASON_UNSPECIFIED,
4611 false, frame_buf); 4590 false, frame_buf);
diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c
index 8b2f4eaac2ba..55a9c5b94ce1 100644
--- a/net/mac80211/offchannel.c
+++ b/net/mac80211/offchannel.c
@@ -252,14 +252,11 @@ static bool ieee80211_recalc_sw_work(struct ieee80211_local *local,
252static void ieee80211_handle_roc_started(struct ieee80211_roc_work *roc, 252static void ieee80211_handle_roc_started(struct ieee80211_roc_work *roc,
253 unsigned long start_time) 253 unsigned long start_time)
254{ 254{
255 struct ieee80211_local *local = roc->sdata->local;
256
257 if (WARN_ON(roc->notified)) 255 if (WARN_ON(roc->notified))
258 return; 256 return;
259 257
260 roc->start_time = start_time; 258 roc->start_time = start_time;
261 roc->started = true; 259 roc->started = true;
262 roc->hw_begun = true;
263 260
264 if (roc->mgmt_tx_cookie) { 261 if (roc->mgmt_tx_cookie) {
265 if (!WARN_ON(!roc->frame)) { 262 if (!WARN_ON(!roc->frame)) {
@@ -274,9 +271,6 @@ static void ieee80211_handle_roc_started(struct ieee80211_roc_work *roc,
274 } 271 }
275 272
276 roc->notified = true; 273 roc->notified = true;
277
278 if (!local->ops->remain_on_channel)
279 ieee80211_recalc_sw_work(local, start_time);
280} 274}
281 275
282static void ieee80211_hw_roc_start(struct work_struct *work) 276static void ieee80211_hw_roc_start(struct work_struct *work)
@@ -291,6 +285,7 @@ static void ieee80211_hw_roc_start(struct work_struct *work)
291 if (!roc->started) 285 if (!roc->started)
292 break; 286 break;
293 287
288 roc->hw_begun = true;
294 ieee80211_handle_roc_started(roc, local->hw_roc_start_time); 289 ieee80211_handle_roc_started(roc, local->hw_roc_start_time);
295 } 290 }
296 291
@@ -413,6 +408,10 @@ void ieee80211_start_next_roc(struct ieee80211_local *local)
413 return; 408 return;
414 } 409 }
415 410
411 /* defer roc if driver is not started (i.e. during reconfig) */
412 if (local->in_reconfig)
413 return;
414
416 roc = list_first_entry(&local->roc_list, struct ieee80211_roc_work, 415 roc = list_first_entry(&local->roc_list, struct ieee80211_roc_work,
417 list); 416 list);
418 417
@@ -534,8 +533,10 @@ ieee80211_coalesce_hw_started_roc(struct ieee80211_local *local,
534 * begin, otherwise they'll both be marked properly by the work 533 * begin, otherwise they'll both be marked properly by the work
535 * struct that runs once the driver notifies us of the beginning 534 * struct that runs once the driver notifies us of the beginning
536 */ 535 */
537 if (cur_roc->hw_begun) 536 if (cur_roc->hw_begun) {
537 new_roc->hw_begun = true;
538 ieee80211_handle_roc_started(new_roc, now); 538 ieee80211_handle_roc_started(new_roc, now);
539 }
539 540
540 return true; 541 return true;
541} 542}
@@ -658,6 +659,7 @@ static int ieee80211_start_roc_work(struct ieee80211_local *local,
658 queued = true; 659 queued = true;
659 roc->on_channel = tmp->on_channel; 660 roc->on_channel = tmp->on_channel;
660 ieee80211_handle_roc_started(roc, now); 661 ieee80211_handle_roc_started(roc, now);
662 ieee80211_recalc_sw_work(local, now);
661 break; 663 break;
662 } 664 }
663 665
diff --git a/net/mac80211/rc80211_minstrel.c b/net/mac80211/rc80211_minstrel.c
index 3ece7d1034c8..b54f398cda5d 100644
--- a/net/mac80211/rc80211_minstrel.c
+++ b/net/mac80211/rc80211_minstrel.c
@@ -711,7 +711,7 @@ static u32 minstrel_get_expected_throughput(void *priv_sta)
711 * computing cur_tp 711 * computing cur_tp
712 */ 712 */
713 tmp_mrs = &mi->r[idx].stats; 713 tmp_mrs = &mi->r[idx].stats;
714 tmp_cur_tp = minstrel_get_tp_avg(&mi->r[idx], tmp_mrs->prob_ewma); 714 tmp_cur_tp = minstrel_get_tp_avg(&mi->r[idx], tmp_mrs->prob_ewma) * 10;
715 tmp_cur_tp = tmp_cur_tp * 1200 * 8 / 1024; 715 tmp_cur_tp = tmp_cur_tp * 1200 * 8 / 1024;
716 716
717 return tmp_cur_tp; 717 return tmp_cur_tp;
diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c
index 3928dbd24e25..370d677b547b 100644
--- a/net/mac80211/rc80211_minstrel_ht.c
+++ b/net/mac80211/rc80211_minstrel_ht.c
@@ -414,15 +414,16 @@ minstrel_ht_set_best_prob_rate(struct minstrel_ht_sta *mi, u16 index)
414 (max_tp_group != MINSTREL_CCK_GROUP)) 414 (max_tp_group != MINSTREL_CCK_GROUP))
415 return; 415 return;
416 416
417 max_gpr_group = mg->max_group_prob_rate / MCS_GROUP_RATES;
418 max_gpr_idx = mg->max_group_prob_rate % MCS_GROUP_RATES;
419 max_gpr_prob = mi->groups[max_gpr_group].rates[max_gpr_idx].prob_ewma;
420
417 if (mrs->prob_ewma > MINSTREL_FRAC(75, 100)) { 421 if (mrs->prob_ewma > MINSTREL_FRAC(75, 100)) {
418 cur_tp_avg = minstrel_ht_get_tp_avg(mi, cur_group, cur_idx, 422 cur_tp_avg = minstrel_ht_get_tp_avg(mi, cur_group, cur_idx,
419 mrs->prob_ewma); 423 mrs->prob_ewma);
420 if (cur_tp_avg > tmp_tp_avg) 424 if (cur_tp_avg > tmp_tp_avg)
421 mi->max_prob_rate = index; 425 mi->max_prob_rate = index;
422 426
423 max_gpr_group = mg->max_group_prob_rate / MCS_GROUP_RATES;
424 max_gpr_idx = mg->max_group_prob_rate % MCS_GROUP_RATES;
425 max_gpr_prob = mi->groups[max_gpr_group].rates[max_gpr_idx].prob_ewma;
426 max_gpr_tp_avg = minstrel_ht_get_tp_avg(mi, max_gpr_group, 427 max_gpr_tp_avg = minstrel_ht_get_tp_avg(mi, max_gpr_group,
427 max_gpr_idx, 428 max_gpr_idx,
428 max_gpr_prob); 429 max_gpr_prob);
@@ -431,7 +432,7 @@ minstrel_ht_set_best_prob_rate(struct minstrel_ht_sta *mi, u16 index)
431 } else { 432 } else {
432 if (mrs->prob_ewma > tmp_prob) 433 if (mrs->prob_ewma > tmp_prob)
433 mi->max_prob_rate = index; 434 mi->max_prob_rate = index;
434 if (mrs->prob_ewma > mg->rates[mg->max_group_prob_rate].prob_ewma) 435 if (mrs->prob_ewma > max_gpr_prob)
435 mg->max_group_prob_rate = index; 436 mg->max_group_prob_rate = index;
436 } 437 }
437} 438}
@@ -691,7 +692,7 @@ minstrel_aggr_check(struct ieee80211_sta *pubsta, struct sk_buff *skb)
691 if (likely(sta->ampdu_mlme.tid_tx[tid])) 692 if (likely(sta->ampdu_mlme.tid_tx[tid]))
692 return; 693 return;
693 694
694 ieee80211_start_tx_ba_session(pubsta, tid, 5000); 695 ieee80211_start_tx_ba_session(pubsta, tid, 0);
695} 696}
696 697
697static void 698static void
@@ -871,7 +872,7 @@ minstrel_ht_set_rate(struct minstrel_priv *mp, struct minstrel_ht_sta *mi,
871 * - if station is in dynamic SMPS (and streams > 1) 872 * - if station is in dynamic SMPS (and streams > 1)
872 * - for fallback rates, to increase chances of getting through 873 * - for fallback rates, to increase chances of getting through
873 */ 874 */
874 if (offset > 0 && 875 if (offset > 0 ||
875 (mi->sta->smps_mode == IEEE80211_SMPS_DYNAMIC && 876 (mi->sta->smps_mode == IEEE80211_SMPS_DYNAMIC &&
876 group->streams > 1)) { 877 group->streams > 1)) {
877 ratetbl->rate[offset].count = ratetbl->rate[offset].count_rts; 878 ratetbl->rate[offset].count = ratetbl->rate[offset].count_rts;
@@ -1334,7 +1335,8 @@ static u32 minstrel_ht_get_expected_throughput(void *priv_sta)
1334 prob = mi->groups[i].rates[j].prob_ewma; 1335 prob = mi->groups[i].rates[j].prob_ewma;
1335 1336
1336 /* convert tp_avg from pkt per second in kbps */ 1337 /* convert tp_avg from pkt per second in kbps */
1337 tp_avg = minstrel_ht_get_tp_avg(mi, i, j, prob) * AVG_PKT_SIZE * 8 / 1024; 1338 tp_avg = minstrel_ht_get_tp_avg(mi, i, j, prob) * 10;
1339 tp_avg = tp_avg * AVG_PKT_SIZE * 8 / 1024;
1338 1340
1339 return tp_avg; 1341 return tp_avg;
1340} 1342}
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index bc081850ac0e..dc27becb9b71 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -4,6 +4,7 @@
4 * Copyright 2006-2007 Jiri Benc <jbenc@suse.cz> 4 * Copyright 2006-2007 Jiri Benc <jbenc@suse.cz>
5 * Copyright 2007-2010 Johannes Berg <johannes@sipsolutions.net> 5 * Copyright 2007-2010 Johannes Berg <johannes@sipsolutions.net>
6 * Copyright 2013-2014 Intel Mobile Communications GmbH 6 * Copyright 2013-2014 Intel Mobile Communications GmbH
7 * Copyright(c) 2015 - 2016 Intel Deutschland GmbH
7 * 8 *
8 * This program is free software; you can redistribute it and/or modify 9 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 as 10 * it under the terms of the GNU General Public License version 2 as
@@ -18,6 +19,7 @@
18#include <linux/etherdevice.h> 19#include <linux/etherdevice.h>
19#include <linux/rcupdate.h> 20#include <linux/rcupdate.h>
20#include <linux/export.h> 21#include <linux/export.h>
22#include <linux/bitops.h>
21#include <net/mac80211.h> 23#include <net/mac80211.h>
22#include <net/ieee80211_radiotap.h> 24#include <net/ieee80211_radiotap.h>
23#include <asm/unaligned.h> 25#include <asm/unaligned.h>
@@ -122,7 +124,8 @@ static inline bool should_drop_frame(struct sk_buff *skb, int present_fcs_len,
122 hdr = (void *)(skb->data + rtap_vendor_space); 124 hdr = (void *)(skb->data + rtap_vendor_space);
123 125
124 if (status->flag & (RX_FLAG_FAILED_FCS_CRC | 126 if (status->flag & (RX_FLAG_FAILED_FCS_CRC |
125 RX_FLAG_FAILED_PLCP_CRC)) 127 RX_FLAG_FAILED_PLCP_CRC |
128 RX_FLAG_ONLY_MONITOR))
126 return true; 129 return true;
127 130
128 if (unlikely(skb->len < 16 + present_fcs_len + rtap_vendor_space)) 131 if (unlikely(skb->len < 16 + present_fcs_len + rtap_vendor_space))
@@ -507,7 +510,7 @@ ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb,
507 return NULL; 510 return NULL;
508 } 511 }
509 512
510 if (!local->monitors) { 513 if (!local->monitors || (status->flag & RX_FLAG_SKIP_MONITOR)) {
511 if (should_drop_frame(origskb, present_fcs_len, 514 if (should_drop_frame(origskb, present_fcs_len,
512 rtap_vendor_space)) { 515 rtap_vendor_space)) {
513 dev_kfree_skb(origskb); 516 dev_kfree_skb(origskb);
@@ -797,6 +800,26 @@ static ieee80211_rx_result ieee80211_rx_mesh_check(struct ieee80211_rx_data *rx)
797 return RX_CONTINUE; 800 return RX_CONTINUE;
798} 801}
799 802
803static inline bool ieee80211_rx_reorder_ready(struct tid_ampdu_rx *tid_agg_rx,
804 int index)
805{
806 struct sk_buff_head *frames = &tid_agg_rx->reorder_buf[index];
807 struct sk_buff *tail = skb_peek_tail(frames);
808 struct ieee80211_rx_status *status;
809
810 if (tid_agg_rx->reorder_buf_filtered & BIT_ULL(index))
811 return true;
812
813 if (!tail)
814 return false;
815
816 status = IEEE80211_SKB_RXCB(tail);
817 if (status->flag & RX_FLAG_AMSDU_MORE)
818 return false;
819
820 return true;
821}
822
800static void ieee80211_release_reorder_frame(struct ieee80211_sub_if_data *sdata, 823static void ieee80211_release_reorder_frame(struct ieee80211_sub_if_data *sdata,
801 struct tid_ampdu_rx *tid_agg_rx, 824 struct tid_ampdu_rx *tid_agg_rx,
802 int index, 825 int index,
@@ -811,7 +834,7 @@ static void ieee80211_release_reorder_frame(struct ieee80211_sub_if_data *sdata,
811 if (skb_queue_empty(skb_list)) 834 if (skb_queue_empty(skb_list))
812 goto no_frame; 835 goto no_frame;
813 836
814 if (!ieee80211_rx_reorder_ready(skb_list)) { 837 if (!ieee80211_rx_reorder_ready(tid_agg_rx, index)) {
815 __skb_queue_purge(skb_list); 838 __skb_queue_purge(skb_list);
816 goto no_frame; 839 goto no_frame;
817 } 840 }
@@ -825,6 +848,7 @@ static void ieee80211_release_reorder_frame(struct ieee80211_sub_if_data *sdata,
825 } 848 }
826 849
827no_frame: 850no_frame:
851 tid_agg_rx->reorder_buf_filtered &= ~BIT_ULL(index);
828 tid_agg_rx->head_seq_num = ieee80211_sn_inc(tid_agg_rx->head_seq_num); 852 tid_agg_rx->head_seq_num = ieee80211_sn_inc(tid_agg_rx->head_seq_num);
829} 853}
830 854
@@ -865,7 +889,7 @@ static void ieee80211_sta_reorder_release(struct ieee80211_sub_if_data *sdata,
865 889
866 /* release the buffer until next missing frame */ 890 /* release the buffer until next missing frame */
867 index = tid_agg_rx->head_seq_num % tid_agg_rx->buf_size; 891 index = tid_agg_rx->head_seq_num % tid_agg_rx->buf_size;
868 if (!ieee80211_rx_reorder_ready(&tid_agg_rx->reorder_buf[index]) && 892 if (!ieee80211_rx_reorder_ready(tid_agg_rx, index) &&
869 tid_agg_rx->stored_mpdu_num) { 893 tid_agg_rx->stored_mpdu_num) {
870 /* 894 /*
871 * No buffers ready to be released, but check whether any 895 * No buffers ready to be released, but check whether any
@@ -874,8 +898,7 @@ static void ieee80211_sta_reorder_release(struct ieee80211_sub_if_data *sdata,
874 int skipped = 1; 898 int skipped = 1;
875 for (j = (index + 1) % tid_agg_rx->buf_size; j != index; 899 for (j = (index + 1) % tid_agg_rx->buf_size; j != index;
876 j = (j + 1) % tid_agg_rx->buf_size) { 900 j = (j + 1) % tid_agg_rx->buf_size) {
877 if (!ieee80211_rx_reorder_ready( 901 if (!ieee80211_rx_reorder_ready(tid_agg_rx, j)) {
878 &tid_agg_rx->reorder_buf[j])) {
879 skipped++; 902 skipped++;
880 continue; 903 continue;
881 } 904 }
@@ -902,8 +925,7 @@ static void ieee80211_sta_reorder_release(struct ieee80211_sub_if_data *sdata,
902 skipped) & IEEE80211_SN_MASK; 925 skipped) & IEEE80211_SN_MASK;
903 skipped = 0; 926 skipped = 0;
904 } 927 }
905 } else while (ieee80211_rx_reorder_ready( 928 } else while (ieee80211_rx_reorder_ready(tid_agg_rx, index)) {
906 &tid_agg_rx->reorder_buf[index])) {
907 ieee80211_release_reorder_frame(sdata, tid_agg_rx, index, 929 ieee80211_release_reorder_frame(sdata, tid_agg_rx, index,
908 frames); 930 frames);
909 index = tid_agg_rx->head_seq_num % tid_agg_rx->buf_size; 931 index = tid_agg_rx->head_seq_num % tid_agg_rx->buf_size;
@@ -914,8 +936,7 @@ static void ieee80211_sta_reorder_release(struct ieee80211_sub_if_data *sdata,
914 936
915 for (; j != (index - 1) % tid_agg_rx->buf_size; 937 for (; j != (index - 1) % tid_agg_rx->buf_size;
916 j = (j + 1) % tid_agg_rx->buf_size) { 938 j = (j + 1) % tid_agg_rx->buf_size) {
917 if (ieee80211_rx_reorder_ready( 939 if (ieee80211_rx_reorder_ready(tid_agg_rx, j))
918 &tid_agg_rx->reorder_buf[j]))
919 break; 940 break;
920 } 941 }
921 942
@@ -986,7 +1007,7 @@ static bool ieee80211_sta_manage_reorder_buf(struct ieee80211_sub_if_data *sdata
986 index = mpdu_seq_num % tid_agg_rx->buf_size; 1007 index = mpdu_seq_num % tid_agg_rx->buf_size;
987 1008
988 /* check if we already stored this frame */ 1009 /* check if we already stored this frame */
989 if (ieee80211_rx_reorder_ready(&tid_agg_rx->reorder_buf[index])) { 1010 if (ieee80211_rx_reorder_ready(tid_agg_rx, index)) {
990 dev_kfree_skb(skb); 1011 dev_kfree_skb(skb);
991 goto out; 1012 goto out;
992 } 1013 }
@@ -1099,6 +1120,9 @@ ieee80211_rx_h_check_dup(struct ieee80211_rx_data *rx)
1099 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)rx->skb->data; 1120 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)rx->skb->data;
1100 struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(rx->skb); 1121 struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(rx->skb);
1101 1122
1123 if (status->flag & RX_FLAG_DUP_VALIDATED)
1124 return RX_CONTINUE;
1125
1102 /* 1126 /*
1103 * Drop duplicate 802.11 retransmissions 1127 * Drop duplicate 802.11 retransmissions
1104 * (IEEE 802.11-2012: 9.3.2.10 "Duplicate detection and recovery") 1128 * (IEEE 802.11-2012: 9.3.2.10 "Duplicate detection and recovery")
@@ -1753,7 +1777,7 @@ ieee80211_reassemble_add(struct ieee80211_sub_if_data *sdata,
1753 entry->seq = seq; 1777 entry->seq = seq;
1754 entry->rx_queue = rx_queue; 1778 entry->rx_queue = rx_queue;
1755 entry->last_frag = frag; 1779 entry->last_frag = frag;
1756 entry->ccmp = 0; 1780 entry->check_sequential_pn = false;
1757 entry->extra_len = 0; 1781 entry->extra_len = 0;
1758 1782
1759 return entry; 1783 return entry;
@@ -1849,15 +1873,27 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
1849 rx->seqno_idx, &(rx->skb)); 1873 rx->seqno_idx, &(rx->skb));
1850 if (rx->key && 1874 if (rx->key &&
1851 (rx->key->conf.cipher == WLAN_CIPHER_SUITE_CCMP || 1875 (rx->key->conf.cipher == WLAN_CIPHER_SUITE_CCMP ||
1852 rx->key->conf.cipher == WLAN_CIPHER_SUITE_CCMP_256) && 1876 rx->key->conf.cipher == WLAN_CIPHER_SUITE_CCMP_256 ||
1877 rx->key->conf.cipher == WLAN_CIPHER_SUITE_GCMP ||
1878 rx->key->conf.cipher == WLAN_CIPHER_SUITE_GCMP_256) &&
1853 ieee80211_has_protected(fc)) { 1879 ieee80211_has_protected(fc)) {
1854 int queue = rx->security_idx; 1880 int queue = rx->security_idx;
1855 /* Store CCMP PN so that we can verify that the next 1881
1856 * fragment has a sequential PN value. */ 1882 /* Store CCMP/GCMP PN so that we can verify that the
1857 entry->ccmp = 1; 1883 * next fragment has a sequential PN value.
1884 */
1885 entry->check_sequential_pn = true;
1858 memcpy(entry->last_pn, 1886 memcpy(entry->last_pn,
1859 rx->key->u.ccmp.rx_pn[queue], 1887 rx->key->u.ccmp.rx_pn[queue],
1860 IEEE80211_CCMP_PN_LEN); 1888 IEEE80211_CCMP_PN_LEN);
1889 BUILD_BUG_ON(offsetof(struct ieee80211_key,
1890 u.ccmp.rx_pn) !=
1891 offsetof(struct ieee80211_key,
1892 u.gcmp.rx_pn));
1893 BUILD_BUG_ON(sizeof(rx->key->u.ccmp.rx_pn[queue]) !=
1894 sizeof(rx->key->u.gcmp.rx_pn[queue]));
1895 BUILD_BUG_ON(IEEE80211_CCMP_PN_LEN !=
1896 IEEE80211_GCMP_PN_LEN);
1861 } 1897 }
1862 return RX_QUEUED; 1898 return RX_QUEUED;
1863 } 1899 }
@@ -1872,15 +1908,21 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
1872 return RX_DROP_MONITOR; 1908 return RX_DROP_MONITOR;
1873 } 1909 }
1874 1910
1875 /* Verify that MPDUs within one MSDU have sequential PN values. 1911 /* "The receiver shall discard MSDUs and MMPDUs whose constituent
1876 * (IEEE 802.11i, 8.3.3.4.5) */ 1912 * MPDU PN values are not incrementing in steps of 1."
1877 if (entry->ccmp) { 1913 * see IEEE P802.11-REVmc/D5.0, 12.5.3.4.4, item d (for CCMP)
1914 * and IEEE P802.11-REVmc/D5.0, 12.5.5.4.4, item d (for GCMP)
1915 */
1916 if (entry->check_sequential_pn) {
1878 int i; 1917 int i;
1879 u8 pn[IEEE80211_CCMP_PN_LEN], *rpn; 1918 u8 pn[IEEE80211_CCMP_PN_LEN], *rpn;
1880 int queue; 1919 int queue;
1920
1881 if (!rx->key || 1921 if (!rx->key ||
1882 (rx->key->conf.cipher != WLAN_CIPHER_SUITE_CCMP && 1922 (rx->key->conf.cipher != WLAN_CIPHER_SUITE_CCMP &&
1883 rx->key->conf.cipher != WLAN_CIPHER_SUITE_CCMP_256)) 1923 rx->key->conf.cipher != WLAN_CIPHER_SUITE_CCMP_256 &&
1924 rx->key->conf.cipher != WLAN_CIPHER_SUITE_GCMP &&
1925 rx->key->conf.cipher != WLAN_CIPHER_SUITE_GCMP_256))
1884 return RX_DROP_UNUSABLE; 1926 return RX_DROP_UNUSABLE;
1885 memcpy(pn, entry->last_pn, IEEE80211_CCMP_PN_LEN); 1927 memcpy(pn, entry->last_pn, IEEE80211_CCMP_PN_LEN);
1886 for (i = IEEE80211_CCMP_PN_LEN - 1; i >= 0; i--) { 1928 for (i = IEEE80211_CCMP_PN_LEN - 1; i >= 0; i--) {
@@ -2199,9 +2241,6 @@ ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx)
2199 skb->dev = dev; 2241 skb->dev = dev;
2200 __skb_queue_head_init(&frame_list); 2242 __skb_queue_head_init(&frame_list);
2201 2243
2202 if (skb_linearize(skb))
2203 return RX_DROP_UNUSABLE;
2204
2205 ieee80211_amsdu_to_8023s(skb, &frame_list, dev->dev_addr, 2244 ieee80211_amsdu_to_8023s(skb, &frame_list, dev->dev_addr,
2206 rx->sdata->vif.type, 2245 rx->sdata->vif.type,
2207 rx->local->hw.extra_tx_headroom, true); 2246 rx->local->hw.extra_tx_headroom, true);
@@ -2231,7 +2270,7 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx)
2231 struct ieee80211_local *local = rx->local; 2270 struct ieee80211_local *local = rx->local;
2232 struct ieee80211_sub_if_data *sdata = rx->sdata; 2271 struct ieee80211_sub_if_data *sdata = rx->sdata;
2233 struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; 2272 struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
2234 u16 q, hdrlen; 2273 u16 ac, q, hdrlen;
2235 2274
2236 hdr = (struct ieee80211_hdr *) skb->data; 2275 hdr = (struct ieee80211_hdr *) skb->data;
2237 hdrlen = ieee80211_hdrlen(hdr->frame_control); 2276 hdrlen = ieee80211_hdrlen(hdr->frame_control);
@@ -2290,6 +2329,7 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx)
2290 spin_lock_bh(&mppath->state_lock); 2329 spin_lock_bh(&mppath->state_lock);
2291 if (!ether_addr_equal(mppath->mpp, mpp_addr)) 2330 if (!ether_addr_equal(mppath->mpp, mpp_addr))
2292 memcpy(mppath->mpp, mpp_addr, ETH_ALEN); 2331 memcpy(mppath->mpp, mpp_addr, ETH_ALEN);
2332 mppath->exp_time = jiffies;
2293 spin_unlock_bh(&mppath->state_lock); 2333 spin_unlock_bh(&mppath->state_lock);
2294 } 2334 }
2295 rcu_read_unlock(); 2335 rcu_read_unlock();
@@ -2300,7 +2340,8 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx)
2300 ether_addr_equal(sdata->vif.addr, hdr->addr3)) 2340 ether_addr_equal(sdata->vif.addr, hdr->addr3))
2301 return RX_CONTINUE; 2341 return RX_CONTINUE;
2302 2342
2303 q = ieee80211_select_queue_80211(sdata, skb, hdr); 2343 ac = ieee80211_select_queue_80211(sdata, skb, hdr);
2344 q = sdata->vif.hw_queue[ac];
2304 if (ieee80211_queue_stopped(&local->hw, q)) { 2345 if (ieee80211_queue_stopped(&local->hw, q)) {
2305 IEEE80211_IFSTA_MESH_CTR_INC(ifmsh, dropped_frames_congestion); 2346 IEEE80211_IFSTA_MESH_CTR_INC(ifmsh, dropped_frames_congestion);
2306 return RX_DROP_MONITOR; 2347 return RX_DROP_MONITOR;
@@ -2738,6 +2779,11 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
2738 opmode, status->band); 2779 opmode, status->band);
2739 goto handled; 2780 goto handled;
2740 } 2781 }
2782 case WLAN_VHT_ACTION_GROUPID_MGMT: {
2783 if (len < IEEE80211_MIN_ACTION_SIZE + 25)
2784 goto invalid;
2785 goto queue;
2786 }
2741 default: 2787 default:
2742 break; 2788 break;
2743 } 2789 }
@@ -3073,7 +3119,7 @@ static void ieee80211_rx_cooked_monitor(struct ieee80211_rx_data *rx,
3073 ieee80211_add_rx_radiotap_header(local, skb, rate, needed_headroom, 3119 ieee80211_add_rx_radiotap_header(local, skb, rate, needed_headroom,
3074 false); 3120 false);
3075 3121
3076 skb_set_mac_header(skb, 0); 3122 skb_reset_mac_header(skb);
3077 skb->ip_summed = CHECKSUM_UNNECESSARY; 3123 skb->ip_summed = CHECKSUM_UNNECESSARY;
3078 skb->pkt_type = PACKET_OTHERHOST; 3124 skb->pkt_type = PACKET_OTHERHOST;
3079 skb->protocol = htons(ETH_P_802_2); 3125 skb->protocol = htons(ETH_P_802_2);
@@ -3275,6 +3321,85 @@ void ieee80211_release_reorder_timeout(struct sta_info *sta, int tid)
3275 ieee80211_rx_handlers(&rx, &frames); 3321 ieee80211_rx_handlers(&rx, &frames);
3276} 3322}
3277 3323
3324void ieee80211_mark_rx_ba_filtered_frames(struct ieee80211_sta *pubsta, u8 tid,
3325 u16 ssn, u64 filtered,
3326 u16 received_mpdus)
3327{
3328 struct sta_info *sta;
3329 struct tid_ampdu_rx *tid_agg_rx;
3330 struct sk_buff_head frames;
3331 struct ieee80211_rx_data rx = {
3332 /* This is OK -- must be QoS data frame */
3333 .security_idx = tid,
3334 .seqno_idx = tid,
3335 };
3336 int i, diff;
3337
3338 if (WARN_ON(!pubsta || tid >= IEEE80211_NUM_TIDS))
3339 return;
3340
3341 __skb_queue_head_init(&frames);
3342
3343 sta = container_of(pubsta, struct sta_info, sta);
3344
3345 rx.sta = sta;
3346 rx.sdata = sta->sdata;
3347 rx.local = sta->local;
3348
3349 rcu_read_lock();
3350 tid_agg_rx = rcu_dereference(sta->ampdu_mlme.tid_rx[tid]);
3351 if (!tid_agg_rx)
3352 goto out;
3353
3354 spin_lock_bh(&tid_agg_rx->reorder_lock);
3355
3356 if (received_mpdus >= IEEE80211_SN_MODULO >> 1) {
3357 int release;
3358
3359 /* release all frames in the reorder buffer */
3360 release = (tid_agg_rx->head_seq_num + tid_agg_rx->buf_size) %
3361 IEEE80211_SN_MODULO;
3362 ieee80211_release_reorder_frames(sta->sdata, tid_agg_rx,
3363 release, &frames);
3364 /* update ssn to match received ssn */
3365 tid_agg_rx->head_seq_num = ssn;
3366 } else {
3367 ieee80211_release_reorder_frames(sta->sdata, tid_agg_rx, ssn,
3368 &frames);
3369 }
3370
3371 /* handle the case that received ssn is behind the mac ssn.
3372 * it can be tid_agg_rx->buf_size behind and still be valid */
3373 diff = (tid_agg_rx->head_seq_num - ssn) & IEEE80211_SN_MASK;
3374 if (diff >= tid_agg_rx->buf_size) {
3375 tid_agg_rx->reorder_buf_filtered = 0;
3376 goto release;
3377 }
3378 filtered = filtered >> diff;
3379 ssn += diff;
3380
3381 /* update bitmap */
3382 for (i = 0; i < tid_agg_rx->buf_size; i++) {
3383 int index = (ssn + i) % tid_agg_rx->buf_size;
3384
3385 tid_agg_rx->reorder_buf_filtered &= ~BIT_ULL(index);
3386 if (filtered & BIT_ULL(i))
3387 tid_agg_rx->reorder_buf_filtered |= BIT_ULL(index);
3388 }
3389
3390 /* now process also frames that the filter marking released */
3391 ieee80211_sta_reorder_release(sta->sdata, tid_agg_rx, &frames);
3392
3393release:
3394 spin_unlock_bh(&tid_agg_rx->reorder_lock);
3395
3396 ieee80211_rx_handlers(&rx, &frames);
3397
3398 out:
3399 rcu_read_unlock();
3400}
3401EXPORT_SYMBOL(ieee80211_mark_rx_ba_filtered_frames);
3402
3278/* main receive path */ 3403/* main receive path */
3279 3404
3280static bool ieee80211_accept_frame(struct ieee80211_rx_data *rx) 3405static bool ieee80211_accept_frame(struct ieee80211_rx_data *rx)
@@ -3366,6 +3491,7 @@ static bool ieee80211_accept_frame(struct ieee80211_rx_data *rx)
3366 return false; 3491 return false;
3367 /* ignore action frames to TDLS-peers */ 3492 /* ignore action frames to TDLS-peers */
3368 if (ieee80211_is_action(hdr->frame_control) && 3493 if (ieee80211_is_action(hdr->frame_control) &&
3494 !is_broadcast_ether_addr(bssid) &&
3369 !ether_addr_equal(bssid, hdr->addr1)) 3495 !ether_addr_equal(bssid, hdr->addr1))
3370 return false; 3496 return false;
3371 } 3497 }
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index a413e52f7691..ae980ce8daff 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -314,6 +314,7 @@ static void __ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted)
314 bool was_scanning = local->scanning; 314 bool was_scanning = local->scanning;
315 struct cfg80211_scan_request *scan_req; 315 struct cfg80211_scan_request *scan_req;
316 struct ieee80211_sub_if_data *scan_sdata; 316 struct ieee80211_sub_if_data *scan_sdata;
317 struct ieee80211_sub_if_data *sdata;
317 318
318 lockdep_assert_held(&local->mtx); 319 lockdep_assert_held(&local->mtx);
319 320
@@ -373,7 +374,16 @@ static void __ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted)
373 374
374 ieee80211_mlme_notify_scan_completed(local); 375 ieee80211_mlme_notify_scan_completed(local);
375 ieee80211_ibss_notify_scan_completed(local); 376 ieee80211_ibss_notify_scan_completed(local);
376 ieee80211_mesh_notify_scan_completed(local); 377
378 /* Requeue all the work that might have been ignored while
379 * the scan was in progress; if there was none this will
380 * just be a no-op for the particular interface.
381 */
382 list_for_each_entry_rcu(sdata, &local->interfaces, list) {
383 if (ieee80211_sdata_running(sdata))
384 ieee80211_queue_work(&sdata->local->hw, &sdata->work);
385 }
386
377 if (was_scanning) 387 if (was_scanning)
378 ieee80211_start_next_roc(local); 388 ieee80211_start_next_roc(local);
379} 389}
@@ -1213,6 +1223,14 @@ void ieee80211_sched_scan_stopped(struct ieee80211_hw *hw)
1213 1223
1214 trace_api_sched_scan_stopped(local); 1224 trace_api_sched_scan_stopped(local);
1215 1225
1226 /*
1227 * this shouldn't really happen, so for simplicity
1228 * simply ignore it, and let mac80211 reconfigure
1229 * the sched scan later on.
1230 */
1231 if (local->in_reconfig)
1232 return;
1233
1216 schedule_work(&local->sched_scan_stopped_work); 1234 schedule_work(&local->sched_scan_stopped_work);
1217} 1235}
1218EXPORT_SYMBOL(ieee80211_sched_scan_stopped); 1236EXPORT_SYMBOL(ieee80211_sched_scan_stopped);
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 4402ad5b27d1..d20bab5c146c 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -116,6 +116,7 @@ static void __cleanup_single_sta(struct sta_info *sta)
116 116
117 ieee80211_purge_tx_queue(&local->hw, &txqi->queue); 117 ieee80211_purge_tx_queue(&local->hw, &txqi->queue);
118 atomic_sub(n, &sdata->txqs_len[txqi->txq.ac]); 118 atomic_sub(n, &sdata->txqs_len[txqi->txq.ac]);
119 txqi->byte_cnt = 0;
119 } 120 }
120 } 121 }
121 122
@@ -498,11 +499,17 @@ static int sta_info_insert_finish(struct sta_info *sta) __acquires(RCU)
498{ 499{
499 struct ieee80211_local *local = sta->local; 500 struct ieee80211_local *local = sta->local;
500 struct ieee80211_sub_if_data *sdata = sta->sdata; 501 struct ieee80211_sub_if_data *sdata = sta->sdata;
501 struct station_info sinfo; 502 struct station_info *sinfo;
502 int err = 0; 503 int err = 0;
503 504
504 lockdep_assert_held(&local->sta_mtx); 505 lockdep_assert_held(&local->sta_mtx);
505 506
507 sinfo = kzalloc(sizeof(struct station_info), GFP_KERNEL);
508 if (!sinfo) {
509 err = -ENOMEM;
510 goto out_err;
511 }
512
506 /* check if STA exists already */ 513 /* check if STA exists already */
507 if (sta_info_get_bss(sdata, sta->sta.addr)) { 514 if (sta_info_get_bss(sdata, sta->sta.addr)) {
508 err = -EEXIST; 515 err = -EEXIST;
@@ -530,14 +537,12 @@ static int sta_info_insert_finish(struct sta_info *sta) __acquires(RCU)
530 /* accept BA sessions now */ 537 /* accept BA sessions now */
531 clear_sta_flag(sta, WLAN_STA_BLOCK_BA); 538 clear_sta_flag(sta, WLAN_STA_BLOCK_BA);
532 539
533 ieee80211_recalc_min_chandef(sdata);
534 ieee80211_sta_debugfs_add(sta); 540 ieee80211_sta_debugfs_add(sta);
535 rate_control_add_sta_debugfs(sta); 541 rate_control_add_sta_debugfs(sta);
536 542
537 memset(&sinfo, 0, sizeof(sinfo)); 543 sinfo->generation = local->sta_generation;
538 sinfo.filled = 0; 544 cfg80211_new_sta(sdata->dev, sta->sta.addr, sinfo, GFP_KERNEL);
539 sinfo.generation = local->sta_generation; 545 kfree(sinfo);
540 cfg80211_new_sta(sdata->dev, sta->sta.addr, &sinfo, GFP_KERNEL);
541 546
542 sta_dbg(sdata, "Inserted STA %pM\n", sta->sta.addr); 547 sta_dbg(sdata, "Inserted STA %pM\n", sta->sta.addr);
543 548
@@ -557,6 +562,7 @@ static int sta_info_insert_finish(struct sta_info *sta) __acquires(RCU)
557 __cleanup_single_sta(sta); 562 __cleanup_single_sta(sta);
558 out_err: 563 out_err:
559 mutex_unlock(&local->sta_mtx); 564 mutex_unlock(&local->sta_mtx);
565 kfree(sinfo);
560 rcu_read_lock(); 566 rcu_read_lock();
561 return err; 567 return err;
562} 568}
@@ -898,7 +904,7 @@ static void __sta_info_destroy_part2(struct sta_info *sta)
898{ 904{
899 struct ieee80211_local *local = sta->local; 905 struct ieee80211_local *local = sta->local;
900 struct ieee80211_sub_if_data *sdata = sta->sdata; 906 struct ieee80211_sub_if_data *sdata = sta->sdata;
901 struct station_info sinfo = {}; 907 struct station_info *sinfo;
902 int ret; 908 int ret;
903 909
904 /* 910 /*
@@ -936,12 +942,14 @@ static void __sta_info_destroy_part2(struct sta_info *sta)
936 942
937 sta_dbg(sdata, "Removed STA %pM\n", sta->sta.addr); 943 sta_dbg(sdata, "Removed STA %pM\n", sta->sta.addr);
938 944
939 sta_set_sinfo(sta, &sinfo); 945 sinfo = kzalloc(sizeof(*sinfo), GFP_KERNEL);
940 cfg80211_del_sta_sinfo(sdata->dev, sta->sta.addr, &sinfo, GFP_KERNEL); 946 if (sinfo)
947 sta_set_sinfo(sta, sinfo);
948 cfg80211_del_sta_sinfo(sdata->dev, sta->sta.addr, sinfo, GFP_KERNEL);
949 kfree(sinfo);
941 950
942 rate_control_remove_sta_debugfs(sta); 951 rate_control_remove_sta_debugfs(sta);
943 ieee80211_sta_debugfs_remove(sta); 952 ieee80211_sta_debugfs_remove(sta);
944 ieee80211_recalc_min_chandef(sdata);
945 953
946 cleanup_single_sta(sta); 954 cleanup_single_sta(sta);
947} 955}
@@ -1453,7 +1461,7 @@ ieee80211_sta_ps_deliver_response(struct sta_info *sta,
1453 1461
1454 more_data = ieee80211_sta_ps_more_data(sta, ignored_acs, reason, driver_release_tids); 1462 more_data = ieee80211_sta_ps_more_data(sta, ignored_acs, reason, driver_release_tids);
1455 1463
1456 if (reason == IEEE80211_FRAME_RELEASE_PSPOLL) 1464 if (driver_release_tids && reason == IEEE80211_FRAME_RELEASE_PSPOLL)
1457 driver_release_tids = 1465 driver_release_tids =
1458 BIT(find_highest_prio_tid(driver_release_tids)); 1466 BIT(find_highest_prio_tid(driver_release_tids));
1459 1467
@@ -1808,14 +1816,17 @@ int sta_info_move_state(struct sta_info *sta,
1808 clear_bit(WLAN_STA_AUTH, &sta->_flags); 1816 clear_bit(WLAN_STA_AUTH, &sta->_flags);
1809 break; 1817 break;
1810 case IEEE80211_STA_AUTH: 1818 case IEEE80211_STA_AUTH:
1811 if (sta->sta_state == IEEE80211_STA_NONE) 1819 if (sta->sta_state == IEEE80211_STA_NONE) {
1812 set_bit(WLAN_STA_AUTH, &sta->_flags); 1820 set_bit(WLAN_STA_AUTH, &sta->_flags);
1813 else if (sta->sta_state == IEEE80211_STA_ASSOC) 1821 } else if (sta->sta_state == IEEE80211_STA_ASSOC) {
1814 clear_bit(WLAN_STA_ASSOC, &sta->_flags); 1822 clear_bit(WLAN_STA_ASSOC, &sta->_flags);
1823 ieee80211_recalc_min_chandef(sta->sdata);
1824 }
1815 break; 1825 break;
1816 case IEEE80211_STA_ASSOC: 1826 case IEEE80211_STA_ASSOC:
1817 if (sta->sta_state == IEEE80211_STA_AUTH) { 1827 if (sta->sta_state == IEEE80211_STA_AUTH) {
1818 set_bit(WLAN_STA_ASSOC, &sta->_flags); 1828 set_bit(WLAN_STA_ASSOC, &sta->_flags);
1829 ieee80211_recalc_min_chandef(sta->sdata);
1819 } else if (sta->sta_state == IEEE80211_STA_AUTHORIZED) { 1830 } else if (sta->sta_state == IEEE80211_STA_AUTHORIZED) {
1820 if (sta->sdata->vif.type == NL80211_IFTYPE_AP || 1831 if (sta->sdata->vif.type == NL80211_IFTYPE_AP ||
1821 (sta->sdata->vif.type == NL80211_IFTYPE_AP_VLAN && 1832 (sta->sdata->vif.type == NL80211_IFTYPE_AP_VLAN &&
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index d6051629ed15..053f5c4fa495 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -1,6 +1,7 @@
1/* 1/*
2 * Copyright 2002-2005, Devicescape Software, Inc. 2 * Copyright 2002-2005, Devicescape Software, Inc.
3 * Copyright 2013-2014 Intel Mobile Communications GmbH 3 * Copyright 2013-2014 Intel Mobile Communications GmbH
4 * Copyright(c) 2015 Intel Deutschland GmbH
4 * 5 *
5 * This program is free software; you can redistribute it and/or modify 6 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 as 7 * it under the terms of the GNU General Public License version 2 as
@@ -167,6 +168,8 @@ struct tid_ampdu_tx {
167 * 168 *
168 * @reorder_buf: buffer to reorder incoming aggregated MPDUs. An MPDU may be an 169 * @reorder_buf: buffer to reorder incoming aggregated MPDUs. An MPDU may be an
169 * A-MSDU with individually reported subframes. 170 * A-MSDU with individually reported subframes.
171 * @reorder_buf_filtered: bitmap indicating where there are filtered frames in
172 * the reorder buffer that should be ignored when releasing frames
170 * @reorder_time: jiffies when skb was added 173 * @reorder_time: jiffies when skb was added
171 * @session_timer: check if peer keeps Tx-ing on the TID (by timeout value) 174 * @session_timer: check if peer keeps Tx-ing on the TID (by timeout value)
172 * @reorder_timer: releases expired frames from the reorder buffer. 175 * @reorder_timer: releases expired frames from the reorder buffer.
@@ -194,6 +197,7 @@ struct tid_ampdu_tx {
194struct tid_ampdu_rx { 197struct tid_ampdu_rx {
195 struct rcu_head rcu_head; 198 struct rcu_head rcu_head;
196 spinlock_t reorder_lock; 199 spinlock_t reorder_lock;
200 u64 reorder_buf_filtered;
197 struct sk_buff_head *reorder_buf; 201 struct sk_buff_head *reorder_buf;
198 unsigned long *reorder_time; 202 unsigned long *reorder_time;
199 struct timer_list session_timer; 203 struct timer_list session_timer;
@@ -212,20 +216,21 @@ struct tid_ampdu_rx {
212/** 216/**
213 * struct sta_ampdu_mlme - STA aggregation information. 217 * struct sta_ampdu_mlme - STA aggregation information.
214 * 218 *
219 * @mtx: mutex to protect all TX data (except non-NULL assignments
220 * to tid_tx[idx], which are protected by the sta spinlock)
221 * tid_start_tx is also protected by sta->lock.
215 * @tid_rx: aggregation info for Rx per TID -- RCU protected 222 * @tid_rx: aggregation info for Rx per TID -- RCU protected
216 * @tid_tx: aggregation info for Tx per TID
217 * @tid_start_tx: sessions where start was requested
218 * @addba_req_num: number of times addBA request has been sent.
219 * @last_addba_req_time: timestamp of the last addBA request.
220 * @dialog_token_allocator: dialog token enumerator for each new session;
221 * @work: work struct for starting/stopping aggregation
222 * @tid_rx_timer_expired: bitmap indicating on which TIDs the 223 * @tid_rx_timer_expired: bitmap indicating on which TIDs the
223 * RX timer expired until the work for it runs 224 * RX timer expired until the work for it runs
224 * @tid_rx_stop_requested: bitmap indicating which BA sessions per TID the 225 * @tid_rx_stop_requested: bitmap indicating which BA sessions per TID the
225 * driver requested to close until the work for it runs 226 * driver requested to close until the work for it runs
226 * @mtx: mutex to protect all TX data (except non-NULL assignments 227 * @agg_session_valid: bitmap indicating which TID has a rx BA session open on
227 * to tid_tx[idx], which are protected by the sta spinlock) 228 * @work: work struct for starting/stopping aggregation
228 * tid_start_tx is also protected by sta->lock. 229 * @tid_tx: aggregation info for Tx per TID
230 * @tid_start_tx: sessions where start was requested
231 * @last_addba_req_time: timestamp of the last addBA request.
232 * @addba_req_num: number of times addBA request has been sent.
233 * @dialog_token_allocator: dialog token enumerator for each new session;
229 */ 234 */
230struct sta_ampdu_mlme { 235struct sta_ampdu_mlme {
231 struct mutex mtx; 236 struct mutex mtx;
@@ -233,6 +238,7 @@ struct sta_ampdu_mlme {
233 struct tid_ampdu_rx __rcu *tid_rx[IEEE80211_NUM_TIDS]; 238 struct tid_ampdu_rx __rcu *tid_rx[IEEE80211_NUM_TIDS];
234 unsigned long tid_rx_timer_expired[BITS_TO_LONGS(IEEE80211_NUM_TIDS)]; 239 unsigned long tid_rx_timer_expired[BITS_TO_LONGS(IEEE80211_NUM_TIDS)];
235 unsigned long tid_rx_stop_requested[BITS_TO_LONGS(IEEE80211_NUM_TIDS)]; 240 unsigned long tid_rx_stop_requested[BITS_TO_LONGS(IEEE80211_NUM_TIDS)];
241 unsigned long agg_session_valid[BITS_TO_LONGS(IEEE80211_NUM_TIDS)];
236 /* tx */ 242 /* tx */
237 struct work_struct work; 243 struct work_struct work;
238 struct tid_ampdu_tx __rcu *tid_tx[IEEE80211_NUM_TIDS]; 244 struct tid_ampdu_tx __rcu *tid_tx[IEEE80211_NUM_TIDS];
diff --git a/net/mac80211/status.c b/net/mac80211/status.c
index 5bad05e9af90..8b1b2ea03eb5 100644
--- a/net/mac80211/status.c
+++ b/net/mac80211/status.c
@@ -51,6 +51,11 @@ static void ieee80211_handle_filtered_frame(struct ieee80211_local *local,
51 struct ieee80211_hdr *hdr = (void *)skb->data; 51 struct ieee80211_hdr *hdr = (void *)skb->data;
52 int ac; 52 int ac;
53 53
54 if (info->flags & IEEE80211_TX_CTL_NO_PS_BUFFER) {
55 ieee80211_free_txskb(&local->hw, skb);
56 return;
57 }
58
54 /* 59 /*
55 * This skb 'survived' a round-trip through the driver, and 60 * This skb 'survived' a round-trip through the driver, and
56 * hopefully the driver didn't mangle it too badly. However, 61 * hopefully the driver didn't mangle it too badly. However,
@@ -692,7 +697,7 @@ void ieee80211_tx_monitor(struct ieee80211_local *local, struct sk_buff *skb,
692 rtap_len, shift); 697 rtap_len, shift);
693 698
694 /* XXX: is this sufficient for BPF? */ 699 /* XXX: is this sufficient for BPF? */
695 skb_set_mac_header(skb, 0); 700 skb_reset_mac_header(skb);
696 skb->ip_summed = CHECKSUM_UNNECESSARY; 701 skb->ip_summed = CHECKSUM_UNNECESSARY;
697 skb->pkt_type = PACKET_OTHERHOST; 702 skb->pkt_type = PACKET_OTHERHOST;
698 skb->protocol = htons(ETH_P_802_2); 703 skb->protocol = htons(ETH_P_802_2);
diff --git a/net/mac80211/tkip.c b/net/mac80211/tkip.c
index 0ae207771a58..b3622823bad2 100644
--- a/net/mac80211/tkip.c
+++ b/net/mac80211/tkip.c
@@ -1,6 +1,7 @@
1/* 1/*
2 * Copyright 2002-2004, Instant802 Networks, Inc. 2 * Copyright 2002-2004, Instant802 Networks, Inc.
3 * Copyright 2005, Devicescape Software, Inc. 3 * Copyright 2005, Devicescape Software, Inc.
4 * Copyright (C) 2016 Intel Deutschland GmbH
4 * 5 *
5 * This program is free software; you can redistribute it and/or modify 6 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 as 7 * it under the terms of the GNU General Public License version 2 as
@@ -142,15 +143,14 @@ static void tkip_mixing_phase2(const u8 *tk, struct tkip_ctx *ctx,
142/* Add TKIP IV and Ext. IV at @pos. @iv0, @iv1, and @iv2 are the first octets 143/* Add TKIP IV and Ext. IV at @pos. @iv0, @iv1, and @iv2 are the first octets
143 * of the IV. Returns pointer to the octet following IVs (i.e., beginning of 144 * of the IV. Returns pointer to the octet following IVs (i.e., beginning of
144 * the packet payload). */ 145 * the packet payload). */
145u8 *ieee80211_tkip_add_iv(u8 *pos, struct ieee80211_key *key) 146u8 *ieee80211_tkip_add_iv(u8 *pos, struct ieee80211_key_conf *keyconf, u64 pn)
146{ 147{
147 lockdep_assert_held(&key->u.tkip.txlock); 148 pos = write_tkip_iv(pos, TKIP_PN_TO_IV16(pn));
148 149 *pos++ = (keyconf->keyidx << 6) | (1 << 5) /* Ext IV */;
149 pos = write_tkip_iv(pos, key->u.tkip.tx.iv16); 150 put_unaligned_le32(TKIP_PN_TO_IV32(pn), pos);
150 *pos++ = (key->conf.keyidx << 6) | (1 << 5) /* Ext IV */;
151 put_unaligned_le32(key->u.tkip.tx.iv32, pos);
152 return pos + 4; 151 return pos + 4;
153} 152}
153EXPORT_SYMBOL_GPL(ieee80211_tkip_add_iv);
154 154
155static void ieee80211_compute_tkip_p1k(struct ieee80211_key *key, u32 iv32) 155static void ieee80211_compute_tkip_p1k(struct ieee80211_key *key, u32 iv32)
156{ 156{
@@ -250,6 +250,7 @@ int ieee80211_tkip_decrypt_data(struct crypto_cipher *tfm,
250 u8 rc4key[16], keyid, *pos = payload; 250 u8 rc4key[16], keyid, *pos = payload;
251 int res; 251 int res;
252 const u8 *tk = &key->conf.key[NL80211_TKIP_DATA_OFFSET_ENCR_KEY]; 252 const u8 *tk = &key->conf.key[NL80211_TKIP_DATA_OFFSET_ENCR_KEY];
253 struct tkip_ctx_rx *rx_ctx = &key->u.tkip.rx[queue];
253 254
254 if (payload_len < 12) 255 if (payload_len < 12)
255 return -1; 256 return -1;
@@ -265,37 +266,36 @@ int ieee80211_tkip_decrypt_data(struct crypto_cipher *tfm,
265 if ((keyid >> 6) != key->conf.keyidx) 266 if ((keyid >> 6) != key->conf.keyidx)
266 return TKIP_DECRYPT_INVALID_KEYIDX; 267 return TKIP_DECRYPT_INVALID_KEYIDX;
267 268
268 if (key->u.tkip.rx[queue].state != TKIP_STATE_NOT_INIT && 269 if (rx_ctx->ctx.state != TKIP_STATE_NOT_INIT &&
269 (iv32 < key->u.tkip.rx[queue].iv32 || 270 (iv32 < rx_ctx->iv32 ||
270 (iv32 == key->u.tkip.rx[queue].iv32 && 271 (iv32 == rx_ctx->iv32 && iv16 <= rx_ctx->iv16)))
271 iv16 <= key->u.tkip.rx[queue].iv16)))
272 return TKIP_DECRYPT_REPLAY; 272 return TKIP_DECRYPT_REPLAY;
273 273
274 if (only_iv) { 274 if (only_iv) {
275 res = TKIP_DECRYPT_OK; 275 res = TKIP_DECRYPT_OK;
276 key->u.tkip.rx[queue].state = TKIP_STATE_PHASE1_HW_UPLOADED; 276 rx_ctx->ctx.state = TKIP_STATE_PHASE1_HW_UPLOADED;
277 goto done; 277 goto done;
278 } 278 }
279 279
280 if (key->u.tkip.rx[queue].state == TKIP_STATE_NOT_INIT || 280 if (rx_ctx->ctx.state == TKIP_STATE_NOT_INIT ||
281 key->u.tkip.rx[queue].iv32 != iv32) { 281 rx_ctx->iv32 != iv32) {
282 /* IV16 wrapped around - perform TKIP phase 1 */ 282 /* IV16 wrapped around - perform TKIP phase 1 */
283 tkip_mixing_phase1(tk, &key->u.tkip.rx[queue], ta, iv32); 283 tkip_mixing_phase1(tk, &rx_ctx->ctx, ta, iv32);
284 } 284 }
285 if (key->local->ops->update_tkip_key && 285 if (key->local->ops->update_tkip_key &&
286 key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE && 286 key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE &&
287 key->u.tkip.rx[queue].state != TKIP_STATE_PHASE1_HW_UPLOADED) { 287 rx_ctx->ctx.state != TKIP_STATE_PHASE1_HW_UPLOADED) {
288 struct ieee80211_sub_if_data *sdata = key->sdata; 288 struct ieee80211_sub_if_data *sdata = key->sdata;
289 289
290 if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) 290 if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
291 sdata = container_of(key->sdata->bss, 291 sdata = container_of(key->sdata->bss,
292 struct ieee80211_sub_if_data, u.ap); 292 struct ieee80211_sub_if_data, u.ap);
293 drv_update_tkip_key(key->local, sdata, &key->conf, key->sta, 293 drv_update_tkip_key(key->local, sdata, &key->conf, key->sta,
294 iv32, key->u.tkip.rx[queue].p1k); 294 iv32, rx_ctx->ctx.p1k);
295 key->u.tkip.rx[queue].state = TKIP_STATE_PHASE1_HW_UPLOADED; 295 rx_ctx->ctx.state = TKIP_STATE_PHASE1_HW_UPLOADED;
296 } 296 }
297 297
298 tkip_mixing_phase2(tk, &key->u.tkip.rx[queue], iv16, rc4key); 298 tkip_mixing_phase2(tk, &rx_ctx->ctx, iv16, rc4key);
299 299
300 res = ieee80211_wep_decrypt_data(tfm, rc4key, 16, pos, payload_len - 12); 300 res = ieee80211_wep_decrypt_data(tfm, rc4key, 16, pos, payload_len - 12);
301 done: 301 done:
diff --git a/net/mac80211/tkip.h b/net/mac80211/tkip.h
index e3ecb659b90a..a1bcbfbefe7c 100644
--- a/net/mac80211/tkip.h
+++ b/net/mac80211/tkip.h
@@ -13,8 +13,6 @@
13#include <linux/crypto.h> 13#include <linux/crypto.h>
14#include "key.h" 14#include "key.h"
15 15
16u8 *ieee80211_tkip_add_iv(u8 *pos, struct ieee80211_key *key);
17
18int ieee80211_tkip_encrypt_data(struct crypto_cipher *tfm, 16int ieee80211_tkip_encrypt_data(struct crypto_cipher *tfm,
19 struct ieee80211_key *key, 17 struct ieee80211_key *key,
20 struct sk_buff *skb, 18 struct sk_buff *skb,
diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h
index a6b4442776a0..2b0a17ee907a 100644
--- a/net/mac80211/trace.h
+++ b/net/mac80211/trace.h
@@ -80,7 +80,23 @@
80#define KEY_PR_FMT " cipher:0x%x, flags=%#x, keyidx=%d, hw_key_idx=%d" 80#define KEY_PR_FMT " cipher:0x%x, flags=%#x, keyidx=%d, hw_key_idx=%d"
81#define KEY_PR_ARG __entry->cipher, __entry->flags, __entry->keyidx, __entry->hw_key_idx 81#define KEY_PR_ARG __entry->cipher, __entry->flags, __entry->keyidx, __entry->hw_key_idx
82 82
83 83#define AMPDU_ACTION_ENTRY __field(enum ieee80211_ampdu_mlme_action, \
84 ieee80211_ampdu_mlme_action) \
85 STA_ENTRY \
86 __field(u16, tid) \
87 __field(u16, ssn) \
88 __field(u8, buf_size) \
89 __field(bool, amsdu) \
90 __field(u16, timeout)
91#define AMPDU_ACTION_ASSIGN STA_NAMED_ASSIGN(params->sta); \
92 __entry->tid = params->tid; \
93 __entry->ssn = params->ssn; \
94 __entry->buf_size = params->buf_size; \
95 __entry->amsdu = params->amsdu; \
96 __entry->timeout = params->timeout;
97#define AMPDU_ACTION_PR_FMT STA_PR_FMT " tid %d, ssn %d, buf_size %u, amsdu %d, timeout %d"
98#define AMPDU_ACTION_PR_ARG STA_PR_ARG, __entry->tid, __entry->ssn, \
99 __entry->buf_size, __entry->amsdu, __entry->timeout
84 100
85/* 101/*
86 * Tracing for driver callbacks. 102 * Tracing for driver callbacks.
@@ -970,38 +986,25 @@ DEFINE_EVENT(local_only_evt, drv_tx_last_beacon,
970TRACE_EVENT(drv_ampdu_action, 986TRACE_EVENT(drv_ampdu_action,
971 TP_PROTO(struct ieee80211_local *local, 987 TP_PROTO(struct ieee80211_local *local,
972 struct ieee80211_sub_if_data *sdata, 988 struct ieee80211_sub_if_data *sdata,
973 enum ieee80211_ampdu_mlme_action action, 989 struct ieee80211_ampdu_params *params),
974 struct ieee80211_sta *sta, u16 tid,
975 u16 *ssn, u8 buf_size, bool amsdu),
976 990
977 TP_ARGS(local, sdata, action, sta, tid, ssn, buf_size, amsdu), 991 TP_ARGS(local, sdata, params),
978 992
979 TP_STRUCT__entry( 993 TP_STRUCT__entry(
980 LOCAL_ENTRY 994 LOCAL_ENTRY
981 STA_ENTRY
982 __field(u32, action)
983 __field(u16, tid)
984 __field(u16, ssn)
985 __field(u8, buf_size)
986 __field(bool, amsdu)
987 VIF_ENTRY 995 VIF_ENTRY
996 AMPDU_ACTION_ENTRY
988 ), 997 ),
989 998
990 TP_fast_assign( 999 TP_fast_assign(
991 LOCAL_ASSIGN; 1000 LOCAL_ASSIGN;
992 VIF_ASSIGN; 1001 VIF_ASSIGN;
993 STA_ASSIGN; 1002 AMPDU_ACTION_ASSIGN;
994 __entry->action = action;
995 __entry->tid = tid;
996 __entry->ssn = ssn ? *ssn : 0;
997 __entry->buf_size = buf_size;
998 __entry->amsdu = amsdu;
999 ), 1003 ),
1000 1004
1001 TP_printk( 1005 TP_printk(
1002 LOCAL_PR_FMT VIF_PR_FMT STA_PR_FMT " action:%d tid:%d buf:%d amsdu:%d", 1006 LOCAL_PR_FMT VIF_PR_FMT AMPDU_ACTION_PR_FMT,
1003 LOCAL_PR_ARG, VIF_PR_ARG, STA_PR_ARG, __entry->action, 1007 LOCAL_PR_ARG, VIF_PR_ARG, AMPDU_ACTION_PR_ARG
1004 __entry->tid, __entry->buf_size, __entry->amsdu
1005 ) 1008 )
1006); 1009);
1007 1010
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 3311ce0f3d6c..62ad5321257d 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -710,6 +710,10 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx)
710 710
711 info->control.short_preamble = txrc.short_preamble; 711 info->control.short_preamble = txrc.short_preamble;
712 712
713 /* don't ask rate control when rate already injected via radiotap */
714 if (info->control.flags & IEEE80211_TX_CTRL_RATE_INJECT)
715 return TX_CONTINUE;
716
713 if (tx->sta) 717 if (tx->sta)
714 assoc = test_sta_flag(tx->sta, WLAN_STA_ASSOC); 718 assoc = test_sta_flag(tx->sta, WLAN_STA_ASSOC);
715 719
@@ -1266,7 +1270,11 @@ static void ieee80211_drv_tx(struct ieee80211_local *local,
1266 if (atomic_read(&sdata->txqs_len[ac]) >= local->hw.txq_ac_max_pending) 1270 if (atomic_read(&sdata->txqs_len[ac]) >= local->hw.txq_ac_max_pending)
1267 netif_stop_subqueue(sdata->dev, ac); 1271 netif_stop_subqueue(sdata->dev, ac);
1268 1272
1269 skb_queue_tail(&txqi->queue, skb); 1273 spin_lock_bh(&txqi->queue.lock);
1274 txqi->byte_cnt += skb->len;
1275 __skb_queue_tail(&txqi->queue, skb);
1276 spin_unlock_bh(&txqi->queue.lock);
1277
1270 drv_wake_tx_queue(local, txqi); 1278 drv_wake_tx_queue(local, txqi);
1271 1279
1272 return; 1280 return;
@@ -1294,6 +1302,8 @@ struct sk_buff *ieee80211_tx_dequeue(struct ieee80211_hw *hw,
1294 if (!skb) 1302 if (!skb)
1295 goto out; 1303 goto out;
1296 1304
1305 txqi->byte_cnt -= skb->len;
1306
1297 atomic_dec(&sdata->txqs_len[ac]); 1307 atomic_dec(&sdata->txqs_len[ac]);
1298 if (__netif_subqueue_stopped(sdata->dev, ac)) 1308 if (__netif_subqueue_stopped(sdata->dev, ac))
1299 ieee80211_propagate_queue_wake(local, sdata->vif.hw_queue[ac]); 1309 ieee80211_propagate_queue_wake(local, sdata->vif.hw_queue[ac]);
@@ -1665,15 +1675,24 @@ void ieee80211_xmit(struct ieee80211_sub_if_data *sdata,
1665 ieee80211_tx(sdata, sta, skb, false); 1675 ieee80211_tx(sdata, sta, skb, false);
1666} 1676}
1667 1677
1668static bool ieee80211_parse_tx_radiotap(struct sk_buff *skb) 1678static bool ieee80211_parse_tx_radiotap(struct ieee80211_local *local,
1679 struct sk_buff *skb)
1669{ 1680{
1670 struct ieee80211_radiotap_iterator iterator; 1681 struct ieee80211_radiotap_iterator iterator;
1671 struct ieee80211_radiotap_header *rthdr = 1682 struct ieee80211_radiotap_header *rthdr =
1672 (struct ieee80211_radiotap_header *) skb->data; 1683 (struct ieee80211_radiotap_header *) skb->data;
1673 struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); 1684 struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
1685 struct ieee80211_supported_band *sband =
1686 local->hw.wiphy->bands[info->band];
1674 int ret = ieee80211_radiotap_iterator_init(&iterator, rthdr, skb->len, 1687 int ret = ieee80211_radiotap_iterator_init(&iterator, rthdr, skb->len,
1675 NULL); 1688 NULL);
1676 u16 txflags; 1689 u16 txflags;
1690 u16 rate = 0;
1691 bool rate_found = false;
1692 u8 rate_retries = 0;
1693 u16 rate_flags = 0;
1694 u8 mcs_known, mcs_flags;
1695 int i;
1677 1696
1678 info->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT | 1697 info->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT |
1679 IEEE80211_TX_CTL_DONTFRAG; 1698 IEEE80211_TX_CTL_DONTFRAG;
@@ -1724,6 +1743,35 @@ static bool ieee80211_parse_tx_radiotap(struct sk_buff *skb)
1724 info->flags |= IEEE80211_TX_CTL_NO_ACK; 1743 info->flags |= IEEE80211_TX_CTL_NO_ACK;
1725 break; 1744 break;
1726 1745
1746 case IEEE80211_RADIOTAP_RATE:
1747 rate = *iterator.this_arg;
1748 rate_flags = 0;
1749 rate_found = true;
1750 break;
1751
1752 case IEEE80211_RADIOTAP_DATA_RETRIES:
1753 rate_retries = *iterator.this_arg;
1754 break;
1755
1756 case IEEE80211_RADIOTAP_MCS:
1757 mcs_known = iterator.this_arg[0];
1758 mcs_flags = iterator.this_arg[1];
1759 if (!(mcs_known & IEEE80211_RADIOTAP_MCS_HAVE_MCS))
1760 break;
1761
1762 rate_found = true;
1763 rate = iterator.this_arg[2];
1764 rate_flags = IEEE80211_TX_RC_MCS;
1765
1766 if (mcs_known & IEEE80211_RADIOTAP_MCS_HAVE_GI &&
1767 mcs_flags & IEEE80211_RADIOTAP_MCS_SGI)
1768 rate_flags |= IEEE80211_TX_RC_SHORT_GI;
1769
1770 if (mcs_known & IEEE80211_RADIOTAP_MCS_HAVE_BW &&
1771 mcs_flags & IEEE80211_RADIOTAP_MCS_BW_40)
1772 rate_flags |= IEEE80211_TX_RC_40_MHZ_WIDTH;
1773 break;
1774
1727 /* 1775 /*
1728 * Please update the file 1776 * Please update the file
1729 * Documentation/networking/mac80211-injection.txt 1777 * Documentation/networking/mac80211-injection.txt
@@ -1738,6 +1786,32 @@ static bool ieee80211_parse_tx_radiotap(struct sk_buff *skb)
1738 if (ret != -ENOENT) /* ie, if we didn't simply run out of fields */ 1786 if (ret != -ENOENT) /* ie, if we didn't simply run out of fields */
1739 return false; 1787 return false;
1740 1788
1789 if (rate_found) {
1790 info->control.flags |= IEEE80211_TX_CTRL_RATE_INJECT;
1791
1792 for (i = 0; i < IEEE80211_TX_MAX_RATES; i++) {
1793 info->control.rates[i].idx = -1;
1794 info->control.rates[i].flags = 0;
1795 info->control.rates[i].count = 0;
1796 }
1797
1798 if (rate_flags & IEEE80211_TX_RC_MCS) {
1799 info->control.rates[0].idx = rate;
1800 } else {
1801 for (i = 0; i < sband->n_bitrates; i++) {
1802 if (rate * 5 != sband->bitrates[i].bitrate)
1803 continue;
1804
1805 info->control.rates[0].idx = i;
1806 break;
1807 }
1808 }
1809
1810 info->control.rates[0].flags = rate_flags;
1811 info->control.rates[0].count = min_t(u8, rate_retries + 1,
1812 local->hw.max_rate_tries);
1813 }
1814
1741 /* 1815 /*
1742 * remove the radiotap header 1816 * remove the radiotap header
1743 * iterator->_max_length was sanity-checked against 1817 * iterator->_max_length was sanity-checked against
@@ -1818,10 +1892,6 @@ netdev_tx_t ieee80211_monitor_start_xmit(struct sk_buff *skb,
1818 info->flags = IEEE80211_TX_CTL_REQ_TX_STATUS | 1892 info->flags = IEEE80211_TX_CTL_REQ_TX_STATUS |
1819 IEEE80211_TX_CTL_INJECTED; 1893 IEEE80211_TX_CTL_INJECTED;
1820 1894
1821 /* process and remove the injection radiotap header */
1822 if (!ieee80211_parse_tx_radiotap(skb))
1823 goto fail;
1824
1825 rcu_read_lock(); 1895 rcu_read_lock();
1826 1896
1827 /* 1897 /*
@@ -1883,6 +1953,11 @@ netdev_tx_t ieee80211_monitor_start_xmit(struct sk_buff *skb,
1883 goto fail_rcu; 1953 goto fail_rcu;
1884 1954
1885 info->band = chandef->chan->band; 1955 info->band = chandef->chan->band;
1956
1957 /* process and remove the injection radiotap header */
1958 if (!ieee80211_parse_tx_radiotap(local, skb))
1959 goto fail_rcu;
1960
1886 ieee80211_xmit(sdata, NULL, skb); 1961 ieee80211_xmit(sdata, NULL, skb);
1887 rcu_read_unlock(); 1962 rcu_read_unlock();
1888 1963
@@ -2099,8 +2174,11 @@ static struct sk_buff *ieee80211_build_hdr(struct ieee80211_sub_if_data *sdata,
2099 mpp_lookup = true; 2174 mpp_lookup = true;
2100 } 2175 }
2101 2176
2102 if (mpp_lookup) 2177 if (mpp_lookup) {
2103 mppath = mpp_path_lookup(sdata, skb->data); 2178 mppath = mpp_path_lookup(sdata, skb->data);
2179 if (mppath)
2180 mppath->exp_time = jiffies;
2181 }
2104 2182
2105 if (mppath && mpath) 2183 if (mppath && mpath)
2106 mesh_path_del(mpath->sdata, mpath->dst); 2184 mesh_path_del(mpath->sdata, mpath->dst);
@@ -2380,7 +2458,7 @@ static struct sk_buff *ieee80211_build_hdr(struct ieee80211_sub_if_data *sdata,
2380 /* Update skb pointers to various headers since this modified frame 2458 /* Update skb pointers to various headers since this modified frame
2381 * is going to go through Linux networking code that may potentially 2459 * is going to go through Linux networking code that may potentially
2382 * need things like pointer to IP header. */ 2460 * need things like pointer to IP header. */
2383 skb_set_mac_header(skb, 0); 2461 skb_reset_mac_header(skb);
2384 skb_set_network_header(skb, nh_pos); 2462 skb_set_network_header(skb, nh_pos);
2385 skb_set_transport_header(skb, h_pos); 2463 skb_set_transport_header(skb, h_pos);
2386 2464
@@ -3895,9 +3973,9 @@ void __ieee80211_tx_skb_tid_band(struct ieee80211_sub_if_data *sdata,
3895{ 3973{
3896 int ac = ieee802_1d_to_ac[tid & 7]; 3974 int ac = ieee802_1d_to_ac[tid & 7];
3897 3975
3898 skb_set_mac_header(skb, 0); 3976 skb_reset_mac_header(skb);
3899 skb_set_network_header(skb, 0); 3977 skb_reset_network_header(skb);
3900 skb_set_transport_header(skb, 0); 3978 skb_reset_transport_header(skb);
3901 3979
3902 skb_set_queue_mapping(skb, ac); 3980 skb_set_queue_mapping(skb, ac);
3903 skb->priority = tid; 3981 skb->priority = tid;
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 3943d4bf289c..7390de4946a9 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -4,7 +4,7 @@
4 * Copyright 2006-2007 Jiri Benc <jbenc@suse.cz> 4 * Copyright 2006-2007 Jiri Benc <jbenc@suse.cz>
5 * Copyright 2007 Johannes Berg <johannes@sipsolutions.net> 5 * Copyright 2007 Johannes Berg <johannes@sipsolutions.net>
6 * Copyright 2013-2014 Intel Mobile Communications GmbH 6 * Copyright 2013-2014 Intel Mobile Communications GmbH
7 * Copyright (C) 2015 Intel Deutschland GmbH 7 * Copyright (C) 2015-2016 Intel Deutschland GmbH
8 * 8 *
9 * This program is free software; you can redistribute it and/or modify 9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License version 2 as 10 * it under the terms of the GNU General Public License version 2 as
@@ -1928,6 +1928,9 @@ int ieee80211_reconfig(struct ieee80211_local *local)
1928 BSS_CHANGED_IDLE | 1928 BSS_CHANGED_IDLE |
1929 BSS_CHANGED_TXPOWER; 1929 BSS_CHANGED_TXPOWER;
1930 1930
1931 if (sdata->vif.mu_mimo_owner)
1932 changed |= BSS_CHANGED_MU_GROUPS;
1933
1931 switch (sdata->vif.type) { 1934 switch (sdata->vif.type) {
1932 case NL80211_IFTYPE_STATION: 1935 case NL80211_IFTYPE_STATION:
1933 changed |= BSS_CHANGED_ASSOC | 1936 changed |= BSS_CHANGED_ASSOC |
@@ -2043,16 +2046,26 @@ int ieee80211_reconfig(struct ieee80211_local *local)
2043 */ 2046 */
2044 if (sched_scan_req->n_scan_plans > 1 || 2047 if (sched_scan_req->n_scan_plans > 1 ||
2045 __ieee80211_request_sched_scan_start(sched_scan_sdata, 2048 __ieee80211_request_sched_scan_start(sched_scan_sdata,
2046 sched_scan_req)) 2049 sched_scan_req)) {
2050 RCU_INIT_POINTER(local->sched_scan_sdata, NULL);
2051 RCU_INIT_POINTER(local->sched_scan_req, NULL);
2047 sched_scan_stopped = true; 2052 sched_scan_stopped = true;
2053 }
2048 mutex_unlock(&local->mtx); 2054 mutex_unlock(&local->mtx);
2049 2055
2050 if (sched_scan_stopped) 2056 if (sched_scan_stopped)
2051 cfg80211_sched_scan_stopped_rtnl(local->hw.wiphy); 2057 cfg80211_sched_scan_stopped_rtnl(local->hw.wiphy);
2052 2058
2053 wake_up: 2059 wake_up:
2054 local->in_reconfig = false; 2060 if (local->in_reconfig) {
2055 barrier(); 2061 local->in_reconfig = false;
2062 barrier();
2063
2064 /* Restart deferred ROCs */
2065 mutex_lock(&local->mtx);
2066 ieee80211_start_next_roc(local);
2067 mutex_unlock(&local->mtx);
2068 }
2056 2069
2057 if (local->monitors == local->open_count && local->monitors > 0) 2070 if (local->monitors == local->open_count && local->monitors > 0)
2058 ieee80211_add_virtual_monitor(local); 2071 ieee80211_add_virtual_monitor(local);
@@ -2361,10 +2374,23 @@ u8 *ieee80211_ie_build_vht_oper(u8 *pos, struct ieee80211_sta_vht_cap *vht_cap,
2361 2374
2362 switch (chandef->width) { 2375 switch (chandef->width) {
2363 case NL80211_CHAN_WIDTH_160: 2376 case NL80211_CHAN_WIDTH_160:
2364 vht_oper->chan_width = IEEE80211_VHT_CHANWIDTH_160MHZ; 2377 /*
2378 * Convert 160 MHz channel width to new style as interop
2379 * workaround.
2380 */
2381 vht_oper->chan_width = IEEE80211_VHT_CHANWIDTH_80MHZ;
2382 vht_oper->center_freq_seg2_idx = vht_oper->center_freq_seg1_idx;
2383 if (chandef->chan->center_freq < chandef->center_freq1)
2384 vht_oper->center_freq_seg1_idx -= 8;
2385 else
2386 vht_oper->center_freq_seg1_idx += 8;
2365 break; 2387 break;
2366 case NL80211_CHAN_WIDTH_80P80: 2388 case NL80211_CHAN_WIDTH_80P80:
2367 vht_oper->chan_width = IEEE80211_VHT_CHANWIDTH_80P80MHZ; 2389 /*
2390 * Convert 80+80 MHz channel width to new style as interop
2391 * workaround.
2392 */
2393 vht_oper->chan_width = IEEE80211_VHT_CHANWIDTH_80MHZ;
2368 break; 2394 break;
2369 case NL80211_CHAN_WIDTH_80: 2395 case NL80211_CHAN_WIDTH_80:
2370 vht_oper->chan_width = IEEE80211_VHT_CHANWIDTH_80MHZ; 2396 vht_oper->chan_width = IEEE80211_VHT_CHANWIDTH_80MHZ;
@@ -2380,17 +2406,13 @@ u8 *ieee80211_ie_build_vht_oper(u8 *pos, struct ieee80211_sta_vht_cap *vht_cap,
2380 return pos + sizeof(struct ieee80211_vht_operation); 2406 return pos + sizeof(struct ieee80211_vht_operation);
2381} 2407}
2382 2408
2383void ieee80211_ht_oper_to_chandef(struct ieee80211_channel *control_chan, 2409bool ieee80211_chandef_ht_oper(const struct ieee80211_ht_operation *ht_oper,
2384 const struct ieee80211_ht_operation *ht_oper, 2410 struct cfg80211_chan_def *chandef)
2385 struct cfg80211_chan_def *chandef)
2386{ 2411{
2387 enum nl80211_channel_type channel_type; 2412 enum nl80211_channel_type channel_type;
2388 2413
2389 if (!ht_oper) { 2414 if (!ht_oper)
2390 cfg80211_chandef_create(chandef, control_chan, 2415 return false;
2391 NL80211_CHAN_NO_HT);
2392 return;
2393 }
2394 2416
2395 switch (ht_oper->ht_param & IEEE80211_HT_PARAM_CHA_SEC_OFFSET) { 2417 switch (ht_oper->ht_param & IEEE80211_HT_PARAM_CHA_SEC_OFFSET) {
2396 case IEEE80211_HT_PARAM_CHA_SEC_NONE: 2418 case IEEE80211_HT_PARAM_CHA_SEC_NONE:
@@ -2404,42 +2426,66 @@ void ieee80211_ht_oper_to_chandef(struct ieee80211_channel *control_chan,
2404 break; 2426 break;
2405 default: 2427 default:
2406 channel_type = NL80211_CHAN_NO_HT; 2428 channel_type = NL80211_CHAN_NO_HT;
2429 return false;
2407 } 2430 }
2408 2431
2409 cfg80211_chandef_create(chandef, control_chan, channel_type); 2432 cfg80211_chandef_create(chandef, chandef->chan, channel_type);
2433 return true;
2410} 2434}
2411 2435
2412void ieee80211_vht_oper_to_chandef(struct ieee80211_channel *control_chan, 2436bool ieee80211_chandef_vht_oper(const struct ieee80211_vht_operation *oper,
2413 const struct ieee80211_vht_operation *oper, 2437 struct cfg80211_chan_def *chandef)
2414 struct cfg80211_chan_def *chandef)
2415{ 2438{
2439 struct cfg80211_chan_def new = *chandef;
2440 int cf1, cf2;
2441
2416 if (!oper) 2442 if (!oper)
2417 return; 2443 return false;
2418 2444
2419 chandef->chan = control_chan; 2445 cf1 = ieee80211_channel_to_frequency(oper->center_freq_seg1_idx,
2446 chandef->chan->band);
2447 cf2 = ieee80211_channel_to_frequency(oper->center_freq_seg2_idx,
2448 chandef->chan->band);
2420 2449
2421 switch (oper->chan_width) { 2450 switch (oper->chan_width) {
2422 case IEEE80211_VHT_CHANWIDTH_USE_HT: 2451 case IEEE80211_VHT_CHANWIDTH_USE_HT:
2423 break; 2452 break;
2424 case IEEE80211_VHT_CHANWIDTH_80MHZ: 2453 case IEEE80211_VHT_CHANWIDTH_80MHZ:
2425 chandef->width = NL80211_CHAN_WIDTH_80; 2454 new.width = NL80211_CHAN_WIDTH_80;
2455 new.center_freq1 = cf1;
2456 /* If needed, adjust based on the newer interop workaround. */
2457 if (oper->center_freq_seg2_idx) {
2458 unsigned int diff;
2459
2460 diff = abs(oper->center_freq_seg2_idx -
2461 oper->center_freq_seg1_idx);
2462 if (diff == 8) {
2463 new.width = NL80211_CHAN_WIDTH_160;
2464 new.center_freq1 = cf2;
2465 } else if (diff > 8) {
2466 new.width = NL80211_CHAN_WIDTH_80P80;
2467 new.center_freq2 = cf2;
2468 }
2469 }
2426 break; 2470 break;
2427 case IEEE80211_VHT_CHANWIDTH_160MHZ: 2471 case IEEE80211_VHT_CHANWIDTH_160MHZ:
2428 chandef->width = NL80211_CHAN_WIDTH_160; 2472 new.width = NL80211_CHAN_WIDTH_160;
2473 new.center_freq1 = cf1;
2429 break; 2474 break;
2430 case IEEE80211_VHT_CHANWIDTH_80P80MHZ: 2475 case IEEE80211_VHT_CHANWIDTH_80P80MHZ:
2431 chandef->width = NL80211_CHAN_WIDTH_80P80; 2476 new.width = NL80211_CHAN_WIDTH_80P80;
2477 new.center_freq1 = cf1;
2478 new.center_freq2 = cf2;
2432 break; 2479 break;
2433 default: 2480 default:
2434 break; 2481 return false;
2435 } 2482 }
2436 2483
2437 chandef->center_freq1 = 2484 if (!cfg80211_chandef_valid(&new))
2438 ieee80211_channel_to_frequency(oper->center_freq_seg1_idx, 2485 return false;
2439 control_chan->band); 2486
2440 chandef->center_freq2 = 2487 *chandef = new;
2441 ieee80211_channel_to_frequency(oper->center_freq_seg2_idx, 2488 return true;
2442 control_chan->band);
2443} 2489}
2444 2490
2445int ieee80211_parse_bitrates(struct cfg80211_chan_def *chandef, 2491int ieee80211_parse_bitrates(struct cfg80211_chan_def *chandef,
@@ -2662,6 +2708,18 @@ u64 ieee80211_calculate_rx_timestamp(struct ieee80211_local *local,
2662 sband = local->hw.wiphy->bands[status->band]; 2708 sband = local->hw.wiphy->bands[status->band];
2663 bitrate = sband->bitrates[status->rate_idx].bitrate; 2709 bitrate = sband->bitrates[status->rate_idx].bitrate;
2664 ri.legacy = DIV_ROUND_UP(bitrate, (1 << shift)); 2710 ri.legacy = DIV_ROUND_UP(bitrate, (1 << shift));
2711
2712 if (status->flag & RX_FLAG_MACTIME_PLCP_START) {
2713 /* TODO: handle HT/VHT preambles */
2714 if (status->band == IEEE80211_BAND_5GHZ) {
2715 ts += 20 << shift;
2716 mpdu_offset += 2;
2717 } else if (status->flag & RX_FLAG_SHORTPRE) {
2718 ts += 96;
2719 } else {
2720 ts += 192;
2721 }
2722 }
2665 } 2723 }
2666 2724
2667 rate = cfg80211_calculate_bitrate(&ri); 2725 rate = cfg80211_calculate_bitrate(&ri);
@@ -3347,3 +3405,17 @@ void ieee80211_init_tx_queue(struct ieee80211_sub_if_data *sdata,
3347 txqi->txq.ac = IEEE80211_AC_BE; 3405 txqi->txq.ac = IEEE80211_AC_BE;
3348 } 3406 }
3349} 3407}
3408
3409void ieee80211_txq_get_depth(struct ieee80211_txq *txq,
3410 unsigned long *frame_cnt,
3411 unsigned long *byte_cnt)
3412{
3413 struct txq_info *txqi = to_txq_info(txq);
3414
3415 if (frame_cnt)
3416 *frame_cnt = txqi->queue.qlen;
3417
3418 if (byte_cnt)
3419 *byte_cnt = txqi->byte_cnt;
3420}
3421EXPORT_SYMBOL(ieee80211_txq_get_depth);
diff --git a/net/mac80211/vht.c b/net/mac80211/vht.c
index c38b2f07a919..89e04d55aa18 100644
--- a/net/mac80211/vht.c
+++ b/net/mac80211/vht.c
@@ -1,6 +1,9 @@
1/* 1/*
2 * VHT handling 2 * VHT handling
3 * 3 *
4 * Portions of this file
5 * Copyright(c) 2015 - 2016 Intel Deutschland GmbH
6 *
4 * This program is free software; you can redistribute it and/or modify 7 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as 8 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation. 9 * published by the Free Software Foundation.
@@ -278,6 +281,23 @@ ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata,
278 } 281 }
279 282
280 sta->sta.bandwidth = ieee80211_sta_cur_vht_bw(sta); 283 sta->sta.bandwidth = ieee80211_sta_cur_vht_bw(sta);
284
285 /* If HT IE reported 3839 bytes only, stay with that size. */
286 if (sta->sta.max_amsdu_len == IEEE80211_MAX_MPDU_LEN_HT_3839)
287 return;
288
289 switch (vht_cap->cap & IEEE80211_VHT_CAP_MAX_MPDU_MASK) {
290 case IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_11454:
291 sta->sta.max_amsdu_len = IEEE80211_MAX_MPDU_LEN_VHT_11454;
292 break;
293 case IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_7991:
294 sta->sta.max_amsdu_len = IEEE80211_MAX_MPDU_LEN_VHT_7991;
295 break;
296 case IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_3895:
297 default:
298 sta->sta.max_amsdu_len = IEEE80211_MAX_MPDU_LEN_VHT_3895;
299 break;
300 }
281} 301}
282 302
283enum ieee80211_sta_rx_bandwidth ieee80211_sta_cap_rx_bw(struct sta_info *sta) 303enum ieee80211_sta_rx_bandwidth ieee80211_sta_cap_rx_bw(struct sta_info *sta)
@@ -425,6 +445,43 @@ u32 __ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata,
425 return changed; 445 return changed;
426} 446}
427 447
448void ieee80211_process_mu_groups(struct ieee80211_sub_if_data *sdata,
449 struct ieee80211_mgmt *mgmt)
450{
451 struct ieee80211_bss_conf *bss_conf = &sdata->vif.bss_conf;
452
453 if (!sdata->vif.mu_mimo_owner)
454 return;
455
456 if (!memcmp(mgmt->u.action.u.vht_group_notif.position,
457 bss_conf->mu_group.position, WLAN_USER_POSITION_LEN) &&
458 !memcmp(mgmt->u.action.u.vht_group_notif.membership,
459 bss_conf->mu_group.membership, WLAN_MEMBERSHIP_LEN))
460 return;
461
462 memcpy(bss_conf->mu_group.membership,
463 mgmt->u.action.u.vht_group_notif.membership,
464 WLAN_MEMBERSHIP_LEN);
465 memcpy(bss_conf->mu_group.position,
466 mgmt->u.action.u.vht_group_notif.position,
467 WLAN_USER_POSITION_LEN);
468
469 ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_MU_GROUPS);
470}
471
472void ieee80211_update_mu_groups(struct ieee80211_vif *vif,
473 const u8 *membership, const u8 *position)
474{
475 struct ieee80211_bss_conf *bss_conf = &vif->bss_conf;
476
477 if (WARN_ON_ONCE(!vif->mu_mimo_owner))
478 return;
479
480 memcpy(bss_conf->mu_group.membership, membership, WLAN_MEMBERSHIP_LEN);
481 memcpy(bss_conf->mu_group.position, position, WLAN_USER_POSITION_LEN);
482}
483EXPORT_SYMBOL_GPL(ieee80211_update_mu_groups);
484
428void ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata, 485void ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata,
429 struct sta_info *sta, u8 opmode, 486 struct sta_info *sta, u8 opmode,
430 enum ieee80211_band band) 487 enum ieee80211_band band)
diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c
index d824c38971ed..18848258adde 100644
--- a/net/mac80211/wpa.c
+++ b/net/mac80211/wpa.c
@@ -1,6 +1,7 @@
1/* 1/*
2 * Copyright 2002-2004, Instant802 Networks, Inc. 2 * Copyright 2002-2004, Instant802 Networks, Inc.
3 * Copyright 2008, Jouni Malinen <j@w1.fi> 3 * Copyright 2008, Jouni Malinen <j@w1.fi>
4 * Copyright (C) 2016 Intel Deutschland GmbH
4 * 5 *
5 * This program is free software; you can redistribute it and/or modify 6 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 as 7 * it under the terms of the GNU General Public License version 2 as
@@ -183,7 +184,6 @@ mic_fail_no_key:
183 return RX_DROP_UNUSABLE; 184 return RX_DROP_UNUSABLE;
184} 185}
185 186
186
187static int tkip_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb) 187static int tkip_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb)
188{ 188{
189 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; 189 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
@@ -191,6 +191,7 @@ static int tkip_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb)
191 struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); 191 struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
192 unsigned int hdrlen; 192 unsigned int hdrlen;
193 int len, tail; 193 int len, tail;
194 u64 pn;
194 u8 *pos; 195 u8 *pos;
195 196
196 if (info->control.hw_key && 197 if (info->control.hw_key &&
@@ -222,12 +223,8 @@ static int tkip_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb)
222 return 0; 223 return 0;
223 224
224 /* Increase IV for the frame */ 225 /* Increase IV for the frame */
225 spin_lock(&key->u.tkip.txlock); 226 pn = atomic64_inc_return(&key->conf.tx_pn);
226 key->u.tkip.tx.iv16++; 227 pos = ieee80211_tkip_add_iv(pos, &key->conf, pn);
227 if (key->u.tkip.tx.iv16 == 0)
228 key->u.tkip.tx.iv32++;
229 pos = ieee80211_tkip_add_iv(pos, key);
230 spin_unlock(&key->u.tkip.txlock);
231 228
232 /* hwaccel - with software IV */ 229 /* hwaccel - with software IV */
233 if (info->control.hw_key) 230 if (info->control.hw_key)
diff --git a/net/mac802154/llsec.c b/net/mac802154/llsec.c
index a13d02b7cee4..6a3e1c2181d3 100644
--- a/net/mac802154/llsec.c
+++ b/net/mac802154/llsec.c
@@ -17,9 +17,9 @@
17#include <linux/err.h> 17#include <linux/err.h>
18#include <linux/bug.h> 18#include <linux/bug.h>
19#include <linux/completion.h> 19#include <linux/completion.h>
20#include <linux/crypto.h>
21#include <linux/ieee802154.h> 20#include <linux/ieee802154.h>
22#include <crypto/aead.h> 21#include <crypto/aead.h>
22#include <crypto/skcipher.h>
23 23
24#include "ieee802154_i.h" 24#include "ieee802154_i.h"
25#include "llsec.h" 25#include "llsec.h"
@@ -144,18 +144,18 @@ llsec_key_alloc(const struct ieee802154_llsec_key *template)
144 goto err_tfm; 144 goto err_tfm;
145 } 145 }
146 146
147 key->tfm0 = crypto_alloc_blkcipher("ctr(aes)", 0, CRYPTO_ALG_ASYNC); 147 key->tfm0 = crypto_alloc_skcipher("ctr(aes)", 0, CRYPTO_ALG_ASYNC);
148 if (IS_ERR(key->tfm0)) 148 if (IS_ERR(key->tfm0))
149 goto err_tfm; 149 goto err_tfm;
150 150
151 if (crypto_blkcipher_setkey(key->tfm0, template->key, 151 if (crypto_skcipher_setkey(key->tfm0, template->key,
152 IEEE802154_LLSEC_KEY_SIZE)) 152 IEEE802154_LLSEC_KEY_SIZE))
153 goto err_tfm0; 153 goto err_tfm0;
154 154
155 return key; 155 return key;
156 156
157err_tfm0: 157err_tfm0:
158 crypto_free_blkcipher(key->tfm0); 158 crypto_free_skcipher(key->tfm0);
159err_tfm: 159err_tfm:
160 for (i = 0; i < ARRAY_SIZE(key->tfm); i++) 160 for (i = 0; i < ARRAY_SIZE(key->tfm); i++)
161 if (key->tfm[i]) 161 if (key->tfm[i])
@@ -175,7 +175,7 @@ static void llsec_key_release(struct kref *ref)
175 for (i = 0; i < ARRAY_SIZE(key->tfm); i++) 175 for (i = 0; i < ARRAY_SIZE(key->tfm); i++)
176 crypto_free_aead(key->tfm[i]); 176 crypto_free_aead(key->tfm[i]);
177 177
178 crypto_free_blkcipher(key->tfm0); 178 crypto_free_skcipher(key->tfm0);
179 kzfree(key); 179 kzfree(key);
180} 180}
181 181
@@ -620,15 +620,17 @@ llsec_do_encrypt_unauth(struct sk_buff *skb, const struct mac802154_llsec *sec,
620{ 620{
621 u8 iv[16]; 621 u8 iv[16];
622 struct scatterlist src; 622 struct scatterlist src;
623 struct blkcipher_desc req = { 623 SKCIPHER_REQUEST_ON_STACK(req, key->tfm0);
624 .tfm = key->tfm0, 624 int err;
625 .info = iv,
626 .flags = 0,
627 };
628 625
629 llsec_geniv(iv, sec->params.hwaddr, &hdr->sec); 626 llsec_geniv(iv, sec->params.hwaddr, &hdr->sec);
630 sg_init_one(&src, skb->data, skb->len); 627 sg_init_one(&src, skb->data, skb->len);
631 return crypto_blkcipher_encrypt_iv(&req, &src, &src, skb->len); 628 skcipher_request_set_tfm(req, key->tfm0);
629 skcipher_request_set_callback(req, 0, NULL, NULL);
630 skcipher_request_set_crypt(req, &src, &src, skb->len, iv);
631 err = crypto_skcipher_encrypt(req);
632 skcipher_request_zero(req);
633 return err;
632} 634}
633 635
634static struct crypto_aead* 636static struct crypto_aead*
@@ -830,11 +832,8 @@ llsec_do_decrypt_unauth(struct sk_buff *skb, const struct mac802154_llsec *sec,
830 unsigned char *data; 832 unsigned char *data;
831 int datalen; 833 int datalen;
832 struct scatterlist src; 834 struct scatterlist src;
833 struct blkcipher_desc req = { 835 SKCIPHER_REQUEST_ON_STACK(req, key->tfm0);
834 .tfm = key->tfm0, 836 int err;
835 .info = iv,
836 .flags = 0,
837 };
838 837
839 llsec_geniv(iv, dev_addr, &hdr->sec); 838 llsec_geniv(iv, dev_addr, &hdr->sec);
840 data = skb_mac_header(skb) + skb->mac_len; 839 data = skb_mac_header(skb) + skb->mac_len;
@@ -842,7 +841,13 @@ llsec_do_decrypt_unauth(struct sk_buff *skb, const struct mac802154_llsec *sec,
842 841
843 sg_init_one(&src, data, datalen); 842 sg_init_one(&src, data, datalen);
844 843
845 return crypto_blkcipher_decrypt_iv(&req, &src, &src, datalen); 844 skcipher_request_set_tfm(req, key->tfm0);
845 skcipher_request_set_callback(req, 0, NULL, NULL);
846 skcipher_request_set_crypt(req, &src, &src, datalen, iv);
847
848 err = crypto_skcipher_decrypt(req);
849 skcipher_request_zero(req);
850 return err;
846} 851}
847 852
848static int 853static int
diff --git a/net/mac802154/llsec.h b/net/mac802154/llsec.h
index 950578e1d7be..6f3b658e3279 100644
--- a/net/mac802154/llsec.h
+++ b/net/mac802154/llsec.h
@@ -19,7 +19,6 @@
19 19
20#include <linux/slab.h> 20#include <linux/slab.h>
21#include <linux/hashtable.h> 21#include <linux/hashtable.h>
22#include <linux/crypto.h>
23#include <linux/kref.h> 22#include <linux/kref.h>
24#include <linux/spinlock.h> 23#include <linux/spinlock.h>
25#include <net/af_ieee802154.h> 24#include <net/af_ieee802154.h>
@@ -30,7 +29,7 @@ struct mac802154_llsec_key {
30 29
31 /* one tfm for each authsize (4/8/16) */ 30 /* one tfm for each authsize (4/8/16) */
32 struct crypto_aead *tfm[3]; 31 struct crypto_aead *tfm[3];
33 struct crypto_blkcipher *tfm0; 32 struct crypto_skcipher *tfm0;
34 33
35 struct kref ref; 34 struct kref ref;
36}; 35};
diff --git a/net/mac802154/main.c b/net/mac802154/main.c
index e8cab5bb80c6..87da85ae5a6b 100644
--- a/net/mac802154/main.c
+++ b/net/mac802154/main.c
@@ -218,7 +218,6 @@ void ieee802154_unregister_hw(struct ieee802154_hw *hw)
218 218
219 tasklet_kill(&local->tasklet); 219 tasklet_kill(&local->tasklet);
220 flush_workqueue(local->workqueue); 220 flush_workqueue(local->workqueue);
221 destroy_workqueue(local->workqueue);
222 221
223 rtnl_lock(); 222 rtnl_lock();
224 223
@@ -226,6 +225,7 @@ void ieee802154_unregister_hw(struct ieee802154_hw *hw)
226 225
227 rtnl_unlock(); 226 rtnl_unlock();
228 227
228 destroy_workqueue(local->workqueue);
229 wpan_phy_unregister(local->phy); 229 wpan_phy_unregister(local->phy);
230} 230}
231EXPORT_SYMBOL(ieee802154_unregister_hw); 231EXPORT_SYMBOL(ieee802154_unregister_hw);
diff --git a/net/mpls/mpls_iptunnel.c b/net/mpls/mpls_iptunnel.c
index fb31aa87de81..644a8da6d4bd 100644
--- a/net/mpls/mpls_iptunnel.c
+++ b/net/mpls/mpls_iptunnel.c
@@ -227,5 +227,6 @@ static void __exit mpls_iptunnel_exit(void)
227} 227}
228module_exit(mpls_iptunnel_exit); 228module_exit(mpls_iptunnel_exit);
229 229
230MODULE_ALIAS_RTNL_LWT(MPLS);
230MODULE_DESCRIPTION("MultiProtocol Label Switching IP Tunnels"); 231MODULE_DESCRIPTION("MultiProtocol Label Switching IP Tunnels");
231MODULE_LICENSE("GPL v2"); 232MODULE_LICENSE("GPL v2");
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 8c067e6663a1..95e757c377f9 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -891,7 +891,7 @@ config NETFILTER_XT_TARGET_TEE
891 depends on IPV6 || IPV6=n 891 depends on IPV6 || IPV6=n
892 depends on !NF_CONNTRACK || NF_CONNTRACK 892 depends on !NF_CONNTRACK || NF_CONNTRACK
893 select NF_DUP_IPV4 893 select NF_DUP_IPV4
894 select NF_DUP_IPV6 if IP6_NF_IPTABLES != n 894 select NF_DUP_IPV6 if IPV6
895 ---help--- 895 ---help---
896 This option adds a "TEE" target with which a packet can be cloned and 896 This option adds a "TEE" target with which a packet can be cloned and
897 this clone be rerouted to another nexthop. 897 this clone be rerouted to another nexthop.
diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
index 29dde208381d..9a065f672d3a 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
@@ -267,6 +267,8 @@ bitmap_ipmac_uadt(struct ip_set *set, struct nlattr *tb[],
267 267
268 e.id = ip_to_id(map, ip); 268 e.id = ip_to_id(map, ip);
269 if (tb[IPSET_ATTR_ETHER]) { 269 if (tb[IPSET_ATTR_ETHER]) {
270 if (nla_len(tb[IPSET_ATTR_ETHER]) != ETH_ALEN)
271 return -IPSET_ERR_PROTOCOL;
270 memcpy(e.ether, nla_data(tb[IPSET_ATTR_ETHER]), ETH_ALEN); 272 memcpy(e.ether, nla_data(tb[IPSET_ATTR_ETHER]), ETH_ALEN);
271 e.add_mac = 1; 273 e.add_mac = 1;
272 } 274 }
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index 95db43fc0303..7e6568cad494 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -985,6 +985,9 @@ static int ip_set_destroy(struct net *net, struct sock *ctnl,
985 if (unlikely(protocol_failed(attr))) 985 if (unlikely(protocol_failed(attr)))
986 return -IPSET_ERR_PROTOCOL; 986 return -IPSET_ERR_PROTOCOL;
987 987
988 /* Must wait for flush to be really finished in list:set */
989 rcu_barrier();
990
988 /* Commands are serialized and references are 991 /* Commands are serialized and references are
989 * protected by the ip_set_ref_lock. 992 * protected by the ip_set_ref_lock.
990 * External systems (i.e. xt_set) must call 993 * External systems (i.e. xt_set) must call
diff --git a/net/netfilter/ipset/ip_set_hash_mac.c b/net/netfilter/ipset/ip_set_hash_mac.c
index f1e7d2c0f685..8f004edad396 100644
--- a/net/netfilter/ipset/ip_set_hash_mac.c
+++ b/net/netfilter/ipset/ip_set_hash_mac.c
@@ -110,7 +110,8 @@ hash_mac4_uadt(struct ip_set *set, struct nlattr *tb[],
110 if (tb[IPSET_ATTR_LINENO]) 110 if (tb[IPSET_ATTR_LINENO])
111 *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); 111 *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
112 112
113 if (unlikely(!tb[IPSET_ATTR_ETHER])) 113 if (unlikely(!tb[IPSET_ATTR_ETHER] ||
114 nla_len(tb[IPSET_ATTR_ETHER]) != ETH_ALEN))
114 return -IPSET_ERR_PROTOCOL; 115 return -IPSET_ERR_PROTOCOL;
115 116
116 ret = ip_set_get_extensions(set, tb, &ext); 117 ret = ip_set_get_extensions(set, tb, &ext);
diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c
index 43d8c9896fa3..f0f688db6213 100644
--- a/net/netfilter/ipset/ip_set_hash_netiface.c
+++ b/net/netfilter/ipset/ip_set_hash_netiface.c
@@ -164,8 +164,6 @@ hash_netiface4_kadt(struct ip_set *set, const struct sk_buff *skb,
164 }; 164 };
165 struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); 165 struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
166 166
167 if (e.cidr == 0)
168 return -EINVAL;
169 if (adt == IPSET_TEST) 167 if (adt == IPSET_TEST)
170 e.cidr = HOST_MASK; 168 e.cidr = HOST_MASK;
171 169
@@ -377,8 +375,6 @@ hash_netiface6_kadt(struct ip_set *set, const struct sk_buff *skb,
377 }; 375 };
378 struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); 376 struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
379 377
380 if (e.cidr == 0)
381 return -EINVAL;
382 if (adt == IPSET_TEST) 378 if (adt == IPSET_TEST)
383 e.cidr = HOST_MASK; 379 e.cidr = HOST_MASK;
384 380
diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c
index bbede95c9f68..24c6c1962aea 100644
--- a/net/netfilter/ipset/ip_set_list_set.c
+++ b/net/netfilter/ipset/ip_set_list_set.c
@@ -30,6 +30,7 @@ MODULE_ALIAS("ip_set_list:set");
30struct set_elem { 30struct set_elem {
31 struct rcu_head rcu; 31 struct rcu_head rcu;
32 struct list_head list; 32 struct list_head list;
33 struct ip_set *set; /* Sigh, in order to cleanup reference */
33 ip_set_id_t id; 34 ip_set_id_t id;
34} __aligned(__alignof__(u64)); 35} __aligned(__alignof__(u64));
35 36
@@ -151,30 +152,29 @@ list_set_kadt(struct ip_set *set, const struct sk_buff *skb,
151/* Userspace interfaces: we are protected by the nfnl mutex */ 152/* Userspace interfaces: we are protected by the nfnl mutex */
152 153
153static void 154static void
154__list_set_del(struct ip_set *set, struct set_elem *e) 155__list_set_del_rcu(struct rcu_head * rcu)
155{ 156{
157 struct set_elem *e = container_of(rcu, struct set_elem, rcu);
158 struct ip_set *set = e->set;
156 struct list_set *map = set->data; 159 struct list_set *map = set->data;
157 160
158 ip_set_put_byindex(map->net, e->id); 161 ip_set_put_byindex(map->net, e->id);
159 /* We may call it, because we don't have a to be destroyed
160 * extension which is used by the kernel.
161 */
162 ip_set_ext_destroy(set, e); 162 ip_set_ext_destroy(set, e);
163 kfree_rcu(e, rcu); 163 kfree(e);
164} 164}
165 165
166static inline void 166static inline void
167list_set_del(struct ip_set *set, struct set_elem *e) 167list_set_del(struct ip_set *set, struct set_elem *e)
168{ 168{
169 list_del_rcu(&e->list); 169 list_del_rcu(&e->list);
170 __list_set_del(set, e); 170 call_rcu(&e->rcu, __list_set_del_rcu);
171} 171}
172 172
173static inline void 173static inline void
174list_set_replace(struct ip_set *set, struct set_elem *e, struct set_elem *old) 174list_set_replace(struct set_elem *e, struct set_elem *old)
175{ 175{
176 list_replace_rcu(&old->list, &e->list); 176 list_replace_rcu(&old->list, &e->list);
177 __list_set_del(set, old); 177 call_rcu(&old->rcu, __list_set_del_rcu);
178} 178}
179 179
180static void 180static void
@@ -244,9 +244,6 @@ list_set_uadd(struct ip_set *set, void *value, const struct ip_set_ext *ext,
244 struct set_elem *e, *n, *prev, *next; 244 struct set_elem *e, *n, *prev, *next;
245 bool flag_exist = flags & IPSET_FLAG_EXIST; 245 bool flag_exist = flags & IPSET_FLAG_EXIST;
246 246
247 if (SET_WITH_TIMEOUT(set))
248 set_cleanup_entries(set);
249
250 /* Find where to add the new entry */ 247 /* Find where to add the new entry */
251 n = prev = next = NULL; 248 n = prev = next = NULL;
252 list_for_each_entry(e, &map->members, list) { 249 list_for_each_entry(e, &map->members, list) {
@@ -301,10 +298,11 @@ list_set_uadd(struct ip_set *set, void *value, const struct ip_set_ext *ext,
301 if (!e) 298 if (!e)
302 return -ENOMEM; 299 return -ENOMEM;
303 e->id = d->id; 300 e->id = d->id;
301 e->set = set;
304 INIT_LIST_HEAD(&e->list); 302 INIT_LIST_HEAD(&e->list);
305 list_set_init_extensions(set, ext, e); 303 list_set_init_extensions(set, ext, e);
306 if (n) 304 if (n)
307 list_set_replace(set, e, n); 305 list_set_replace(e, n);
308 else if (next) 306 else if (next)
309 list_add_tail_rcu(&e->list, &next->list); 307 list_add_tail_rcu(&e->list, &next->list);
310 else if (prev) 308 else if (prev)
@@ -431,6 +429,7 @@ list_set_destroy(struct ip_set *set)
431 429
432 if (SET_WITH_TIMEOUT(set)) 430 if (SET_WITH_TIMEOUT(set))
433 del_timer_sync(&map->gc); 431 del_timer_sync(&map->gc);
432
434 list_for_each_entry_safe(e, n, &map->members, list) { 433 list_for_each_entry_safe(e, n, &map->members, list) {
435 list_del(&e->list); 434 list_del(&e->list);
436 ip_set_put_byindex(map->net, e->id); 435 ip_set_put_byindex(map->net, e->id);
@@ -450,8 +449,10 @@ list_set_head(struct ip_set *set, struct sk_buff *skb)
450 struct set_elem *e; 449 struct set_elem *e;
451 u32 n = 0; 450 u32 n = 0;
452 451
453 list_for_each_entry(e, &map->members, list) 452 rcu_read_lock();
453 list_for_each_entry_rcu(e, &map->members, list)
454 n++; 454 n++;
455 rcu_read_unlock();
455 456
456 nested = ipset_nest_start(skb, IPSET_ATTR_DATA); 457 nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
457 if (!nested) 458 if (!nested)
@@ -483,33 +484,25 @@ list_set_list(const struct ip_set *set,
483 atd = ipset_nest_start(skb, IPSET_ATTR_ADT); 484 atd = ipset_nest_start(skb, IPSET_ATTR_ADT);
484 if (!atd) 485 if (!atd)
485 return -EMSGSIZE; 486 return -EMSGSIZE;
486 list_for_each_entry(e, &map->members, list) {
487 if (i == first)
488 break;
489 i++;
490 }
491 487
492 rcu_read_lock(); 488 rcu_read_lock();
493 list_for_each_entry_from(e, &map->members, list) { 489 list_for_each_entry_rcu(e, &map->members, list) {
494 i++; 490 if (i < first ||
495 if (SET_WITH_TIMEOUT(set) && 491 (SET_WITH_TIMEOUT(set) &&
496 ip_set_timeout_expired(ext_timeout(e, set))) 492 ip_set_timeout_expired(ext_timeout(e, set)))) {
493 i++;
497 continue; 494 continue;
495 }
498 nested = ipset_nest_start(skb, IPSET_ATTR_DATA); 496 nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
499 if (!nested) { 497 if (!nested)
500 if (i == first) {
501 nla_nest_cancel(skb, atd);
502 ret = -EMSGSIZE;
503 goto out;
504 }
505 goto nla_put_failure; 498 goto nla_put_failure;
506 }
507 if (nla_put_string(skb, IPSET_ATTR_NAME, 499 if (nla_put_string(skb, IPSET_ATTR_NAME,
508 ip_set_name_byindex(map->net, e->id))) 500 ip_set_name_byindex(map->net, e->id)))
509 goto nla_put_failure; 501 goto nla_put_failure;
510 if (ip_set_put_extensions(skb, set, e, true)) 502 if (ip_set_put_extensions(skb, set, e, true))
511 goto nla_put_failure; 503 goto nla_put_failure;
512 ipset_nest_end(skb, nested); 504 ipset_nest_end(skb, nested);
505 i++;
513 } 506 }
514 507
515 ipset_nest_end(skb, atd); 508 ipset_nest_end(skb, atd);
@@ -520,10 +513,12 @@ list_set_list(const struct ip_set *set,
520nla_put_failure: 513nla_put_failure:
521 nla_nest_cancel(skb, nested); 514 nla_nest_cancel(skb, nested);
522 if (unlikely(i == first)) { 515 if (unlikely(i == first)) {
516 nla_nest_cancel(skb, atd);
523 cb->args[IPSET_CB_ARG0] = 0; 517 cb->args[IPSET_CB_ARG0] = 0;
524 ret = -EMSGSIZE; 518 ret = -EMSGSIZE;
519 } else {
520 cb->args[IPSET_CB_ARG0] = i;
525 } 521 }
526 cb->args[IPSET_CB_ARG0] = i - 1;
527 ipset_nest_end(skb, atd); 522 ipset_nest_end(skb, atd);
528out: 523out:
529 rcu_read_unlock(); 524 rcu_read_unlock();
diff --git a/net/netfilter/ipvs/ip_vs_app.c b/net/netfilter/ipvs/ip_vs_app.c
index 0328f7250693..299edc6add5a 100644
--- a/net/netfilter/ipvs/ip_vs_app.c
+++ b/net/netfilter/ipvs/ip_vs_app.c
@@ -605,17 +605,13 @@ static const struct file_operations ip_vs_app_fops = {
605 605
606int __net_init ip_vs_app_net_init(struct netns_ipvs *ipvs) 606int __net_init ip_vs_app_net_init(struct netns_ipvs *ipvs)
607{ 607{
608 struct net *net = ipvs->net;
609
610 INIT_LIST_HEAD(&ipvs->app_list); 608 INIT_LIST_HEAD(&ipvs->app_list);
611 proc_create("ip_vs_app", 0, net->proc_net, &ip_vs_app_fops); 609 proc_create("ip_vs_app", 0, ipvs->net->proc_net, &ip_vs_app_fops);
612 return 0; 610 return 0;
613} 611}
614 612
615void __net_exit ip_vs_app_net_cleanup(struct netns_ipvs *ipvs) 613void __net_exit ip_vs_app_net_cleanup(struct netns_ipvs *ipvs)
616{ 614{
617 struct net *net = ipvs->net;
618
619 unregister_ip_vs_app(ipvs, NULL /* all */); 615 unregister_ip_vs_app(ipvs, NULL /* all */);
620 remove_proc_entry("ip_vs_app", net->proc_net); 616 remove_proc_entry("ip_vs_app", ipvs->net->proc_net);
621} 617}
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index f57b4dcdb233..b9a4082afa3a 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -1089,6 +1089,7 @@ static inline bool is_new_conn_expected(const struct ip_vs_conn *cp,
1089 switch (cp->protocol) { 1089 switch (cp->protocol) {
1090 case IPPROTO_TCP: 1090 case IPPROTO_TCP:
1091 return (cp->state == IP_VS_TCP_S_TIME_WAIT) || 1091 return (cp->state == IP_VS_TCP_S_TIME_WAIT) ||
1092 (cp->state == IP_VS_TCP_S_CLOSE) ||
1092 ((conn_reuse_mode & 2) && 1093 ((conn_reuse_mode & 2) &&
1093 (cp->state == IP_VS_TCP_S_FIN_WAIT) && 1094 (cp->state == IP_VS_TCP_S_FIN_WAIT) &&
1094 (cp->flags & IP_VS_CONN_F_NOOUTPUT)); 1095 (cp->flags & IP_VS_CONN_F_NOOUTPUT));
@@ -1757,15 +1758,34 @@ ip_vs_in(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, int
1757 cp = pp->conn_in_get(ipvs, af, skb, &iph); 1758 cp = pp->conn_in_get(ipvs, af, skb, &iph);
1758 1759
1759 conn_reuse_mode = sysctl_conn_reuse_mode(ipvs); 1760 conn_reuse_mode = sysctl_conn_reuse_mode(ipvs);
1760 if (conn_reuse_mode && !iph.fragoffs && 1761 if (conn_reuse_mode && !iph.fragoffs && is_new_conn(skb, &iph) && cp) {
1761 is_new_conn(skb, &iph) && cp && 1762 bool uses_ct = false, resched = false;
1762 ((unlikely(sysctl_expire_nodest_conn(ipvs)) && cp->dest && 1763
1763 unlikely(!atomic_read(&cp->dest->weight))) || 1764 if (unlikely(sysctl_expire_nodest_conn(ipvs)) && cp->dest &&
1764 unlikely(is_new_conn_expected(cp, conn_reuse_mode)))) { 1765 unlikely(!atomic_read(&cp->dest->weight))) {
1765 if (!atomic_read(&cp->n_control)) 1766 resched = true;
1766 ip_vs_conn_expire_now(cp); 1767 uses_ct = ip_vs_conn_uses_conntrack(cp, skb);
1767 __ip_vs_conn_put(cp); 1768 } else if (is_new_conn_expected(cp, conn_reuse_mode)) {
1768 cp = NULL; 1769 uses_ct = ip_vs_conn_uses_conntrack(cp, skb);
1770 if (!atomic_read(&cp->n_control)) {
1771 resched = true;
1772 } else {
1773 /* Do not reschedule controlling connection
1774 * that uses conntrack while it is still
1775 * referenced by controlled connection(s).
1776 */
1777 resched = !uses_ct;
1778 }
1779 }
1780
1781 if (resched) {
1782 if (!atomic_read(&cp->n_control))
1783 ip_vs_conn_expire_now(cp);
1784 __ip_vs_conn_put(cp);
1785 if (uses_ct)
1786 return NF_DROP;
1787 cp = NULL;
1788 }
1769 } 1789 }
1770 1790
1771 if (unlikely(!cp)) { 1791 if (unlikely(!cp)) {
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index e7c1b052c2a3..404b2a4f4b5b 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -1376,8 +1376,6 @@ static void __ip_vs_del_service(struct ip_vs_service *svc, bool cleanup)
1376 struct ip_vs_pe *old_pe; 1376 struct ip_vs_pe *old_pe;
1377 struct netns_ipvs *ipvs = svc->ipvs; 1377 struct netns_ipvs *ipvs = svc->ipvs;
1378 1378
1379 pr_info("%s: enter\n", __func__);
1380
1381 /* Count only IPv4 services for old get/setsockopt interface */ 1379 /* Count only IPv4 services for old get/setsockopt interface */
1382 if (svc->af == AF_INET) 1380 if (svc->af == AF_INET)
1383 ipvs->num_services--; 1381 ipvs->num_services--;
@@ -3947,7 +3945,6 @@ static struct notifier_block ip_vs_dst_notifier = {
3947 3945
3948int __net_init ip_vs_control_net_init(struct netns_ipvs *ipvs) 3946int __net_init ip_vs_control_net_init(struct netns_ipvs *ipvs)
3949{ 3947{
3950 struct net *net = ipvs->net;
3951 int i, idx; 3948 int i, idx;
3952 3949
3953 /* Initialize rs_table */ 3950 /* Initialize rs_table */
@@ -3974,9 +3971,9 @@ int __net_init ip_vs_control_net_init(struct netns_ipvs *ipvs)
3974 3971
3975 spin_lock_init(&ipvs->tot_stats.lock); 3972 spin_lock_init(&ipvs->tot_stats.lock);
3976 3973
3977 proc_create("ip_vs", 0, net->proc_net, &ip_vs_info_fops); 3974 proc_create("ip_vs", 0, ipvs->net->proc_net, &ip_vs_info_fops);
3978 proc_create("ip_vs_stats", 0, net->proc_net, &ip_vs_stats_fops); 3975 proc_create("ip_vs_stats", 0, ipvs->net->proc_net, &ip_vs_stats_fops);
3979 proc_create("ip_vs_stats_percpu", 0, net->proc_net, 3976 proc_create("ip_vs_stats_percpu", 0, ipvs->net->proc_net,
3980 &ip_vs_stats_percpu_fops); 3977 &ip_vs_stats_percpu_fops);
3981 3978
3982 if (ip_vs_control_net_init_sysctl(ipvs)) 3979 if (ip_vs_control_net_init_sysctl(ipvs))
@@ -3991,13 +3988,11 @@ err:
3991 3988
3992void __net_exit ip_vs_control_net_cleanup(struct netns_ipvs *ipvs) 3989void __net_exit ip_vs_control_net_cleanup(struct netns_ipvs *ipvs)
3993{ 3990{
3994 struct net *net = ipvs->net;
3995
3996 ip_vs_trash_cleanup(ipvs); 3991 ip_vs_trash_cleanup(ipvs);
3997 ip_vs_control_net_cleanup_sysctl(ipvs); 3992 ip_vs_control_net_cleanup_sysctl(ipvs);
3998 remove_proc_entry("ip_vs_stats_percpu", net->proc_net); 3993 remove_proc_entry("ip_vs_stats_percpu", ipvs->net->proc_net);
3999 remove_proc_entry("ip_vs_stats", net->proc_net); 3994 remove_proc_entry("ip_vs_stats", ipvs->net->proc_net);
4000 remove_proc_entry("ip_vs", net->proc_net); 3995 remove_proc_entry("ip_vs", ipvs->net->proc_net);
4001 free_percpu(ipvs->tot_stats.cpustats); 3996 free_percpu(ipvs->tot_stats.cpustats);
4002} 3997}
4003 3998
diff --git a/net/netfilter/ipvs/ip_vs_pe_sip.c b/net/netfilter/ipvs/ip_vs_pe_sip.c
index 1b8d594e493a..0a6eb5c0d9e9 100644
--- a/net/netfilter/ipvs/ip_vs_pe_sip.c
+++ b/net/netfilter/ipvs/ip_vs_pe_sip.c
@@ -70,10 +70,10 @@ ip_vs_sip_fill_param(struct ip_vs_conn_param *p, struct sk_buff *skb)
70 const char *dptr; 70 const char *dptr;
71 int retc; 71 int retc;
72 72
73 ip_vs_fill_iph_skb(p->af, skb, false, &iph); 73 retc = ip_vs_fill_iph_skb(p->af, skb, false, &iph);
74 74
75 /* Only useful with UDP */ 75 /* Only useful with UDP */
76 if (iph.protocol != IPPROTO_UDP) 76 if (!retc || iph.protocol != IPPROTO_UDP)
77 return -EINVAL; 77 return -EINVAL;
78 /* todo: IPv6 fragments: 78 /* todo: IPv6 fragments:
79 * I think this only should be done for the first fragment. /HS 79 * I think this only should be done for the first fragment. /HS
@@ -88,7 +88,7 @@ ip_vs_sip_fill_param(struct ip_vs_conn_param *p, struct sk_buff *skb)
88 dptr = skb->data + dataoff; 88 dptr = skb->data + dataoff;
89 datalen = skb->len - dataoff; 89 datalen = skb->len - dataoff;
90 90
91 if (get_callid(dptr, dataoff, datalen, &matchoff, &matchlen)) 91 if (get_callid(dptr, 0, datalen, &matchoff, &matchlen))
92 return -EINVAL; 92 return -EINVAL;
93 93
94 /* N.B: pe_data is only set on success, 94 /* N.B: pe_data is only set on success,
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index 3264cb49b333..dc196a0f501d 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -531,8 +531,6 @@ static inline int ip_vs_tunnel_xmit_prepare(struct sk_buff *skb,
531 if (ret == NF_ACCEPT) { 531 if (ret == NF_ACCEPT) {
532 nf_reset(skb); 532 nf_reset(skb);
533 skb_forward_csum(skb); 533 skb_forward_csum(skb);
534 if (!skb->sk)
535 skb_sender_cpu_clear(skb);
536 } 534 }
537 return ret; 535 return ret;
538} 536}
@@ -573,8 +571,6 @@ static inline int ip_vs_nat_send_or_cont(int pf, struct sk_buff *skb,
573 571
574 if (!local) { 572 if (!local) {
575 skb_forward_csum(skb); 573 skb_forward_csum(skb);
576 if (!skb->sk)
577 skb_sender_cpu_clear(skb);
578 NF_HOOK(pf, NF_INET_LOCAL_OUT, cp->ipvs->net, NULL, skb, 574 NF_HOOK(pf, NF_INET_LOCAL_OUT, cp->ipvs->net, NULL, skb,
579 NULL, skb_dst(skb)->dev, dst_output); 575 NULL, skb_dst(skb)->dev, dst_output);
580 } else 576 } else
@@ -595,8 +591,6 @@ static inline int ip_vs_send_or_cont(int pf, struct sk_buff *skb,
595 if (!local) { 591 if (!local) {
596 ip_vs_drop_early_demux_sk(skb); 592 ip_vs_drop_early_demux_sk(skb);
597 skb_forward_csum(skb); 593 skb_forward_csum(skb);
598 if (!skb->sk)
599 skb_sender_cpu_clear(skb);
600 NF_HOOK(pf, NF_INET_LOCAL_OUT, cp->ipvs->net, NULL, skb, 594 NF_HOOK(pf, NF_INET_LOCAL_OUT, cp->ipvs->net, NULL, skb,
601 NULL, skb_dst(skb)->dev, dst_output); 595 NULL, skb_dst(skb)->dev, dst_output);
602 } else 596 } else
@@ -1019,8 +1013,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
1019 if (IS_ERR(skb)) 1013 if (IS_ERR(skb))
1020 goto tx_error; 1014 goto tx_error;
1021 1015
1022 skb = iptunnel_handle_offloads( 1016 skb = iptunnel_handle_offloads(skb, __tun_gso_type_mask(AF_INET, cp->af));
1023 skb, false, __tun_gso_type_mask(AF_INET, cp->af));
1024 if (IS_ERR(skb)) 1017 if (IS_ERR(skb))
1025 goto tx_error; 1018 goto tx_error;
1026 1019
@@ -1112,8 +1105,7 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
1112 if (IS_ERR(skb)) 1105 if (IS_ERR(skb))
1113 goto tx_error; 1106 goto tx_error;
1114 1107
1115 skb = iptunnel_handle_offloads( 1108 skb = iptunnel_handle_offloads(skb, __tun_gso_type_mask(AF_INET6, cp->af));
1116 skb, false, __tun_gso_type_mask(AF_INET6, cp->af));
1117 if (IS_ERR(skb)) 1109 if (IS_ERR(skb))
1118 goto tx_error; 1110 goto tx_error;
1119 1111
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 3cb3cb831591..afde5f5e728a 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -66,6 +66,20 @@ EXPORT_SYMBOL_GPL(nf_conntrack_locks);
66__cacheline_aligned_in_smp DEFINE_SPINLOCK(nf_conntrack_expect_lock); 66__cacheline_aligned_in_smp DEFINE_SPINLOCK(nf_conntrack_expect_lock);
67EXPORT_SYMBOL_GPL(nf_conntrack_expect_lock); 67EXPORT_SYMBOL_GPL(nf_conntrack_expect_lock);
68 68
69static __read_mostly spinlock_t nf_conntrack_locks_all_lock;
70static __read_mostly bool nf_conntrack_locks_all;
71
72void nf_conntrack_lock(spinlock_t *lock) __acquires(lock)
73{
74 spin_lock(lock);
75 while (unlikely(nf_conntrack_locks_all)) {
76 spin_unlock(lock);
77 spin_unlock_wait(&nf_conntrack_locks_all_lock);
78 spin_lock(lock);
79 }
80}
81EXPORT_SYMBOL_GPL(nf_conntrack_lock);
82
69static void nf_conntrack_double_unlock(unsigned int h1, unsigned int h2) 83static void nf_conntrack_double_unlock(unsigned int h1, unsigned int h2)
70{ 84{
71 h1 %= CONNTRACK_LOCKS; 85 h1 %= CONNTRACK_LOCKS;
@@ -82,12 +96,12 @@ static bool nf_conntrack_double_lock(struct net *net, unsigned int h1,
82 h1 %= CONNTRACK_LOCKS; 96 h1 %= CONNTRACK_LOCKS;
83 h2 %= CONNTRACK_LOCKS; 97 h2 %= CONNTRACK_LOCKS;
84 if (h1 <= h2) { 98 if (h1 <= h2) {
85 spin_lock(&nf_conntrack_locks[h1]); 99 nf_conntrack_lock(&nf_conntrack_locks[h1]);
86 if (h1 != h2) 100 if (h1 != h2)
87 spin_lock_nested(&nf_conntrack_locks[h2], 101 spin_lock_nested(&nf_conntrack_locks[h2],
88 SINGLE_DEPTH_NESTING); 102 SINGLE_DEPTH_NESTING);
89 } else { 103 } else {
90 spin_lock(&nf_conntrack_locks[h2]); 104 nf_conntrack_lock(&nf_conntrack_locks[h2]);
91 spin_lock_nested(&nf_conntrack_locks[h1], 105 spin_lock_nested(&nf_conntrack_locks[h1],
92 SINGLE_DEPTH_NESTING); 106 SINGLE_DEPTH_NESTING);
93 } 107 }
@@ -102,16 +116,18 @@ static void nf_conntrack_all_lock(void)
102{ 116{
103 int i; 117 int i;
104 118
105 for (i = 0; i < CONNTRACK_LOCKS; i++) 119 spin_lock(&nf_conntrack_locks_all_lock);
106 spin_lock_nested(&nf_conntrack_locks[i], i); 120 nf_conntrack_locks_all = true;
121
122 for (i = 0; i < CONNTRACK_LOCKS; i++) {
123 spin_unlock_wait(&nf_conntrack_locks[i]);
124 }
107} 125}
108 126
109static void nf_conntrack_all_unlock(void) 127static void nf_conntrack_all_unlock(void)
110{ 128{
111 int i; 129 nf_conntrack_locks_all = false;
112 130 spin_unlock(&nf_conntrack_locks_all_lock);
113 for (i = 0; i < CONNTRACK_LOCKS; i++)
114 spin_unlock(&nf_conntrack_locks[i]);
115} 131}
116 132
117unsigned int nf_conntrack_htable_size __read_mostly; 133unsigned int nf_conntrack_htable_size __read_mostly;
@@ -757,7 +773,7 @@ restart:
757 hash = hash_bucket(_hash, net); 773 hash = hash_bucket(_hash, net);
758 for (; i < net->ct.htable_size; i++) { 774 for (; i < net->ct.htable_size; i++) {
759 lockp = &nf_conntrack_locks[hash % CONNTRACK_LOCKS]; 775 lockp = &nf_conntrack_locks[hash % CONNTRACK_LOCKS];
760 spin_lock(lockp); 776 nf_conntrack_lock(lockp);
761 if (read_seqcount_retry(&net->ct.generation, sequence)) { 777 if (read_seqcount_retry(&net->ct.generation, sequence)) {
762 spin_unlock(lockp); 778 spin_unlock(lockp);
763 goto restart; 779 goto restart;
@@ -1382,7 +1398,7 @@ get_next_corpse(struct net *net, int (*iter)(struct nf_conn *i, void *data),
1382 for (; *bucket < net->ct.htable_size; (*bucket)++) { 1398 for (; *bucket < net->ct.htable_size; (*bucket)++) {
1383 lockp = &nf_conntrack_locks[*bucket % CONNTRACK_LOCKS]; 1399 lockp = &nf_conntrack_locks[*bucket % CONNTRACK_LOCKS];
1384 local_bh_disable(); 1400 local_bh_disable();
1385 spin_lock(lockp); 1401 nf_conntrack_lock(lockp);
1386 if (*bucket < net->ct.htable_size) { 1402 if (*bucket < net->ct.htable_size) {
1387 hlist_nulls_for_each_entry(h, n, &net->ct.hash[*bucket], hnnode) { 1403 hlist_nulls_for_each_entry(h, n, &net->ct.hash[*bucket], hnnode) {
1388 if (NF_CT_DIRECTION(h) != IP_CT_DIR_ORIGINAL) 1404 if (NF_CT_DIRECTION(h) != IP_CT_DIR_ORIGINAL)
@@ -1394,6 +1410,7 @@ get_next_corpse(struct net *net, int (*iter)(struct nf_conn *i, void *data),
1394 } 1410 }
1395 spin_unlock(lockp); 1411 spin_unlock(lockp);
1396 local_bh_enable(); 1412 local_bh_enable();
1413 cond_resched();
1397 } 1414 }
1398 1415
1399 for_each_possible_cpu(cpu) { 1416 for_each_possible_cpu(cpu) {
@@ -1406,6 +1423,7 @@ get_next_corpse(struct net *net, int (*iter)(struct nf_conn *i, void *data),
1406 set_bit(IPS_DYING_BIT, &ct->status); 1423 set_bit(IPS_DYING_BIT, &ct->status);
1407 } 1424 }
1408 spin_unlock_bh(&pcpu->lock); 1425 spin_unlock_bh(&pcpu->lock);
1426 cond_resched();
1409 } 1427 }
1410 return NULL; 1428 return NULL;
1411found: 1429found:
@@ -1422,6 +1440,8 @@ void nf_ct_iterate_cleanup(struct net *net,
1422 struct nf_conn *ct; 1440 struct nf_conn *ct;
1423 unsigned int bucket = 0; 1441 unsigned int bucket = 0;
1424 1442
1443 might_sleep();
1444
1425 while ((ct = get_next_corpse(net, iter, data, &bucket)) != NULL) { 1445 while ((ct = get_next_corpse(net, iter, data, &bucket)) != NULL) {
1426 /* Time to push up daises... */ 1446 /* Time to push up daises... */
1427 if (del_timer(&ct->timeout)) 1447 if (del_timer(&ct->timeout))
@@ -1430,6 +1450,7 @@ void nf_ct_iterate_cleanup(struct net *net,
1430 /* ... else the timer will get him soon. */ 1450 /* ... else the timer will get him soon. */
1431 1451
1432 nf_ct_put(ct); 1452 nf_ct_put(ct);
1453 cond_resched();
1433 } 1454 }
1434} 1455}
1435EXPORT_SYMBOL_GPL(nf_ct_iterate_cleanup); 1456EXPORT_SYMBOL_GPL(nf_ct_iterate_cleanup);
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index bd9d31537905..3b40ec575cd5 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -425,7 +425,7 @@ static void __nf_conntrack_helper_unregister(struct nf_conntrack_helper *me,
425 } 425 }
426 local_bh_disable(); 426 local_bh_disable();
427 for (i = 0; i < net->ct.htable_size; i++) { 427 for (i = 0; i < net->ct.htable_size; i++) {
428 spin_lock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]); 428 nf_conntrack_lock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]);
429 if (i < net->ct.htable_size) { 429 if (i < net->ct.htable_size) {
430 hlist_nulls_for_each_entry(h, nn, &net->ct.hash[i], hnnode) 430 hlist_nulls_for_each_entry(h, nn, &net->ct.hash[i], hnnode)
431 unhelp(h, me); 431 unhelp(h, me);
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index dbb1bb3edb45..355e8552fd5b 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -840,7 +840,7 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
840 for (; cb->args[0] < net->ct.htable_size; cb->args[0]++) { 840 for (; cb->args[0] < net->ct.htable_size; cb->args[0]++) {
841restart: 841restart:
842 lockp = &nf_conntrack_locks[cb->args[0] % CONNTRACK_LOCKS]; 842 lockp = &nf_conntrack_locks[cb->args[0] % CONNTRACK_LOCKS];
843 spin_lock(lockp); 843 nf_conntrack_lock(lockp);
844 if (cb->args[0] >= net->ct.htable_size) { 844 if (cb->args[0] >= net->ct.htable_size) {
845 spin_unlock(lockp); 845 spin_unlock(lockp);
846 goto out; 846 goto out;
diff --git a/net/netfilter/nf_dup_netdev.c b/net/netfilter/nf_dup_netdev.c
index 8414ee1a0319..7ec69723940f 100644
--- a/net/netfilter/nf_dup_netdev.c
+++ b/net/netfilter/nf_dup_netdev.c
@@ -31,7 +31,6 @@ void nf_dup_netdev_egress(const struct nft_pktinfo *pkt, int oif)
31 skb_push(skb, skb->mac_len); 31 skb_push(skb, skb->mac_len);
32 32
33 skb->dev = dev; 33 skb->dev = dev;
34 skb_sender_cpu_clear(skb);
35 dev_queue_xmit(skb); 34 dev_queue_xmit(skb);
36} 35}
37EXPORT_SYMBOL_GPL(nf_dup_netdev_egress); 36EXPORT_SYMBOL_GPL(nf_dup_netdev_egress);
diff --git a/net/netfilter/nf_tables_netdev.c b/net/netfilter/nf_tables_netdev.c
index b6605e000801..5eefe4a355c6 100644
--- a/net/netfilter/nf_tables_netdev.c
+++ b/net/netfilter/nf_tables_netdev.c
@@ -224,12 +224,12 @@ static int __init nf_tables_netdev_init(void)
224 224
225 nft_register_chain_type(&nft_filter_chain_netdev); 225 nft_register_chain_type(&nft_filter_chain_netdev);
226 ret = register_pernet_subsys(&nf_tables_netdev_net_ops); 226 ret = register_pernet_subsys(&nf_tables_netdev_net_ops);
227 if (ret < 0) 227 if (ret < 0) {
228 nft_unregister_chain_type(&nft_filter_chain_netdev); 228 nft_unregister_chain_type(&nft_filter_chain_netdev);
229 229 return ret;
230 }
230 register_netdevice_notifier(&nf_tables_netdev_notifier); 231 register_netdevice_notifier(&nf_tables_netdev_notifier);
231 232 return 0;
232 return ret;
233} 233}
234 234
235static void __exit nf_tables_netdev_exit(void) 235static void __exit nf_tables_netdev_exit(void)
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index a7ba23353dab..2278d9ab723b 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -127,13 +127,6 @@ int nfnetlink_has_listeners(struct net *net, unsigned int group)
127} 127}
128EXPORT_SYMBOL_GPL(nfnetlink_has_listeners); 128EXPORT_SYMBOL_GPL(nfnetlink_has_listeners);
129 129
130struct sk_buff *nfnetlink_alloc_skb(struct net *net, unsigned int size,
131 u32 dst_portid, gfp_t gfp_mask)
132{
133 return netlink_alloc_skb(net->nfnl, size, dst_portid, gfp_mask);
134}
135EXPORT_SYMBOL_GPL(nfnetlink_alloc_skb);
136
137int nfnetlink_send(struct sk_buff *skb, struct net *net, u32 portid, 130int nfnetlink_send(struct sk_buff *skb, struct net *net, u32 portid,
138 unsigned int group, int echo, gfp_t flags) 131 unsigned int group, int echo, gfp_t flags)
139{ 132{
@@ -311,14 +304,14 @@ replay:
311#endif 304#endif
312 { 305 {
313 nfnl_unlock(subsys_id); 306 nfnl_unlock(subsys_id);
314 netlink_ack(skb, nlh, -EOPNOTSUPP); 307 netlink_ack(oskb, nlh, -EOPNOTSUPP);
315 return kfree_skb(skb); 308 return kfree_skb(skb);
316 } 309 }
317 } 310 }
318 311
319 if (!ss->commit || !ss->abort) { 312 if (!ss->commit || !ss->abort) {
320 nfnl_unlock(subsys_id); 313 nfnl_unlock(subsys_id);
321 netlink_ack(skb, nlh, -EOPNOTSUPP); 314 netlink_ack(oskb, nlh, -EOPNOTSUPP);
322 return kfree_skb(skb); 315 return kfree_skb(skb);
323 } 316 }
324 317
@@ -328,10 +321,12 @@ replay:
328 nlh = nlmsg_hdr(skb); 321 nlh = nlmsg_hdr(skb);
329 err = 0; 322 err = 0;
330 323
331 if (nlmsg_len(nlh) < sizeof(struct nfgenmsg) || 324 if (nlh->nlmsg_len < NLMSG_HDRLEN ||
332 skb->len < nlh->nlmsg_len) { 325 skb->len < nlh->nlmsg_len ||
333 err = -EINVAL; 326 nlmsg_len(nlh) < sizeof(struct nfgenmsg)) {
334 goto ack; 327 nfnl_err_reset(&err_list);
328 status |= NFNL_BATCH_FAILURE;
329 goto done;
335 } 330 }
336 331
337 /* Only requests are handled by the kernel */ 332 /* Only requests are handled by the kernel */
@@ -406,7 +401,7 @@ ack:
406 * pointing to the batch header. 401 * pointing to the batch header.
407 */ 402 */
408 nfnl_err_reset(&err_list); 403 nfnl_err_reset(&err_list);
409 netlink_ack(skb, nlmsg_hdr(oskb), -ENOMEM); 404 netlink_ack(oskb, nlmsg_hdr(oskb), -ENOMEM);
410 status |= NFNL_BATCH_FAILURE; 405 status |= NFNL_BATCH_FAILURE;
411 goto done; 406 goto done;
412 } 407 }
diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c
index 5274b04c42a6..4c2b4c0c4d5f 100644
--- a/net/netfilter/nfnetlink_acct.c
+++ b/net/netfilter/nfnetlink_acct.c
@@ -242,6 +242,9 @@ nfacct_filter_alloc(const struct nlattr * const attr)
242 if (err < 0) 242 if (err < 0)
243 return ERR_PTR(err); 243 return ERR_PTR(err);
244 244
245 if (!tb[NFACCT_FILTER_MASK] || !tb[NFACCT_FILTER_VALUE])
246 return ERR_PTR(-EINVAL);
247
245 filter = kzalloc(sizeof(struct nfacct_filter), GFP_KERNEL); 248 filter = kzalloc(sizeof(struct nfacct_filter), GFP_KERNEL);
246 if (!filter) 249 if (!filter)
247 return ERR_PTR(-ENOMEM); 250 return ERR_PTR(-ENOMEM);
diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c
index 5d010f27ac01..2671b9deb103 100644
--- a/net/netfilter/nfnetlink_cttimeout.c
+++ b/net/netfilter/nfnetlink_cttimeout.c
@@ -307,7 +307,7 @@ static void ctnl_untimeout(struct net *net, struct ctnl_timeout *timeout)
307 307
308 local_bh_disable(); 308 local_bh_disable();
309 for (i = 0; i < net->ct.htable_size; i++) { 309 for (i = 0; i < net->ct.htable_size; i++) {
310 spin_lock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]); 310 nf_conntrack_lock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]);
311 if (i < net->ct.htable_size) { 311 if (i < net->ct.htable_size) {
312 hlist_nulls_for_each_entry(h, nn, &net->ct.hash[i], hnnode) 312 hlist_nulls_for_each_entry(h, nn, &net->ct.hash[i], hnnode)
313 untimeout(h, timeout); 313 untimeout(h, timeout);
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index 8ca932057c13..11f81c8385fc 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -330,14 +330,13 @@ nfulnl_alloc_skb(struct net *net, u32 peer_portid, unsigned int inst_size,
330 * message. WARNING: has to be <= 128k due to slab restrictions */ 330 * message. WARNING: has to be <= 128k due to slab restrictions */
331 331
332 n = max(inst_size, pkt_size); 332 n = max(inst_size, pkt_size);
333 skb = nfnetlink_alloc_skb(net, n, peer_portid, GFP_ATOMIC); 333 skb = alloc_skb(n, GFP_ATOMIC);
334 if (!skb) { 334 if (!skb) {
335 if (n > pkt_size) { 335 if (n > pkt_size) {
336 /* try to allocate only as much as we need for current 336 /* try to allocate only as much as we need for current
337 * packet */ 337 * packet */
338 338
339 skb = nfnetlink_alloc_skb(net, pkt_size, 339 skb = alloc_skb(pkt_size, GFP_ATOMIC);
340 peer_portid, GFP_ATOMIC);
341 } 340 }
342 } 341 }
343 342
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 1d3936587ace..75429997ed41 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -301,7 +301,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
301 __be32 **packet_id_ptr) 301 __be32 **packet_id_ptr)
302{ 302{
303 size_t size; 303 size_t size;
304 size_t data_len = 0, cap_len = 0, rem_len = 0; 304 size_t data_len = 0, cap_len = 0;
305 unsigned int hlen = 0; 305 unsigned int hlen = 0;
306 struct sk_buff *skb; 306 struct sk_buff *skb;
307 struct nlattr *nla; 307 struct nlattr *nla;
@@ -361,7 +361,6 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
361 hlen = min_t(unsigned int, hlen, data_len); 361 hlen = min_t(unsigned int, hlen, data_len);
362 size += sizeof(struct nlattr) + hlen; 362 size += sizeof(struct nlattr) + hlen;
363 cap_len = entskb->len; 363 cap_len = entskb->len;
364 rem_len = data_len - hlen;
365 break; 364 break;
366 } 365 }
367 366
@@ -386,8 +385,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
386 size += nla_total_size(seclen); 385 size += nla_total_size(seclen);
387 } 386 }
388 387
389 skb = __netlink_alloc_skb(net->nfnl, size, rem_len, queue->peer_portid, 388 skb = alloc_skb(size, GFP_ATOMIC);
390 GFP_ATOMIC);
391 if (!skb) { 389 if (!skb) {
392 skb_tx_error(entskb); 390 skb_tx_error(entskb);
393 return NULL; 391 return NULL;
diff --git a/net/netfilter/nft_byteorder.c b/net/netfilter/nft_byteorder.c
index 383c17138399..b78c28ba465f 100644
--- a/net/netfilter/nft_byteorder.c
+++ b/net/netfilter/nft_byteorder.c
@@ -46,16 +46,14 @@ static void nft_byteorder_eval(const struct nft_expr *expr,
46 switch (priv->op) { 46 switch (priv->op) {
47 case NFT_BYTEORDER_NTOH: 47 case NFT_BYTEORDER_NTOH:
48 for (i = 0; i < priv->len / 8; i++) { 48 for (i = 0; i < priv->len / 8; i++) {
49 src64 = get_unaligned_be64(&src[i]); 49 src64 = get_unaligned((u64 *)&src[i]);
50 src64 = be64_to_cpu((__force __be64)src64);
51 put_unaligned_be64(src64, &dst[i]); 50 put_unaligned_be64(src64, &dst[i]);
52 } 51 }
53 break; 52 break;
54 case NFT_BYTEORDER_HTON: 53 case NFT_BYTEORDER_HTON:
55 for (i = 0; i < priv->len / 8; i++) { 54 for (i = 0; i < priv->len / 8; i++) {
56 src64 = get_unaligned_be64(&src[i]); 55 src64 = get_unaligned_be64(&src[i]);
57 src64 = (__force u64)cpu_to_be64(src64); 56 put_unaligned(src64, (u64 *)&dst[i]);
58 put_unaligned_be64(src64, &dst[i]);
59 } 57 }
60 break; 58 break;
61 } 59 }
diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c
index 454841baa4d0..6228c422c766 100644
--- a/net/netfilter/nft_compat.c
+++ b/net/netfilter/nft_compat.c
@@ -660,6 +660,9 @@ nft_match_select_ops(const struct nft_ctx *ctx,
660 if (IS_ERR(match)) 660 if (IS_ERR(match))
661 return ERR_PTR(-ENOENT); 661 return ERR_PTR(-ENOENT);
662 662
663 if (match->matchsize > nla_len(tb[NFTA_MATCH_INFO]))
664 return ERR_PTR(-EINVAL);
665
663 /* This is the first time we use this match, allocate operations */ 666 /* This is the first time we use this match, allocate operations */
664 nft_match = kzalloc(sizeof(struct nft_xt), GFP_KERNEL); 667 nft_match = kzalloc(sizeof(struct nft_xt), GFP_KERNEL);
665 if (nft_match == NULL) 668 if (nft_match == NULL)
@@ -740,6 +743,9 @@ nft_target_select_ops(const struct nft_ctx *ctx,
740 if (IS_ERR(target)) 743 if (IS_ERR(target))
741 return ERR_PTR(-ENOENT); 744 return ERR_PTR(-ENOENT);
742 745
746 if (target->targetsize > nla_len(tb[NFTA_TARGET_INFO]))
747 return ERR_PTR(-EINVAL);
748
743 /* This is the first time we use this target, allocate operations */ 749 /* This is the first time we use this target, allocate operations */
744 nft_target = kzalloc(sizeof(struct nft_xt), GFP_KERNEL); 750 nft_target = kzalloc(sizeof(struct nft_xt), GFP_KERNEL);
745 if (nft_target == NULL) 751 if (nft_target == NULL)
diff --git a/net/netfilter/nft_counter.c b/net/netfilter/nft_counter.c
index c7808fc19719..c9743f78f219 100644
--- a/net/netfilter/nft_counter.c
+++ b/net/netfilter/nft_counter.c
@@ -100,7 +100,7 @@ static int nft_counter_init(const struct nft_ctx *ctx,
100 100
101 cpu_stats = netdev_alloc_pcpu_stats(struct nft_counter_percpu); 101 cpu_stats = netdev_alloc_pcpu_stats(struct nft_counter_percpu);
102 if (cpu_stats == NULL) 102 if (cpu_stats == NULL)
103 return ENOMEM; 103 return -ENOMEM;
104 104
105 preempt_disable(); 105 preempt_disable();
106 this_cpu = this_cpu_ptr(cpu_stats); 106 this_cpu = this_cpu_ptr(cpu_stats);
@@ -138,7 +138,7 @@ static int nft_counter_clone(struct nft_expr *dst, const struct nft_expr *src)
138 cpu_stats = __netdev_alloc_pcpu_stats(struct nft_counter_percpu, 138 cpu_stats = __netdev_alloc_pcpu_stats(struct nft_counter_percpu,
139 GFP_ATOMIC); 139 GFP_ATOMIC);
140 if (cpu_stats == NULL) 140 if (cpu_stats == NULL)
141 return ENOMEM; 141 return -ENOMEM;
142 142
143 preempt_disable(); 143 preempt_disable();
144 this_cpu = this_cpu_ptr(cpu_stats); 144 this_cpu = this_cpu_ptr(cpu_stats);
diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
index a0eb2161e3ef..d4a4619fcebc 100644
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c
@@ -127,6 +127,7 @@ static void nft_ct_get_eval(const struct nft_expr *expr,
127 NF_CT_LABELS_MAX_SIZE - size); 127 NF_CT_LABELS_MAX_SIZE - size);
128 return; 128 return;
129 } 129 }
130#endif
130 case NFT_CT_BYTES: /* fallthrough */ 131 case NFT_CT_BYTES: /* fallthrough */
131 case NFT_CT_PKTS: { 132 case NFT_CT_PKTS: {
132 const struct nf_conn_acct *acct = nf_conn_acct_find(ct); 133 const struct nf_conn_acct *acct = nf_conn_acct_find(ct);
@@ -138,7 +139,6 @@ static void nft_ct_get_eval(const struct nft_expr *expr,
138 memcpy(dest, &count, sizeof(count)); 139 memcpy(dest, &count, sizeof(count));
139 return; 140 return;
140 } 141 }
141#endif
142 default: 142 default:
143 break; 143 break;
144 } 144 }
diff --git a/net/netfilter/nft_masq.c b/net/netfilter/nft_masq.c
index 9aea747b43ea..81b5ad6165ac 100644
--- a/net/netfilter/nft_masq.c
+++ b/net/netfilter/nft_masq.c
@@ -17,7 +17,9 @@
17#include <net/netfilter/nft_masq.h> 17#include <net/netfilter/nft_masq.h>
18 18
19const struct nla_policy nft_masq_policy[NFTA_MASQ_MAX + 1] = { 19const struct nla_policy nft_masq_policy[NFTA_MASQ_MAX + 1] = {
20 [NFTA_MASQ_FLAGS] = { .type = NLA_U32 }, 20 [NFTA_MASQ_FLAGS] = { .type = NLA_U32 },
21 [NFTA_MASQ_REG_PROTO_MIN] = { .type = NLA_U32 },
22 [NFTA_MASQ_REG_PROTO_MAX] = { .type = NLA_U32 },
21}; 23};
22EXPORT_SYMBOL_GPL(nft_masq_policy); 24EXPORT_SYMBOL_GPL(nft_masq_policy);
23 25
@@ -40,6 +42,7 @@ int nft_masq_init(const struct nft_ctx *ctx,
40 const struct nft_expr *expr, 42 const struct nft_expr *expr,
41 const struct nlattr * const tb[]) 43 const struct nlattr * const tb[])
42{ 44{
45 u32 plen = FIELD_SIZEOF(struct nf_nat_range, min_addr.all);
43 struct nft_masq *priv = nft_expr_priv(expr); 46 struct nft_masq *priv = nft_expr_priv(expr);
44 int err; 47 int err;
45 48
@@ -47,12 +50,32 @@ int nft_masq_init(const struct nft_ctx *ctx,
47 if (err) 50 if (err)
48 return err; 51 return err;
49 52
50 if (tb[NFTA_MASQ_FLAGS] == NULL) 53 if (tb[NFTA_MASQ_FLAGS]) {
51 return 0; 54 priv->flags = ntohl(nla_get_be32(tb[NFTA_MASQ_FLAGS]));
52 55 if (priv->flags & ~NF_NAT_RANGE_MASK)
53 priv->flags = ntohl(nla_get_be32(tb[NFTA_MASQ_FLAGS])); 56 return -EINVAL;
54 if (priv->flags & ~NF_NAT_RANGE_MASK) 57 }
55 return -EINVAL; 58
59 if (tb[NFTA_MASQ_REG_PROTO_MIN]) {
60 priv->sreg_proto_min =
61 nft_parse_register(tb[NFTA_MASQ_REG_PROTO_MIN]);
62
63 err = nft_validate_register_load(priv->sreg_proto_min, plen);
64 if (err < 0)
65 return err;
66
67 if (tb[NFTA_MASQ_REG_PROTO_MAX]) {
68 priv->sreg_proto_max =
69 nft_parse_register(tb[NFTA_MASQ_REG_PROTO_MAX]);
70
71 err = nft_validate_register_load(priv->sreg_proto_max,
72 plen);
73 if (err < 0)
74 return err;
75 } else {
76 priv->sreg_proto_max = priv->sreg_proto_min;
77 }
78 }
56 79
57 return 0; 80 return 0;
58} 81}
@@ -62,12 +85,18 @@ int nft_masq_dump(struct sk_buff *skb, const struct nft_expr *expr)
62{ 85{
63 const struct nft_masq *priv = nft_expr_priv(expr); 86 const struct nft_masq *priv = nft_expr_priv(expr);
64 87
65 if (priv->flags == 0) 88 if (priv->flags != 0 &&
66 return 0; 89 nla_put_be32(skb, NFTA_MASQ_FLAGS, htonl(priv->flags)))
67
68 if (nla_put_be32(skb, NFTA_MASQ_FLAGS, htonl(priv->flags)))
69 goto nla_put_failure; 90 goto nla_put_failure;
70 91
92 if (priv->sreg_proto_min) {
93 if (nft_dump_register(skb, NFTA_MASQ_REG_PROTO_MIN,
94 priv->sreg_proto_min) ||
95 nft_dump_register(skb, NFTA_MASQ_REG_PROTO_MAX,
96 priv->sreg_proto_max))
97 goto nla_put_failure;
98 }
99
71 return 0; 100 return 0;
72 101
73nla_put_failure: 102nla_put_failure:
diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c
index fe885bf271c5..16c50b0dd426 100644
--- a/net/netfilter/nft_meta.c
+++ b/net/netfilter/nft_meta.c
@@ -28,6 +28,8 @@
28 28
29#include <uapi/linux/netfilter_bridge.h> /* NF_BR_PRE_ROUTING */ 29#include <uapi/linux/netfilter_bridge.h> /* NF_BR_PRE_ROUTING */
30 30
31static DEFINE_PER_CPU(struct rnd_state, nft_prandom_state);
32
31void nft_meta_get_eval(const struct nft_expr *expr, 33void nft_meta_get_eval(const struct nft_expr *expr,
32 struct nft_regs *regs, 34 struct nft_regs *regs,
33 const struct nft_pktinfo *pkt) 35 const struct nft_pktinfo *pkt)
@@ -181,6 +183,11 @@ void nft_meta_get_eval(const struct nft_expr *expr,
181 *dest = sock_cgroup_classid(&sk->sk_cgrp_data); 183 *dest = sock_cgroup_classid(&sk->sk_cgrp_data);
182 break; 184 break;
183#endif 185#endif
186 case NFT_META_PRANDOM: {
187 struct rnd_state *state = this_cpu_ptr(&nft_prandom_state);
188 *dest = prandom_u32_state(state);
189 break;
190 }
184 default: 191 default:
185 WARN_ON(1); 192 WARN_ON(1);
186 goto err; 193 goto err;
@@ -277,6 +284,10 @@ int nft_meta_get_init(const struct nft_ctx *ctx,
277 case NFT_META_OIFNAME: 284 case NFT_META_OIFNAME:
278 len = IFNAMSIZ; 285 len = IFNAMSIZ;
279 break; 286 break;
287 case NFT_META_PRANDOM:
288 prandom_init_once(&nft_prandom_state);
289 len = sizeof(u32);
290 break;
280 default: 291 default:
281 return -EOPNOTSUPP; 292 return -EOPNOTSUPP;
282 } 293 }
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index c8a0b7da5ff4..582c9cfd6567 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -659,6 +659,9 @@ struct xt_table_info *xt_alloc_table_info(unsigned int size)
659 struct xt_table_info *info = NULL; 659 struct xt_table_info *info = NULL;
660 size_t sz = sizeof(*info) + size; 660 size_t sz = sizeof(*info) + size;
661 661
662 if (sz < sizeof(*info))
663 return NULL;
664
662 /* Pedantry: prevent them from hitting BUG() in vmalloc.c --RR */ 665 /* Pedantry: prevent them from hitting BUG() in vmalloc.c --RR */
663 if ((SMP_ALIGN(size) >> PAGE_SHIFT) + 2 > totalram_pages) 666 if ((SMP_ALIGN(size) >> PAGE_SHIFT) + 2 > totalram_pages)
664 return NULL; 667 return NULL;
@@ -694,12 +697,45 @@ EXPORT_SYMBOL(xt_free_table_info);
694struct xt_table *xt_find_table_lock(struct net *net, u_int8_t af, 697struct xt_table *xt_find_table_lock(struct net *net, u_int8_t af,
695 const char *name) 698 const char *name)
696{ 699{
697 struct xt_table *t; 700 struct xt_table *t, *found = NULL;
698 701
699 mutex_lock(&xt[af].mutex); 702 mutex_lock(&xt[af].mutex);
700 list_for_each_entry(t, &net->xt.tables[af], list) 703 list_for_each_entry(t, &net->xt.tables[af], list)
701 if (strcmp(t->name, name) == 0 && try_module_get(t->me)) 704 if (strcmp(t->name, name) == 0 && try_module_get(t->me))
702 return t; 705 return t;
706
707 if (net == &init_net)
708 goto out;
709
710 /* Table doesn't exist in this netns, re-try init */
711 list_for_each_entry(t, &init_net.xt.tables[af], list) {
712 if (strcmp(t->name, name))
713 continue;
714 if (!try_module_get(t->me))
715 return NULL;
716
717 mutex_unlock(&xt[af].mutex);
718 if (t->table_init(net) != 0) {
719 module_put(t->me);
720 return NULL;
721 }
722
723 found = t;
724
725 mutex_lock(&xt[af].mutex);
726 break;
727 }
728
729 if (!found)
730 goto out;
731
732 /* and once again: */
733 list_for_each_entry(t, &net->xt.tables[af], list)
734 if (strcmp(t->name, name) == 0)
735 return t;
736
737 module_put(found->me);
738 out:
703 mutex_unlock(&xt[af].mutex); 739 mutex_unlock(&xt[af].mutex);
704 return NULL; 740 return NULL;
705} 741}
@@ -1170,20 +1206,20 @@ static const struct file_operations xt_target_ops = {
1170#endif /* CONFIG_PROC_FS */ 1206#endif /* CONFIG_PROC_FS */
1171 1207
1172/** 1208/**
1173 * xt_hook_link - set up hooks for a new table 1209 * xt_hook_ops_alloc - set up hooks for a new table
1174 * @table: table with metadata needed to set up hooks 1210 * @table: table with metadata needed to set up hooks
1175 * @fn: Hook function 1211 * @fn: Hook function
1176 * 1212 *
1177 * This function will take care of creating and registering the necessary 1213 * This function will create the nf_hook_ops that the x_table needs
1178 * Netfilter hooks for XT tables. 1214 * to hand to xt_hook_link_net().
1179 */ 1215 */
1180struct nf_hook_ops *xt_hook_link(const struct xt_table *table, nf_hookfn *fn) 1216struct nf_hook_ops *
1217xt_hook_ops_alloc(const struct xt_table *table, nf_hookfn *fn)
1181{ 1218{
1182 unsigned int hook_mask = table->valid_hooks; 1219 unsigned int hook_mask = table->valid_hooks;
1183 uint8_t i, num_hooks = hweight32(hook_mask); 1220 uint8_t i, num_hooks = hweight32(hook_mask);
1184 uint8_t hooknum; 1221 uint8_t hooknum;
1185 struct nf_hook_ops *ops; 1222 struct nf_hook_ops *ops;
1186 int ret;
1187 1223
1188 ops = kmalloc(sizeof(*ops) * num_hooks, GFP_KERNEL); 1224 ops = kmalloc(sizeof(*ops) * num_hooks, GFP_KERNEL);
1189 if (ops == NULL) 1225 if (ops == NULL)
@@ -1200,27 +1236,9 @@ struct nf_hook_ops *xt_hook_link(const struct xt_table *table, nf_hookfn *fn)
1200 ++i; 1236 ++i;
1201 } 1237 }
1202 1238
1203 ret = nf_register_hooks(ops, num_hooks);
1204 if (ret < 0) {
1205 kfree(ops);
1206 return ERR_PTR(ret);
1207 }
1208
1209 return ops; 1239 return ops;
1210} 1240}
1211EXPORT_SYMBOL_GPL(xt_hook_link); 1241EXPORT_SYMBOL_GPL(xt_hook_ops_alloc);
1212
1213/**
1214 * xt_hook_unlink - remove hooks for a table
1215 * @ops: nf_hook_ops array as returned by nf_hook_link
1216 * @hook_mask: the very same mask that was passed to nf_hook_link
1217 */
1218void xt_hook_unlink(const struct xt_table *table, struct nf_hook_ops *ops)
1219{
1220 nf_unregister_hooks(ops, hweight32(table->valid_hooks));
1221 kfree(ops);
1222}
1223EXPORT_SYMBOL_GPL(xt_hook_unlink);
1224 1242
1225int xt_proto_init(struct net *net, u_int8_t af) 1243int xt_proto_init(struct net *net, u_int8_t af)
1226{ 1244{
diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c
index b7c43def0dc6..e118397254af 100644
--- a/net/netfilter/xt_TCPMSS.c
+++ b/net/netfilter/xt_TCPMSS.c
@@ -228,7 +228,7 @@ tcpmss_tg6(struct sk_buff *skb, const struct xt_action_param *par)
228{ 228{
229 struct ipv6hdr *ipv6h = ipv6_hdr(skb); 229 struct ipv6hdr *ipv6h = ipv6_hdr(skb);
230 u8 nexthdr; 230 u8 nexthdr;
231 __be16 frag_off; 231 __be16 frag_off, oldlen, newlen;
232 int tcphoff; 232 int tcphoff;
233 int ret; 233 int ret;
234 234
@@ -244,7 +244,12 @@ tcpmss_tg6(struct sk_buff *skb, const struct xt_action_param *par)
244 return NF_DROP; 244 return NF_DROP;
245 if (ret > 0) { 245 if (ret > 0) {
246 ipv6h = ipv6_hdr(skb); 246 ipv6h = ipv6_hdr(skb);
247 ipv6h->payload_len = htons(ntohs(ipv6h->payload_len) + ret); 247 oldlen = ipv6h->payload_len;
248 newlen = htons(ntohs(oldlen) + ret);
249 if (skb->ip_summed == CHECKSUM_COMPLETE)
250 skb->csum = csum_add(csum_sub(skb->csum, oldlen),
251 newlen);
252 ipv6h->payload_len = newlen;
248 } 253 }
249 return XT_CONTINUE; 254 return XT_CONTINUE;
250} 255}
diff --git a/net/netfilter/xt_TEE.c b/net/netfilter/xt_TEE.c
index 3eff7b67cdf2..6e57a3966dc5 100644
--- a/net/netfilter/xt_TEE.c
+++ b/net/netfilter/xt_TEE.c
@@ -38,7 +38,7 @@ tee_tg4(struct sk_buff *skb, const struct xt_action_param *par)
38 return XT_CONTINUE; 38 return XT_CONTINUE;
39} 39}
40 40
41#if IS_ENABLED(CONFIG_NF_DUP_IPV6) 41#if IS_ENABLED(CONFIG_IPV6)
42static unsigned int 42static unsigned int
43tee_tg6(struct sk_buff *skb, const struct xt_action_param *par) 43tee_tg6(struct sk_buff *skb, const struct xt_action_param *par)
44{ 44{
@@ -131,7 +131,7 @@ static struct xt_target tee_tg_reg[] __read_mostly = {
131 .destroy = tee_tg_destroy, 131 .destroy = tee_tg_destroy,
132 .me = THIS_MODULE, 132 .me = THIS_MODULE,
133 }, 133 },
134#if IS_ENABLED(CONFIG_NF_DUP_IPV6) 134#if IS_ENABLED(CONFIG_IPV6)
135 { 135 {
136 .name = "TEE", 136 .name = "TEE",
137 .revision = 1, 137 .revision = 1,
diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c
index 3ab591e73ec0..7f4414d26a66 100644
--- a/net/netfilter/xt_TPROXY.c
+++ b/net/netfilter/xt_TPROXY.c
@@ -105,19 +105,24 @@ tproxy_laddr4(struct sk_buff *skb, __be32 user_laddr, __be32 daddr)
105 * belonging to established connections going through that one. 105 * belonging to established connections going through that one.
106 */ 106 */
107static inline struct sock * 107static inline struct sock *
108nf_tproxy_get_sock_v4(struct net *net, const u8 protocol, 108nf_tproxy_get_sock_v4(struct net *net, struct sk_buff *skb, void *hp,
109 const u8 protocol,
109 const __be32 saddr, const __be32 daddr, 110 const __be32 saddr, const __be32 daddr,
110 const __be16 sport, const __be16 dport, 111 const __be16 sport, const __be16 dport,
111 const struct net_device *in, 112 const struct net_device *in,
112 const enum nf_tproxy_lookup_t lookup_type) 113 const enum nf_tproxy_lookup_t lookup_type)
113{ 114{
114 struct sock *sk; 115 struct sock *sk;
116 struct tcphdr *tcph;
115 117
116 switch (protocol) { 118 switch (protocol) {
117 case IPPROTO_TCP: 119 case IPPROTO_TCP:
118 switch (lookup_type) { 120 switch (lookup_type) {
119 case NFT_LOOKUP_LISTENER: 121 case NFT_LOOKUP_LISTENER:
120 sk = inet_lookup_listener(net, &tcp_hashinfo, 122 tcph = hp;
123 sk = inet_lookup_listener(net, &tcp_hashinfo, skb,
124 ip_hdrlen(skb) +
125 __tcp_hdrlen(tcph),
121 saddr, sport, 126 saddr, sport,
122 daddr, dport, 127 daddr, dport,
123 in->ifindex); 128 in->ifindex);
@@ -169,19 +174,23 @@ nf_tproxy_get_sock_v4(struct net *net, const u8 protocol,
169 174
170#ifdef XT_TPROXY_HAVE_IPV6 175#ifdef XT_TPROXY_HAVE_IPV6
171static inline struct sock * 176static inline struct sock *
172nf_tproxy_get_sock_v6(struct net *net, const u8 protocol, 177nf_tproxy_get_sock_v6(struct net *net, struct sk_buff *skb, int thoff, void *hp,
178 const u8 protocol,
173 const struct in6_addr *saddr, const struct in6_addr *daddr, 179 const struct in6_addr *saddr, const struct in6_addr *daddr,
174 const __be16 sport, const __be16 dport, 180 const __be16 sport, const __be16 dport,
175 const struct net_device *in, 181 const struct net_device *in,
176 const enum nf_tproxy_lookup_t lookup_type) 182 const enum nf_tproxy_lookup_t lookup_type)
177{ 183{
178 struct sock *sk; 184 struct sock *sk;
185 struct tcphdr *tcph;
179 186
180 switch (protocol) { 187 switch (protocol) {
181 case IPPROTO_TCP: 188 case IPPROTO_TCP:
182 switch (lookup_type) { 189 switch (lookup_type) {
183 case NFT_LOOKUP_LISTENER: 190 case NFT_LOOKUP_LISTENER:
184 sk = inet6_lookup_listener(net, &tcp_hashinfo, 191 tcph = hp;
192 sk = inet6_lookup_listener(net, &tcp_hashinfo, skb,
193 thoff + __tcp_hdrlen(tcph),
185 saddr, sport, 194 saddr, sport,
186 daddr, ntohs(dport), 195 daddr, ntohs(dport),
187 in->ifindex); 196 in->ifindex);
@@ -267,7 +276,7 @@ tproxy_handle_time_wait4(struct net *net, struct sk_buff *skb,
267 * to a listener socket if there's one */ 276 * to a listener socket if there's one */
268 struct sock *sk2; 277 struct sock *sk2;
269 278
270 sk2 = nf_tproxy_get_sock_v4(net, iph->protocol, 279 sk2 = nf_tproxy_get_sock_v4(net, skb, hp, iph->protocol,
271 iph->saddr, laddr ? laddr : iph->daddr, 280 iph->saddr, laddr ? laddr : iph->daddr,
272 hp->source, lport ? lport : hp->dest, 281 hp->source, lport ? lport : hp->dest,
273 skb->dev, NFT_LOOKUP_LISTENER); 282 skb->dev, NFT_LOOKUP_LISTENER);
@@ -305,7 +314,7 @@ tproxy_tg4(struct net *net, struct sk_buff *skb, __be32 laddr, __be16 lport,
305 * addresses, this happens if the redirect already happened 314 * addresses, this happens if the redirect already happened
306 * and the current packet belongs to an already established 315 * and the current packet belongs to an already established
307 * connection */ 316 * connection */
308 sk = nf_tproxy_get_sock_v4(net, iph->protocol, 317 sk = nf_tproxy_get_sock_v4(net, skb, hp, iph->protocol,
309 iph->saddr, iph->daddr, 318 iph->saddr, iph->daddr,
310 hp->source, hp->dest, 319 hp->source, hp->dest,
311 skb->dev, NFT_LOOKUP_ESTABLISHED); 320 skb->dev, NFT_LOOKUP_ESTABLISHED);
@@ -321,7 +330,7 @@ tproxy_tg4(struct net *net, struct sk_buff *skb, __be32 laddr, __be16 lport,
321 else if (!sk) 330 else if (!sk)
322 /* no, there's no established connection, check if 331 /* no, there's no established connection, check if
323 * there's a listener on the redirected addr/port */ 332 * there's a listener on the redirected addr/port */
324 sk = nf_tproxy_get_sock_v4(net, iph->protocol, 333 sk = nf_tproxy_get_sock_v4(net, skb, hp, iph->protocol,
325 iph->saddr, laddr, 334 iph->saddr, laddr,
326 hp->source, lport, 335 hp->source, lport,
327 skb->dev, NFT_LOOKUP_LISTENER); 336 skb->dev, NFT_LOOKUP_LISTENER);
@@ -429,7 +438,7 @@ tproxy_handle_time_wait6(struct sk_buff *skb, int tproto, int thoff,
429 * to a listener socket if there's one */ 438 * to a listener socket if there's one */
430 struct sock *sk2; 439 struct sock *sk2;
431 440
432 sk2 = nf_tproxy_get_sock_v6(par->net, tproto, 441 sk2 = nf_tproxy_get_sock_v6(par->net, skb, thoff, hp, tproto,
433 &iph->saddr, 442 &iph->saddr,
434 tproxy_laddr6(skb, &tgi->laddr.in6, &iph->daddr), 443 tproxy_laddr6(skb, &tgi->laddr.in6, &iph->daddr),
435 hp->source, 444 hp->source,
@@ -472,7 +481,7 @@ tproxy_tg6_v1(struct sk_buff *skb, const struct xt_action_param *par)
472 * addresses, this happens if the redirect already happened 481 * addresses, this happens if the redirect already happened
473 * and the current packet belongs to an already established 482 * and the current packet belongs to an already established
474 * connection */ 483 * connection */
475 sk = nf_tproxy_get_sock_v6(par->net, tproto, 484 sk = nf_tproxy_get_sock_v6(par->net, skb, thoff, hp, tproto,
476 &iph->saddr, &iph->daddr, 485 &iph->saddr, &iph->daddr,
477 hp->source, hp->dest, 486 hp->source, hp->dest,
478 par->in, NFT_LOOKUP_ESTABLISHED); 487 par->in, NFT_LOOKUP_ESTABLISHED);
@@ -487,8 +496,8 @@ tproxy_tg6_v1(struct sk_buff *skb, const struct xt_action_param *par)
487 else if (!sk) 496 else if (!sk)
488 /* no there's no established connection, check if 497 /* no there's no established connection, check if
489 * there's a listener on the redirected addr/port */ 498 * there's a listener on the redirected addr/port */
490 sk = nf_tproxy_get_sock_v6(par->net, tproto, 499 sk = nf_tproxy_get_sock_v6(par->net, skb, thoff, hp,
491 &iph->saddr, laddr, 500 tproto, &iph->saddr, laddr,
492 hp->source, lport, 501 hp->source, lport,
493 par->in, NFT_LOOKUP_LISTENER); 502 par->in, NFT_LOOKUP_LISTENER);
494 503
diff --git a/net/netfilter/xt_osf.c b/net/netfilter/xt_osf.c
index 4e3c3affd285..2455b69b5810 100644
--- a/net/netfilter/xt_osf.c
+++ b/net/netfilter/xt_osf.c
@@ -262,7 +262,6 @@ xt_osf_match_packet(const struct sk_buff *skb, struct xt_action_param *p)
262 if (f->opt[optnum].kind == (*optp)) { 262 if (f->opt[optnum].kind == (*optp)) {
263 __u32 len = f->opt[optnum].length; 263 __u32 len = f->opt[optnum].length;
264 const __u8 *optend = optp + len; 264 const __u8 *optend = optp + len;
265 int loop_cont = 0;
266 265
267 fmatch = FMATCH_OK; 266 fmatch = FMATCH_OK;
268 267
@@ -275,7 +274,6 @@ xt_osf_match_packet(const struct sk_buff *skb, struct xt_action_param *p)
275 mss = ntohs((__force __be16)mss); 274 mss = ntohs((__force __be16)mss);
276 break; 275 break;
277 case OSFOPT_TS: 276 case OSFOPT_TS:
278 loop_cont = 1;
279 break; 277 break;
280 } 278 }
281 279
diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c
index 2ec08f04b816..49d14ecad444 100644
--- a/net/netfilter/xt_socket.c
+++ b/net/netfilter/xt_socket.c
@@ -112,14 +112,15 @@ extract_icmp4_fields(const struct sk_buff *skb,
112 * box. 112 * box.
113 */ 113 */
114static struct sock * 114static struct sock *
115xt_socket_get_sock_v4(struct net *net, const u8 protocol, 115xt_socket_get_sock_v4(struct net *net, struct sk_buff *skb, const int doff,
116 const u8 protocol,
116 const __be32 saddr, const __be32 daddr, 117 const __be32 saddr, const __be32 daddr,
117 const __be16 sport, const __be16 dport, 118 const __be16 sport, const __be16 dport,
118 const struct net_device *in) 119 const struct net_device *in)
119{ 120{
120 switch (protocol) { 121 switch (protocol) {
121 case IPPROTO_TCP: 122 case IPPROTO_TCP:
122 return __inet_lookup(net, &tcp_hashinfo, 123 return __inet_lookup(net, &tcp_hashinfo, skb, doff,
123 saddr, sport, daddr, dport, 124 saddr, sport, daddr, dport,
124 in->ifindex); 125 in->ifindex);
125 case IPPROTO_UDP: 126 case IPPROTO_UDP:
@@ -148,6 +149,8 @@ static struct sock *xt_socket_lookup_slow_v4(struct net *net,
148 const struct net_device *indev) 149 const struct net_device *indev)
149{ 150{
150 const struct iphdr *iph = ip_hdr(skb); 151 const struct iphdr *iph = ip_hdr(skb);
152 struct sk_buff *data_skb = NULL;
153 int doff = 0;
151 __be32 uninitialized_var(daddr), uninitialized_var(saddr); 154 __be32 uninitialized_var(daddr), uninitialized_var(saddr);
152 __be16 uninitialized_var(dport), uninitialized_var(sport); 155 __be16 uninitialized_var(dport), uninitialized_var(sport);
153 u8 uninitialized_var(protocol); 156 u8 uninitialized_var(protocol);
@@ -169,6 +172,10 @@ static struct sock *xt_socket_lookup_slow_v4(struct net *net,
169 sport = hp->source; 172 sport = hp->source;
170 daddr = iph->daddr; 173 daddr = iph->daddr;
171 dport = hp->dest; 174 dport = hp->dest;
175 data_skb = (struct sk_buff *)skb;
176 doff = iph->protocol == IPPROTO_TCP ?
177 ip_hdrlen(skb) + __tcp_hdrlen((struct tcphdr *)hp) :
178 ip_hdrlen(skb) + sizeof(*hp);
172 179
173 } else if (iph->protocol == IPPROTO_ICMP) { 180 } else if (iph->protocol == IPPROTO_ICMP) {
174 if (extract_icmp4_fields(skb, &protocol, &saddr, &daddr, 181 if (extract_icmp4_fields(skb, &protocol, &saddr, &daddr,
@@ -198,8 +205,8 @@ static struct sock *xt_socket_lookup_slow_v4(struct net *net,
198 } 205 }
199#endif 206#endif
200 207
201 return xt_socket_get_sock_v4(net, protocol, saddr, daddr, 208 return xt_socket_get_sock_v4(net, data_skb, doff, protocol, saddr,
202 sport, dport, indev); 209 daddr, sport, dport, indev);
203} 210}
204 211
205static bool 212static bool
@@ -318,14 +325,15 @@ extract_icmp6_fields(const struct sk_buff *skb,
318} 325}
319 326
320static struct sock * 327static struct sock *
321xt_socket_get_sock_v6(struct net *net, const u8 protocol, 328xt_socket_get_sock_v6(struct net *net, struct sk_buff *skb, int doff,
329 const u8 protocol,
322 const struct in6_addr *saddr, const struct in6_addr *daddr, 330 const struct in6_addr *saddr, const struct in6_addr *daddr,
323 const __be16 sport, const __be16 dport, 331 const __be16 sport, const __be16 dport,
324 const struct net_device *in) 332 const struct net_device *in)
325{ 333{
326 switch (protocol) { 334 switch (protocol) {
327 case IPPROTO_TCP: 335 case IPPROTO_TCP:
328 return inet6_lookup(net, &tcp_hashinfo, 336 return inet6_lookup(net, &tcp_hashinfo, skb, doff,
329 saddr, sport, daddr, dport, 337 saddr, sport, daddr, dport,
330 in->ifindex); 338 in->ifindex);
331 case IPPROTO_UDP: 339 case IPPROTO_UDP:
@@ -343,6 +351,8 @@ static struct sock *xt_socket_lookup_slow_v6(struct net *net,
343 __be16 uninitialized_var(dport), uninitialized_var(sport); 351 __be16 uninitialized_var(dport), uninitialized_var(sport);
344 const struct in6_addr *daddr = NULL, *saddr = NULL; 352 const struct in6_addr *daddr = NULL, *saddr = NULL;
345 struct ipv6hdr *iph = ipv6_hdr(skb); 353 struct ipv6hdr *iph = ipv6_hdr(skb);
354 struct sk_buff *data_skb = NULL;
355 int doff = 0;
346 int thoff = 0, tproto; 356 int thoff = 0, tproto;
347 357
348 tproto = ipv6_find_hdr(skb, &thoff, -1, NULL, NULL); 358 tproto = ipv6_find_hdr(skb, &thoff, -1, NULL, NULL);
@@ -362,6 +372,10 @@ static struct sock *xt_socket_lookup_slow_v6(struct net *net,
362 sport = hp->source; 372 sport = hp->source;
363 daddr = &iph->daddr; 373 daddr = &iph->daddr;
364 dport = hp->dest; 374 dport = hp->dest;
375 data_skb = (struct sk_buff *)skb;
376 doff = tproto == IPPROTO_TCP ?
377 thoff + __tcp_hdrlen((struct tcphdr *)hp) :
378 thoff + sizeof(*hp);
365 379
366 } else if (tproto == IPPROTO_ICMPV6) { 380 } else if (tproto == IPPROTO_ICMPV6) {
367 struct ipv6hdr ipv6_var; 381 struct ipv6hdr ipv6_var;
@@ -373,7 +387,7 @@ static struct sock *xt_socket_lookup_slow_v6(struct net *net,
373 return NULL; 387 return NULL;
374 } 388 }
375 389
376 return xt_socket_get_sock_v6(net, tproto, saddr, daddr, 390 return xt_socket_get_sock_v6(net, data_skb, doff, tproto, saddr, daddr,
377 sport, dport, indev); 391 sport, dport, indev);
378} 392}
379 393
diff --git a/net/netlabel/netlabel_domainhash.c b/net/netlabel/netlabel_domainhash.c
index f0cb92f3ddaf..ada67422234b 100644
--- a/net/netlabel/netlabel_domainhash.c
+++ b/net/netlabel/netlabel_domainhash.c
@@ -55,8 +55,8 @@ struct netlbl_domhsh_tbl {
55static DEFINE_SPINLOCK(netlbl_domhsh_lock); 55static DEFINE_SPINLOCK(netlbl_domhsh_lock);
56#define netlbl_domhsh_rcu_deref(p) \ 56#define netlbl_domhsh_rcu_deref(p) \
57 rcu_dereference_check(p, lockdep_is_held(&netlbl_domhsh_lock)) 57 rcu_dereference_check(p, lockdep_is_held(&netlbl_domhsh_lock))
58static struct netlbl_domhsh_tbl *netlbl_domhsh = NULL; 58static struct netlbl_domhsh_tbl *netlbl_domhsh;
59static struct netlbl_dom_map *netlbl_domhsh_def = NULL; 59static struct netlbl_dom_map *netlbl_domhsh_def;
60 60
61/* 61/*
62 * Domain Hash Table Helper Functions 62 * Domain Hash Table Helper Functions
diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c
index b0380927f05f..9eaa9a1e8629 100644
--- a/net/netlabel/netlabel_unlabeled.c
+++ b/net/netlabel/netlabel_unlabeled.c
@@ -116,11 +116,11 @@ struct netlbl_unlhsh_walk_arg {
116static DEFINE_SPINLOCK(netlbl_unlhsh_lock); 116static DEFINE_SPINLOCK(netlbl_unlhsh_lock);
117#define netlbl_unlhsh_rcu_deref(p) \ 117#define netlbl_unlhsh_rcu_deref(p) \
118 rcu_dereference_check(p, lockdep_is_held(&netlbl_unlhsh_lock)) 118 rcu_dereference_check(p, lockdep_is_held(&netlbl_unlhsh_lock))
119static struct netlbl_unlhsh_tbl *netlbl_unlhsh = NULL; 119static struct netlbl_unlhsh_tbl *netlbl_unlhsh;
120static struct netlbl_unlhsh_iface *netlbl_unlhsh_def = NULL; 120static struct netlbl_unlhsh_iface *netlbl_unlhsh_def;
121 121
122/* Accept unlabeled packets flag */ 122/* Accept unlabeled packets flag */
123static u8 netlabel_unlabel_acceptflg = 0; 123static u8 netlabel_unlabel_acceptflg;
124 124
125/* NetLabel Generic NETLINK unlabeled family */ 125/* NetLabel Generic NETLINK unlabeled family */
126static struct genl_family netlbl_unlabel_gnl_family = { 126static struct genl_family netlbl_unlabel_gnl_family = {
diff --git a/net/netlink/Kconfig b/net/netlink/Kconfig
index 2c5e95e9bfbd..5d6e8c05b3d4 100644
--- a/net/netlink/Kconfig
+++ b/net/netlink/Kconfig
@@ -2,15 +2,6 @@
2# Netlink Sockets 2# Netlink Sockets
3# 3#
4 4
5config NETLINK_MMAP
6 bool "NETLINK: mmaped IO"
7 ---help---
8 This option enables support for memory mapped netlink IO. This
9 reduces overhead by avoiding copying data between kernel- and
10 userspace.
11
12 If unsure, say N.
13
14config NETLINK_DIAG 5config NETLINK_DIAG
15 tristate "NETLINK: socket monitoring interface" 6 tristate "NETLINK: socket monitoring interface"
16 default n 7 default n
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 81dc1bb6e016..215fc08c02ab 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -225,7 +225,7 @@ static int __netlink_deliver_tap_skb(struct sk_buff *skb,
225 225
226 dev_hold(dev); 226 dev_hold(dev);
227 227
228 if (netlink_skb_is_mmaped(skb) || is_vmalloc_addr(skb->head)) 228 if (is_vmalloc_addr(skb->head))
229 nskb = netlink_to_full_skb(skb, GFP_ATOMIC); 229 nskb = netlink_to_full_skb(skb, GFP_ATOMIC);
230 else 230 else
231 nskb = skb_clone(skb, GFP_ATOMIC); 231 nskb = skb_clone(skb, GFP_ATOMIC);
@@ -300,610 +300,8 @@ static void netlink_rcv_wake(struct sock *sk)
300 wake_up_interruptible(&nlk->wait); 300 wake_up_interruptible(&nlk->wait);
301} 301}
302 302
303#ifdef CONFIG_NETLINK_MMAP
304static bool netlink_rx_is_mmaped(struct sock *sk)
305{
306 return nlk_sk(sk)->rx_ring.pg_vec != NULL;
307}
308
309static bool netlink_tx_is_mmaped(struct sock *sk)
310{
311 return nlk_sk(sk)->tx_ring.pg_vec != NULL;
312}
313
314static __pure struct page *pgvec_to_page(const void *addr)
315{
316 if (is_vmalloc_addr(addr))
317 return vmalloc_to_page(addr);
318 else
319 return virt_to_page(addr);
320}
321
322static void free_pg_vec(void **pg_vec, unsigned int order, unsigned int len)
323{
324 unsigned int i;
325
326 for (i = 0; i < len; i++) {
327 if (pg_vec[i] != NULL) {
328 if (is_vmalloc_addr(pg_vec[i]))
329 vfree(pg_vec[i]);
330 else
331 free_pages((unsigned long)pg_vec[i], order);
332 }
333 }
334 kfree(pg_vec);
335}
336
337static void *alloc_one_pg_vec_page(unsigned long order)
338{
339 void *buffer;
340 gfp_t gfp_flags = GFP_KERNEL | __GFP_COMP | __GFP_ZERO |
341 __GFP_NOWARN | __GFP_NORETRY;
342
343 buffer = (void *)__get_free_pages(gfp_flags, order);
344 if (buffer != NULL)
345 return buffer;
346
347 buffer = vzalloc((1 << order) * PAGE_SIZE);
348 if (buffer != NULL)
349 return buffer;
350
351 gfp_flags &= ~__GFP_NORETRY;
352 return (void *)__get_free_pages(gfp_flags, order);
353}
354
355static void **alloc_pg_vec(struct netlink_sock *nlk,
356 struct nl_mmap_req *req, unsigned int order)
357{
358 unsigned int block_nr = req->nm_block_nr;
359 unsigned int i;
360 void **pg_vec;
361
362 pg_vec = kcalloc(block_nr, sizeof(void *), GFP_KERNEL);
363 if (pg_vec == NULL)
364 return NULL;
365
366 for (i = 0; i < block_nr; i++) {
367 pg_vec[i] = alloc_one_pg_vec_page(order);
368 if (pg_vec[i] == NULL)
369 goto err1;
370 }
371
372 return pg_vec;
373err1:
374 free_pg_vec(pg_vec, order, block_nr);
375 return NULL;
376}
377
378
379static void
380__netlink_set_ring(struct sock *sk, struct nl_mmap_req *req, bool tx_ring, void **pg_vec,
381 unsigned int order)
382{
383 struct netlink_sock *nlk = nlk_sk(sk);
384 struct sk_buff_head *queue;
385 struct netlink_ring *ring;
386
387 queue = tx_ring ? &sk->sk_write_queue : &sk->sk_receive_queue;
388 ring = tx_ring ? &nlk->tx_ring : &nlk->rx_ring;
389
390 spin_lock_bh(&queue->lock);
391
392 ring->frame_max = req->nm_frame_nr - 1;
393 ring->head = 0;
394 ring->frame_size = req->nm_frame_size;
395 ring->pg_vec_pages = req->nm_block_size / PAGE_SIZE;
396
397 swap(ring->pg_vec_len, req->nm_block_nr);
398 swap(ring->pg_vec_order, order);
399 swap(ring->pg_vec, pg_vec);
400
401 __skb_queue_purge(queue);
402 spin_unlock_bh(&queue->lock);
403
404 WARN_ON(atomic_read(&nlk->mapped));
405
406 if (pg_vec)
407 free_pg_vec(pg_vec, order, req->nm_block_nr);
408}
409
410static int netlink_set_ring(struct sock *sk, struct nl_mmap_req *req,
411 bool tx_ring)
412{
413 struct netlink_sock *nlk = nlk_sk(sk);
414 struct netlink_ring *ring;
415 void **pg_vec = NULL;
416 unsigned int order = 0;
417
418 ring = tx_ring ? &nlk->tx_ring : &nlk->rx_ring;
419
420 if (atomic_read(&nlk->mapped))
421 return -EBUSY;
422 if (atomic_read(&ring->pending))
423 return -EBUSY;
424
425 if (req->nm_block_nr) {
426 if (ring->pg_vec != NULL)
427 return -EBUSY;
428
429 if ((int)req->nm_block_size <= 0)
430 return -EINVAL;
431 if (!PAGE_ALIGNED(req->nm_block_size))
432 return -EINVAL;
433 if (req->nm_frame_size < NL_MMAP_HDRLEN)
434 return -EINVAL;
435 if (!IS_ALIGNED(req->nm_frame_size, NL_MMAP_MSG_ALIGNMENT))
436 return -EINVAL;
437
438 ring->frames_per_block = req->nm_block_size /
439 req->nm_frame_size;
440 if (ring->frames_per_block == 0)
441 return -EINVAL;
442 if (ring->frames_per_block * req->nm_block_nr !=
443 req->nm_frame_nr)
444 return -EINVAL;
445
446 order = get_order(req->nm_block_size);
447 pg_vec = alloc_pg_vec(nlk, req, order);
448 if (pg_vec == NULL)
449 return -ENOMEM;
450 } else {
451 if (req->nm_frame_nr)
452 return -EINVAL;
453 }
454
455 mutex_lock(&nlk->pg_vec_lock);
456 if (atomic_read(&nlk->mapped) == 0) {
457 __netlink_set_ring(sk, req, tx_ring, pg_vec, order);
458 mutex_unlock(&nlk->pg_vec_lock);
459 return 0;
460 }
461
462 mutex_unlock(&nlk->pg_vec_lock);
463
464 if (pg_vec)
465 free_pg_vec(pg_vec, order, req->nm_block_nr);
466
467 return -EBUSY;
468}
469
470static void netlink_mm_open(struct vm_area_struct *vma)
471{
472 struct file *file = vma->vm_file;
473 struct socket *sock = file->private_data;
474 struct sock *sk = sock->sk;
475
476 if (sk)
477 atomic_inc(&nlk_sk(sk)->mapped);
478}
479
480static void netlink_mm_close(struct vm_area_struct *vma)
481{
482 struct file *file = vma->vm_file;
483 struct socket *sock = file->private_data;
484 struct sock *sk = sock->sk;
485
486 if (sk)
487 atomic_dec(&nlk_sk(sk)->mapped);
488}
489
490static const struct vm_operations_struct netlink_mmap_ops = {
491 .open = netlink_mm_open,
492 .close = netlink_mm_close,
493};
494
495static int netlink_mmap(struct file *file, struct socket *sock,
496 struct vm_area_struct *vma)
497{
498 struct sock *sk = sock->sk;
499 struct netlink_sock *nlk = nlk_sk(sk);
500 struct netlink_ring *ring;
501 unsigned long start, size, expected;
502 unsigned int i;
503 int err = -EINVAL;
504
505 if (vma->vm_pgoff)
506 return -EINVAL;
507
508 mutex_lock(&nlk->pg_vec_lock);
509
510 expected = 0;
511 for (ring = &nlk->rx_ring; ring <= &nlk->tx_ring; ring++) {
512 if (ring->pg_vec == NULL)
513 continue;
514 expected += ring->pg_vec_len * ring->pg_vec_pages * PAGE_SIZE;
515 }
516
517 if (expected == 0)
518 goto out;
519
520 size = vma->vm_end - vma->vm_start;
521 if (size != expected)
522 goto out;
523
524 start = vma->vm_start;
525 for (ring = &nlk->rx_ring; ring <= &nlk->tx_ring; ring++) {
526 if (ring->pg_vec == NULL)
527 continue;
528
529 for (i = 0; i < ring->pg_vec_len; i++) {
530 struct page *page;
531 void *kaddr = ring->pg_vec[i];
532 unsigned int pg_num;
533
534 for (pg_num = 0; pg_num < ring->pg_vec_pages; pg_num++) {
535 page = pgvec_to_page(kaddr);
536 err = vm_insert_page(vma, start, page);
537 if (err < 0)
538 goto out;
539 start += PAGE_SIZE;
540 kaddr += PAGE_SIZE;
541 }
542 }
543 }
544
545 atomic_inc(&nlk->mapped);
546 vma->vm_ops = &netlink_mmap_ops;
547 err = 0;
548out:
549 mutex_unlock(&nlk->pg_vec_lock);
550 return err;
551}
552
553static void netlink_frame_flush_dcache(const struct nl_mmap_hdr *hdr, unsigned int nm_len)
554{
555#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1
556 struct page *p_start, *p_end;
557
558 /* First page is flushed through netlink_{get,set}_status */
559 p_start = pgvec_to_page(hdr + PAGE_SIZE);
560 p_end = pgvec_to_page((void *)hdr + NL_MMAP_HDRLEN + nm_len - 1);
561 while (p_start <= p_end) {
562 flush_dcache_page(p_start);
563 p_start++;
564 }
565#endif
566}
567
568static enum nl_mmap_status netlink_get_status(const struct nl_mmap_hdr *hdr)
569{
570 smp_rmb();
571 flush_dcache_page(pgvec_to_page(hdr));
572 return hdr->nm_status;
573}
574
575static void netlink_set_status(struct nl_mmap_hdr *hdr,
576 enum nl_mmap_status status)
577{
578 smp_mb();
579 hdr->nm_status = status;
580 flush_dcache_page(pgvec_to_page(hdr));
581}
582
583static struct nl_mmap_hdr *
584__netlink_lookup_frame(const struct netlink_ring *ring, unsigned int pos)
585{
586 unsigned int pg_vec_pos, frame_off;
587
588 pg_vec_pos = pos / ring->frames_per_block;
589 frame_off = pos % ring->frames_per_block;
590
591 return ring->pg_vec[pg_vec_pos] + (frame_off * ring->frame_size);
592}
593
594static struct nl_mmap_hdr *
595netlink_lookup_frame(const struct netlink_ring *ring, unsigned int pos,
596 enum nl_mmap_status status)
597{
598 struct nl_mmap_hdr *hdr;
599
600 hdr = __netlink_lookup_frame(ring, pos);
601 if (netlink_get_status(hdr) != status)
602 return NULL;
603
604 return hdr;
605}
606
607static struct nl_mmap_hdr *
608netlink_current_frame(const struct netlink_ring *ring,
609 enum nl_mmap_status status)
610{
611 return netlink_lookup_frame(ring, ring->head, status);
612}
613
614static void netlink_increment_head(struct netlink_ring *ring)
615{
616 ring->head = ring->head != ring->frame_max ? ring->head + 1 : 0;
617}
618
619static void netlink_forward_ring(struct netlink_ring *ring)
620{
621 unsigned int head = ring->head;
622 const struct nl_mmap_hdr *hdr;
623
624 do {
625 hdr = __netlink_lookup_frame(ring, ring->head);
626 if (hdr->nm_status == NL_MMAP_STATUS_UNUSED)
627 break;
628 if (hdr->nm_status != NL_MMAP_STATUS_SKIP)
629 break;
630 netlink_increment_head(ring);
631 } while (ring->head != head);
632}
633
634static bool netlink_has_valid_frame(struct netlink_ring *ring)
635{
636 unsigned int head = ring->head, pos = head;
637 const struct nl_mmap_hdr *hdr;
638
639 do {
640 hdr = __netlink_lookup_frame(ring, pos);
641 if (hdr->nm_status == NL_MMAP_STATUS_VALID)
642 return true;
643 pos = pos != 0 ? pos - 1 : ring->frame_max;
644 } while (pos != head);
645
646 return false;
647}
648
649static bool netlink_dump_space(struct netlink_sock *nlk)
650{
651 struct netlink_ring *ring = &nlk->rx_ring;
652 struct nl_mmap_hdr *hdr;
653 unsigned int n;
654
655 hdr = netlink_current_frame(ring, NL_MMAP_STATUS_UNUSED);
656 if (hdr == NULL)
657 return false;
658
659 n = ring->head + ring->frame_max / 2;
660 if (n > ring->frame_max)
661 n -= ring->frame_max;
662
663 hdr = __netlink_lookup_frame(ring, n);
664
665 return hdr->nm_status == NL_MMAP_STATUS_UNUSED;
666}
667
668static unsigned int netlink_poll(struct file *file, struct socket *sock,
669 poll_table *wait)
670{
671 struct sock *sk = sock->sk;
672 struct netlink_sock *nlk = nlk_sk(sk);
673 unsigned int mask;
674 int err;
675
676 if (nlk->rx_ring.pg_vec != NULL) {
677 /* Memory mapped sockets don't call recvmsg(), so flow control
678 * for dumps is performed here. A dump is allowed to continue
679 * if at least half the ring is unused.
680 */
681 while (nlk->cb_running && netlink_dump_space(nlk)) {
682 err = netlink_dump(sk);
683 if (err < 0) {
684 sk->sk_err = -err;
685 sk->sk_error_report(sk);
686 break;
687 }
688 }
689 netlink_rcv_wake(sk);
690 }
691
692 mask = datagram_poll(file, sock, wait);
693
694 /* We could already have received frames in the normal receive
695 * queue, that will show up as NL_MMAP_STATUS_COPY in the ring,
696 * so if mask contains pollin/etc already, there's no point
697 * walking the ring.
698 */
699 if ((mask & (POLLIN | POLLRDNORM)) != (POLLIN | POLLRDNORM)) {
700 spin_lock_bh(&sk->sk_receive_queue.lock);
701 if (nlk->rx_ring.pg_vec) {
702 if (netlink_has_valid_frame(&nlk->rx_ring))
703 mask |= POLLIN | POLLRDNORM;
704 }
705 spin_unlock_bh(&sk->sk_receive_queue.lock);
706 }
707
708 spin_lock_bh(&sk->sk_write_queue.lock);
709 if (nlk->tx_ring.pg_vec) {
710 if (netlink_current_frame(&nlk->tx_ring, NL_MMAP_STATUS_UNUSED))
711 mask |= POLLOUT | POLLWRNORM;
712 }
713 spin_unlock_bh(&sk->sk_write_queue.lock);
714
715 return mask;
716}
717
718static struct nl_mmap_hdr *netlink_mmap_hdr(struct sk_buff *skb)
719{
720 return (struct nl_mmap_hdr *)(skb->head - NL_MMAP_HDRLEN);
721}
722
723static void netlink_ring_setup_skb(struct sk_buff *skb, struct sock *sk,
724 struct netlink_ring *ring,
725 struct nl_mmap_hdr *hdr)
726{
727 unsigned int size;
728 void *data;
729
730 size = ring->frame_size - NL_MMAP_HDRLEN;
731 data = (void *)hdr + NL_MMAP_HDRLEN;
732
733 skb->head = data;
734 skb->data = data;
735 skb_reset_tail_pointer(skb);
736 skb->end = skb->tail + size;
737 skb->len = 0;
738
739 skb->destructor = netlink_skb_destructor;
740 NETLINK_CB(skb).flags |= NETLINK_SKB_MMAPED;
741 NETLINK_CB(skb).sk = sk;
742}
743
744static int netlink_mmap_sendmsg(struct sock *sk, struct msghdr *msg,
745 u32 dst_portid, u32 dst_group,
746 struct scm_cookie *scm)
747{
748 struct netlink_sock *nlk = nlk_sk(sk);
749 struct netlink_ring *ring;
750 struct nl_mmap_hdr *hdr;
751 struct sk_buff *skb;
752 unsigned int maxlen;
753 int err = 0, len = 0;
754
755 mutex_lock(&nlk->pg_vec_lock);
756
757 ring = &nlk->tx_ring;
758 maxlen = ring->frame_size - NL_MMAP_HDRLEN;
759
760 do {
761 unsigned int nm_len;
762
763 hdr = netlink_current_frame(ring, NL_MMAP_STATUS_VALID);
764 if (hdr == NULL) {
765 if (!(msg->msg_flags & MSG_DONTWAIT) &&
766 atomic_read(&nlk->tx_ring.pending))
767 schedule();
768 continue;
769 }
770
771 nm_len = ACCESS_ONCE(hdr->nm_len);
772 if (nm_len > maxlen) {
773 err = -EINVAL;
774 goto out;
775 }
776
777 netlink_frame_flush_dcache(hdr, nm_len);
778
779 skb = alloc_skb(nm_len, GFP_KERNEL);
780 if (skb == NULL) {
781 err = -ENOBUFS;
782 goto out;
783 }
784 __skb_put(skb, nm_len);
785 memcpy(skb->data, (void *)hdr + NL_MMAP_HDRLEN, nm_len);
786 netlink_set_status(hdr, NL_MMAP_STATUS_UNUSED);
787
788 netlink_increment_head(ring);
789
790 NETLINK_CB(skb).portid = nlk->portid;
791 NETLINK_CB(skb).dst_group = dst_group;
792 NETLINK_CB(skb).creds = scm->creds;
793
794 err = security_netlink_send(sk, skb);
795 if (err) {
796 kfree_skb(skb);
797 goto out;
798 }
799
800 if (unlikely(dst_group)) {
801 atomic_inc(&skb->users);
802 netlink_broadcast(sk, skb, dst_portid, dst_group,
803 GFP_KERNEL);
804 }
805 err = netlink_unicast(sk, skb, dst_portid,
806 msg->msg_flags & MSG_DONTWAIT);
807 if (err < 0)
808 goto out;
809 len += err;
810
811 } while (hdr != NULL ||
812 (!(msg->msg_flags & MSG_DONTWAIT) &&
813 atomic_read(&nlk->tx_ring.pending)));
814
815 if (len > 0)
816 err = len;
817out:
818 mutex_unlock(&nlk->pg_vec_lock);
819 return err;
820}
821
822static void netlink_queue_mmaped_skb(struct sock *sk, struct sk_buff *skb)
823{
824 struct nl_mmap_hdr *hdr;
825
826 hdr = netlink_mmap_hdr(skb);
827 hdr->nm_len = skb->len;
828 hdr->nm_group = NETLINK_CB(skb).dst_group;
829 hdr->nm_pid = NETLINK_CB(skb).creds.pid;
830 hdr->nm_uid = from_kuid(sk_user_ns(sk), NETLINK_CB(skb).creds.uid);
831 hdr->nm_gid = from_kgid(sk_user_ns(sk), NETLINK_CB(skb).creds.gid);
832 netlink_frame_flush_dcache(hdr, hdr->nm_len);
833 netlink_set_status(hdr, NL_MMAP_STATUS_VALID);
834
835 NETLINK_CB(skb).flags |= NETLINK_SKB_DELIVERED;
836 kfree_skb(skb);
837}
838
839static void netlink_ring_set_copied(struct sock *sk, struct sk_buff *skb)
840{
841 struct netlink_sock *nlk = nlk_sk(sk);
842 struct netlink_ring *ring = &nlk->rx_ring;
843 struct nl_mmap_hdr *hdr;
844
845 spin_lock_bh(&sk->sk_receive_queue.lock);
846 hdr = netlink_current_frame(ring, NL_MMAP_STATUS_UNUSED);
847 if (hdr == NULL) {
848 spin_unlock_bh(&sk->sk_receive_queue.lock);
849 kfree_skb(skb);
850 netlink_overrun(sk);
851 return;
852 }
853 netlink_increment_head(ring);
854 __skb_queue_tail(&sk->sk_receive_queue, skb);
855 spin_unlock_bh(&sk->sk_receive_queue.lock);
856
857 hdr->nm_len = skb->len;
858 hdr->nm_group = NETLINK_CB(skb).dst_group;
859 hdr->nm_pid = NETLINK_CB(skb).creds.pid;
860 hdr->nm_uid = from_kuid(sk_user_ns(sk), NETLINK_CB(skb).creds.uid);
861 hdr->nm_gid = from_kgid(sk_user_ns(sk), NETLINK_CB(skb).creds.gid);
862 netlink_set_status(hdr, NL_MMAP_STATUS_COPY);
863}
864
865#else /* CONFIG_NETLINK_MMAP */
866#define netlink_rx_is_mmaped(sk) false
867#define netlink_tx_is_mmaped(sk) false
868#define netlink_mmap sock_no_mmap
869#define netlink_poll datagram_poll
870#define netlink_mmap_sendmsg(sk, msg, dst_portid, dst_group, scm) 0
871#endif /* CONFIG_NETLINK_MMAP */
872
873static void netlink_skb_destructor(struct sk_buff *skb) 303static void netlink_skb_destructor(struct sk_buff *skb)
874{ 304{
875#ifdef CONFIG_NETLINK_MMAP
876 struct nl_mmap_hdr *hdr;
877 struct netlink_ring *ring;
878 struct sock *sk;
879
880 /* If a packet from the kernel to userspace was freed because of an
881 * error without being delivered to userspace, the kernel must reset
882 * the status. In the direction userspace to kernel, the status is
883 * always reset here after the packet was processed and freed.
884 */
885 if (netlink_skb_is_mmaped(skb)) {
886 hdr = netlink_mmap_hdr(skb);
887 sk = NETLINK_CB(skb).sk;
888
889 if (NETLINK_CB(skb).flags & NETLINK_SKB_TX) {
890 netlink_set_status(hdr, NL_MMAP_STATUS_UNUSED);
891 ring = &nlk_sk(sk)->tx_ring;
892 } else {
893 if (!(NETLINK_CB(skb).flags & NETLINK_SKB_DELIVERED)) {
894 hdr->nm_len = 0;
895 netlink_set_status(hdr, NL_MMAP_STATUS_VALID);
896 }
897 ring = &nlk_sk(sk)->rx_ring;
898 }
899
900 WARN_ON(atomic_read(&ring->pending) == 0);
901 atomic_dec(&ring->pending);
902 sock_put(sk);
903
904 skb->head = NULL;
905 }
906#endif
907 if (is_vmalloc_addr(skb->head)) { 305 if (is_vmalloc_addr(skb->head)) {
908 if (!skb->cloned || 306 if (!skb->cloned ||
909 !atomic_dec_return(&(skb_shinfo(skb)->dataref))) 307 !atomic_dec_return(&(skb_shinfo(skb)->dataref)))
@@ -937,18 +335,6 @@ static void netlink_sock_destruct(struct sock *sk)
937 } 335 }
938 336
939 skb_queue_purge(&sk->sk_receive_queue); 337 skb_queue_purge(&sk->sk_receive_queue);
940#ifdef CONFIG_NETLINK_MMAP
941 if (1) {
942 struct nl_mmap_req req;
943
944 memset(&req, 0, sizeof(req));
945 if (nlk->rx_ring.pg_vec)
946 __netlink_set_ring(sk, &req, false, NULL, 0);
947 memset(&req, 0, sizeof(req));
948 if (nlk->tx_ring.pg_vec)
949 __netlink_set_ring(sk, &req, true, NULL, 0);
950 }
951#endif /* CONFIG_NETLINK_MMAP */
952 338
953 if (!sock_flag(sk, SOCK_DEAD)) { 339 if (!sock_flag(sk, SOCK_DEAD)) {
954 printk(KERN_ERR "Freeing alive netlink socket %p\n", sk); 340 printk(KERN_ERR "Freeing alive netlink socket %p\n", sk);
@@ -1194,9 +580,6 @@ static int __netlink_create(struct net *net, struct socket *sock,
1194 mutex_init(nlk->cb_mutex); 580 mutex_init(nlk->cb_mutex);
1195 } 581 }
1196 init_waitqueue_head(&nlk->wait); 582 init_waitqueue_head(&nlk->wait);
1197#ifdef CONFIG_NETLINK_MMAP
1198 mutex_init(&nlk->pg_vec_lock);
1199#endif
1200 583
1201 sk->sk_destruct = netlink_sock_destruct; 584 sk->sk_destruct = netlink_sock_destruct;
1202 sk->sk_protocol = protocol; 585 sk->sk_protocol = protocol;
@@ -1650,6 +1033,14 @@ static int netlink_getname(struct socket *sock, struct sockaddr *addr,
1650 return 0; 1033 return 0;
1651} 1034}
1652 1035
1036static int netlink_ioctl(struct socket *sock, unsigned int cmd,
1037 unsigned long arg)
1038{
1039 /* try to hand this ioctl down to the NIC drivers.
1040 */
1041 return -ENOIOCTLCMD;
1042}
1043
1653static struct sock *netlink_getsockbyportid(struct sock *ssk, u32 portid) 1044static struct sock *netlink_getsockbyportid(struct sock *ssk, u32 portid)
1654{ 1045{
1655 struct sock *sock; 1046 struct sock *sock;
@@ -1728,8 +1119,7 @@ int netlink_attachskb(struct sock *sk, struct sk_buff *skb,
1728 nlk = nlk_sk(sk); 1119 nlk = nlk_sk(sk);
1729 1120
1730 if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf || 1121 if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
1731 test_bit(NETLINK_S_CONGESTED, &nlk->state)) && 1122 test_bit(NETLINK_S_CONGESTED, &nlk->state))) {
1732 !netlink_skb_is_mmaped(skb)) {
1733 DECLARE_WAITQUEUE(wait, current); 1123 DECLARE_WAITQUEUE(wait, current);
1734 if (!*timeo) { 1124 if (!*timeo) {
1735 if (!ssk || netlink_is_kernel(ssk)) 1125 if (!ssk || netlink_is_kernel(ssk))
@@ -1767,14 +1157,7 @@ static int __netlink_sendskb(struct sock *sk, struct sk_buff *skb)
1767 1157
1768 netlink_deliver_tap(skb); 1158 netlink_deliver_tap(skb);
1769 1159
1770#ifdef CONFIG_NETLINK_MMAP 1160 skb_queue_tail(&sk->sk_receive_queue, skb);
1771 if (netlink_skb_is_mmaped(skb))
1772 netlink_queue_mmaped_skb(sk, skb);
1773 else if (netlink_rx_is_mmaped(sk))
1774 netlink_ring_set_copied(sk, skb);
1775 else
1776#endif /* CONFIG_NETLINK_MMAP */
1777 skb_queue_tail(&sk->sk_receive_queue, skb);
1778 sk->sk_data_ready(sk); 1161 sk->sk_data_ready(sk);
1779 return len; 1162 return len;
1780} 1163}
@@ -1798,9 +1181,6 @@ static struct sk_buff *netlink_trim(struct sk_buff *skb, gfp_t allocation)
1798 int delta; 1181 int delta;
1799 1182
1800 WARN_ON(skb->sk != NULL); 1183 WARN_ON(skb->sk != NULL);
1801 if (netlink_skb_is_mmaped(skb))
1802 return skb;
1803
1804 delta = skb->end - skb->tail; 1184 delta = skb->end - skb->tail;
1805 if (is_vmalloc_addr(skb->head) || delta * 2 < skb->truesize) 1185 if (is_vmalloc_addr(skb->head) || delta * 2 < skb->truesize)
1806 return skb; 1186 return skb;
@@ -1876,79 +1256,6 @@ retry:
1876} 1256}
1877EXPORT_SYMBOL(netlink_unicast); 1257EXPORT_SYMBOL(netlink_unicast);
1878 1258
1879struct sk_buff *__netlink_alloc_skb(struct sock *ssk, unsigned int size,
1880 unsigned int ldiff, u32 dst_portid,
1881 gfp_t gfp_mask)
1882{
1883#ifdef CONFIG_NETLINK_MMAP
1884 unsigned int maxlen, linear_size;
1885 struct sock *sk = NULL;
1886 struct sk_buff *skb;
1887 struct netlink_ring *ring;
1888 struct nl_mmap_hdr *hdr;
1889
1890 sk = netlink_getsockbyportid(ssk, dst_portid);
1891 if (IS_ERR(sk))
1892 goto out;
1893
1894 ring = &nlk_sk(sk)->rx_ring;
1895 /* fast-path without atomic ops for common case: non-mmaped receiver */
1896 if (ring->pg_vec == NULL)
1897 goto out_put;
1898
1899 /* We need to account the full linear size needed as a ring
1900 * slot cannot have non-linear parts.
1901 */
1902 linear_size = size + ldiff;
1903 if (ring->frame_size - NL_MMAP_HDRLEN < linear_size)
1904 goto out_put;
1905
1906 skb = alloc_skb_head(gfp_mask);
1907 if (skb == NULL)
1908 goto err1;
1909
1910 spin_lock_bh(&sk->sk_receive_queue.lock);
1911 /* check again under lock */
1912 if (ring->pg_vec == NULL)
1913 goto out_free;
1914
1915 /* check again under lock */
1916 maxlen = ring->frame_size - NL_MMAP_HDRLEN;
1917 if (maxlen < linear_size)
1918 goto out_free;
1919
1920 netlink_forward_ring(ring);
1921 hdr = netlink_current_frame(ring, NL_MMAP_STATUS_UNUSED);
1922 if (hdr == NULL)
1923 goto err2;
1924
1925 netlink_ring_setup_skb(skb, sk, ring, hdr);
1926 netlink_set_status(hdr, NL_MMAP_STATUS_RESERVED);
1927 atomic_inc(&ring->pending);
1928 netlink_increment_head(ring);
1929
1930 spin_unlock_bh(&sk->sk_receive_queue.lock);
1931 return skb;
1932
1933err2:
1934 kfree_skb(skb);
1935 spin_unlock_bh(&sk->sk_receive_queue.lock);
1936 netlink_overrun(sk);
1937err1:
1938 sock_put(sk);
1939 return NULL;
1940
1941out_free:
1942 kfree_skb(skb);
1943 spin_unlock_bh(&sk->sk_receive_queue.lock);
1944out_put:
1945 sock_put(sk);
1946out:
1947#endif
1948 return alloc_skb(size, gfp_mask);
1949}
1950EXPORT_SYMBOL_GPL(__netlink_alloc_skb);
1951
1952int netlink_has_listeners(struct sock *sk, unsigned int group) 1259int netlink_has_listeners(struct sock *sk, unsigned int group)
1953{ 1260{
1954 int res = 0; 1261 int res = 0;
@@ -2225,8 +1532,7 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname,
2225 if (level != SOL_NETLINK) 1532 if (level != SOL_NETLINK)
2226 return -ENOPROTOOPT; 1533 return -ENOPROTOOPT;
2227 1534
2228 if (optname != NETLINK_RX_RING && optname != NETLINK_TX_RING && 1535 if (optlen >= sizeof(int) &&
2229 optlen >= sizeof(int) &&
2230 get_user(val, (unsigned int __user *)optval)) 1536 get_user(val, (unsigned int __user *)optval))
2231 return -EFAULT; 1537 return -EFAULT;
2232 1538
@@ -2279,25 +1585,6 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname,
2279 } 1585 }
2280 err = 0; 1586 err = 0;
2281 break; 1587 break;
2282#ifdef CONFIG_NETLINK_MMAP
2283 case NETLINK_RX_RING:
2284 case NETLINK_TX_RING: {
2285 struct nl_mmap_req req;
2286
2287 /* Rings might consume more memory than queue limits, require
2288 * CAP_NET_ADMIN.
2289 */
2290 if (!capable(CAP_NET_ADMIN))
2291 return -EPERM;
2292 if (optlen < sizeof(req))
2293 return -EINVAL;
2294 if (copy_from_user(&req, optval, sizeof(req)))
2295 return -EFAULT;
2296 err = netlink_set_ring(sk, &req,
2297 optname == NETLINK_TX_RING);
2298 break;
2299 }
2300#endif /* CONFIG_NETLINK_MMAP */
2301 case NETLINK_LISTEN_ALL_NSID: 1588 case NETLINK_LISTEN_ALL_NSID:
2302 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_BROADCAST)) 1589 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_BROADCAST))
2303 return -EPERM; 1590 return -EPERM;
@@ -2467,18 +1754,6 @@ static int netlink_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
2467 smp_rmb(); 1754 smp_rmb();
2468 } 1755 }
2469 1756
2470 /* It's a really convoluted way for userland to ask for mmaped
2471 * sendmsg(), but that's what we've got...
2472 */
2473 if (netlink_tx_is_mmaped(sk) &&
2474 iter_is_iovec(&msg->msg_iter) &&
2475 msg->msg_iter.nr_segs == 1 &&
2476 msg->msg_iter.iov->iov_base == NULL) {
2477 err = netlink_mmap_sendmsg(sk, msg, dst_portid, dst_group,
2478 &scm);
2479 goto out;
2480 }
2481
2482 err = -EMSGSIZE; 1757 err = -EMSGSIZE;
2483 if (len > sk->sk_sndbuf - 32) 1758 if (len > sk->sk_sndbuf - 32)
2484 goto out; 1759 goto out;
@@ -2794,8 +2069,7 @@ static int netlink_dump(struct sock *sk)
2794 goto errout_skb; 2069 goto errout_skb;
2795 } 2070 }
2796 2071
2797 if (!netlink_rx_is_mmaped(sk) && 2072 if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
2798 atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
2799 goto errout_skb; 2073 goto errout_skb;
2800 2074
2801 /* NLMSG_GOODSIZE is small to avoid high order allocations being 2075 /* NLMSG_GOODSIZE is small to avoid high order allocations being
@@ -2808,15 +2082,12 @@ static int netlink_dump(struct sock *sk)
2808 2082
2809 if (alloc_min_size < nlk->max_recvmsg_len) { 2083 if (alloc_min_size < nlk->max_recvmsg_len) {
2810 alloc_size = nlk->max_recvmsg_len; 2084 alloc_size = nlk->max_recvmsg_len;
2811 skb = netlink_alloc_skb(sk, alloc_size, nlk->portid, 2085 skb = alloc_skb(alloc_size, GFP_KERNEL |
2812 GFP_KERNEL | 2086 __GFP_NOWARN | __GFP_NORETRY);
2813 __GFP_NOWARN |
2814 __GFP_NORETRY);
2815 } 2087 }
2816 if (!skb) { 2088 if (!skb) {
2817 alloc_size = alloc_min_size; 2089 alloc_size = alloc_min_size;
2818 skb = netlink_alloc_skb(sk, alloc_size, nlk->portid, 2090 skb = alloc_skb(alloc_size, GFP_KERNEL);
2819 GFP_KERNEL);
2820 } 2091 }
2821 if (!skb) 2092 if (!skb)
2822 goto errout_skb; 2093 goto errout_skb;
@@ -2883,16 +2154,7 @@ int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
2883 struct netlink_sock *nlk; 2154 struct netlink_sock *nlk;
2884 int ret; 2155 int ret;
2885 2156
2886 /* Memory mapped dump requests need to be copied to avoid looping 2157 atomic_inc(&skb->users);
2887 * on the pending state in netlink_mmap_sendmsg() while the CB hold
2888 * a reference to the skb.
2889 */
2890 if (netlink_skb_is_mmaped(skb)) {
2891 skb = skb_copy(skb, GFP_KERNEL);
2892 if (skb == NULL)
2893 return -ENOBUFS;
2894 } else
2895 atomic_inc(&skb->users);
2896 2158
2897 sk = netlink_lookup(sock_net(ssk), ssk->sk_protocol, NETLINK_CB(skb).portid); 2159 sk = netlink_lookup(sock_net(ssk), ssk->sk_protocol, NETLINK_CB(skb).portid);
2898 if (sk == NULL) { 2160 if (sk == NULL) {
@@ -2965,8 +2227,7 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err)
2965 if (!(nlk->flags & NETLINK_F_CAP_ACK) && err) 2227 if (!(nlk->flags & NETLINK_F_CAP_ACK) && err)
2966 payload += nlmsg_len(nlh); 2228 payload += nlmsg_len(nlh);
2967 2229
2968 skb = netlink_alloc_skb(in_skb->sk, nlmsg_total_size(payload), 2230 skb = nlmsg_new(payload, GFP_KERNEL);
2969 NETLINK_CB(in_skb).portid, GFP_KERNEL);
2970 if (!skb) { 2231 if (!skb) {
2971 struct sock *sk; 2232 struct sock *sk;
2972 2233
@@ -3240,15 +2501,15 @@ static const struct proto_ops netlink_ops = {
3240 .socketpair = sock_no_socketpair, 2501 .socketpair = sock_no_socketpair,
3241 .accept = sock_no_accept, 2502 .accept = sock_no_accept,
3242 .getname = netlink_getname, 2503 .getname = netlink_getname,
3243 .poll = netlink_poll, 2504 .poll = datagram_poll,
3244 .ioctl = sock_no_ioctl, 2505 .ioctl = netlink_ioctl,
3245 .listen = sock_no_listen, 2506 .listen = sock_no_listen,
3246 .shutdown = sock_no_shutdown, 2507 .shutdown = sock_no_shutdown,
3247 .setsockopt = netlink_setsockopt, 2508 .setsockopt = netlink_setsockopt,
3248 .getsockopt = netlink_getsockopt, 2509 .getsockopt = netlink_getsockopt,
3249 .sendmsg = netlink_sendmsg, 2510 .sendmsg = netlink_sendmsg,
3250 .recvmsg = netlink_recvmsg, 2511 .recvmsg = netlink_recvmsg,
3251 .mmap = netlink_mmap, 2512 .mmap = sock_no_mmap,
3252 .sendpage = sock_no_sendpage, 2513 .sendpage = sock_no_sendpage,
3253}; 2514};
3254 2515
diff --git a/net/netlink/af_netlink.h b/net/netlink/af_netlink.h
index 14437d9b1965..e68ef9ccd703 100644
--- a/net/netlink/af_netlink.h
+++ b/net/netlink/af_netlink.h
@@ -44,12 +44,6 @@ struct netlink_sock {
44 int (*netlink_bind)(struct net *net, int group); 44 int (*netlink_bind)(struct net *net, int group);
45 void (*netlink_unbind)(struct net *net, int group); 45 void (*netlink_unbind)(struct net *net, int group);
46 struct module *module; 46 struct module *module;
47#ifdef CONFIG_NETLINK_MMAP
48 struct mutex pg_vec_lock;
49 struct netlink_ring rx_ring;
50 struct netlink_ring tx_ring;
51 atomic_t mapped;
52#endif /* CONFIG_NETLINK_MMAP */
53 47
54 struct rhash_head node; 48 struct rhash_head node;
55 struct rcu_head rcu; 49 struct rcu_head rcu;
@@ -60,15 +54,6 @@ static inline struct netlink_sock *nlk_sk(struct sock *sk)
60 return container_of(sk, struct netlink_sock, sk); 54 return container_of(sk, struct netlink_sock, sk);
61} 55}
62 56
63static inline bool netlink_skb_is_mmaped(const struct sk_buff *skb)
64{
65#ifdef CONFIG_NETLINK_MMAP
66 return NETLINK_CB(skb).flags & NETLINK_SKB_MMAPED;
67#else
68 return false;
69#endif /* CONFIG_NETLINK_MMAP */
70}
71
72struct netlink_table { 57struct netlink_table {
73 struct rhashtable hash; 58 struct rhashtable hash;
74 struct hlist_head mc_list; 59 struct hlist_head mc_list;
diff --git a/net/netlink/diag.c b/net/netlink/diag.c
index 3ee63a3cff30..8dd836a8dd60 100644
--- a/net/netlink/diag.c
+++ b/net/netlink/diag.c
@@ -8,41 +8,6 @@
8 8
9#include "af_netlink.h" 9#include "af_netlink.h"
10 10
11#ifdef CONFIG_NETLINK_MMAP
12static int sk_diag_put_ring(struct netlink_ring *ring, int nl_type,
13 struct sk_buff *nlskb)
14{
15 struct netlink_diag_ring ndr;
16
17 ndr.ndr_block_size = ring->pg_vec_pages << PAGE_SHIFT;
18 ndr.ndr_block_nr = ring->pg_vec_len;
19 ndr.ndr_frame_size = ring->frame_size;
20 ndr.ndr_frame_nr = ring->frame_max + 1;
21
22 return nla_put(nlskb, nl_type, sizeof(ndr), &ndr);
23}
24
25static int sk_diag_put_rings_cfg(struct sock *sk, struct sk_buff *nlskb)
26{
27 struct netlink_sock *nlk = nlk_sk(sk);
28 int ret;
29
30 mutex_lock(&nlk->pg_vec_lock);
31 ret = sk_diag_put_ring(&nlk->rx_ring, NETLINK_DIAG_RX_RING, nlskb);
32 if (!ret)
33 ret = sk_diag_put_ring(&nlk->tx_ring, NETLINK_DIAG_TX_RING,
34 nlskb);
35 mutex_unlock(&nlk->pg_vec_lock);
36
37 return ret;
38}
39#else
40static int sk_diag_put_rings_cfg(struct sock *sk, struct sk_buff *nlskb)
41{
42 return 0;
43}
44#endif
45
46static int sk_diag_dump_groups(struct sock *sk, struct sk_buff *nlskb) 11static int sk_diag_dump_groups(struct sock *sk, struct sk_buff *nlskb)
47{ 12{
48 struct netlink_sock *nlk = nlk_sk(sk); 13 struct netlink_sock *nlk = nlk_sk(sk);
@@ -87,10 +52,6 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb,
87 sock_diag_put_meminfo(sk, skb, NETLINK_DIAG_MEMINFO)) 52 sock_diag_put_meminfo(sk, skb, NETLINK_DIAG_MEMINFO))
88 goto out_nlmsg_trim; 53 goto out_nlmsg_trim;
89 54
90 if ((req->ndiag_show & NDIAG_SHOW_RING_CFG) &&
91 sk_diag_put_rings_cfg(sk, skb))
92 goto out_nlmsg_trim;
93
94 nlmsg_end(skb, nlh); 55 nlmsg_end(skb, nlh);
95 return 0; 56 return 0;
96 57
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index f830326b3b1d..a09132a69869 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -463,26 +463,6 @@ int genl_unregister_family(struct genl_family *family)
463EXPORT_SYMBOL(genl_unregister_family); 463EXPORT_SYMBOL(genl_unregister_family);
464 464
465/** 465/**
466 * genlmsg_new_unicast - Allocate generic netlink message for unicast
467 * @payload: size of the message payload
468 * @info: information on destination
469 * @flags: the type of memory to allocate
470 *
471 * Allocates a new sk_buff large enough to cover the specified payload
472 * plus required Netlink headers. Will check receiving socket for
473 * memory mapped i/o capability and use it if enabled. Will fall back
474 * to non-mapped skb if message size exceeds the frame size of the ring.
475 */
476struct sk_buff *genlmsg_new_unicast(size_t payload, struct genl_info *info,
477 gfp_t flags)
478{
479 size_t len = nlmsg_total_size(genlmsg_total_size(payload));
480
481 return netlink_alloc_skb(info->dst_sk, len, info->snd_portid, flags);
482}
483EXPORT_SYMBOL_GPL(genlmsg_new_unicast);
484
485/**
486 * genlmsg_put - Add generic netlink header to netlink message 466 * genlmsg_put - Add generic netlink header to netlink message
487 * @skb: socket buffer holding the message 467 * @skb: socket buffer holding the message
488 * @portid: netlink portid the message is addressed to 468 * @portid: netlink portid the message is addressed to
@@ -580,6 +560,10 @@ static int genl_family_rcv_msg(struct genl_family *family,
580 !netlink_capable(skb, CAP_NET_ADMIN)) 560 !netlink_capable(skb, CAP_NET_ADMIN))
581 return -EPERM; 561 return -EPERM;
582 562
563 if ((ops->flags & GENL_UNS_ADMIN_PERM) &&
564 !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
565 return -EPERM;
566
583 if ((nlh->nlmsg_flags & NLM_F_DUMP) == NLM_F_DUMP) { 567 if ((nlh->nlmsg_flags & NLM_F_DUMP) == NLM_F_DUMP) {
584 int rc; 568 int rc;
585 569
@@ -638,7 +622,6 @@ static int genl_family_rcv_msg(struct genl_family *family,
638 info.genlhdr = nlmsg_data(nlh); 622 info.genlhdr = nlmsg_data(nlh);
639 info.userhdr = nlmsg_data(nlh) + GENL_HDRLEN; 623 info.userhdr = nlmsg_data(nlh) + GENL_HDRLEN;
640 info.attrs = attrbuf; 624 info.attrs = attrbuf;
641 info.dst_sk = skb->sk;
642 genl_info_net_set(&info, net); 625 genl_info_net_set(&info, net);
643 memset(&info.user_ptr, 0, sizeof(info.user_ptr)); 626 memset(&info.user_ptr, 0, sizeof(info.user_ptr));
644 627
diff --git a/net/nfc/llcp_commands.c b/net/nfc/llcp_commands.c
index 3621a902cb6e..3425532c39f7 100644
--- a/net/nfc/llcp_commands.c
+++ b/net/nfc/llcp_commands.c
@@ -663,7 +663,7 @@ int nfc_llcp_send_i_frame(struct nfc_llcp_sock *sock,
663 return -ENOBUFS; 663 return -ENOBUFS;
664 } 664 }
665 665
666 msg_data = kzalloc(len, GFP_KERNEL); 666 msg_data = kmalloc(len, GFP_USER | __GFP_NOWARN);
667 if (msg_data == NULL) 667 if (msg_data == NULL)
668 return -ENOMEM; 668 return -ENOMEM;
669 669
@@ -729,7 +729,7 @@ int nfc_llcp_send_ui_frame(struct nfc_llcp_sock *sock, u8 ssap, u8 dsap,
729 if (local == NULL) 729 if (local == NULL)
730 return -ENODEV; 730 return -ENODEV;
731 731
732 msg_data = kzalloc(len, GFP_KERNEL); 732 msg_data = kmalloc(len, GFP_USER | __GFP_NOWARN);
733 if (msg_data == NULL) 733 if (msg_data == NULL)
734 return -ENOMEM; 734 return -ENOMEM;
735 735
diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c
index ecf0a0196f18..b9edf5fae6ae 100644
--- a/net/nfc/llcp_sock.c
+++ b/net/nfc/llcp_sock.c
@@ -509,6 +509,11 @@ static int llcp_sock_getname(struct socket *sock, struct sockaddr *uaddr,
509 memset(llcp_addr, 0, sizeof(*llcp_addr)); 509 memset(llcp_addr, 0, sizeof(*llcp_addr));
510 *len = sizeof(struct sockaddr_nfc_llcp); 510 *len = sizeof(struct sockaddr_nfc_llcp);
511 511
512 lock_sock(sk);
513 if (!llcp_sock->dev) {
514 release_sock(sk);
515 return -EBADFD;
516 }
512 llcp_addr->sa_family = AF_NFC; 517 llcp_addr->sa_family = AF_NFC;
513 llcp_addr->dev_idx = llcp_sock->dev->idx; 518 llcp_addr->dev_idx = llcp_sock->dev->idx;
514 llcp_addr->target_idx = llcp_sock->target_idx; 519 llcp_addr->target_idx = llcp_sock->target_idx;
@@ -518,6 +523,7 @@ static int llcp_sock_getname(struct socket *sock, struct sockaddr *uaddr,
518 llcp_addr->service_name_len = llcp_sock->service_name_len; 523 llcp_addr->service_name_len = llcp_sock->service_name_len;
519 memcpy(llcp_addr->service_name, llcp_sock->service_name, 524 memcpy(llcp_addr->service_name, llcp_sock->service_name,
520 llcp_addr->service_name_len); 525 llcp_addr->service_name_len);
526 release_sock(sk);
521 527
522 return 0; 528 return 0;
523} 529}
diff --git a/net/nfc/nci/uart.c b/net/nfc/nci/uart.c
index 21d8875673a4..c468eabd6943 100644
--- a/net/nfc/nci/uart.c
+++ b/net/nfc/nci/uart.c
@@ -171,14 +171,7 @@ static int nci_uart_tty_open(struct tty_struct *tty)
171 tty->disc_data = NULL; 171 tty->disc_data = NULL;
172 tty->receive_room = 65536; 172 tty->receive_room = 65536;
173 173
174 /* Flush any pending characters in the driver and line discipline. */ 174 /* Flush any pending characters in the driver */
175
176 /* FIXME: why is this needed. Note don't use ldisc_ref here as the
177 * open path is before the ldisc is referencable.
178 */
179
180 if (tty->ldisc->ops->flush_buffer)
181 tty->ldisc->ops->flush_buffer(tty);
182 tty_driver_flush_buffer(tty); 175 tty_driver_flush_buffer(tty);
183 176
184 return 0; 177 return 0;
diff --git a/net/openvswitch/Kconfig b/net/openvswitch/Kconfig
index d143aa9f6654..234a73344c6e 100644
--- a/net/openvswitch/Kconfig
+++ b/net/openvswitch/Kconfig
@@ -6,10 +6,12 @@ config OPENVSWITCH
6 tristate "Open vSwitch" 6 tristate "Open vSwitch"
7 depends on INET 7 depends on INET
8 depends on !NF_CONNTRACK || \ 8 depends on !NF_CONNTRACK || \
9 (NF_CONNTRACK && (!NF_DEFRAG_IPV6 || NF_DEFRAG_IPV6)) 9 (NF_CONNTRACK && ((!NF_DEFRAG_IPV6 || NF_DEFRAG_IPV6) && \
10 (!NF_NAT || NF_NAT)))
10 select LIBCRC32C 11 select LIBCRC32C
11 select MPLS 12 select MPLS
12 select NET_MPLS_GSO 13 select NET_MPLS_GSO
14 select DST_CACHE
13 ---help--- 15 ---help---
14 Open vSwitch is a multilayer Ethernet switch targeted at virtualized 16 Open vSwitch is a multilayer Ethernet switch targeted at virtualized
15 environments. In addition to supporting a variety of features 17 environments. In addition to supporting a variety of features
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index 2d59df521915..e9dd47b2a85b 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -158,9 +158,7 @@ static int push_mpls(struct sk_buff *skb, struct sw_flow_key *key,
158 new_mpls_lse = (__be32 *)skb_mpls_header(skb); 158 new_mpls_lse = (__be32 *)skb_mpls_header(skb);
159 *new_mpls_lse = mpls->mpls_lse; 159 *new_mpls_lse = mpls->mpls_lse;
160 160
161 if (skb->ip_summed == CHECKSUM_COMPLETE) 161 skb_postpush_rcsum(skb, new_mpls_lse, MPLS_HLEN);
162 skb->csum = csum_add(skb->csum, csum_partial(new_mpls_lse,
163 MPLS_HLEN, 0));
164 162
165 hdr = eth_hdr(skb); 163 hdr = eth_hdr(skb);
166 hdr->h_proto = mpls->mpls_ethertype; 164 hdr->h_proto = mpls->mpls_ethertype;
@@ -280,7 +278,7 @@ static int set_eth_addr(struct sk_buff *skb, struct sw_flow_key *flow_key,
280 ether_addr_copy_masked(eth_hdr(skb)->h_dest, key->eth_dst, 278 ether_addr_copy_masked(eth_hdr(skb)->h_dest, key->eth_dst,
281 mask->eth_dst); 279 mask->eth_dst);
282 280
283 ovs_skb_postpush_rcsum(skb, eth_hdr(skb), ETH_ALEN * 2); 281 skb_postpush_rcsum(skb, eth_hdr(skb), ETH_ALEN * 2);
284 282
285 ether_addr_copy(flow_key->eth.src, eth_hdr(skb)->h_source); 283 ether_addr_copy(flow_key->eth.src, eth_hdr(skb)->h_source);
286 ether_addr_copy(flow_key->eth.dst, eth_hdr(skb)->h_dest); 284 ether_addr_copy(flow_key->eth.dst, eth_hdr(skb)->h_dest);
@@ -639,7 +637,7 @@ static int ovs_vport_output(struct net *net, struct sock *sk, struct sk_buff *sk
639 /* Reconstruct the MAC header. */ 637 /* Reconstruct the MAC header. */
640 skb_push(skb, data->l2_len); 638 skb_push(skb, data->l2_len);
641 memcpy(skb->data, &data->l2_data, data->l2_len); 639 memcpy(skb->data, &data->l2_data, data->l2_len);
642 ovs_skb_postpush_rcsum(skb, skb->data, data->l2_len); 640 skb_postpush_rcsum(skb, skb->data, data->l2_len);
643 skb_reset_mac_header(skb); 641 skb_reset_mac_header(skb);
644 642
645 ovs_vport_send(vport, skb); 643 ovs_vport_send(vport, skb);
diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index ee6ff8ffc12d..dc5eb29fe7d6 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -13,21 +13,31 @@
13 13
14#include <linux/module.h> 14#include <linux/module.h>
15#include <linux/openvswitch.h> 15#include <linux/openvswitch.h>
16#include <linux/tcp.h>
17#include <linux/udp.h>
18#include <linux/sctp.h>
16#include <net/ip.h> 19#include <net/ip.h>
17#include <net/netfilter/nf_conntrack_core.h> 20#include <net/netfilter/nf_conntrack_core.h>
18#include <net/netfilter/nf_conntrack_helper.h> 21#include <net/netfilter/nf_conntrack_helper.h>
19#include <net/netfilter/nf_conntrack_labels.h> 22#include <net/netfilter/nf_conntrack_labels.h>
23#include <net/netfilter/nf_conntrack_seqadj.h>
20#include <net/netfilter/nf_conntrack_zones.h> 24#include <net/netfilter/nf_conntrack_zones.h>
21#include <net/netfilter/ipv6/nf_defrag_ipv6.h> 25#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
22 26
27#ifdef CONFIG_NF_NAT_NEEDED
28#include <linux/netfilter/nf_nat.h>
29#include <net/netfilter/nf_nat_core.h>
30#include <net/netfilter/nf_nat_l3proto.h>
31#endif
32
23#include "datapath.h" 33#include "datapath.h"
24#include "conntrack.h" 34#include "conntrack.h"
25#include "flow.h" 35#include "flow.h"
26#include "flow_netlink.h" 36#include "flow_netlink.h"
27 37
28struct ovs_ct_len_tbl { 38struct ovs_ct_len_tbl {
29 size_t maxlen; 39 int maxlen;
30 size_t minlen; 40 int minlen;
31}; 41};
32 42
33/* Metadata mark for masked write to conntrack mark */ 43/* Metadata mark for masked write to conntrack mark */
@@ -42,15 +52,25 @@ struct md_labels {
42 struct ovs_key_ct_labels mask; 52 struct ovs_key_ct_labels mask;
43}; 53};
44 54
55enum ovs_ct_nat {
56 OVS_CT_NAT = 1 << 0, /* NAT for committed connections only. */
57 OVS_CT_SRC_NAT = 1 << 1, /* Source NAT for NEW connections. */
58 OVS_CT_DST_NAT = 1 << 2, /* Destination NAT for NEW connections. */
59};
60
45/* Conntrack action context for execution. */ 61/* Conntrack action context for execution. */
46struct ovs_conntrack_info { 62struct ovs_conntrack_info {
47 struct nf_conntrack_helper *helper; 63 struct nf_conntrack_helper *helper;
48 struct nf_conntrack_zone zone; 64 struct nf_conntrack_zone zone;
49 struct nf_conn *ct; 65 struct nf_conn *ct;
50 u8 commit : 1; 66 u8 commit : 1;
67 u8 nat : 3; /* enum ovs_ct_nat */
51 u16 family; 68 u16 family;
52 struct md_mark mark; 69 struct md_mark mark;
53 struct md_labels labels; 70 struct md_labels labels;
71#ifdef CONFIG_NF_NAT_NEEDED
72 struct nf_nat_range range; /* Only present for SRC NAT and DST NAT. */
73#endif
54}; 74};
55 75
56static void __ovs_ct_free_action(struct ovs_conntrack_info *ct_info); 76static void __ovs_ct_free_action(struct ovs_conntrack_info *ct_info);
@@ -75,7 +95,6 @@ static u8 ovs_ct_get_state(enum ip_conntrack_info ctinfo)
75 switch (ctinfo) { 95 switch (ctinfo) {
76 case IP_CT_ESTABLISHED_REPLY: 96 case IP_CT_ESTABLISHED_REPLY:
77 case IP_CT_RELATED_REPLY: 97 case IP_CT_RELATED_REPLY:
78 case IP_CT_NEW_REPLY:
79 ct_state |= OVS_CS_F_REPLY_DIR; 98 ct_state |= OVS_CS_F_REPLY_DIR;
80 break; 99 break;
81 default: 100 default:
@@ -92,7 +111,6 @@ static u8 ovs_ct_get_state(enum ip_conntrack_info ctinfo)
92 ct_state |= OVS_CS_F_RELATED; 111 ct_state |= OVS_CS_F_RELATED;
93 break; 112 break;
94 case IP_CT_NEW: 113 case IP_CT_NEW:
95 case IP_CT_NEW_REPLY:
96 ct_state |= OVS_CS_F_NEW; 114 ct_state |= OVS_CS_F_NEW;
97 break; 115 break;
98 default: 116 default:
@@ -139,12 +157,15 @@ static void __ovs_ct_update_key(struct sw_flow_key *key, u8 state,
139 ovs_ct_get_labels(ct, &key->ct.labels); 157 ovs_ct_get_labels(ct, &key->ct.labels);
140} 158}
141 159
142/* Update 'key' based on skb->nfct. If 'post_ct' is true, then OVS has 160/* Update 'key' based on skb->nfct. If 'post_ct' is true, then OVS has
143 * previously sent the packet to conntrack via the ct action. 161 * previously sent the packet to conntrack via the ct action. If
162 * 'keep_nat_flags' is true, the existing NAT flags retained, else they are
163 * initialized from the connection status.
144 */ 164 */
145static void ovs_ct_update_key(const struct sk_buff *skb, 165static void ovs_ct_update_key(const struct sk_buff *skb,
146 const struct ovs_conntrack_info *info, 166 const struct ovs_conntrack_info *info,
147 struct sw_flow_key *key, bool post_ct) 167 struct sw_flow_key *key, bool post_ct,
168 bool keep_nat_flags)
148{ 169{
149 const struct nf_conntrack_zone *zone = &nf_ct_zone_dflt; 170 const struct nf_conntrack_zone *zone = &nf_ct_zone_dflt;
150 enum ip_conntrack_info ctinfo; 171 enum ip_conntrack_info ctinfo;
@@ -154,10 +175,22 @@ static void ovs_ct_update_key(const struct sk_buff *skb,
154 ct = nf_ct_get(skb, &ctinfo); 175 ct = nf_ct_get(skb, &ctinfo);
155 if (ct) { 176 if (ct) {
156 state = ovs_ct_get_state(ctinfo); 177 state = ovs_ct_get_state(ctinfo);
178 /* All unconfirmed entries are NEW connections. */
157 if (!nf_ct_is_confirmed(ct)) 179 if (!nf_ct_is_confirmed(ct))
158 state |= OVS_CS_F_NEW; 180 state |= OVS_CS_F_NEW;
181 /* OVS persists the related flag for the duration of the
182 * connection.
183 */
159 if (ct->master) 184 if (ct->master)
160 state |= OVS_CS_F_RELATED; 185 state |= OVS_CS_F_RELATED;
186 if (keep_nat_flags) {
187 state |= key->ct.state & OVS_CS_F_NAT_MASK;
188 } else {
189 if (ct->status & IPS_SRC_NAT)
190 state |= OVS_CS_F_SRC_NAT;
191 if (ct->status & IPS_DST_NAT)
192 state |= OVS_CS_F_DST_NAT;
193 }
161 zone = nf_ct_zone(ct); 194 zone = nf_ct_zone(ct);
162 } else if (post_ct) { 195 } else if (post_ct) {
163 state = OVS_CS_F_TRACKED | OVS_CS_F_INVALID; 196 state = OVS_CS_F_TRACKED | OVS_CS_F_INVALID;
@@ -167,9 +200,12 @@ static void ovs_ct_update_key(const struct sk_buff *skb,
167 __ovs_ct_update_key(key, state, zone, ct); 200 __ovs_ct_update_key(key, state, zone, ct);
168} 201}
169 202
203/* This is called to initialize CT key fields possibly coming in from the local
204 * stack.
205 */
170void ovs_ct_fill_key(const struct sk_buff *skb, struct sw_flow_key *key) 206void ovs_ct_fill_key(const struct sk_buff *skb, struct sw_flow_key *key)
171{ 207{
172 ovs_ct_update_key(skb, NULL, key, false); 208 ovs_ct_update_key(skb, NULL, key, false, false);
173} 209}
174 210
175int ovs_ct_put_key(const struct sw_flow_key *key, struct sk_buff *skb) 211int ovs_ct_put_key(const struct sw_flow_key *key, struct sk_buff *skb)
@@ -201,7 +237,6 @@ static int ovs_ct_set_mark(struct sk_buff *skb, struct sw_flow_key *key,
201 struct nf_conn *ct; 237 struct nf_conn *ct;
202 u32 new_mark; 238 u32 new_mark;
203 239
204
205 /* The connection could be invalid, in which case set_mark is no-op. */ 240 /* The connection could be invalid, in which case set_mark is no-op. */
206 ct = nf_ct_get(skb, &ctinfo); 241 ct = nf_ct_get(skb, &ctinfo);
207 if (!ct) 242 if (!ct)
@@ -259,6 +294,7 @@ static int ovs_ct_helper(struct sk_buff *skb, u16 proto)
259 enum ip_conntrack_info ctinfo; 294 enum ip_conntrack_info ctinfo;
260 unsigned int protoff; 295 unsigned int protoff;
261 struct nf_conn *ct; 296 struct nf_conn *ct;
297 int err;
262 298
263 ct = nf_ct_get(skb, &ctinfo); 299 ct = nf_ct_get(skb, &ctinfo);
264 if (!ct || ctinfo == IP_CT_RELATED_REPLY) 300 if (!ct || ctinfo == IP_CT_RELATED_REPLY)
@@ -295,7 +331,18 @@ static int ovs_ct_helper(struct sk_buff *skb, u16 proto)
295 return NF_DROP; 331 return NF_DROP;
296 } 332 }
297 333
298 return helper->help(skb, protoff, ct, ctinfo); 334 err = helper->help(skb, protoff, ct, ctinfo);
335 if (err != NF_ACCEPT)
336 return err;
337
338 /* Adjust seqs after helper. This is needed due to some helpers (e.g.,
339 * FTP with NAT) adusting the TCP payload size when mangling IP
340 * addresses and/or port numbers in the text-based control connection.
341 */
342 if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status) &&
343 !nf_ct_seq_adjust(skb, ct, ctinfo, protoff))
344 return NF_DROP;
345 return NF_ACCEPT;
299} 346}
300 347
301/* Returns 0 on success, -EINPROGRESS if 'skb' is stolen, or other nonzero 348/* Returns 0 on success, -EINPROGRESS if 'skb' is stolen, or other nonzero
@@ -352,14 +399,101 @@ ovs_ct_expect_find(struct net *net, const struct nf_conntrack_zone *zone,
352 return __nf_ct_expect_find(net, zone, &tuple); 399 return __nf_ct_expect_find(net, zone, &tuple);
353} 400}
354 401
402/* This replicates logic from nf_conntrack_core.c that is not exported. */
403static enum ip_conntrack_info
404ovs_ct_get_info(const struct nf_conntrack_tuple_hash *h)
405{
406 const struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
407
408 if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY)
409 return IP_CT_ESTABLISHED_REPLY;
410 /* Once we've had two way comms, always ESTABLISHED. */
411 if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status))
412 return IP_CT_ESTABLISHED;
413 if (test_bit(IPS_EXPECTED_BIT, &ct->status))
414 return IP_CT_RELATED;
415 return IP_CT_NEW;
416}
417
418/* Find an existing connection which this packet belongs to without
419 * re-attributing statistics or modifying the connection state. This allows an
420 * skb->nfct lost due to an upcall to be recovered during actions execution.
421 *
422 * Must be called with rcu_read_lock.
423 *
424 * On success, populates skb->nfct and skb->nfctinfo, and returns the
425 * connection. Returns NULL if there is no existing entry.
426 */
427static struct nf_conn *
428ovs_ct_find_existing(struct net *net, const struct nf_conntrack_zone *zone,
429 u8 l3num, struct sk_buff *skb)
430{
431 struct nf_conntrack_l3proto *l3proto;
432 struct nf_conntrack_l4proto *l4proto;
433 struct nf_conntrack_tuple tuple;
434 struct nf_conntrack_tuple_hash *h;
435 enum ip_conntrack_info ctinfo;
436 struct nf_conn *ct;
437 unsigned int dataoff;
438 u8 protonum;
439
440 l3proto = __nf_ct_l3proto_find(l3num);
441 if (!l3proto) {
442 pr_debug("ovs_ct_find_existing: Can't get l3proto\n");
443 return NULL;
444 }
445 if (l3proto->get_l4proto(skb, skb_network_offset(skb), &dataoff,
446 &protonum) <= 0) {
447 pr_debug("ovs_ct_find_existing: Can't get protonum\n");
448 return NULL;
449 }
450 l4proto = __nf_ct_l4proto_find(l3num, protonum);
451 if (!l4proto) {
452 pr_debug("ovs_ct_find_existing: Can't get l4proto\n");
453 return NULL;
454 }
455 if (!nf_ct_get_tuple(skb, skb_network_offset(skb), dataoff, l3num,
456 protonum, net, &tuple, l3proto, l4proto)) {
457 pr_debug("ovs_ct_find_existing: Can't get tuple\n");
458 return NULL;
459 }
460
461 /* look for tuple match */
462 h = nf_conntrack_find_get(net, zone, &tuple);
463 if (!h)
464 return NULL; /* Not found. */
465
466 ct = nf_ct_tuplehash_to_ctrack(h);
467
468 ctinfo = ovs_ct_get_info(h);
469 if (ctinfo == IP_CT_NEW) {
470 /* This should not happen. */
471 WARN_ONCE(1, "ovs_ct_find_existing: new packet for %p\n", ct);
472 }
473 skb->nfct = &ct->ct_general;
474 skb->nfctinfo = ctinfo;
475 return ct;
476}
477
355/* Determine whether skb->nfct is equal to the result of conntrack lookup. */ 478/* Determine whether skb->nfct is equal to the result of conntrack lookup. */
356static bool skb_nfct_cached(const struct net *net, const struct sk_buff *skb, 479static bool skb_nfct_cached(struct net *net,
357 const struct ovs_conntrack_info *info) 480 const struct sw_flow_key *key,
481 const struct ovs_conntrack_info *info,
482 struct sk_buff *skb)
358{ 483{
359 enum ip_conntrack_info ctinfo; 484 enum ip_conntrack_info ctinfo;
360 struct nf_conn *ct; 485 struct nf_conn *ct;
361 486
362 ct = nf_ct_get(skb, &ctinfo); 487 ct = nf_ct_get(skb, &ctinfo);
488 /* If no ct, check if we have evidence that an existing conntrack entry
489 * might be found for this skb. This happens when we lose a skb->nfct
490 * due to an upcall. If the connection was not confirmed, it is not
491 * cached and needs to be run through conntrack again.
492 */
493 if (!ct && key->ct.state & OVS_CS_F_TRACKED &&
494 !(key->ct.state & OVS_CS_F_INVALID) &&
495 key->ct.zone == info->zone.id)
496 ct = ovs_ct_find_existing(net, &info->zone, info->family, skb);
363 if (!ct) 497 if (!ct)
364 return false; 498 return false;
365 if (!net_eq(net, read_pnet(&ct->ct_net))) 499 if (!net_eq(net, read_pnet(&ct->ct_net)))
@@ -377,6 +511,206 @@ static bool skb_nfct_cached(const struct net *net, const struct sk_buff *skb,
377 return true; 511 return true;
378} 512}
379 513
514#ifdef CONFIG_NF_NAT_NEEDED
515/* Modelled after nf_nat_ipv[46]_fn().
516 * range is only used for new, uninitialized NAT state.
517 * Returns either NF_ACCEPT or NF_DROP.
518 */
519static int ovs_ct_nat_execute(struct sk_buff *skb, struct nf_conn *ct,
520 enum ip_conntrack_info ctinfo,
521 const struct nf_nat_range *range,
522 enum nf_nat_manip_type maniptype)
523{
524 int hooknum, nh_off, err = NF_ACCEPT;
525
526 nh_off = skb_network_offset(skb);
527 skb_pull(skb, nh_off);
528
529 /* See HOOK2MANIP(). */
530 if (maniptype == NF_NAT_MANIP_SRC)
531 hooknum = NF_INET_LOCAL_IN; /* Source NAT */
532 else
533 hooknum = NF_INET_LOCAL_OUT; /* Destination NAT */
534
535 switch (ctinfo) {
536 case IP_CT_RELATED:
537 case IP_CT_RELATED_REPLY:
538 if (skb->protocol == htons(ETH_P_IP) &&
539 ip_hdr(skb)->protocol == IPPROTO_ICMP) {
540 if (!nf_nat_icmp_reply_translation(skb, ct, ctinfo,
541 hooknum))
542 err = NF_DROP;
543 goto push;
544#if IS_ENABLED(CONFIG_NF_NAT_IPV6)
545 } else if (skb->protocol == htons(ETH_P_IPV6)) {
546 __be16 frag_off;
547 u8 nexthdr = ipv6_hdr(skb)->nexthdr;
548 int hdrlen = ipv6_skip_exthdr(skb,
549 sizeof(struct ipv6hdr),
550 &nexthdr, &frag_off);
551
552 if (hdrlen >= 0 && nexthdr == IPPROTO_ICMPV6) {
553 if (!nf_nat_icmpv6_reply_translation(skb, ct,
554 ctinfo,
555 hooknum,
556 hdrlen))
557 err = NF_DROP;
558 goto push;
559 }
560#endif
561 }
562 /* Non-ICMP, fall thru to initialize if needed. */
563 case IP_CT_NEW:
564 /* Seen it before? This can happen for loopback, retrans,
565 * or local packets.
566 */
567 if (!nf_nat_initialized(ct, maniptype)) {
568 /* Initialize according to the NAT action. */
569 err = (range && range->flags & NF_NAT_RANGE_MAP_IPS)
570 /* Action is set up to establish a new
571 * mapping.
572 */
573 ? nf_nat_setup_info(ct, range, maniptype)
574 : nf_nat_alloc_null_binding(ct, hooknum);
575 if (err != NF_ACCEPT)
576 goto push;
577 }
578 break;
579
580 case IP_CT_ESTABLISHED:
581 case IP_CT_ESTABLISHED_REPLY:
582 break;
583
584 default:
585 err = NF_DROP;
586 goto push;
587 }
588
589 err = nf_nat_packet(ct, ctinfo, hooknum, skb);
590push:
591 skb_push(skb, nh_off);
592
593 return err;
594}
595
596static void ovs_nat_update_key(struct sw_flow_key *key,
597 const struct sk_buff *skb,
598 enum nf_nat_manip_type maniptype)
599{
600 if (maniptype == NF_NAT_MANIP_SRC) {
601 __be16 src;
602
603 key->ct.state |= OVS_CS_F_SRC_NAT;
604 if (key->eth.type == htons(ETH_P_IP))
605 key->ipv4.addr.src = ip_hdr(skb)->saddr;
606 else if (key->eth.type == htons(ETH_P_IPV6))
607 memcpy(&key->ipv6.addr.src, &ipv6_hdr(skb)->saddr,
608 sizeof(key->ipv6.addr.src));
609 else
610 return;
611
612 if (key->ip.proto == IPPROTO_UDP)
613 src = udp_hdr(skb)->source;
614 else if (key->ip.proto == IPPROTO_TCP)
615 src = tcp_hdr(skb)->source;
616 else if (key->ip.proto == IPPROTO_SCTP)
617 src = sctp_hdr(skb)->source;
618 else
619 return;
620
621 key->tp.src = src;
622 } else {
623 __be16 dst;
624
625 key->ct.state |= OVS_CS_F_DST_NAT;
626 if (key->eth.type == htons(ETH_P_IP))
627 key->ipv4.addr.dst = ip_hdr(skb)->daddr;
628 else if (key->eth.type == htons(ETH_P_IPV6))
629 memcpy(&key->ipv6.addr.dst, &ipv6_hdr(skb)->daddr,
630 sizeof(key->ipv6.addr.dst));
631 else
632 return;
633
634 if (key->ip.proto == IPPROTO_UDP)
635 dst = udp_hdr(skb)->dest;
636 else if (key->ip.proto == IPPROTO_TCP)
637 dst = tcp_hdr(skb)->dest;
638 else if (key->ip.proto == IPPROTO_SCTP)
639 dst = sctp_hdr(skb)->dest;
640 else
641 return;
642
643 key->tp.dst = dst;
644 }
645}
646
647/* Returns NF_DROP if the packet should be dropped, NF_ACCEPT otherwise. */
648static int ovs_ct_nat(struct net *net, struct sw_flow_key *key,
649 const struct ovs_conntrack_info *info,
650 struct sk_buff *skb, struct nf_conn *ct,
651 enum ip_conntrack_info ctinfo)
652{
653 enum nf_nat_manip_type maniptype;
654 int err;
655
656 if (nf_ct_is_untracked(ct)) {
657 /* A NAT action may only be performed on tracked packets. */
658 return NF_ACCEPT;
659 }
660
661 /* Add NAT extension if not confirmed yet. */
662 if (!nf_ct_is_confirmed(ct) && !nf_ct_nat_ext_add(ct))
663 return NF_ACCEPT; /* Can't NAT. */
664
665 /* Determine NAT type.
666 * Check if the NAT type can be deduced from the tracked connection.
667 * Make sure expected traffic is NATted only when committing.
668 */
669 if (info->nat & OVS_CT_NAT && ctinfo != IP_CT_NEW &&
670 ct->status & IPS_NAT_MASK &&
671 (!(ct->status & IPS_EXPECTED_BIT) || info->commit)) {
672 /* NAT an established or related connection like before. */
673 if (CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY)
674 /* This is the REPLY direction for a connection
675 * for which NAT was applied in the forward
676 * direction. Do the reverse NAT.
677 */
678 maniptype = ct->status & IPS_SRC_NAT
679 ? NF_NAT_MANIP_DST : NF_NAT_MANIP_SRC;
680 else
681 maniptype = ct->status & IPS_SRC_NAT
682 ? NF_NAT_MANIP_SRC : NF_NAT_MANIP_DST;
683 } else if (info->nat & OVS_CT_SRC_NAT) {
684 maniptype = NF_NAT_MANIP_SRC;
685 } else if (info->nat & OVS_CT_DST_NAT) {
686 maniptype = NF_NAT_MANIP_DST;
687 } else {
688 return NF_ACCEPT; /* Connection is not NATed. */
689 }
690 err = ovs_ct_nat_execute(skb, ct, ctinfo, &info->range, maniptype);
691
692 /* Mark NAT done if successful and update the flow key. */
693 if (err == NF_ACCEPT)
694 ovs_nat_update_key(key, skb, maniptype);
695
696 return err;
697}
698#else /* !CONFIG_NF_NAT_NEEDED */
699static int ovs_ct_nat(struct net *net, struct sw_flow_key *key,
700 const struct ovs_conntrack_info *info,
701 struct sk_buff *skb, struct nf_conn *ct,
702 enum ip_conntrack_info ctinfo)
703{
704 return NF_ACCEPT;
705}
706#endif
707
708/* Pass 'skb' through conntrack in 'net', using zone configured in 'info', if
709 * not done already. Update key with new CT state after passing the packet
710 * through conntrack.
711 * Note that if the packet is deemed invalid by conntrack, skb->nfct will be
712 * set to NULL and 0 will be returned.
713 */
380static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key, 714static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key,
381 const struct ovs_conntrack_info *info, 715 const struct ovs_conntrack_info *info,
382 struct sk_buff *skb) 716 struct sk_buff *skb)
@@ -386,8 +720,13 @@ static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key,
386 * actually run the packet through conntrack twice unless it's for a 720 * actually run the packet through conntrack twice unless it's for a
387 * different zone. 721 * different zone.
388 */ 722 */
389 if (!skb_nfct_cached(net, skb, info)) { 723 bool cached = skb_nfct_cached(net, key, info, skb);
724 enum ip_conntrack_info ctinfo;
725 struct nf_conn *ct;
726
727 if (!cached) {
390 struct nf_conn *tmpl = info->ct; 728 struct nf_conn *tmpl = info->ct;
729 int err;
391 730
392 /* Associate skb with specified zone. */ 731 /* Associate skb with specified zone. */
393 if (tmpl) { 732 if (tmpl) {
@@ -398,17 +737,53 @@ static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key,
398 skb->nfctinfo = IP_CT_NEW; 737 skb->nfctinfo = IP_CT_NEW;
399 } 738 }
400 739
401 if (nf_conntrack_in(net, info->family, NF_INET_PRE_ROUTING, 740 /* Repeat if requested, see nf_iterate(). */
402 skb) != NF_ACCEPT) 741 do {
742 err = nf_conntrack_in(net, info->family,
743 NF_INET_PRE_ROUTING, skb);
744 } while (err == NF_REPEAT);
745
746 if (err != NF_ACCEPT)
403 return -ENOENT; 747 return -ENOENT;
404 748
405 if (ovs_ct_helper(skb, info->family) != NF_ACCEPT) { 749 /* Clear CT state NAT flags to mark that we have not yet done
406 WARN_ONCE(1, "helper rejected packet"); 750 * NAT after the nf_conntrack_in() call. We can actually clear
751 * the whole state, as it will be re-initialized below.
752 */
753 key->ct.state = 0;
754
755 /* Update the key, but keep the NAT flags. */
756 ovs_ct_update_key(skb, info, key, true, true);
757 }
758
759 ct = nf_ct_get(skb, &ctinfo);
760 if (ct) {
761 /* Packets starting a new connection must be NATted before the
762 * helper, so that the helper knows about the NAT. We enforce
763 * this by delaying both NAT and helper calls for unconfirmed
764 * connections until the committing CT action. For later
765 * packets NAT and Helper may be called in either order.
766 *
767 * NAT will be done only if the CT action has NAT, and only
768 * once per packet (per zone), as guarded by the NAT bits in
769 * the key->ct.state.
770 */
771 if (info->nat && !(key->ct.state & OVS_CS_F_NAT_MASK) &&
772 (nf_ct_is_confirmed(ct) || info->commit) &&
773 ovs_ct_nat(net, key, info, skb, ct, ctinfo) != NF_ACCEPT) {
407 return -EINVAL; 774 return -EINVAL;
408 } 775 }
409 }
410 776
411 ovs_ct_update_key(skb, info, key, true); 777 /* Call the helper only if:
778 * - nf_conntrack_in() was executed above ("!cached") for a
779 * confirmed connection, or
780 * - When committing an unconfirmed connection.
781 */
782 if ((nf_ct_is_confirmed(ct) ? !cached : info->commit) &&
783 ovs_ct_helper(skb, info->family) != NF_ACCEPT) {
784 return -EINVAL;
785 }
786 }
412 787
413 return 0; 788 return 0;
414} 789}
@@ -420,19 +795,24 @@ static int ovs_ct_lookup(struct net *net, struct sw_flow_key *key,
420{ 795{
421 struct nf_conntrack_expect *exp; 796 struct nf_conntrack_expect *exp;
422 797
798 /* If we pass an expected packet through nf_conntrack_in() the
799 * expectation is typically removed, but the packet could still be
800 * lost in upcall processing. To prevent this from happening we
801 * perform an explicit expectation lookup. Expected connections are
802 * always new, and will be passed through conntrack only when they are
803 * committed, as it is OK to remove the expectation at that time.
804 */
423 exp = ovs_ct_expect_find(net, &info->zone, info->family, skb); 805 exp = ovs_ct_expect_find(net, &info->zone, info->family, skb);
424 if (exp) { 806 if (exp) {
425 u8 state; 807 u8 state;
426 808
809 /* NOTE: New connections are NATted and Helped only when
810 * committed, so we are not calling into NAT here.
811 */
427 state = OVS_CS_F_TRACKED | OVS_CS_F_NEW | OVS_CS_F_RELATED; 812 state = OVS_CS_F_TRACKED | OVS_CS_F_NEW | OVS_CS_F_RELATED;
428 __ovs_ct_update_key(key, state, &info->zone, exp->master); 813 __ovs_ct_update_key(key, state, &info->zone, exp->master);
429 } else { 814 } else
430 int err; 815 return __ovs_ct_lookup(net, key, info, skb);
431
432 err = __ovs_ct_lookup(net, key, info, skb);
433 if (err)
434 return err;
435 }
436 816
437 return 0; 817 return 0;
438} 818}
@@ -442,21 +822,12 @@ static int ovs_ct_commit(struct net *net, struct sw_flow_key *key,
442 const struct ovs_conntrack_info *info, 822 const struct ovs_conntrack_info *info,
443 struct sk_buff *skb) 823 struct sk_buff *skb)
444{ 824{
445 u8 state;
446 int err; 825 int err;
447 826
448 state = key->ct.state;
449 if (key->ct.zone == info->zone.id &&
450 ((state & OVS_CS_F_TRACKED) && !(state & OVS_CS_F_NEW))) {
451 /* Previous lookup has shown that this connection is already
452 * tracked and committed. Skip committing.
453 */
454 return 0;
455 }
456
457 err = __ovs_ct_lookup(net, key, info, skb); 827 err = __ovs_ct_lookup(net, key, info, skb);
458 if (err) 828 if (err)
459 return err; 829 return err;
830 /* This is a no-op if the connection has already been confirmed. */
460 if (nf_conntrack_confirm(skb) != NF_ACCEPT) 831 if (nf_conntrack_confirm(skb) != NF_ACCEPT)
461 return -EINVAL; 832 return -EINVAL;
462 833
@@ -541,6 +912,135 @@ static int ovs_ct_add_helper(struct ovs_conntrack_info *info, const char *name,
541 return 0; 912 return 0;
542} 913}
543 914
915#ifdef CONFIG_NF_NAT_NEEDED
916static int parse_nat(const struct nlattr *attr,
917 struct ovs_conntrack_info *info, bool log)
918{
919 struct nlattr *a;
920 int rem;
921 bool have_ip_max = false;
922 bool have_proto_max = false;
923 bool ip_vers = (info->family == NFPROTO_IPV6);
924
925 nla_for_each_nested(a, attr, rem) {
926 static const int ovs_nat_attr_lens[OVS_NAT_ATTR_MAX + 1][2] = {
927 [OVS_NAT_ATTR_SRC] = {0, 0},
928 [OVS_NAT_ATTR_DST] = {0, 0},
929 [OVS_NAT_ATTR_IP_MIN] = {sizeof(struct in_addr),
930 sizeof(struct in6_addr)},
931 [OVS_NAT_ATTR_IP_MAX] = {sizeof(struct in_addr),
932 sizeof(struct in6_addr)},
933 [OVS_NAT_ATTR_PROTO_MIN] = {sizeof(u16), sizeof(u16)},
934 [OVS_NAT_ATTR_PROTO_MAX] = {sizeof(u16), sizeof(u16)},
935 [OVS_NAT_ATTR_PERSISTENT] = {0, 0},
936 [OVS_NAT_ATTR_PROTO_HASH] = {0, 0},
937 [OVS_NAT_ATTR_PROTO_RANDOM] = {0, 0},
938 };
939 int type = nla_type(a);
940
941 if (type > OVS_NAT_ATTR_MAX) {
942 OVS_NLERR(log,
943 "Unknown NAT attribute (type=%d, max=%d).\n",
944 type, OVS_NAT_ATTR_MAX);
945 return -EINVAL;
946 }
947
948 if (nla_len(a) != ovs_nat_attr_lens[type][ip_vers]) {
949 OVS_NLERR(log,
950 "NAT attribute type %d has unexpected length (%d != %d).\n",
951 type, nla_len(a),
952 ovs_nat_attr_lens[type][ip_vers]);
953 return -EINVAL;
954 }
955
956 switch (type) {
957 case OVS_NAT_ATTR_SRC:
958 case OVS_NAT_ATTR_DST:
959 if (info->nat) {
960 OVS_NLERR(log,
961 "Only one type of NAT may be specified.\n"
962 );
963 return -ERANGE;
964 }
965 info->nat |= OVS_CT_NAT;
966 info->nat |= ((type == OVS_NAT_ATTR_SRC)
967 ? OVS_CT_SRC_NAT : OVS_CT_DST_NAT);
968 break;
969
970 case OVS_NAT_ATTR_IP_MIN:
971 nla_memcpy(&info->range.min_addr, a, nla_len(a));
972 info->range.flags |= NF_NAT_RANGE_MAP_IPS;
973 break;
974
975 case OVS_NAT_ATTR_IP_MAX:
976 have_ip_max = true;
977 nla_memcpy(&info->range.max_addr, a,
978 sizeof(info->range.max_addr));
979 info->range.flags |= NF_NAT_RANGE_MAP_IPS;
980 break;
981
982 case OVS_NAT_ATTR_PROTO_MIN:
983 info->range.min_proto.all = htons(nla_get_u16(a));
984 info->range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
985 break;
986
987 case OVS_NAT_ATTR_PROTO_MAX:
988 have_proto_max = true;
989 info->range.max_proto.all = htons(nla_get_u16(a));
990 info->range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
991 break;
992
993 case OVS_NAT_ATTR_PERSISTENT:
994 info->range.flags |= NF_NAT_RANGE_PERSISTENT;
995 break;
996
997 case OVS_NAT_ATTR_PROTO_HASH:
998 info->range.flags |= NF_NAT_RANGE_PROTO_RANDOM;
999 break;
1000
1001 case OVS_NAT_ATTR_PROTO_RANDOM:
1002 info->range.flags |= NF_NAT_RANGE_PROTO_RANDOM_FULLY;
1003 break;
1004
1005 default:
1006 OVS_NLERR(log, "Unknown nat attribute (%d).\n", type);
1007 return -EINVAL;
1008 }
1009 }
1010
1011 if (rem > 0) {
1012 OVS_NLERR(log, "NAT attribute has %d unknown bytes.\n", rem);
1013 return -EINVAL;
1014 }
1015 if (!info->nat) {
1016 /* Do not allow flags if no type is given. */
1017 if (info->range.flags) {
1018 OVS_NLERR(log,
1019 "NAT flags may be given only when NAT range (SRC or DST) is also specified.\n"
1020 );
1021 return -EINVAL;
1022 }
1023 info->nat = OVS_CT_NAT; /* NAT existing connections. */
1024 } else if (!info->commit) {
1025 OVS_NLERR(log,
1026 "NAT attributes may be specified only when CT COMMIT flag is also specified.\n"
1027 );
1028 return -EINVAL;
1029 }
1030 /* Allow missing IP_MAX. */
1031 if (info->range.flags & NF_NAT_RANGE_MAP_IPS && !have_ip_max) {
1032 memcpy(&info->range.max_addr, &info->range.min_addr,
1033 sizeof(info->range.max_addr));
1034 }
1035 /* Allow missing PROTO_MAX. */
1036 if (info->range.flags & NF_NAT_RANGE_PROTO_SPECIFIED &&
1037 !have_proto_max) {
1038 info->range.max_proto.all = info->range.min_proto.all;
1039 }
1040 return 0;
1041}
1042#endif
1043
544static const struct ovs_ct_len_tbl ovs_ct_attr_lens[OVS_CT_ATTR_MAX + 1] = { 1044static const struct ovs_ct_len_tbl ovs_ct_attr_lens[OVS_CT_ATTR_MAX + 1] = {
545 [OVS_CT_ATTR_COMMIT] = { .minlen = 0, .maxlen = 0 }, 1045 [OVS_CT_ATTR_COMMIT] = { .minlen = 0, .maxlen = 0 },
546 [OVS_CT_ATTR_ZONE] = { .minlen = sizeof(u16), 1046 [OVS_CT_ATTR_ZONE] = { .minlen = sizeof(u16),
@@ -550,7 +1050,11 @@ static const struct ovs_ct_len_tbl ovs_ct_attr_lens[OVS_CT_ATTR_MAX + 1] = {
550 [OVS_CT_ATTR_LABELS] = { .minlen = sizeof(struct md_labels), 1050 [OVS_CT_ATTR_LABELS] = { .minlen = sizeof(struct md_labels),
551 .maxlen = sizeof(struct md_labels) }, 1051 .maxlen = sizeof(struct md_labels) },
552 [OVS_CT_ATTR_HELPER] = { .minlen = 1, 1052 [OVS_CT_ATTR_HELPER] = { .minlen = 1,
553 .maxlen = NF_CT_HELPER_NAME_LEN } 1053 .maxlen = NF_CT_HELPER_NAME_LEN },
1054#ifdef CONFIG_NF_NAT_NEEDED
1055 /* NAT length is checked when parsing the nested attributes. */
1056 [OVS_CT_ATTR_NAT] = { .minlen = 0, .maxlen = INT_MAX },
1057#endif
554}; 1058};
555 1059
556static int parse_ct(const struct nlattr *attr, struct ovs_conntrack_info *info, 1060static int parse_ct(const struct nlattr *attr, struct ovs_conntrack_info *info,
@@ -617,6 +1121,15 @@ static int parse_ct(const struct nlattr *attr, struct ovs_conntrack_info *info,
617 return -EINVAL; 1121 return -EINVAL;
618 } 1122 }
619 break; 1123 break;
1124#ifdef CONFIG_NF_NAT_NEEDED
1125 case OVS_CT_ATTR_NAT: {
1126 int err = parse_nat(a, info, log);
1127
1128 if (err)
1129 return err;
1130 break;
1131 }
1132#endif
620 default: 1133 default:
621 OVS_NLERR(log, "Unknown conntrack attr (%d)", 1134 OVS_NLERR(log, "Unknown conntrack attr (%d)",
622 type); 1135 type);
@@ -704,6 +1217,74 @@ err_free_ct:
704 return err; 1217 return err;
705} 1218}
706 1219
1220#ifdef CONFIG_NF_NAT_NEEDED
1221static bool ovs_ct_nat_to_attr(const struct ovs_conntrack_info *info,
1222 struct sk_buff *skb)
1223{
1224 struct nlattr *start;
1225
1226 start = nla_nest_start(skb, OVS_CT_ATTR_NAT);
1227 if (!start)
1228 return false;
1229
1230 if (info->nat & OVS_CT_SRC_NAT) {
1231 if (nla_put_flag(skb, OVS_NAT_ATTR_SRC))
1232 return false;
1233 } else if (info->nat & OVS_CT_DST_NAT) {
1234 if (nla_put_flag(skb, OVS_NAT_ATTR_DST))
1235 return false;
1236 } else {
1237 goto out;
1238 }
1239
1240 if (info->range.flags & NF_NAT_RANGE_MAP_IPS) {
1241 if (info->family == NFPROTO_IPV4) {
1242 if (nla_put_in_addr(skb, OVS_NAT_ATTR_IP_MIN,
1243 info->range.min_addr.ip) ||
1244 (info->range.max_addr.ip
1245 != info->range.min_addr.ip &&
1246 (nla_put_in_addr(skb, OVS_NAT_ATTR_IP_MAX,
1247 info->range.max_addr.ip))))
1248 return false;
1249#if IS_ENABLED(CONFIG_NF_NAT_IPV6)
1250 } else if (info->family == NFPROTO_IPV6) {
1251 if (nla_put_in6_addr(skb, OVS_NAT_ATTR_IP_MIN,
1252 &info->range.min_addr.in6) ||
1253 (memcmp(&info->range.max_addr.in6,
1254 &info->range.min_addr.in6,
1255 sizeof(info->range.max_addr.in6)) &&
1256 (nla_put_in6_addr(skb, OVS_NAT_ATTR_IP_MAX,
1257 &info->range.max_addr.in6))))
1258 return false;
1259#endif
1260 } else {
1261 return false;
1262 }
1263 }
1264 if (info->range.flags & NF_NAT_RANGE_PROTO_SPECIFIED &&
1265 (nla_put_u16(skb, OVS_NAT_ATTR_PROTO_MIN,
1266 ntohs(info->range.min_proto.all)) ||
1267 (info->range.max_proto.all != info->range.min_proto.all &&
1268 nla_put_u16(skb, OVS_NAT_ATTR_PROTO_MAX,
1269 ntohs(info->range.max_proto.all)))))
1270 return false;
1271
1272 if (info->range.flags & NF_NAT_RANGE_PERSISTENT &&
1273 nla_put_flag(skb, OVS_NAT_ATTR_PERSISTENT))
1274 return false;
1275 if (info->range.flags & NF_NAT_RANGE_PROTO_RANDOM &&
1276 nla_put_flag(skb, OVS_NAT_ATTR_PROTO_HASH))
1277 return false;
1278 if (info->range.flags & NF_NAT_RANGE_PROTO_RANDOM_FULLY &&
1279 nla_put_flag(skb, OVS_NAT_ATTR_PROTO_RANDOM))
1280 return false;
1281out:
1282 nla_nest_end(skb, start);
1283
1284 return true;
1285}
1286#endif
1287
707int ovs_ct_action_to_attr(const struct ovs_conntrack_info *ct_info, 1288int ovs_ct_action_to_attr(const struct ovs_conntrack_info *ct_info,
708 struct sk_buff *skb) 1289 struct sk_buff *skb)
709{ 1290{
@@ -732,7 +1313,10 @@ int ovs_ct_action_to_attr(const struct ovs_conntrack_info *ct_info,
732 ct_info->helper->name)) 1313 ct_info->helper->name))
733 return -EMSGSIZE; 1314 return -EMSGSIZE;
734 } 1315 }
735 1316#ifdef CONFIG_NF_NAT_NEEDED
1317 if (ct_info->nat && !ovs_ct_nat_to_attr(ct_info, skb))
1318 return -EMSGSIZE;
1319#endif
736 nla_nest_end(skb, start); 1320 nla_nest_end(skb, start);
737 1321
738 return 0; 1322 return 0;
diff --git a/net/openvswitch/conntrack.h b/net/openvswitch/conntrack.h
index a7544f405c16..8f6230bd6183 100644
--- a/net/openvswitch/conntrack.h
+++ b/net/openvswitch/conntrack.h
@@ -37,7 +37,8 @@ void ovs_ct_free_action(const struct nlattr *a);
37 37
38#define CT_SUPPORTED_MASK (OVS_CS_F_NEW | OVS_CS_F_ESTABLISHED | \ 38#define CT_SUPPORTED_MASK (OVS_CS_F_NEW | OVS_CS_F_ESTABLISHED | \
39 OVS_CS_F_RELATED | OVS_CS_F_REPLY_DIR | \ 39 OVS_CS_F_RELATED | OVS_CS_F_REPLY_DIR | \
40 OVS_CS_F_INVALID | OVS_CS_F_TRACKED) 40 OVS_CS_F_INVALID | OVS_CS_F_TRACKED | \
41 OVS_CS_F_SRC_NAT | OVS_CS_F_DST_NAT)
41#else 42#else
42#include <linux/errno.h> 43#include <linux/errno.h>
43 44
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index deadfdab1bc3..0cc66a4e492d 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -422,10 +422,6 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
422 struct sk_buff *nskb = NULL; 422 struct sk_buff *nskb = NULL;
423 struct sk_buff *user_skb = NULL; /* to be queued to userspace */ 423 struct sk_buff *user_skb = NULL; /* to be queued to userspace */
424 struct nlattr *nla; 424 struct nlattr *nla;
425 struct genl_info info = {
426 .dst_sk = ovs_dp_get_net(dp)->genl_sock,
427 .snd_portid = upcall_info->portid,
428 };
429 size_t len; 425 size_t len;
430 unsigned int hlen; 426 unsigned int hlen;
431 int err, dp_ifindex; 427 int err, dp_ifindex;
@@ -466,7 +462,7 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
466 hlen = skb->len; 462 hlen = skb->len;
467 463
468 len = upcall_msg_size(upcall_info, hlen); 464 len = upcall_msg_size(upcall_info, hlen);
469 user_skb = genlmsg_new_unicast(len, &info, GFP_ATOMIC); 465 user_skb = genlmsg_new(len, GFP_ATOMIC);
470 if (!user_skb) { 466 if (!user_skb) {
471 err = -ENOMEM; 467 err = -ENOMEM;
472 goto out; 468 goto out;
@@ -654,7 +650,7 @@ static const struct nla_policy packet_policy[OVS_PACKET_ATTR_MAX + 1] = {
654 650
655static const struct genl_ops dp_packet_genl_ops[] = { 651static const struct genl_ops dp_packet_genl_ops[] = {
656 { .cmd = OVS_PACKET_CMD_EXECUTE, 652 { .cmd = OVS_PACKET_CMD_EXECUTE,
657 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ 653 .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
658 .policy = packet_policy, 654 .policy = packet_policy,
659 .doit = ovs_packet_cmd_execute 655 .doit = ovs_packet_cmd_execute
660 } 656 }
@@ -876,7 +872,7 @@ static struct sk_buff *ovs_flow_cmd_alloc_info(const struct sw_flow_actions *act
876 return NULL; 872 return NULL;
877 873
878 len = ovs_flow_cmd_msg_size(acts, sfid, ufid_flags); 874 len = ovs_flow_cmd_msg_size(acts, sfid, ufid_flags);
879 skb = genlmsg_new_unicast(len, info, GFP_KERNEL); 875 skb = genlmsg_new(len, GFP_KERNEL);
880 if (!skb) 876 if (!skb)
881 return ERR_PTR(-ENOMEM); 877 return ERR_PTR(-ENOMEM);
882 878
@@ -1100,26 +1096,32 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info)
1100 struct sw_flow_match match; 1096 struct sw_flow_match match;
1101 struct sw_flow_id sfid; 1097 struct sw_flow_id sfid;
1102 u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]); 1098 u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
1103 int error; 1099 int error = 0;
1104 bool log = !a[OVS_FLOW_ATTR_PROBE]; 1100 bool log = !a[OVS_FLOW_ATTR_PROBE];
1105 bool ufid_present; 1101 bool ufid_present;
1106 1102
1107 /* Extract key. */
1108 error = -EINVAL;
1109 if (!a[OVS_FLOW_ATTR_KEY]) {
1110 OVS_NLERR(log, "Flow key attribute not present in set flow.");
1111 goto error;
1112 }
1113
1114 ufid_present = ovs_nla_get_ufid(&sfid, a[OVS_FLOW_ATTR_UFID], log); 1103 ufid_present = ovs_nla_get_ufid(&sfid, a[OVS_FLOW_ATTR_UFID], log);
1115 ovs_match_init(&match, &key, &mask); 1104 if (a[OVS_FLOW_ATTR_KEY]) {
1116 error = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY], 1105 ovs_match_init(&match, &key, &mask);
1117 a[OVS_FLOW_ATTR_MASK], log); 1106 error = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY],
1107 a[OVS_FLOW_ATTR_MASK], log);
1108 } else if (!ufid_present) {
1109 OVS_NLERR(log,
1110 "Flow set message rejected, Key attribute missing.");
1111 error = -EINVAL;
1112 }
1118 if (error) 1113 if (error)
1119 goto error; 1114 goto error;
1120 1115
1121 /* Validate actions. */ 1116 /* Validate actions. */
1122 if (a[OVS_FLOW_ATTR_ACTIONS]) { 1117 if (a[OVS_FLOW_ATTR_ACTIONS]) {
1118 if (!a[OVS_FLOW_ATTR_KEY]) {
1119 OVS_NLERR(log,
1120 "Flow key attribute not present in set flow.");
1121 error = -EINVAL;
1122 goto error;
1123 }
1124
1123 acts = get_flow_actions(net, a[OVS_FLOW_ATTR_ACTIONS], &key, 1125 acts = get_flow_actions(net, a[OVS_FLOW_ATTR_ACTIONS], &key,
1124 &mask, log); 1126 &mask, log);
1125 if (IS_ERR(acts)) { 1127 if (IS_ERR(acts)) {
@@ -1391,12 +1393,12 @@ static const struct nla_policy flow_policy[OVS_FLOW_ATTR_MAX + 1] = {
1391 1393
1392static const struct genl_ops dp_flow_genl_ops[] = { 1394static const struct genl_ops dp_flow_genl_ops[] = {
1393 { .cmd = OVS_FLOW_CMD_NEW, 1395 { .cmd = OVS_FLOW_CMD_NEW,
1394 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ 1396 .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1395 .policy = flow_policy, 1397 .policy = flow_policy,
1396 .doit = ovs_flow_cmd_new 1398 .doit = ovs_flow_cmd_new
1397 }, 1399 },
1398 { .cmd = OVS_FLOW_CMD_DEL, 1400 { .cmd = OVS_FLOW_CMD_DEL,
1399 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ 1401 .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1400 .policy = flow_policy, 1402 .policy = flow_policy,
1401 .doit = ovs_flow_cmd_del 1403 .doit = ovs_flow_cmd_del
1402 }, 1404 },
@@ -1407,7 +1409,7 @@ static const struct genl_ops dp_flow_genl_ops[] = {
1407 .dumpit = ovs_flow_cmd_dump 1409 .dumpit = ovs_flow_cmd_dump
1408 }, 1410 },
1409 { .cmd = OVS_FLOW_CMD_SET, 1411 { .cmd = OVS_FLOW_CMD_SET,
1410 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ 1412 .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1411 .policy = flow_policy, 1413 .policy = flow_policy,
1412 .doit = ovs_flow_cmd_set, 1414 .doit = ovs_flow_cmd_set,
1413 }, 1415 },
@@ -1481,9 +1483,9 @@ error:
1481 return -EMSGSIZE; 1483 return -EMSGSIZE;
1482} 1484}
1483 1485
1484static struct sk_buff *ovs_dp_cmd_alloc_info(struct genl_info *info) 1486static struct sk_buff *ovs_dp_cmd_alloc_info(void)
1485{ 1487{
1486 return genlmsg_new_unicast(ovs_dp_cmd_msg_size(), info, GFP_KERNEL); 1488 return genlmsg_new(ovs_dp_cmd_msg_size(), GFP_KERNEL);
1487} 1489}
1488 1490
1489/* Called with rcu_read_lock or ovs_mutex. */ 1491/* Called with rcu_read_lock or ovs_mutex. */
@@ -1536,7 +1538,7 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
1536 if (!a[OVS_DP_ATTR_NAME] || !a[OVS_DP_ATTR_UPCALL_PID]) 1538 if (!a[OVS_DP_ATTR_NAME] || !a[OVS_DP_ATTR_UPCALL_PID])
1537 goto err; 1539 goto err;
1538 1540
1539 reply = ovs_dp_cmd_alloc_info(info); 1541 reply = ovs_dp_cmd_alloc_info();
1540 if (!reply) 1542 if (!reply)
1541 return -ENOMEM; 1543 return -ENOMEM;
1542 1544
@@ -1657,7 +1659,7 @@ static int ovs_dp_cmd_del(struct sk_buff *skb, struct genl_info *info)
1657 struct datapath *dp; 1659 struct datapath *dp;
1658 int err; 1660 int err;
1659 1661
1660 reply = ovs_dp_cmd_alloc_info(info); 1662 reply = ovs_dp_cmd_alloc_info();
1661 if (!reply) 1663 if (!reply)
1662 return -ENOMEM; 1664 return -ENOMEM;
1663 1665
@@ -1690,7 +1692,7 @@ static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info)
1690 struct datapath *dp; 1692 struct datapath *dp;
1691 int err; 1693 int err;
1692 1694
1693 reply = ovs_dp_cmd_alloc_info(info); 1695 reply = ovs_dp_cmd_alloc_info();
1694 if (!reply) 1696 if (!reply)
1695 return -ENOMEM; 1697 return -ENOMEM;
1696 1698
@@ -1723,7 +1725,7 @@ static int ovs_dp_cmd_get(struct sk_buff *skb, struct genl_info *info)
1723 struct datapath *dp; 1725 struct datapath *dp;
1724 int err; 1726 int err;
1725 1727
1726 reply = ovs_dp_cmd_alloc_info(info); 1728 reply = ovs_dp_cmd_alloc_info();
1727 if (!reply) 1729 if (!reply)
1728 return -ENOMEM; 1730 return -ENOMEM;
1729 1731
@@ -1777,12 +1779,12 @@ static const struct nla_policy datapath_policy[OVS_DP_ATTR_MAX + 1] = {
1777 1779
1778static const struct genl_ops dp_datapath_genl_ops[] = { 1780static const struct genl_ops dp_datapath_genl_ops[] = {
1779 { .cmd = OVS_DP_CMD_NEW, 1781 { .cmd = OVS_DP_CMD_NEW,
1780 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ 1782 .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1781 .policy = datapath_policy, 1783 .policy = datapath_policy,
1782 .doit = ovs_dp_cmd_new 1784 .doit = ovs_dp_cmd_new
1783 }, 1785 },
1784 { .cmd = OVS_DP_CMD_DEL, 1786 { .cmd = OVS_DP_CMD_DEL,
1785 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ 1787 .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1786 .policy = datapath_policy, 1788 .policy = datapath_policy,
1787 .doit = ovs_dp_cmd_del 1789 .doit = ovs_dp_cmd_del
1788 }, 1790 },
@@ -1793,7 +1795,7 @@ static const struct genl_ops dp_datapath_genl_ops[] = {
1793 .dumpit = ovs_dp_cmd_dump 1795 .dumpit = ovs_dp_cmd_dump
1794 }, 1796 },
1795 { .cmd = OVS_DP_CMD_SET, 1797 { .cmd = OVS_DP_CMD_SET,
1796 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ 1798 .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1797 .policy = datapath_policy, 1799 .policy = datapath_policy,
1798 .doit = ovs_dp_cmd_set, 1800 .doit = ovs_dp_cmd_set,
1799 }, 1801 },
@@ -1912,6 +1914,29 @@ static struct vport *lookup_vport(struct net *net,
1912 return ERR_PTR(-EINVAL); 1914 return ERR_PTR(-EINVAL);
1913} 1915}
1914 1916
1917/* Called with ovs_mutex */
1918static void update_headroom(struct datapath *dp)
1919{
1920 unsigned dev_headroom, max_headroom = 0;
1921 struct net_device *dev;
1922 struct vport *vport;
1923 int i;
1924
1925 for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
1926 hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node) {
1927 dev = vport->dev;
1928 dev_headroom = netdev_get_fwd_headroom(dev);
1929 if (dev_headroom > max_headroom)
1930 max_headroom = dev_headroom;
1931 }
1932 }
1933
1934 dp->max_headroom = max_headroom;
1935 for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++)
1936 hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node)
1937 netdev_set_rx_headroom(vport->dev, max_headroom);
1938}
1939
1915static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info) 1940static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
1916{ 1941{
1917 struct nlattr **a = info->attrs; 1942 struct nlattr **a = info->attrs;
@@ -1977,6 +2002,12 @@ restart:
1977 2002
1978 err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid, 2003 err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid,
1979 info->snd_seq, 0, OVS_VPORT_CMD_NEW); 2004 info->snd_seq, 0, OVS_VPORT_CMD_NEW);
2005
2006 if (netdev_get_fwd_headroom(vport->dev) > dp->max_headroom)
2007 update_headroom(dp);
2008 else
2009 netdev_set_rx_headroom(vport->dev, dp->max_headroom);
2010
1980 BUG_ON(err < 0); 2011 BUG_ON(err < 0);
1981 ovs_unlock(); 2012 ovs_unlock();
1982 2013
@@ -2043,8 +2074,10 @@ exit_unlock_free:
2043 2074
2044static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info) 2075static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info)
2045{ 2076{
2077 bool must_update_headroom = false;
2046 struct nlattr **a = info->attrs; 2078 struct nlattr **a = info->attrs;
2047 struct sk_buff *reply; 2079 struct sk_buff *reply;
2080 struct datapath *dp;
2048 struct vport *vport; 2081 struct vport *vport;
2049 int err; 2082 int err;
2050 2083
@@ -2066,7 +2099,16 @@ static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info)
2066 err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid, 2099 err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid,
2067 info->snd_seq, 0, OVS_VPORT_CMD_DEL); 2100 info->snd_seq, 0, OVS_VPORT_CMD_DEL);
2068 BUG_ON(err < 0); 2101 BUG_ON(err < 0);
2102
2103 /* the vport deletion may trigger dp headroom update */
2104 dp = vport->dp;
2105 if (netdev_get_fwd_headroom(vport->dev) == dp->max_headroom)
2106 must_update_headroom = true;
2107 netdev_reset_rx_headroom(vport->dev);
2069 ovs_dp_detach_port(vport); 2108 ovs_dp_detach_port(vport);
2109
2110 if (must_update_headroom)
2111 update_headroom(dp);
2070 ovs_unlock(); 2112 ovs_unlock();
2071 2113
2072 ovs_notify(&dp_vport_genl_family, reply, info); 2114 ovs_notify(&dp_vport_genl_family, reply, info);
@@ -2158,12 +2200,12 @@ static const struct nla_policy vport_policy[OVS_VPORT_ATTR_MAX + 1] = {
2158 2200
2159static const struct genl_ops dp_vport_genl_ops[] = { 2201static const struct genl_ops dp_vport_genl_ops[] = {
2160 { .cmd = OVS_VPORT_CMD_NEW, 2202 { .cmd = OVS_VPORT_CMD_NEW,
2161 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ 2203 .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
2162 .policy = vport_policy, 2204 .policy = vport_policy,
2163 .doit = ovs_vport_cmd_new 2205 .doit = ovs_vport_cmd_new
2164 }, 2206 },
2165 { .cmd = OVS_VPORT_CMD_DEL, 2207 { .cmd = OVS_VPORT_CMD_DEL,
2166 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ 2208 .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
2167 .policy = vport_policy, 2209 .policy = vport_policy,
2168 .doit = ovs_vport_cmd_del 2210 .doit = ovs_vport_cmd_del
2169 }, 2211 },
@@ -2174,7 +2216,7 @@ static const struct genl_ops dp_vport_genl_ops[] = {
2174 .dumpit = ovs_vport_cmd_dump 2216 .dumpit = ovs_vport_cmd_dump
2175 }, 2217 },
2176 { .cmd = OVS_VPORT_CMD_SET, 2218 { .cmd = OVS_VPORT_CMD_SET,
2177 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ 2219 .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
2178 .policy = vport_policy, 2220 .policy = vport_policy,
2179 .doit = ovs_vport_cmd_set, 2221 .doit = ovs_vport_cmd_set,
2180 }, 2222 },
diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
index 67bdecd9fdc1..427e39a045cf 100644
--- a/net/openvswitch/datapath.h
+++ b/net/openvswitch/datapath.h
@@ -68,6 +68,8 @@ struct dp_stats_percpu {
68 * ovs_mutex and RCU. 68 * ovs_mutex and RCU.
69 * @stats_percpu: Per-CPU datapath statistics. 69 * @stats_percpu: Per-CPU datapath statistics.
70 * @net: Reference to net namespace. 70 * @net: Reference to net namespace.
71 * @max_headroom: the maximum headroom of all vports in this datapath; it will
72 * be used by all the internal vports in this dp.
71 * 73 *
72 * Context: See the comment on locking at the top of datapath.c for additional 74 * Context: See the comment on locking at the top of datapath.c for additional
73 * locking information. 75 * locking information.
@@ -89,6 +91,8 @@ struct datapath {
89 possible_net_t net; 91 possible_net_t net;
90 92
91 u32 user_features; 93 u32 user_features;
94
95 u32 max_headroom;
92}; 96};
93 97
94/** 98/**
diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h
index 1d055c559eaf..03378e75a67c 100644
--- a/net/openvswitch/flow.h
+++ b/net/openvswitch/flow.h
@@ -55,7 +55,7 @@ struct ovs_tunnel_info {
55 FIELD_SIZEOF(struct sw_flow_key, recirc_id)) 55 FIELD_SIZEOF(struct sw_flow_key, recirc_id))
56 56
57struct sw_flow_key { 57struct sw_flow_key {
58 u8 tun_opts[255]; 58 u8 tun_opts[IP_TUNNEL_OPTS_MAX];
59 u8 tun_opts_len; 59 u8 tun_opts_len;
60 struct ip_tunnel_key tun_key; /* Encapsulating tunnel key. */ 60 struct ip_tunnel_key tun_key; /* Encapsulating tunnel key. */
61 struct { 61 struct {
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index d1bd4a45ca2d..689c17264221 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -1959,6 +1959,12 @@ static int validate_and_copy_set_tun(const struct nlattr *attr,
1959 if (!tun_dst) 1959 if (!tun_dst)
1960 return -ENOMEM; 1960 return -ENOMEM;
1961 1961
1962 err = dst_cache_init(&tun_dst->u.tun_info.dst_cache, GFP_KERNEL);
1963 if (err) {
1964 dst_release((struct dst_entry *)tun_dst);
1965 return err;
1966 }
1967
1962 a = __add_action(sfa, OVS_KEY_ATTR_TUNNEL_INFO, NULL, 1968 a = __add_action(sfa, OVS_KEY_ATTR_TUNNEL_INFO, NULL,
1963 sizeof(*ovs_tun), log); 1969 sizeof(*ovs_tun), log);
1964 if (IS_ERR(a)) { 1970 if (IS_ERR(a)) {
@@ -2038,9 +2044,6 @@ static int validate_set(const struct nlattr *a,
2038 break; 2044 break;
2039 2045
2040 case OVS_KEY_ATTR_TUNNEL: 2046 case OVS_KEY_ATTR_TUNNEL:
2041 if (eth_p_mpls(eth_type))
2042 return -EINVAL;
2043
2044 if (masked) 2047 if (masked)
2045 return -EINVAL; /* Masked tunnel set not supported. */ 2048 return -EINVAL; /* Masked tunnel set not supported. */
2046 2049
diff --git a/net/openvswitch/vport-geneve.c b/net/openvswitch/vport-geneve.c
index 30ab8e127288..1a1fcec88695 100644
--- a/net/openvswitch/vport-geneve.c
+++ b/net/openvswitch/vport-geneve.c
@@ -132,6 +132,6 @@ static void __exit ovs_geneve_tnl_exit(void)
132module_init(ovs_geneve_tnl_init); 132module_init(ovs_geneve_tnl_init);
133module_exit(ovs_geneve_tnl_exit); 133module_exit(ovs_geneve_tnl_exit);
134 134
135MODULE_DESCRIPTION("OVS: Geneve swiching port"); 135MODULE_DESCRIPTION("OVS: Geneve switching port");
136MODULE_LICENSE("GPL"); 136MODULE_LICENSE("GPL");
137MODULE_ALIAS("vport-type-5"); 137MODULE_ALIAS("vport-type-5");
diff --git a/net/openvswitch/vport-internal_dev.c b/net/openvswitch/vport-internal_dev.c
index ec76398a792f..7c8b90bf0e54 100644
--- a/net/openvswitch/vport-internal_dev.c
+++ b/net/openvswitch/vport-internal_dev.c
@@ -138,6 +138,11 @@ internal_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats)
138 return stats; 138 return stats;
139} 139}
140 140
141static void internal_set_rx_headroom(struct net_device *dev, int new_hr)
142{
143 dev->needed_headroom = new_hr;
144}
145
141static const struct net_device_ops internal_dev_netdev_ops = { 146static const struct net_device_ops internal_dev_netdev_ops = {
142 .ndo_open = internal_dev_open, 147 .ndo_open = internal_dev_open,
143 .ndo_stop = internal_dev_stop, 148 .ndo_stop = internal_dev_stop,
@@ -145,6 +150,7 @@ static const struct net_device_ops internal_dev_netdev_ops = {
145 .ndo_set_mac_address = eth_mac_addr, 150 .ndo_set_mac_address = eth_mac_addr,
146 .ndo_change_mtu = internal_dev_change_mtu, 151 .ndo_change_mtu = internal_dev_change_mtu,
147 .ndo_get_stats64 = internal_get_stats, 152 .ndo_get_stats64 = internal_get_stats,
153 .ndo_set_rx_headroom = internal_set_rx_headroom,
148}; 154};
149 155
150static struct rtnl_link_ops internal_dev_link_ops __read_mostly = { 156static struct rtnl_link_ops internal_dev_link_ops __read_mostly = {
@@ -158,7 +164,8 @@ static void do_setup(struct net_device *netdev)
158 netdev->netdev_ops = &internal_dev_netdev_ops; 164 netdev->netdev_ops = &internal_dev_netdev_ops;
159 165
160 netdev->priv_flags &= ~IFF_TX_SKB_SHARING; 166 netdev->priv_flags &= ~IFF_TX_SKB_SHARING;
161 netdev->priv_flags |= IFF_LIVE_ADDR_CHANGE | IFF_OPENVSWITCH; 167 netdev->priv_flags |= IFF_LIVE_ADDR_CHANGE | IFF_OPENVSWITCH |
168 IFF_PHONY_HEADROOM;
162 netdev->destructor = internal_dev_destructor; 169 netdev->destructor = internal_dev_destructor;
163 netdev->ethtool_ops = &internal_dev_ethtool_ops; 170 netdev->ethtool_ops = &internal_dev_ethtool_ops;
164 netdev->rtnl_link_ops = &internal_dev_link_ops; 171 netdev->rtnl_link_ops = &internal_dev_link_ops;
@@ -199,6 +206,7 @@ static struct vport *internal_dev_create(const struct vport_parms *parms)
199 err = -ENOMEM; 206 err = -ENOMEM;
200 goto error_free_netdev; 207 goto error_free_netdev;
201 } 208 }
209 vport->dev->needed_headroom = vport->dp->max_headroom;
202 210
203 dev_net_set(vport->dev, ovs_dp_get_net(vport->dp)); 211 dev_net_set(vport->dev, ovs_dp_get_net(vport->dp));
204 internal_dev = internal_dev_priv(vport->dev); 212 internal_dev = internal_dev_priv(vport->dev);
diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c
index 6a6adf314363..4e3972344aa6 100644
--- a/net/openvswitch/vport-netdev.c
+++ b/net/openvswitch/vport-netdev.c
@@ -58,7 +58,7 @@ static void netdev_port_receive(struct sk_buff *skb)
58 return; 58 return;
59 59
60 skb_push(skb, ETH_HLEN); 60 skb_push(skb, ETH_HLEN);
61 ovs_skb_postpush_rcsum(skb, skb->data, ETH_HLEN); 61 skb_postpush_rcsum(skb, skb->data, ETH_HLEN);
62 ovs_vport_receive(vport, skb, skb_tunnel_info(skb)); 62 ovs_vport_receive(vport, skb, skb_tunnel_info(skb));
63 return; 63 return;
64error: 64error:
diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c
index 1605691d9414..5eb7694348b5 100644
--- a/net/openvswitch/vport-vxlan.c
+++ b/net/openvswitch/vport-vxlan.c
@@ -90,7 +90,9 @@ static struct vport *vxlan_tnl_create(const struct vport_parms *parms)
90 int err; 90 int err;
91 struct vxlan_config conf = { 91 struct vxlan_config conf = {
92 .no_share = true, 92 .no_share = true,
93 .flags = VXLAN_F_COLLECT_METADATA, 93 .flags = VXLAN_F_COLLECT_METADATA | VXLAN_F_UDP_ZERO_CSUM6_RX,
94 /* Don't restrict the packets that can be sent by MTU */
95 .mtu = IP_MAX_MTU,
94 }; 96 };
95 97
96 if (!options) { 98 if (!options) {
diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h
index c10899cb9040..f01f28a567ad 100644
--- a/net/openvswitch/vport.h
+++ b/net/openvswitch/vport.h
@@ -185,13 +185,6 @@ static inline struct vport *vport_from_priv(void *priv)
185int ovs_vport_receive(struct vport *, struct sk_buff *, 185int ovs_vport_receive(struct vport *, struct sk_buff *,
186 const struct ip_tunnel_info *); 186 const struct ip_tunnel_info *);
187 187
188static inline void ovs_skb_postpush_rcsum(struct sk_buff *skb,
189 const void *start, unsigned int len)
190{
191 if (skb->ip_summed == CHECKSUM_COMPLETE)
192 skb->csum = csum_add(skb->csum, csum_partial(start, len, 0));
193}
194
195static inline const char *ovs_vport_name(struct vport *vport) 188static inline const char *ovs_vport_name(struct vport *vport)
196{ 189{
197 return vport->dev->name; 190 return vport->dev->name;
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 992396aa635c..1ecfa710ca98 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -557,9 +557,8 @@ static int prb_calc_retire_blk_tmo(struct packet_sock *po,
557{ 557{
558 struct net_device *dev; 558 struct net_device *dev;
559 unsigned int mbits = 0, msec = 0, div = 0, tmo = 0; 559 unsigned int mbits = 0, msec = 0, div = 0, tmo = 0;
560 struct ethtool_cmd ecmd; 560 struct ethtool_link_ksettings ecmd;
561 int err; 561 int err;
562 u32 speed;
563 562
564 rtnl_lock(); 563 rtnl_lock();
565 dev = __dev_get_by_index(sock_net(&po->sk), po->ifindex); 564 dev = __dev_get_by_index(sock_net(&po->sk), po->ifindex);
@@ -567,19 +566,19 @@ static int prb_calc_retire_blk_tmo(struct packet_sock *po,
567 rtnl_unlock(); 566 rtnl_unlock();
568 return DEFAULT_PRB_RETIRE_TOV; 567 return DEFAULT_PRB_RETIRE_TOV;
569 } 568 }
570 err = __ethtool_get_settings(dev, &ecmd); 569 err = __ethtool_get_link_ksettings(dev, &ecmd);
571 speed = ethtool_cmd_speed(&ecmd);
572 rtnl_unlock(); 570 rtnl_unlock();
573 if (!err) { 571 if (!err) {
574 /* 572 /*
575 * If the link speed is so slow you don't really 573 * If the link speed is so slow you don't really
576 * need to worry about perf anyways 574 * need to worry about perf anyways
577 */ 575 */
578 if (speed < SPEED_1000 || speed == SPEED_UNKNOWN) { 576 if (ecmd.base.speed < SPEED_1000 ||
577 ecmd.base.speed == SPEED_UNKNOWN) {
579 return DEFAULT_PRB_RETIRE_TOV; 578 return DEFAULT_PRB_RETIRE_TOV;
580 } else { 579 } else {
581 msec = 1; 580 msec = 1;
582 div = speed / 1000; 581 div = ecmd.base.speed / 1000;
583 } 582 }
584 } 583 }
585 584
@@ -1916,6 +1915,10 @@ retry:
1916 goto retry; 1915 goto retry;
1917 } 1916 }
1918 1917
1918 if (!dev_validate_header(dev, skb->data, len)) {
1919 err = -EINVAL;
1920 goto out_unlock;
1921 }
1919 if (len > (dev->mtu + dev->hard_header_len + extra_len) && 1922 if (len > (dev->mtu + dev->hard_header_len + extra_len) &&
1920 !packet_extra_vlan_len_allowed(dev, skb)) { 1923 !packet_extra_vlan_len_allowed(dev, skb)) {
1921 err = -EMSGSIZE; 1924 err = -EMSGSIZE;
@@ -1960,6 +1963,64 @@ static unsigned int run_filter(struct sk_buff *skb,
1960 return res; 1963 return res;
1961} 1964}
1962 1965
1966static int __packet_rcv_vnet(const struct sk_buff *skb,
1967 struct virtio_net_hdr *vnet_hdr)
1968{
1969 *vnet_hdr = (const struct virtio_net_hdr) { 0 };
1970
1971 if (skb_is_gso(skb)) {
1972 struct skb_shared_info *sinfo = skb_shinfo(skb);
1973
1974 /* This is a hint as to how much should be linear. */
1975 vnet_hdr->hdr_len =
1976 __cpu_to_virtio16(vio_le(), skb_headlen(skb));
1977 vnet_hdr->gso_size =
1978 __cpu_to_virtio16(vio_le(), sinfo->gso_size);
1979
1980 if (sinfo->gso_type & SKB_GSO_TCPV4)
1981 vnet_hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
1982 else if (sinfo->gso_type & SKB_GSO_TCPV6)
1983 vnet_hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
1984 else if (sinfo->gso_type & SKB_GSO_UDP)
1985 vnet_hdr->gso_type = VIRTIO_NET_HDR_GSO_UDP;
1986 else if (sinfo->gso_type & SKB_GSO_FCOE)
1987 return -EINVAL;
1988 else
1989 BUG();
1990
1991 if (sinfo->gso_type & SKB_GSO_TCP_ECN)
1992 vnet_hdr->gso_type |= VIRTIO_NET_HDR_GSO_ECN;
1993 } else
1994 vnet_hdr->gso_type = VIRTIO_NET_HDR_GSO_NONE;
1995
1996 if (skb->ip_summed == CHECKSUM_PARTIAL) {
1997 vnet_hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
1998 vnet_hdr->csum_start = __cpu_to_virtio16(vio_le(),
1999 skb_checksum_start_offset(skb));
2000 vnet_hdr->csum_offset = __cpu_to_virtio16(vio_le(),
2001 skb->csum_offset);
2002 } else if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
2003 vnet_hdr->flags = VIRTIO_NET_HDR_F_DATA_VALID;
2004 } /* else everything is zero */
2005
2006 return 0;
2007}
2008
2009static int packet_rcv_vnet(struct msghdr *msg, const struct sk_buff *skb,
2010 size_t *len)
2011{
2012 struct virtio_net_hdr vnet_hdr;
2013
2014 if (*len < sizeof(vnet_hdr))
2015 return -EINVAL;
2016 *len -= sizeof(vnet_hdr);
2017
2018 if (__packet_rcv_vnet(skb, &vnet_hdr))
2019 return -EINVAL;
2020
2021 return memcpy_to_msg(msg, (void *)&vnet_hdr, sizeof(vnet_hdr));
2022}
2023
1963/* 2024/*
1964 * This function makes lazy skb cloning in hope that most of packets 2025 * This function makes lazy skb cloning in hope that most of packets
1965 * are discarded by BPF. 2026 * are discarded by BPF.
@@ -2148,7 +2209,9 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
2148 unsigned int maclen = skb_network_offset(skb); 2209 unsigned int maclen = skb_network_offset(skb);
2149 netoff = TPACKET_ALIGN(po->tp_hdrlen + 2210 netoff = TPACKET_ALIGN(po->tp_hdrlen +
2150 (maclen < 16 ? 16 : maclen)) + 2211 (maclen < 16 ? 16 : maclen)) +
2151 po->tp_reserve; 2212 po->tp_reserve;
2213 if (po->has_vnet_hdr)
2214 netoff += sizeof(struct virtio_net_hdr);
2152 macoff = netoff - maclen; 2215 macoff = netoff - maclen;
2153 } 2216 }
2154 if (po->tp_version <= TPACKET_V2) { 2217 if (po->tp_version <= TPACKET_V2) {
@@ -2185,7 +2248,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
2185 h.raw = packet_current_rx_frame(po, skb, 2248 h.raw = packet_current_rx_frame(po, skb,
2186 TP_STATUS_KERNEL, (macoff+snaplen)); 2249 TP_STATUS_KERNEL, (macoff+snaplen));
2187 if (!h.raw) 2250 if (!h.raw)
2188 goto ring_is_full; 2251 goto drop_n_account;
2189 if (po->tp_version <= TPACKET_V2) { 2252 if (po->tp_version <= TPACKET_V2) {
2190 packet_increment_rx_head(po, &po->rx_ring); 2253 packet_increment_rx_head(po, &po->rx_ring);
2191 /* 2254 /*
@@ -2204,6 +2267,14 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
2204 } 2267 }
2205 spin_unlock(&sk->sk_receive_queue.lock); 2268 spin_unlock(&sk->sk_receive_queue.lock);
2206 2269
2270 if (po->has_vnet_hdr) {
2271 if (__packet_rcv_vnet(skb, h.raw + macoff -
2272 sizeof(struct virtio_net_hdr))) {
2273 spin_lock(&sk->sk_receive_queue.lock);
2274 goto drop_n_account;
2275 }
2276 }
2277
2207 skb_copy_bits(skb, 0, h.raw + macoff, snaplen); 2278 skb_copy_bits(skb, 0, h.raw + macoff, snaplen);
2208 2279
2209 if (!(ts_status = tpacket_get_timestamp(skb, &ts, po->tp_tstamp))) 2280 if (!(ts_status = tpacket_get_timestamp(skb, &ts, po->tp_tstamp)))
@@ -2299,7 +2370,7 @@ drop:
2299 kfree_skb(skb); 2370 kfree_skb(skb);
2300 return 0; 2371 return 0;
2301 2372
2302ring_is_full: 2373drop_n_account:
2303 po->stats.stats1.tp_drops++; 2374 po->stats.stats1.tp_drops++;
2304 spin_unlock(&sk->sk_receive_queue.lock); 2375 spin_unlock(&sk->sk_receive_queue.lock);
2305 2376
@@ -2326,18 +2397,6 @@ static void tpacket_destruct_skb(struct sk_buff *skb)
2326 sock_wfree(skb); 2397 sock_wfree(skb);
2327} 2398}
2328 2399
2329static bool ll_header_truncated(const struct net_device *dev, int len)
2330{
2331 /* net device doesn't like empty head */
2332 if (unlikely(len < dev->hard_header_len)) {
2333 net_warn_ratelimited("%s: packet size is too short (%d < %d)\n",
2334 current->comm, len, dev->hard_header_len);
2335 return true;
2336 }
2337
2338 return false;
2339}
2340
2341static void tpacket_set_protocol(const struct net_device *dev, 2400static void tpacket_set_protocol(const struct net_device *dev,
2342 struct sk_buff *skb) 2401 struct sk_buff *skb)
2343{ 2402{
@@ -2347,15 +2406,92 @@ static void tpacket_set_protocol(const struct net_device *dev,
2347 } 2406 }
2348} 2407}
2349 2408
2409static int __packet_snd_vnet_parse(struct virtio_net_hdr *vnet_hdr, size_t len)
2410{
2411 unsigned short gso_type = 0;
2412
2413 if ((vnet_hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) &&
2414 (__virtio16_to_cpu(vio_le(), vnet_hdr->csum_start) +
2415 __virtio16_to_cpu(vio_le(), vnet_hdr->csum_offset) + 2 >
2416 __virtio16_to_cpu(vio_le(), vnet_hdr->hdr_len)))
2417 vnet_hdr->hdr_len = __cpu_to_virtio16(vio_le(),
2418 __virtio16_to_cpu(vio_le(), vnet_hdr->csum_start) +
2419 __virtio16_to_cpu(vio_le(), vnet_hdr->csum_offset) + 2);
2420
2421 if (__virtio16_to_cpu(vio_le(), vnet_hdr->hdr_len) > len)
2422 return -EINVAL;
2423
2424 if (vnet_hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) {
2425 switch (vnet_hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
2426 case VIRTIO_NET_HDR_GSO_TCPV4:
2427 gso_type = SKB_GSO_TCPV4;
2428 break;
2429 case VIRTIO_NET_HDR_GSO_TCPV6:
2430 gso_type = SKB_GSO_TCPV6;
2431 break;
2432 case VIRTIO_NET_HDR_GSO_UDP:
2433 gso_type = SKB_GSO_UDP;
2434 break;
2435 default:
2436 return -EINVAL;
2437 }
2438
2439 if (vnet_hdr->gso_type & VIRTIO_NET_HDR_GSO_ECN)
2440 gso_type |= SKB_GSO_TCP_ECN;
2441
2442 if (vnet_hdr->gso_size == 0)
2443 return -EINVAL;
2444 }
2445
2446 vnet_hdr->gso_type = gso_type; /* changes type, temporary storage */
2447 return 0;
2448}
2449
2450static int packet_snd_vnet_parse(struct msghdr *msg, size_t *len,
2451 struct virtio_net_hdr *vnet_hdr)
2452{
2453 int n;
2454
2455 if (*len < sizeof(*vnet_hdr))
2456 return -EINVAL;
2457 *len -= sizeof(*vnet_hdr);
2458
2459 n = copy_from_iter(vnet_hdr, sizeof(*vnet_hdr), &msg->msg_iter);
2460 if (n != sizeof(*vnet_hdr))
2461 return -EFAULT;
2462
2463 return __packet_snd_vnet_parse(vnet_hdr, *len);
2464}
2465
2466static int packet_snd_vnet_gso(struct sk_buff *skb,
2467 struct virtio_net_hdr *vnet_hdr)
2468{
2469 if (vnet_hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
2470 u16 s = __virtio16_to_cpu(vio_le(), vnet_hdr->csum_start);
2471 u16 o = __virtio16_to_cpu(vio_le(), vnet_hdr->csum_offset);
2472
2473 if (!skb_partial_csum_set(skb, s, o))
2474 return -EINVAL;
2475 }
2476
2477 skb_shinfo(skb)->gso_size =
2478 __virtio16_to_cpu(vio_le(), vnet_hdr->gso_size);
2479 skb_shinfo(skb)->gso_type = vnet_hdr->gso_type;
2480
2481 /* Header must be checked, and gso_segs computed. */
2482 skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
2483 skb_shinfo(skb)->gso_segs = 0;
2484 return 0;
2485}
2486
2350static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb, 2487static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
2351 void *frame, struct net_device *dev, int size_max, 2488 void *frame, struct net_device *dev, void *data, int tp_len,
2352 __be16 proto, unsigned char *addr, int hlen) 2489 __be16 proto, unsigned char *addr, int hlen, int copylen)
2353{ 2490{
2354 union tpacket_uhdr ph; 2491 union tpacket_uhdr ph;
2355 int to_write, offset, len, tp_len, nr_frags, len_max; 2492 int to_write, offset, len, nr_frags, len_max;
2356 struct socket *sock = po->sk.sk_socket; 2493 struct socket *sock = po->sk.sk_socket;
2357 struct page *page; 2494 struct page *page;
2358 void *data;
2359 int err; 2495 int err;
2360 2496
2361 ph.raw = frame; 2497 ph.raw = frame;
@@ -2367,51 +2503,9 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
2367 sock_tx_timestamp(&po->sk, &skb_shinfo(skb)->tx_flags); 2503 sock_tx_timestamp(&po->sk, &skb_shinfo(skb)->tx_flags);
2368 skb_shinfo(skb)->destructor_arg = ph.raw; 2504 skb_shinfo(skb)->destructor_arg = ph.raw;
2369 2505
2370 switch (po->tp_version) {
2371 case TPACKET_V2:
2372 tp_len = ph.h2->tp_len;
2373 break;
2374 default:
2375 tp_len = ph.h1->tp_len;
2376 break;
2377 }
2378 if (unlikely(tp_len > size_max)) {
2379 pr_err("packet size is too long (%d > %d)\n", tp_len, size_max);
2380 return -EMSGSIZE;
2381 }
2382
2383 skb_reserve(skb, hlen); 2506 skb_reserve(skb, hlen);
2384 skb_reset_network_header(skb); 2507 skb_reset_network_header(skb);
2385 2508
2386 if (unlikely(po->tp_tx_has_off)) {
2387 int off_min, off_max, off;
2388 off_min = po->tp_hdrlen - sizeof(struct sockaddr_ll);
2389 off_max = po->tx_ring.frame_size - tp_len;
2390 if (sock->type == SOCK_DGRAM) {
2391 switch (po->tp_version) {
2392 case TPACKET_V2:
2393 off = ph.h2->tp_net;
2394 break;
2395 default:
2396 off = ph.h1->tp_net;
2397 break;
2398 }
2399 } else {
2400 switch (po->tp_version) {
2401 case TPACKET_V2:
2402 off = ph.h2->tp_mac;
2403 break;
2404 default:
2405 off = ph.h1->tp_mac;
2406 break;
2407 }
2408 }
2409 if (unlikely((off < off_min) || (off_max < off)))
2410 return -EINVAL;
2411 data = ph.raw + off;
2412 } else {
2413 data = ph.raw + po->tp_hdrlen - sizeof(struct sockaddr_ll);
2414 }
2415 to_write = tp_len; 2509 to_write = tp_len;
2416 2510
2417 if (sock->type == SOCK_DGRAM) { 2511 if (sock->type == SOCK_DGRAM) {
@@ -2419,20 +2513,21 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
2419 NULL, tp_len); 2513 NULL, tp_len);
2420 if (unlikely(err < 0)) 2514 if (unlikely(err < 0))
2421 return -EINVAL; 2515 return -EINVAL;
2422 } else if (dev->hard_header_len) { 2516 } else if (copylen) {
2423 if (ll_header_truncated(dev, tp_len)) 2517 int hdrlen = min_t(int, copylen, tp_len);
2424 return -EINVAL;
2425 2518
2426 skb_push(skb, dev->hard_header_len); 2519 skb_push(skb, dev->hard_header_len);
2427 err = skb_store_bits(skb, 0, data, 2520 skb_put(skb, copylen - dev->hard_header_len);
2428 dev->hard_header_len); 2521 err = skb_store_bits(skb, 0, data, hdrlen);
2429 if (unlikely(err)) 2522 if (unlikely(err))
2430 return err; 2523 return err;
2524 if (!dev_validate_header(dev, skb->data, hdrlen))
2525 return -EINVAL;
2431 if (!skb->protocol) 2526 if (!skb->protocol)
2432 tpacket_set_protocol(dev, skb); 2527 tpacket_set_protocol(dev, skb);
2433 2528
2434 data += dev->hard_header_len; 2529 data += hdrlen;
2435 to_write -= dev->hard_header_len; 2530 to_write -= hdrlen;
2436 } 2531 }
2437 2532
2438 offset = offset_in_page(data); 2533 offset = offset_in_page(data);
@@ -2469,10 +2564,66 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
2469 return tp_len; 2564 return tp_len;
2470} 2565}
2471 2566
2567static int tpacket_parse_header(struct packet_sock *po, void *frame,
2568 int size_max, void **data)
2569{
2570 union tpacket_uhdr ph;
2571 int tp_len, off;
2572
2573 ph.raw = frame;
2574
2575 switch (po->tp_version) {
2576 case TPACKET_V2:
2577 tp_len = ph.h2->tp_len;
2578 break;
2579 default:
2580 tp_len = ph.h1->tp_len;
2581 break;
2582 }
2583 if (unlikely(tp_len > size_max)) {
2584 pr_err("packet size is too long (%d > %d)\n", tp_len, size_max);
2585 return -EMSGSIZE;
2586 }
2587
2588 if (unlikely(po->tp_tx_has_off)) {
2589 int off_min, off_max;
2590
2591 off_min = po->tp_hdrlen - sizeof(struct sockaddr_ll);
2592 off_max = po->tx_ring.frame_size - tp_len;
2593 if (po->sk.sk_type == SOCK_DGRAM) {
2594 switch (po->tp_version) {
2595 case TPACKET_V2:
2596 off = ph.h2->tp_net;
2597 break;
2598 default:
2599 off = ph.h1->tp_net;
2600 break;
2601 }
2602 } else {
2603 switch (po->tp_version) {
2604 case TPACKET_V2:
2605 off = ph.h2->tp_mac;
2606 break;
2607 default:
2608 off = ph.h1->tp_mac;
2609 break;
2610 }
2611 }
2612 if (unlikely((off < off_min) || (off_max < off)))
2613 return -EINVAL;
2614 } else {
2615 off = po->tp_hdrlen - sizeof(struct sockaddr_ll);
2616 }
2617
2618 *data = frame + off;
2619 return tp_len;
2620}
2621
2472static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) 2622static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
2473{ 2623{
2474 struct sk_buff *skb; 2624 struct sk_buff *skb;
2475 struct net_device *dev; 2625 struct net_device *dev;
2626 struct virtio_net_hdr *vnet_hdr = NULL;
2476 __be16 proto; 2627 __be16 proto;
2477 int err, reserve = 0; 2628 int err, reserve = 0;
2478 void *ph; 2629 void *ph;
@@ -2480,9 +2631,10 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
2480 bool need_wait = !(msg->msg_flags & MSG_DONTWAIT); 2631 bool need_wait = !(msg->msg_flags & MSG_DONTWAIT);
2481 int tp_len, size_max; 2632 int tp_len, size_max;
2482 unsigned char *addr; 2633 unsigned char *addr;
2634 void *data;
2483 int len_sum = 0; 2635 int len_sum = 0;
2484 int status = TP_STATUS_AVAILABLE; 2636 int status = TP_STATUS_AVAILABLE;
2485 int hlen, tlen; 2637 int hlen, tlen, copylen = 0;
2486 2638
2487 mutex_lock(&po->pg_vec_lock); 2639 mutex_lock(&po->pg_vec_lock);
2488 2640
@@ -2515,7 +2667,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
2515 size_max = po->tx_ring.frame_size 2667 size_max = po->tx_ring.frame_size
2516 - (po->tp_hdrlen - sizeof(struct sockaddr_ll)); 2668 - (po->tp_hdrlen - sizeof(struct sockaddr_ll));
2517 2669
2518 if (size_max > dev->mtu + reserve + VLAN_HLEN) 2670 if ((size_max > dev->mtu + reserve + VLAN_HLEN) && !po->has_vnet_hdr)
2519 size_max = dev->mtu + reserve + VLAN_HLEN; 2671 size_max = dev->mtu + reserve + VLAN_HLEN;
2520 2672
2521 do { 2673 do {
@@ -2527,11 +2679,30 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
2527 continue; 2679 continue;
2528 } 2680 }
2529 2681
2682 skb = NULL;
2683 tp_len = tpacket_parse_header(po, ph, size_max, &data);
2684 if (tp_len < 0)
2685 goto tpacket_error;
2686
2530 status = TP_STATUS_SEND_REQUEST; 2687 status = TP_STATUS_SEND_REQUEST;
2531 hlen = LL_RESERVED_SPACE(dev); 2688 hlen = LL_RESERVED_SPACE(dev);
2532 tlen = dev->needed_tailroom; 2689 tlen = dev->needed_tailroom;
2690 if (po->has_vnet_hdr) {
2691 vnet_hdr = data;
2692 data += sizeof(*vnet_hdr);
2693 tp_len -= sizeof(*vnet_hdr);
2694 if (tp_len < 0 ||
2695 __packet_snd_vnet_parse(vnet_hdr, tp_len)) {
2696 tp_len = -EINVAL;
2697 goto tpacket_error;
2698 }
2699 copylen = __virtio16_to_cpu(vio_le(),
2700 vnet_hdr->hdr_len);
2701 }
2702 copylen = max_t(int, copylen, dev->hard_header_len);
2533 skb = sock_alloc_send_skb(&po->sk, 2703 skb = sock_alloc_send_skb(&po->sk,
2534 hlen + tlen + sizeof(struct sockaddr_ll), 2704 hlen + tlen + sizeof(struct sockaddr_ll) +
2705 (copylen - dev->hard_header_len),
2535 !need_wait, &err); 2706 !need_wait, &err);
2536 2707
2537 if (unlikely(skb == NULL)) { 2708 if (unlikely(skb == NULL)) {
@@ -2540,14 +2711,16 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
2540 err = len_sum; 2711 err = len_sum;
2541 goto out_status; 2712 goto out_status;
2542 } 2713 }
2543 tp_len = tpacket_fill_skb(po, skb, ph, dev, size_max, proto, 2714 tp_len = tpacket_fill_skb(po, skb, ph, dev, data, tp_len, proto,
2544 addr, hlen); 2715 addr, hlen, copylen);
2545 if (likely(tp_len >= 0) && 2716 if (likely(tp_len >= 0) &&
2546 tp_len > dev->mtu + reserve && 2717 tp_len > dev->mtu + reserve &&
2718 !po->has_vnet_hdr &&
2547 !packet_extra_vlan_len_allowed(dev, skb)) 2719 !packet_extra_vlan_len_allowed(dev, skb))
2548 tp_len = -EMSGSIZE; 2720 tp_len = -EMSGSIZE;
2549 2721
2550 if (unlikely(tp_len < 0)) { 2722 if (unlikely(tp_len < 0)) {
2723tpacket_error:
2551 if (po->tp_loss) { 2724 if (po->tp_loss) {
2552 __packet_set_status(po, ph, 2725 __packet_set_status(po, ph,
2553 TP_STATUS_AVAILABLE); 2726 TP_STATUS_AVAILABLE);
@@ -2561,6 +2734,11 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
2561 } 2734 }
2562 } 2735 }
2563 2736
2737 if (po->has_vnet_hdr && packet_snd_vnet_gso(skb, vnet_hdr)) {
2738 tp_len = -EINVAL;
2739 goto tpacket_error;
2740 }
2741
2564 packet_pick_tx_queue(dev, skb); 2742 packet_pick_tx_queue(dev, skb);
2565 2743
2566 skb->destructor = tpacket_destruct_skb; 2744 skb->destructor = tpacket_destruct_skb;
@@ -2643,12 +2821,9 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
2643 struct sockcm_cookie sockc; 2821 struct sockcm_cookie sockc;
2644 struct virtio_net_hdr vnet_hdr = { 0 }; 2822 struct virtio_net_hdr vnet_hdr = { 0 };
2645 int offset = 0; 2823 int offset = 0;
2646 int vnet_hdr_len;
2647 struct packet_sock *po = pkt_sk(sk); 2824 struct packet_sock *po = pkt_sk(sk);
2648 unsigned short gso_type = 0;
2649 int hlen, tlen; 2825 int hlen, tlen;
2650 int extra_len = 0; 2826 int extra_len = 0;
2651 ssize_t n;
2652 2827
2653 /* 2828 /*
2654 * Get and verify the address. 2829 * Get and verify the address.
@@ -2686,53 +2861,9 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
2686 if (sock->type == SOCK_RAW) 2861 if (sock->type == SOCK_RAW)
2687 reserve = dev->hard_header_len; 2862 reserve = dev->hard_header_len;
2688 if (po->has_vnet_hdr) { 2863 if (po->has_vnet_hdr) {
2689 vnet_hdr_len = sizeof(vnet_hdr); 2864 err = packet_snd_vnet_parse(msg, &len, &vnet_hdr);
2690 2865 if (err)
2691 err = -EINVAL;
2692 if (len < vnet_hdr_len)
2693 goto out_unlock;
2694
2695 len -= vnet_hdr_len;
2696
2697 err = -EFAULT;
2698 n = copy_from_iter(&vnet_hdr, vnet_hdr_len, &msg->msg_iter);
2699 if (n != vnet_hdr_len)
2700 goto out_unlock;
2701
2702 if ((vnet_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) &&
2703 (__virtio16_to_cpu(vio_le(), vnet_hdr.csum_start) +
2704 __virtio16_to_cpu(vio_le(), vnet_hdr.csum_offset) + 2 >
2705 __virtio16_to_cpu(vio_le(), vnet_hdr.hdr_len)))
2706 vnet_hdr.hdr_len = __cpu_to_virtio16(vio_le(),
2707 __virtio16_to_cpu(vio_le(), vnet_hdr.csum_start) +
2708 __virtio16_to_cpu(vio_le(), vnet_hdr.csum_offset) + 2);
2709
2710 err = -EINVAL;
2711 if (__virtio16_to_cpu(vio_le(), vnet_hdr.hdr_len) > len)
2712 goto out_unlock; 2866 goto out_unlock;
2713
2714 if (vnet_hdr.gso_type != VIRTIO_NET_HDR_GSO_NONE) {
2715 switch (vnet_hdr.gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
2716 case VIRTIO_NET_HDR_GSO_TCPV4:
2717 gso_type = SKB_GSO_TCPV4;
2718 break;
2719 case VIRTIO_NET_HDR_GSO_TCPV6:
2720 gso_type = SKB_GSO_TCPV6;
2721 break;
2722 case VIRTIO_NET_HDR_GSO_UDP:
2723 gso_type = SKB_GSO_UDP;
2724 break;
2725 default:
2726 goto out_unlock;
2727 }
2728
2729 if (vnet_hdr.gso_type & VIRTIO_NET_HDR_GSO_ECN)
2730 gso_type |= SKB_GSO_TCP_ECN;
2731
2732 if (vnet_hdr.gso_size == 0)
2733 goto out_unlock;
2734
2735 }
2736 } 2867 }
2737 2868
2738 if (unlikely(sock_flag(sk, SOCK_NOFCS))) { 2869 if (unlikely(sock_flag(sk, SOCK_NOFCS))) {
@@ -2744,7 +2875,8 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
2744 } 2875 }
2745 2876
2746 err = -EMSGSIZE; 2877 err = -EMSGSIZE;
2747 if (!gso_type && (len > dev->mtu + reserve + VLAN_HLEN + extra_len)) 2878 if (!vnet_hdr.gso_type &&
2879 (len > dev->mtu + reserve + VLAN_HLEN + extra_len))
2748 goto out_unlock; 2880 goto out_unlock;
2749 2881
2750 err = -ENOBUFS; 2882 err = -ENOBUFS;
@@ -2763,9 +2895,6 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
2763 offset = dev_hard_header(skb, dev, ntohs(proto), addr, NULL, len); 2895 offset = dev_hard_header(skb, dev, ntohs(proto), addr, NULL, len);
2764 if (unlikely(offset < 0)) 2896 if (unlikely(offset < 0))
2765 goto out_free; 2897 goto out_free;
2766 } else {
2767 if (ll_header_truncated(dev, len))
2768 goto out_free;
2769 } 2898 }
2770 2899
2771 /* Returns -EFAULT on error */ 2900 /* Returns -EFAULT on error */
@@ -2773,9 +2902,15 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
2773 if (err) 2902 if (err)
2774 goto out_free; 2903 goto out_free;
2775 2904
2905 if (sock->type == SOCK_RAW &&
2906 !dev_validate_header(dev, skb->data, len)) {
2907 err = -EINVAL;
2908 goto out_free;
2909 }
2910
2776 sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags); 2911 sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags);
2777 2912
2778 if (!gso_type && (len > dev->mtu + reserve + extra_len) && 2913 if (!vnet_hdr.gso_type && (len > dev->mtu + reserve + extra_len) &&
2779 !packet_extra_vlan_len_allowed(dev, skb)) { 2914 !packet_extra_vlan_len_allowed(dev, skb)) {
2780 err = -EMSGSIZE; 2915 err = -EMSGSIZE;
2781 goto out_free; 2916 goto out_free;
@@ -2789,24 +2924,10 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
2789 packet_pick_tx_queue(dev, skb); 2924 packet_pick_tx_queue(dev, skb);
2790 2925
2791 if (po->has_vnet_hdr) { 2926 if (po->has_vnet_hdr) {
2792 if (vnet_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) { 2927 err = packet_snd_vnet_gso(skb, &vnet_hdr);
2793 u16 s = __virtio16_to_cpu(vio_le(), vnet_hdr.csum_start); 2928 if (err)
2794 u16 o = __virtio16_to_cpu(vio_le(), vnet_hdr.csum_offset); 2929 goto out_free;
2795 if (!skb_partial_csum_set(skb, s, o)) { 2930 len += sizeof(vnet_hdr);
2796 err = -EINVAL;
2797 goto out_free;
2798 }
2799 }
2800
2801 skb_shinfo(skb)->gso_size =
2802 __virtio16_to_cpu(vio_le(), vnet_hdr.gso_size);
2803 skb_shinfo(skb)->gso_type = gso_type;
2804
2805 /* Header must be checked, and gso_segs computed. */
2806 skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
2807 skb_shinfo(skb)->gso_segs = 0;
2808
2809 len += vnet_hdr_len;
2810 } 2931 }
2811 2932
2812 skb_probe_transport_header(skb, reserve); 2933 skb_probe_transport_header(skb, reserve);
@@ -3177,51 +3298,10 @@ static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
3177 packet_rcv_has_room(pkt_sk(sk), NULL); 3298 packet_rcv_has_room(pkt_sk(sk), NULL);
3178 3299
3179 if (pkt_sk(sk)->has_vnet_hdr) { 3300 if (pkt_sk(sk)->has_vnet_hdr) {
3180 struct virtio_net_hdr vnet_hdr = { 0 }; 3301 err = packet_rcv_vnet(msg, skb, &len);
3181 3302 if (err)
3182 err = -EINVAL;
3183 vnet_hdr_len = sizeof(vnet_hdr);
3184 if (len < vnet_hdr_len)
3185 goto out_free;
3186
3187 len -= vnet_hdr_len;
3188
3189 if (skb_is_gso(skb)) {
3190 struct skb_shared_info *sinfo = skb_shinfo(skb);
3191
3192 /* This is a hint as to how much should be linear. */
3193 vnet_hdr.hdr_len =
3194 __cpu_to_virtio16(vio_le(), skb_headlen(skb));
3195 vnet_hdr.gso_size =
3196 __cpu_to_virtio16(vio_le(), sinfo->gso_size);
3197 if (sinfo->gso_type & SKB_GSO_TCPV4)
3198 vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
3199 else if (sinfo->gso_type & SKB_GSO_TCPV6)
3200 vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
3201 else if (sinfo->gso_type & SKB_GSO_UDP)
3202 vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_UDP;
3203 else if (sinfo->gso_type & SKB_GSO_FCOE)
3204 goto out_free;
3205 else
3206 BUG();
3207 if (sinfo->gso_type & SKB_GSO_TCP_ECN)
3208 vnet_hdr.gso_type |= VIRTIO_NET_HDR_GSO_ECN;
3209 } else
3210 vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_NONE;
3211
3212 if (skb->ip_summed == CHECKSUM_PARTIAL) {
3213 vnet_hdr.flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
3214 vnet_hdr.csum_start = __cpu_to_virtio16(vio_le(),
3215 skb_checksum_start_offset(skb));
3216 vnet_hdr.csum_offset = __cpu_to_virtio16(vio_le(),
3217 skb->csum_offset);
3218 } else if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
3219 vnet_hdr.flags = VIRTIO_NET_HDR_F_DATA_VALID;
3220 } /* else everything is zero */
3221
3222 err = memcpy_to_msg(msg, (void *)&vnet_hdr, vnet_hdr_len);
3223 if (err < 0)
3224 goto out_free; 3303 goto out_free;
3304 vnet_hdr_len = sizeof(struct virtio_net_hdr);
3225 } 3305 }
3226 3306
3227 /* You lose any data beyond the buffer you gave. If it worries 3307 /* You lose any data beyond the buffer you gave. If it worries
@@ -3552,8 +3632,6 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
3552 } 3632 }
3553 if (optlen < len) 3633 if (optlen < len)
3554 return -EINVAL; 3634 return -EINVAL;
3555 if (pkt_sk(sk)->has_vnet_hdr)
3556 return -EINVAL;
3557 if (copy_from_user(&req_u.req, optval, len)) 3635 if (copy_from_user(&req_u.req, optval, len))
3558 return -EFAULT; 3636 return -EFAULT;
3559 return packet_set_ring(sk, &req_u, 0, 3637 return packet_set_ring(sk, &req_u, 0,
diff --git a/net/phonet/socket.c b/net/phonet/socket.c
index d575ef4e9aa6..ffd5f2297584 100644
--- a/net/phonet/socket.c
+++ b/net/phonet/socket.c
@@ -140,13 +140,15 @@ void pn_deliver_sock_broadcast(struct net *net, struct sk_buff *skb)
140 rcu_read_unlock(); 140 rcu_read_unlock();
141} 141}
142 142
143void pn_sock_hash(struct sock *sk) 143int pn_sock_hash(struct sock *sk)
144{ 144{
145 struct hlist_head *hlist = pn_hash_list(pn_sk(sk)->sobject); 145 struct hlist_head *hlist = pn_hash_list(pn_sk(sk)->sobject);
146 146
147 mutex_lock(&pnsocks.lock); 147 mutex_lock(&pnsocks.lock);
148 sk_add_node_rcu(sk, hlist); 148 sk_add_node_rcu(sk, hlist);
149 mutex_unlock(&pnsocks.lock); 149 mutex_unlock(&pnsocks.lock);
150
151 return 0;
150} 152}
151EXPORT_SYMBOL(pn_sock_hash); 153EXPORT_SYMBOL(pn_sock_hash);
152 154
@@ -200,7 +202,7 @@ static int pn_socket_bind(struct socket *sock, struct sockaddr *addr, int len)
200 pn->resource = spn->spn_resource; 202 pn->resource = spn->spn_resource;
201 203
202 /* Enable RX on the socket */ 204 /* Enable RX on the socket */
203 sk->sk_prot->hash(sk); 205 err = sk->sk_prot->hash(sk);
204out_port: 206out_port:
205 mutex_unlock(&port_mutex); 207 mutex_unlock(&port_mutex);
206out: 208out:
diff --git a/net/rds/Kconfig b/net/rds/Kconfig
index f2c670ba7b9b..bffde4b46c5d 100644
--- a/net/rds/Kconfig
+++ b/net/rds/Kconfig
@@ -4,14 +4,13 @@ config RDS
4 depends on INET 4 depends on INET
5 ---help--- 5 ---help---
6 The RDS (Reliable Datagram Sockets) protocol provides reliable, 6 The RDS (Reliable Datagram Sockets) protocol provides reliable,
7 sequenced delivery of datagrams over Infiniband, iWARP, 7 sequenced delivery of datagrams over Infiniband or TCP.
8 or TCP.
9 8
10config RDS_RDMA 9config RDS_RDMA
11 tristate "RDS over Infiniband and iWARP" 10 tristate "RDS over Infiniband"
12 depends on RDS && INFINIBAND && INFINIBAND_ADDR_TRANS 11 depends on RDS && INFINIBAND && INFINIBAND_ADDR_TRANS
13 ---help--- 12 ---help---
14 Allow RDS to use Infiniband and iWARP as a transport. 13 Allow RDS to use Infiniband as a transport.
15 This transport supports RDMA operations. 14 This transport supports RDMA operations.
16 15
17config RDS_TCP 16config RDS_TCP
diff --git a/net/rds/Makefile b/net/rds/Makefile
index 56d3f6023ced..0e72bec1529f 100644
--- a/net/rds/Makefile
+++ b/net/rds/Makefile
@@ -6,9 +6,7 @@ rds-y := af_rds.o bind.o cong.o connection.o info.o message.o \
6obj-$(CONFIG_RDS_RDMA) += rds_rdma.o 6obj-$(CONFIG_RDS_RDMA) += rds_rdma.o
7rds_rdma-y := rdma_transport.o \ 7rds_rdma-y := rdma_transport.o \
8 ib.o ib_cm.o ib_recv.o ib_ring.o ib_send.o ib_stats.o \ 8 ib.o ib_cm.o ib_recv.o ib_ring.o ib_send.o ib_stats.o \
9 ib_sysctl.o ib_rdma.o \ 9 ib_sysctl.o ib_rdma.o ib_fmr.o ib_frmr.o
10 iw.o iw_cm.o iw_recv.o iw_ring.o iw_send.o iw_stats.o \
11 iw_sysctl.o iw_rdma.o
12 10
13 11
14obj-$(CONFIG_RDS_TCP) += rds_tcp.o 12obj-$(CONFIG_RDS_TCP) += rds_tcp.o
diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c
index b5476aebd68d..6beaeb1138f3 100644
--- a/net/rds/af_rds.c
+++ b/net/rds/af_rds.c
@@ -277,6 +277,27 @@ static int rds_set_transport(struct rds_sock *rs, char __user *optval,
277 return rs->rs_transport ? 0 : -ENOPROTOOPT; 277 return rs->rs_transport ? 0 : -ENOPROTOOPT;
278} 278}
279 279
280static int rds_enable_recvtstamp(struct sock *sk, char __user *optval,
281 int optlen)
282{
283 int val, valbool;
284
285 if (optlen != sizeof(int))
286 return -EFAULT;
287
288 if (get_user(val, (int __user *)optval))
289 return -EFAULT;
290
291 valbool = val ? 1 : 0;
292
293 if (valbool)
294 sock_set_flag(sk, SOCK_RCVTSTAMP);
295 else
296 sock_reset_flag(sk, SOCK_RCVTSTAMP);
297
298 return 0;
299}
300
280static int rds_setsockopt(struct socket *sock, int level, int optname, 301static int rds_setsockopt(struct socket *sock, int level, int optname,
281 char __user *optval, unsigned int optlen) 302 char __user *optval, unsigned int optlen)
282{ 303{
@@ -312,6 +333,11 @@ static int rds_setsockopt(struct socket *sock, int level, int optname,
312 ret = rds_set_transport(rs, optval, optlen); 333 ret = rds_set_transport(rs, optval, optlen);
313 release_sock(sock->sk); 334 release_sock(sock->sk);
314 break; 335 break;
336 case SO_TIMESTAMP:
337 lock_sock(sock->sk);
338 ret = rds_enable_recvtstamp(sock->sk, optval, optlen);
339 release_sock(sock->sk);
340 break;
315 default: 341 default:
316 ret = -ENOPROTOOPT; 342 ret = -ENOPROTOOPT;
317 } 343 }
diff --git a/net/rds/ib.c b/net/rds/ib.c
index 9481d55ff6cb..b5342fddaf98 100644
--- a/net/rds/ib.c
+++ b/net/rds/ib.c
@@ -42,15 +42,16 @@
42 42
43#include "rds.h" 43#include "rds.h"
44#include "ib.h" 44#include "ib.h"
45#include "ib_mr.h"
45 46
46unsigned int rds_ib_fmr_1m_pool_size = RDS_FMR_1M_POOL_SIZE; 47unsigned int rds_ib_mr_1m_pool_size = RDS_MR_1M_POOL_SIZE;
47unsigned int rds_ib_fmr_8k_pool_size = RDS_FMR_8K_POOL_SIZE; 48unsigned int rds_ib_mr_8k_pool_size = RDS_MR_8K_POOL_SIZE;
48unsigned int rds_ib_retry_count = RDS_IB_DEFAULT_RETRY_COUNT; 49unsigned int rds_ib_retry_count = RDS_IB_DEFAULT_RETRY_COUNT;
49 50
50module_param(rds_ib_fmr_1m_pool_size, int, 0444); 51module_param(rds_ib_mr_1m_pool_size, int, 0444);
51MODULE_PARM_DESC(rds_ib_fmr_1m_pool_size, " Max number of 1M fmr per HCA"); 52MODULE_PARM_DESC(rds_ib_mr_1m_pool_size, " Max number of 1M mr per HCA");
52module_param(rds_ib_fmr_8k_pool_size, int, 0444); 53module_param(rds_ib_mr_8k_pool_size, int, 0444);
53MODULE_PARM_DESC(rds_ib_fmr_8k_pool_size, " Max number of 8K fmr per HCA"); 54MODULE_PARM_DESC(rds_ib_mr_8k_pool_size, " Max number of 8K mr per HCA");
54module_param(rds_ib_retry_count, int, 0444); 55module_param(rds_ib_retry_count, int, 0444);
55MODULE_PARM_DESC(rds_ib_retry_count, " Number of hw retries before reporting an error"); 56MODULE_PARM_DESC(rds_ib_retry_count, " Number of hw retries before reporting an error");
56 57
@@ -139,14 +140,20 @@ static void rds_ib_add_one(struct ib_device *device)
139 rds_ibdev->max_wrs = device->attrs.max_qp_wr; 140 rds_ibdev->max_wrs = device->attrs.max_qp_wr;
140 rds_ibdev->max_sge = min(device->attrs.max_sge, RDS_IB_MAX_SGE); 141 rds_ibdev->max_sge = min(device->attrs.max_sge, RDS_IB_MAX_SGE);
141 142
143 rds_ibdev->has_fr = (device->attrs.device_cap_flags &
144 IB_DEVICE_MEM_MGT_EXTENSIONS);
145 rds_ibdev->has_fmr = (device->alloc_fmr && device->dealloc_fmr &&
146 device->map_phys_fmr && device->unmap_fmr);
147 rds_ibdev->use_fastreg = (rds_ibdev->has_fr && !rds_ibdev->has_fmr);
148
142 rds_ibdev->fmr_max_remaps = device->attrs.max_map_per_fmr?: 32; 149 rds_ibdev->fmr_max_remaps = device->attrs.max_map_per_fmr?: 32;
143 rds_ibdev->max_1m_fmrs = device->attrs.max_mr ? 150 rds_ibdev->max_1m_mrs = device->attrs.max_mr ?
144 min_t(unsigned int, (device->attrs.max_mr / 2), 151 min_t(unsigned int, (device->attrs.max_mr / 2),
145 rds_ib_fmr_1m_pool_size) : rds_ib_fmr_1m_pool_size; 152 rds_ib_mr_1m_pool_size) : rds_ib_mr_1m_pool_size;
146 153
147 rds_ibdev->max_8k_fmrs = device->attrs.max_mr ? 154 rds_ibdev->max_8k_mrs = device->attrs.max_mr ?
148 min_t(unsigned int, ((device->attrs.max_mr / 2) * RDS_MR_8K_SCALE), 155 min_t(unsigned int, ((device->attrs.max_mr / 2) * RDS_MR_8K_SCALE),
149 rds_ib_fmr_8k_pool_size) : rds_ib_fmr_8k_pool_size; 156 rds_ib_mr_8k_pool_size) : rds_ib_mr_8k_pool_size;
150 157
151 rds_ibdev->max_initiator_depth = device->attrs.max_qp_init_rd_atom; 158 rds_ibdev->max_initiator_depth = device->attrs.max_qp_init_rd_atom;
152 rds_ibdev->max_responder_resources = device->attrs.max_qp_rd_atom; 159 rds_ibdev->max_responder_resources = device->attrs.max_qp_rd_atom;
@@ -172,10 +179,14 @@ static void rds_ib_add_one(struct ib_device *device)
172 goto put_dev; 179 goto put_dev;
173 } 180 }
174 181
175 rdsdebug("RDS/IB: max_mr = %d, max_wrs = %d, max_sge = %d, fmr_max_remaps = %d, max_1m_fmrs = %d, max_8k_fmrs = %d\n", 182 rdsdebug("RDS/IB: max_mr = %d, max_wrs = %d, max_sge = %d, fmr_max_remaps = %d, max_1m_mrs = %d, max_8k_mrs = %d\n",
176 device->attrs.max_fmr, rds_ibdev->max_wrs, rds_ibdev->max_sge, 183 device->attrs.max_fmr, rds_ibdev->max_wrs, rds_ibdev->max_sge,
177 rds_ibdev->fmr_max_remaps, rds_ibdev->max_1m_fmrs, 184 rds_ibdev->fmr_max_remaps, rds_ibdev->max_1m_mrs,
178 rds_ibdev->max_8k_fmrs); 185 rds_ibdev->max_8k_mrs);
186
187 pr_info("RDS/IB: %s: %s supported and preferred\n",
188 device->name,
189 rds_ibdev->use_fastreg ? "FRMR" : "FMR");
179 190
180 INIT_LIST_HEAD(&rds_ibdev->ipaddr_list); 191 INIT_LIST_HEAD(&rds_ibdev->ipaddr_list);
181 INIT_LIST_HEAD(&rds_ibdev->conn_list); 192 INIT_LIST_HEAD(&rds_ibdev->conn_list);
@@ -364,7 +375,7 @@ void rds_ib_exit(void)
364 rds_ib_sysctl_exit(); 375 rds_ib_sysctl_exit();
365 rds_ib_recv_exit(); 376 rds_ib_recv_exit();
366 rds_trans_unregister(&rds_ib_transport); 377 rds_trans_unregister(&rds_ib_transport);
367 rds_ib_fmr_exit(); 378 rds_ib_mr_exit();
368} 379}
369 380
370struct rds_transport rds_ib_transport = { 381struct rds_transport rds_ib_transport = {
@@ -400,13 +411,13 @@ int rds_ib_init(void)
400 411
401 INIT_LIST_HEAD(&rds_ib_devices); 412 INIT_LIST_HEAD(&rds_ib_devices);
402 413
403 ret = rds_ib_fmr_init(); 414 ret = rds_ib_mr_init();
404 if (ret) 415 if (ret)
405 goto out; 416 goto out;
406 417
407 ret = ib_register_client(&rds_ib_client); 418 ret = ib_register_client(&rds_ib_client);
408 if (ret) 419 if (ret)
409 goto out_fmr_exit; 420 goto out_mr_exit;
410 421
411 ret = rds_ib_sysctl_init(); 422 ret = rds_ib_sysctl_init();
412 if (ret) 423 if (ret)
@@ -430,8 +441,8 @@ out_sysctl:
430 rds_ib_sysctl_exit(); 441 rds_ib_sysctl_exit();
431out_ibreg: 442out_ibreg:
432 rds_ib_unregister_client(); 443 rds_ib_unregister_client();
433out_fmr_exit: 444out_mr_exit:
434 rds_ib_fmr_exit(); 445 rds_ib_mr_exit();
435out: 446out:
436 return ret; 447 return ret;
437} 448}
diff --git a/net/rds/ib.h b/net/rds/ib.h
index b3fdebb57460..627fb79aee65 100644
--- a/net/rds/ib.h
+++ b/net/rds/ib.h
@@ -9,17 +9,12 @@
9#include "rds.h" 9#include "rds.h"
10#include "rdma_transport.h" 10#include "rdma_transport.h"
11 11
12#define RDS_FMR_1M_POOL_SIZE (8192 / 2)
13#define RDS_FMR_1M_MSG_SIZE 256
14#define RDS_FMR_8K_MSG_SIZE 2
15#define RDS_MR_8K_SCALE (256 / (RDS_FMR_8K_MSG_SIZE + 1))
16#define RDS_FMR_8K_POOL_SIZE (RDS_MR_8K_SCALE * (8192 / 2))
17
18#define RDS_IB_MAX_SGE 8 12#define RDS_IB_MAX_SGE 8
19#define RDS_IB_RECV_SGE 2 13#define RDS_IB_RECV_SGE 2
20 14
21#define RDS_IB_DEFAULT_RECV_WR 1024 15#define RDS_IB_DEFAULT_RECV_WR 1024
22#define RDS_IB_DEFAULT_SEND_WR 256 16#define RDS_IB_DEFAULT_SEND_WR 256
17#define RDS_IB_DEFAULT_FR_WR 512
23 18
24#define RDS_IB_DEFAULT_RETRY_COUNT 2 19#define RDS_IB_DEFAULT_RETRY_COUNT 2
25 20
@@ -28,7 +23,6 @@
28#define RDS_IB_RECYCLE_BATCH_COUNT 32 23#define RDS_IB_RECYCLE_BATCH_COUNT 32
29 24
30#define RDS_IB_WC_MAX 32 25#define RDS_IB_WC_MAX 32
31#define RDS_IB_SEND_OP BIT_ULL(63)
32 26
33extern struct rw_semaphore rds_ib_devices_lock; 27extern struct rw_semaphore rds_ib_devices_lock;
34extern struct list_head rds_ib_devices; 28extern struct list_head rds_ib_devices;
@@ -129,6 +123,9 @@ struct rds_ib_connection {
129 struct ib_wc i_send_wc[RDS_IB_WC_MAX]; 123 struct ib_wc i_send_wc[RDS_IB_WC_MAX];
130 struct ib_wc i_recv_wc[RDS_IB_WC_MAX]; 124 struct ib_wc i_recv_wc[RDS_IB_WC_MAX];
131 125
126 /* To control the number of wrs from fastreg */
127 atomic_t i_fastreg_wrs;
128
132 /* interrupt handling */ 129 /* interrupt handling */
133 struct tasklet_struct i_send_tasklet; 130 struct tasklet_struct i_send_tasklet;
134 struct tasklet_struct i_recv_tasklet; 131 struct tasklet_struct i_recv_tasklet;
@@ -207,12 +204,16 @@ struct rds_ib_device {
207 struct list_head conn_list; 204 struct list_head conn_list;
208 struct ib_device *dev; 205 struct ib_device *dev;
209 struct ib_pd *pd; 206 struct ib_pd *pd;
210 unsigned int max_fmrs; 207 bool has_fmr;
208 bool has_fr;
209 bool use_fastreg;
210
211 unsigned int max_mrs;
211 struct rds_ib_mr_pool *mr_1m_pool; 212 struct rds_ib_mr_pool *mr_1m_pool;
212 struct rds_ib_mr_pool *mr_8k_pool; 213 struct rds_ib_mr_pool *mr_8k_pool;
213 unsigned int fmr_max_remaps; 214 unsigned int fmr_max_remaps;
214 unsigned int max_8k_fmrs; 215 unsigned int max_8k_mrs;
215 unsigned int max_1m_fmrs; 216 unsigned int max_1m_mrs;
216 int max_sge; 217 int max_sge;
217 unsigned int max_wrs; 218 unsigned int max_wrs;
218 unsigned int max_initiator_depth; 219 unsigned int max_initiator_depth;
@@ -266,6 +267,8 @@ struct rds_ib_statistics {
266 uint64_t s_ib_rdma_mr_1m_pool_flush; 267 uint64_t s_ib_rdma_mr_1m_pool_flush;
267 uint64_t s_ib_rdma_mr_1m_pool_wait; 268 uint64_t s_ib_rdma_mr_1m_pool_wait;
268 uint64_t s_ib_rdma_mr_1m_pool_depleted; 269 uint64_t s_ib_rdma_mr_1m_pool_depleted;
270 uint64_t s_ib_rdma_mr_8k_reused;
271 uint64_t s_ib_rdma_mr_1m_reused;
269 uint64_t s_ib_atomic_cswp; 272 uint64_t s_ib_atomic_cswp;
270 uint64_t s_ib_atomic_fadd; 273 uint64_t s_ib_atomic_fadd;
271}; 274};
@@ -317,8 +320,6 @@ struct rds_ib_device *rds_ib_get_client_data(struct ib_device *device);
317void rds_ib_dev_put(struct rds_ib_device *rds_ibdev); 320void rds_ib_dev_put(struct rds_ib_device *rds_ibdev);
318extern struct ib_client rds_ib_client; 321extern struct ib_client rds_ib_client;
319 322
320extern unsigned int rds_ib_fmr_1m_pool_size;
321extern unsigned int rds_ib_fmr_8k_pool_size;
322extern unsigned int rds_ib_retry_count; 323extern unsigned int rds_ib_retry_count;
323 324
324extern spinlock_t ib_nodev_conns_lock; 325extern spinlock_t ib_nodev_conns_lock;
@@ -348,17 +349,7 @@ int rds_ib_update_ipaddr(struct rds_ib_device *rds_ibdev, __be32 ipaddr);
348void rds_ib_add_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn); 349void rds_ib_add_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn);
349void rds_ib_remove_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn); 350void rds_ib_remove_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn);
350void rds_ib_destroy_nodev_conns(void); 351void rds_ib_destroy_nodev_conns(void);
351struct rds_ib_mr_pool *rds_ib_create_mr_pool(struct rds_ib_device *rds_dev, 352void rds_ib_mr_cqe_handler(struct rds_ib_connection *ic, struct ib_wc *wc);
352 int npages);
353void rds_ib_get_mr_info(struct rds_ib_device *rds_ibdev, struct rds_info_rdma_connection *iinfo);
354void rds_ib_destroy_mr_pool(struct rds_ib_mr_pool *);
355void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents,
356 struct rds_sock *rs, u32 *key_ret);
357void rds_ib_sync_mr(void *trans_private, int dir);
358void rds_ib_free_mr(void *trans_private, int invalidate);
359void rds_ib_flush_mrs(void);
360int rds_ib_fmr_init(void);
361void rds_ib_fmr_exit(void);
362 353
363/* ib_recv.c */ 354/* ib_recv.c */
364int rds_ib_recv_init(void); 355int rds_ib_recv_init(void);
diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c
index da5a7fb98c77..8764970f0c24 100644
--- a/net/rds/ib_cm.c
+++ b/net/rds/ib_cm.c
@@ -236,12 +236,10 @@ static void rds_ib_cq_comp_handler_recv(struct ib_cq *cq, void *context)
236 tasklet_schedule(&ic->i_recv_tasklet); 236 tasklet_schedule(&ic->i_recv_tasklet);
237} 237}
238 238
239static void poll_cq(struct rds_ib_connection *ic, struct ib_cq *cq, 239static void poll_scq(struct rds_ib_connection *ic, struct ib_cq *cq,
240 struct ib_wc *wcs, 240 struct ib_wc *wcs)
241 struct rds_ib_ack_state *ack_state)
242{ 241{
243 int nr; 242 int nr, i;
244 int i;
245 struct ib_wc *wc; 243 struct ib_wc *wc;
246 244
247 while ((nr = ib_poll_cq(cq, RDS_IB_WC_MAX, wcs)) > 0) { 245 while ((nr = ib_poll_cq(cq, RDS_IB_WC_MAX, wcs)) > 0) {
@@ -251,10 +249,12 @@ static void poll_cq(struct rds_ib_connection *ic, struct ib_cq *cq,
251 (unsigned long long)wc->wr_id, wc->status, 249 (unsigned long long)wc->wr_id, wc->status,
252 wc->byte_len, be32_to_cpu(wc->ex.imm_data)); 250 wc->byte_len, be32_to_cpu(wc->ex.imm_data));
253 251
254 if (wc->wr_id & RDS_IB_SEND_OP) 252 if (wc->wr_id <= ic->i_send_ring.w_nr ||
253 wc->wr_id == RDS_IB_ACK_WR_ID)
255 rds_ib_send_cqe_handler(ic, wc); 254 rds_ib_send_cqe_handler(ic, wc);
256 else 255 else
257 rds_ib_recv_cqe_handler(ic, wc, ack_state); 256 rds_ib_mr_cqe_handler(ic, wc);
257
258 } 258 }
259 } 259 }
260} 260}
@@ -263,14 +263,12 @@ static void rds_ib_tasklet_fn_send(unsigned long data)
263{ 263{
264 struct rds_ib_connection *ic = (struct rds_ib_connection *)data; 264 struct rds_ib_connection *ic = (struct rds_ib_connection *)data;
265 struct rds_connection *conn = ic->conn; 265 struct rds_connection *conn = ic->conn;
266 struct rds_ib_ack_state state;
267 266
268 rds_ib_stats_inc(s_ib_tasklet_call); 267 rds_ib_stats_inc(s_ib_tasklet_call);
269 268
270 memset(&state, 0, sizeof(state)); 269 poll_scq(ic, ic->i_send_cq, ic->i_send_wc);
271 poll_cq(ic, ic->i_send_cq, ic->i_send_wc, &state);
272 ib_req_notify_cq(ic->i_send_cq, IB_CQ_NEXT_COMP); 270 ib_req_notify_cq(ic->i_send_cq, IB_CQ_NEXT_COMP);
273 poll_cq(ic, ic->i_send_cq, ic->i_send_wc, &state); 271 poll_scq(ic, ic->i_send_cq, ic->i_send_wc);
274 272
275 if (rds_conn_up(conn) && 273 if (rds_conn_up(conn) &&
276 (!test_bit(RDS_LL_SEND_FULL, &conn->c_flags) || 274 (!test_bit(RDS_LL_SEND_FULL, &conn->c_flags) ||
@@ -278,6 +276,25 @@ static void rds_ib_tasklet_fn_send(unsigned long data)
278 rds_send_xmit(ic->conn); 276 rds_send_xmit(ic->conn);
279} 277}
280 278
279static void poll_rcq(struct rds_ib_connection *ic, struct ib_cq *cq,
280 struct ib_wc *wcs,
281 struct rds_ib_ack_state *ack_state)
282{
283 int nr, i;
284 struct ib_wc *wc;
285
286 while ((nr = ib_poll_cq(cq, RDS_IB_WC_MAX, wcs)) > 0) {
287 for (i = 0; i < nr; i++) {
288 wc = wcs + i;
289 rdsdebug("wc wr_id 0x%llx status %u byte_len %u imm_data %u\n",
290 (unsigned long long)wc->wr_id, wc->status,
291 wc->byte_len, be32_to_cpu(wc->ex.imm_data));
292
293 rds_ib_recv_cqe_handler(ic, wc, ack_state);
294 }
295 }
296}
297
281static void rds_ib_tasklet_fn_recv(unsigned long data) 298static void rds_ib_tasklet_fn_recv(unsigned long data)
282{ 299{
283 struct rds_ib_connection *ic = (struct rds_ib_connection *)data; 300 struct rds_ib_connection *ic = (struct rds_ib_connection *)data;
@@ -291,9 +308,9 @@ static void rds_ib_tasklet_fn_recv(unsigned long data)
291 rds_ib_stats_inc(s_ib_tasklet_call); 308 rds_ib_stats_inc(s_ib_tasklet_call);
292 309
293 memset(&state, 0, sizeof(state)); 310 memset(&state, 0, sizeof(state));
294 poll_cq(ic, ic->i_recv_cq, ic->i_recv_wc, &state); 311 poll_rcq(ic, ic->i_recv_cq, ic->i_recv_wc, &state);
295 ib_req_notify_cq(ic->i_recv_cq, IB_CQ_SOLICITED); 312 ib_req_notify_cq(ic->i_recv_cq, IB_CQ_SOLICITED);
296 poll_cq(ic, ic->i_recv_cq, ic->i_recv_wc, &state); 313 poll_rcq(ic, ic->i_recv_cq, ic->i_recv_wc, &state);
297 314
298 if (state.ack_next_valid) 315 if (state.ack_next_valid)
299 rds_ib_set_ack(ic, state.ack_next, state.ack_required); 316 rds_ib_set_ack(ic, state.ack_next, state.ack_required);
@@ -351,7 +368,7 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
351 struct ib_qp_init_attr attr; 368 struct ib_qp_init_attr attr;
352 struct ib_cq_init_attr cq_attr = {}; 369 struct ib_cq_init_attr cq_attr = {};
353 struct rds_ib_device *rds_ibdev; 370 struct rds_ib_device *rds_ibdev;
354 int ret; 371 int ret, fr_queue_space;
355 372
356 /* 373 /*
357 * It's normal to see a null device if an incoming connection races 374 * It's normal to see a null device if an incoming connection races
@@ -361,6 +378,12 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
361 if (!rds_ibdev) 378 if (!rds_ibdev)
362 return -EOPNOTSUPP; 379 return -EOPNOTSUPP;
363 380
381 /* The fr_queue_space is currently set to 512, to add extra space on
382 * completion queue and send queue. This extra space is used for FRMR
383 * registration and invalidation work requests
384 */
385 fr_queue_space = (rds_ibdev->use_fastreg ? RDS_IB_DEFAULT_FR_WR : 0);
386
364 /* add the conn now so that connection establishment has the dev */ 387 /* add the conn now so that connection establishment has the dev */
365 rds_ib_add_conn(rds_ibdev, conn); 388 rds_ib_add_conn(rds_ibdev, conn);
366 389
@@ -372,7 +395,7 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
372 /* Protection domain and memory range */ 395 /* Protection domain and memory range */
373 ic->i_pd = rds_ibdev->pd; 396 ic->i_pd = rds_ibdev->pd;
374 397
375 cq_attr.cqe = ic->i_send_ring.w_nr + 1; 398 cq_attr.cqe = ic->i_send_ring.w_nr + fr_queue_space + 1;
376 399
377 ic->i_send_cq = ib_create_cq(dev, rds_ib_cq_comp_handler_send, 400 ic->i_send_cq = ib_create_cq(dev, rds_ib_cq_comp_handler_send,
378 rds_ib_cq_event_handler, conn, 401 rds_ib_cq_event_handler, conn,
@@ -412,7 +435,7 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
412 attr.event_handler = rds_ib_qp_event_handler; 435 attr.event_handler = rds_ib_qp_event_handler;
413 attr.qp_context = conn; 436 attr.qp_context = conn;
414 /* + 1 to allow for the single ack message */ 437 /* + 1 to allow for the single ack message */
415 attr.cap.max_send_wr = ic->i_send_ring.w_nr + 1; 438 attr.cap.max_send_wr = ic->i_send_ring.w_nr + fr_queue_space + 1;
416 attr.cap.max_recv_wr = ic->i_recv_ring.w_nr + 1; 439 attr.cap.max_recv_wr = ic->i_recv_ring.w_nr + 1;
417 attr.cap.max_send_sge = rds_ibdev->max_sge; 440 attr.cap.max_send_sge = rds_ibdev->max_sge;
418 attr.cap.max_recv_sge = RDS_IB_RECV_SGE; 441 attr.cap.max_recv_sge = RDS_IB_RECV_SGE;
@@ -420,6 +443,7 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
420 attr.qp_type = IB_QPT_RC; 443 attr.qp_type = IB_QPT_RC;
421 attr.send_cq = ic->i_send_cq; 444 attr.send_cq = ic->i_send_cq;
422 attr.recv_cq = ic->i_recv_cq; 445 attr.recv_cq = ic->i_recv_cq;
446 atomic_set(&ic->i_fastreg_wrs, RDS_IB_DEFAULT_FR_WR);
423 447
424 /* 448 /*
425 * XXX this can fail if max_*_wr is too large? Are we supposed 449 * XXX this can fail if max_*_wr is too large? Are we supposed
@@ -739,7 +763,8 @@ void rds_ib_conn_shutdown(struct rds_connection *conn)
739 */ 763 */
740 wait_event(rds_ib_ring_empty_wait, 764 wait_event(rds_ib_ring_empty_wait,
741 rds_ib_ring_empty(&ic->i_recv_ring) && 765 rds_ib_ring_empty(&ic->i_recv_ring) &&
742 (atomic_read(&ic->i_signaled_sends) == 0)); 766 (atomic_read(&ic->i_signaled_sends) == 0) &&
767 (atomic_read(&ic->i_fastreg_wrs) == RDS_IB_DEFAULT_FR_WR));
743 tasklet_kill(&ic->i_send_tasklet); 768 tasklet_kill(&ic->i_send_tasklet);
744 tasklet_kill(&ic->i_recv_tasklet); 769 tasklet_kill(&ic->i_recv_tasklet);
745 770
diff --git a/net/rds/ib_fmr.c b/net/rds/ib_fmr.c
new file mode 100644
index 000000000000..4fe8f4fec4ee
--- /dev/null
+++ b/net/rds/ib_fmr.c
@@ -0,0 +1,248 @@
1/*
2 * Copyright (c) 2016 Oracle. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#include "ib_mr.h"
34
35struct rds_ib_mr *rds_ib_alloc_fmr(struct rds_ib_device *rds_ibdev, int npages)
36{
37 struct rds_ib_mr_pool *pool;
38 struct rds_ib_mr *ibmr = NULL;
39 struct rds_ib_fmr *fmr;
40 int err = 0;
41
42 if (npages <= RDS_MR_8K_MSG_SIZE)
43 pool = rds_ibdev->mr_8k_pool;
44 else
45 pool = rds_ibdev->mr_1m_pool;
46
47 ibmr = rds_ib_try_reuse_ibmr(pool);
48 if (ibmr)
49 return ibmr;
50
51 ibmr = kzalloc_node(sizeof(*ibmr), GFP_KERNEL,
52 rdsibdev_to_node(rds_ibdev));
53 if (!ibmr) {
54 err = -ENOMEM;
55 goto out_no_cigar;
56 }
57
58 fmr = &ibmr->u.fmr;
59 fmr->fmr = ib_alloc_fmr(rds_ibdev->pd,
60 (IB_ACCESS_LOCAL_WRITE |
61 IB_ACCESS_REMOTE_READ |
62 IB_ACCESS_REMOTE_WRITE |
63 IB_ACCESS_REMOTE_ATOMIC),
64 &pool->fmr_attr);
65 if (IS_ERR(fmr->fmr)) {
66 err = PTR_ERR(fmr->fmr);
67 fmr->fmr = NULL;
68 pr_warn("RDS/IB: %s failed (err=%d)\n", __func__, err);
69 goto out_no_cigar;
70 }
71
72 ibmr->pool = pool;
73 if (pool->pool_type == RDS_IB_MR_8K_POOL)
74 rds_ib_stats_inc(s_ib_rdma_mr_8k_alloc);
75 else
76 rds_ib_stats_inc(s_ib_rdma_mr_1m_alloc);
77
78 return ibmr;
79
80out_no_cigar:
81 if (ibmr) {
82 if (fmr->fmr)
83 ib_dealloc_fmr(fmr->fmr);
84 kfree(ibmr);
85 }
86 atomic_dec(&pool->item_count);
87 return ERR_PTR(err);
88}
89
90int rds_ib_map_fmr(struct rds_ib_device *rds_ibdev, struct rds_ib_mr *ibmr,
91 struct scatterlist *sg, unsigned int nents)
92{
93 struct ib_device *dev = rds_ibdev->dev;
94 struct rds_ib_fmr *fmr = &ibmr->u.fmr;
95 struct scatterlist *scat = sg;
96 u64 io_addr = 0;
97 u64 *dma_pages;
98 u32 len;
99 int page_cnt, sg_dma_len;
100 int i, j;
101 int ret;
102
103 sg_dma_len = ib_dma_map_sg(dev, sg, nents, DMA_BIDIRECTIONAL);
104 if (unlikely(!sg_dma_len)) {
105 pr_warn("RDS/IB: %s failed!\n", __func__);
106 return -EBUSY;
107 }
108
109 len = 0;
110 page_cnt = 0;
111
112 for (i = 0; i < sg_dma_len; ++i) {
113 unsigned int dma_len = ib_sg_dma_len(dev, &scat[i]);
114 u64 dma_addr = ib_sg_dma_address(dev, &scat[i]);
115
116 if (dma_addr & ~PAGE_MASK) {
117 if (i > 0)
118 return -EINVAL;
119 else
120 ++page_cnt;
121 }
122 if ((dma_addr + dma_len) & ~PAGE_MASK) {
123 if (i < sg_dma_len - 1)
124 return -EINVAL;
125 else
126 ++page_cnt;
127 }
128
129 len += dma_len;
130 }
131
132 page_cnt += len >> PAGE_SHIFT;
133 if (page_cnt > ibmr->pool->fmr_attr.max_pages)
134 return -EINVAL;
135
136 dma_pages = kmalloc_node(sizeof(u64) * page_cnt, GFP_ATOMIC,
137 rdsibdev_to_node(rds_ibdev));
138 if (!dma_pages)
139 return -ENOMEM;
140
141 page_cnt = 0;
142 for (i = 0; i < sg_dma_len; ++i) {
143 unsigned int dma_len = ib_sg_dma_len(dev, &scat[i]);
144 u64 dma_addr = ib_sg_dma_address(dev, &scat[i]);
145
146 for (j = 0; j < dma_len; j += PAGE_SIZE)
147 dma_pages[page_cnt++] =
148 (dma_addr & PAGE_MASK) + j;
149 }
150
151 ret = ib_map_phys_fmr(fmr->fmr, dma_pages, page_cnt, io_addr);
152 if (ret)
153 goto out;
154
155 /* Success - we successfully remapped the MR, so we can
156 * safely tear down the old mapping.
157 */
158 rds_ib_teardown_mr(ibmr);
159
160 ibmr->sg = scat;
161 ibmr->sg_len = nents;
162 ibmr->sg_dma_len = sg_dma_len;
163 ibmr->remap_count++;
164
165 if (ibmr->pool->pool_type == RDS_IB_MR_8K_POOL)
166 rds_ib_stats_inc(s_ib_rdma_mr_8k_used);
167 else
168 rds_ib_stats_inc(s_ib_rdma_mr_1m_used);
169 ret = 0;
170
171out:
172 kfree(dma_pages);
173
174 return ret;
175}
176
177struct rds_ib_mr *rds_ib_reg_fmr(struct rds_ib_device *rds_ibdev,
178 struct scatterlist *sg,
179 unsigned long nents,
180 u32 *key)
181{
182 struct rds_ib_mr *ibmr = NULL;
183 struct rds_ib_fmr *fmr;
184 int ret;
185
186 ibmr = rds_ib_alloc_fmr(rds_ibdev, nents);
187 if (IS_ERR(ibmr))
188 return ibmr;
189
190 ibmr->device = rds_ibdev;
191 fmr = &ibmr->u.fmr;
192 ret = rds_ib_map_fmr(rds_ibdev, ibmr, sg, nents);
193 if (ret == 0)
194 *key = fmr->fmr->rkey;
195 else
196 rds_ib_free_mr(ibmr, 0);
197
198 return ibmr;
199}
200
201void rds_ib_unreg_fmr(struct list_head *list, unsigned int *nfreed,
202 unsigned long *unpinned, unsigned int goal)
203{
204 struct rds_ib_mr *ibmr, *next;
205 struct rds_ib_fmr *fmr;
206 LIST_HEAD(fmr_list);
207 int ret = 0;
208 unsigned int freed = *nfreed;
209
210 /* String all ib_mr's onto one list and hand them to ib_unmap_fmr */
211 list_for_each_entry(ibmr, list, unmap_list) {
212 fmr = &ibmr->u.fmr;
213 list_add(&fmr->fmr->list, &fmr_list);
214 }
215
216 ret = ib_unmap_fmr(&fmr_list);
217 if (ret)
218 pr_warn("RDS/IB: FMR invalidation failed (err=%d)\n", ret);
219
220 /* Now we can destroy the DMA mapping and unpin any pages */
221 list_for_each_entry_safe(ibmr, next, list, unmap_list) {
222 fmr = &ibmr->u.fmr;
223 *unpinned += ibmr->sg_len;
224 __rds_ib_teardown_mr(ibmr);
225 if (freed < goal ||
226 ibmr->remap_count >= ibmr->pool->fmr_attr.max_maps) {
227 if (ibmr->pool->pool_type == RDS_IB_MR_8K_POOL)
228 rds_ib_stats_inc(s_ib_rdma_mr_8k_free);
229 else
230 rds_ib_stats_inc(s_ib_rdma_mr_1m_free);
231 list_del(&ibmr->unmap_list);
232 ib_dealloc_fmr(fmr->fmr);
233 kfree(ibmr);
234 freed++;
235 }
236 }
237 *nfreed = freed;
238}
239
240void rds_ib_free_fmr_list(struct rds_ib_mr *ibmr)
241{
242 struct rds_ib_mr_pool *pool = ibmr->pool;
243
244 if (ibmr->remap_count >= pool->fmr_attr.max_maps)
245 llist_add(&ibmr->llnode, &pool->drop_list);
246 else
247 llist_add(&ibmr->llnode, &pool->free_list);
248}
diff --git a/net/rds/ib_frmr.c b/net/rds/ib_frmr.c
new file mode 100644
index 000000000000..93ff038ea9d1
--- /dev/null
+++ b/net/rds/ib_frmr.c
@@ -0,0 +1,376 @@
1/*
2 * Copyright (c) 2016 Oracle. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#include "ib_mr.h"
34
35static struct rds_ib_mr *rds_ib_alloc_frmr(struct rds_ib_device *rds_ibdev,
36 int npages)
37{
38 struct rds_ib_mr_pool *pool;
39 struct rds_ib_mr *ibmr = NULL;
40 struct rds_ib_frmr *frmr;
41 int err = 0;
42
43 if (npages <= RDS_MR_8K_MSG_SIZE)
44 pool = rds_ibdev->mr_8k_pool;
45 else
46 pool = rds_ibdev->mr_1m_pool;
47
48 ibmr = rds_ib_try_reuse_ibmr(pool);
49 if (ibmr)
50 return ibmr;
51
52 ibmr = kzalloc_node(sizeof(*ibmr), GFP_KERNEL,
53 rdsibdev_to_node(rds_ibdev));
54 if (!ibmr) {
55 err = -ENOMEM;
56 goto out_no_cigar;
57 }
58
59 frmr = &ibmr->u.frmr;
60 frmr->mr = ib_alloc_mr(rds_ibdev->pd, IB_MR_TYPE_MEM_REG,
61 pool->fmr_attr.max_pages);
62 if (IS_ERR(frmr->mr)) {
63 pr_warn("RDS/IB: %s failed to allocate MR", __func__);
64 goto out_no_cigar;
65 }
66
67 ibmr->pool = pool;
68 if (pool->pool_type == RDS_IB_MR_8K_POOL)
69 rds_ib_stats_inc(s_ib_rdma_mr_8k_alloc);
70 else
71 rds_ib_stats_inc(s_ib_rdma_mr_1m_alloc);
72
73 if (atomic_read(&pool->item_count) > pool->max_items_soft)
74 pool->max_items_soft = pool->max_items;
75
76 frmr->fr_state = FRMR_IS_FREE;
77 return ibmr;
78
79out_no_cigar:
80 kfree(ibmr);
81 atomic_dec(&pool->item_count);
82 return ERR_PTR(err);
83}
84
85static void rds_ib_free_frmr(struct rds_ib_mr *ibmr, bool drop)
86{
87 struct rds_ib_mr_pool *pool = ibmr->pool;
88
89 if (drop)
90 llist_add(&ibmr->llnode, &pool->drop_list);
91 else
92 llist_add(&ibmr->llnode, &pool->free_list);
93 atomic_add(ibmr->sg_len, &pool->free_pinned);
94 atomic_inc(&pool->dirty_count);
95
96 /* If we've pinned too many pages, request a flush */
97 if (atomic_read(&pool->free_pinned) >= pool->max_free_pinned ||
98 atomic_read(&pool->dirty_count) >= pool->max_items / 5)
99 queue_delayed_work(rds_ib_mr_wq, &pool->flush_worker, 10);
100}
101
102static int rds_ib_post_reg_frmr(struct rds_ib_mr *ibmr)
103{
104 struct rds_ib_frmr *frmr = &ibmr->u.frmr;
105 struct ib_send_wr *failed_wr;
106 struct ib_reg_wr reg_wr;
107 int ret;
108
109 while (atomic_dec_return(&ibmr->ic->i_fastreg_wrs) <= 0) {
110 atomic_inc(&ibmr->ic->i_fastreg_wrs);
111 cpu_relax();
112 }
113
114 ret = ib_map_mr_sg_zbva(frmr->mr, ibmr->sg, ibmr->sg_len, PAGE_SIZE);
115 if (unlikely(ret != ibmr->sg_len))
116 return ret < 0 ? ret : -EINVAL;
117
118 /* Perform a WR for the fast_reg_mr. Each individual page
119 * in the sg list is added to the fast reg page list and placed
120 * inside the fast_reg_mr WR. The key used is a rolling 8bit
121 * counter, which should guarantee uniqueness.
122 */
123 ib_update_fast_reg_key(frmr->mr, ibmr->remap_count++);
124 frmr->fr_state = FRMR_IS_INUSE;
125
126 memset(&reg_wr, 0, sizeof(reg_wr));
127 reg_wr.wr.wr_id = (unsigned long)(void *)ibmr;
128 reg_wr.wr.opcode = IB_WR_REG_MR;
129 reg_wr.wr.num_sge = 0;
130 reg_wr.mr = frmr->mr;
131 reg_wr.key = frmr->mr->rkey;
132 reg_wr.access = IB_ACCESS_LOCAL_WRITE |
133 IB_ACCESS_REMOTE_READ |
134 IB_ACCESS_REMOTE_WRITE;
135 reg_wr.wr.send_flags = IB_SEND_SIGNALED;
136
137 failed_wr = &reg_wr.wr;
138 ret = ib_post_send(ibmr->ic->i_cm_id->qp, &reg_wr.wr, &failed_wr);
139 WARN_ON(failed_wr != &reg_wr.wr);
140 if (unlikely(ret)) {
141 /* Failure here can be because of -ENOMEM as well */
142 frmr->fr_state = FRMR_IS_STALE;
143 atomic_inc(&ibmr->ic->i_fastreg_wrs);
144 if (printk_ratelimit())
145 pr_warn("RDS/IB: %s returned error(%d)\n",
146 __func__, ret);
147 }
148 return ret;
149}
150
151static int rds_ib_map_frmr(struct rds_ib_device *rds_ibdev,
152 struct rds_ib_mr_pool *pool,
153 struct rds_ib_mr *ibmr,
154 struct scatterlist *sg, unsigned int sg_len)
155{
156 struct ib_device *dev = rds_ibdev->dev;
157 struct rds_ib_frmr *frmr = &ibmr->u.frmr;
158 int i;
159 u32 len;
160 int ret = 0;
161
162 /* We want to teardown old ibmr values here and fill it up with
163 * new sg values
164 */
165 rds_ib_teardown_mr(ibmr);
166
167 ibmr->sg = sg;
168 ibmr->sg_len = sg_len;
169 ibmr->sg_dma_len = 0;
170 frmr->sg_byte_len = 0;
171 WARN_ON(ibmr->sg_dma_len);
172 ibmr->sg_dma_len = ib_dma_map_sg(dev, ibmr->sg, ibmr->sg_len,
173 DMA_BIDIRECTIONAL);
174 if (unlikely(!ibmr->sg_dma_len)) {
175 pr_warn("RDS/IB: %s failed!\n", __func__);
176 return -EBUSY;
177 }
178
179 frmr->sg_byte_len = 0;
180 frmr->dma_npages = 0;
181 len = 0;
182
183 ret = -EINVAL;
184 for (i = 0; i < ibmr->sg_dma_len; ++i) {
185 unsigned int dma_len = ib_sg_dma_len(dev, &ibmr->sg[i]);
186 u64 dma_addr = ib_sg_dma_address(dev, &ibmr->sg[i]);
187
188 frmr->sg_byte_len += dma_len;
189 if (dma_addr & ~PAGE_MASK) {
190 if (i > 0)
191 goto out_unmap;
192 else
193 ++frmr->dma_npages;
194 }
195
196 if ((dma_addr + dma_len) & ~PAGE_MASK) {
197 if (i < ibmr->sg_dma_len - 1)
198 goto out_unmap;
199 else
200 ++frmr->dma_npages;
201 }
202
203 len += dma_len;
204 }
205 frmr->dma_npages += len >> PAGE_SHIFT;
206
207 if (frmr->dma_npages > ibmr->pool->fmr_attr.max_pages) {
208 ret = -EMSGSIZE;
209 goto out_unmap;
210 }
211
212 ret = rds_ib_post_reg_frmr(ibmr);
213 if (ret)
214 goto out_unmap;
215
216 if (ibmr->pool->pool_type == RDS_IB_MR_8K_POOL)
217 rds_ib_stats_inc(s_ib_rdma_mr_8k_used);
218 else
219 rds_ib_stats_inc(s_ib_rdma_mr_1m_used);
220
221 return ret;
222
223out_unmap:
224 ib_dma_unmap_sg(rds_ibdev->dev, ibmr->sg, ibmr->sg_len,
225 DMA_BIDIRECTIONAL);
226 ibmr->sg_dma_len = 0;
227 return ret;
228}
229
230static int rds_ib_post_inv(struct rds_ib_mr *ibmr)
231{
232 struct ib_send_wr *s_wr, *failed_wr;
233 struct rds_ib_frmr *frmr = &ibmr->u.frmr;
234 struct rdma_cm_id *i_cm_id = ibmr->ic->i_cm_id;
235 int ret = -EINVAL;
236
237 if (!i_cm_id || !i_cm_id->qp || !frmr->mr)
238 goto out;
239
240 if (frmr->fr_state != FRMR_IS_INUSE)
241 goto out;
242
243 while (atomic_dec_return(&ibmr->ic->i_fastreg_wrs) <= 0) {
244 atomic_inc(&ibmr->ic->i_fastreg_wrs);
245 cpu_relax();
246 }
247
248 frmr->fr_inv = true;
249 s_wr = &frmr->fr_wr;
250
251 memset(s_wr, 0, sizeof(*s_wr));
252 s_wr->wr_id = (unsigned long)(void *)ibmr;
253 s_wr->opcode = IB_WR_LOCAL_INV;
254 s_wr->ex.invalidate_rkey = frmr->mr->rkey;
255 s_wr->send_flags = IB_SEND_SIGNALED;
256
257 failed_wr = s_wr;
258 ret = ib_post_send(i_cm_id->qp, s_wr, &failed_wr);
259 WARN_ON(failed_wr != s_wr);
260 if (unlikely(ret)) {
261 frmr->fr_state = FRMR_IS_STALE;
262 frmr->fr_inv = false;
263 atomic_inc(&ibmr->ic->i_fastreg_wrs);
264 pr_err("RDS/IB: %s returned error(%d)\n", __func__, ret);
265 goto out;
266 }
267out:
268 return ret;
269}
270
271void rds_ib_mr_cqe_handler(struct rds_ib_connection *ic, struct ib_wc *wc)
272{
273 struct rds_ib_mr *ibmr = (void *)(unsigned long)wc->wr_id;
274 struct rds_ib_frmr *frmr = &ibmr->u.frmr;
275
276 if (wc->status != IB_WC_SUCCESS) {
277 frmr->fr_state = FRMR_IS_STALE;
278 if (rds_conn_up(ic->conn))
279 rds_ib_conn_error(ic->conn,
280 "frmr completion <%pI4,%pI4> status %u(%s), vendor_err 0x%x, disconnecting and reconnecting\n",
281 &ic->conn->c_laddr,
282 &ic->conn->c_faddr,
283 wc->status,
284 ib_wc_status_msg(wc->status),
285 wc->vendor_err);
286 }
287
288 if (frmr->fr_inv) {
289 frmr->fr_state = FRMR_IS_FREE;
290 frmr->fr_inv = false;
291 }
292
293 atomic_inc(&ic->i_fastreg_wrs);
294}
295
296void rds_ib_unreg_frmr(struct list_head *list, unsigned int *nfreed,
297 unsigned long *unpinned, unsigned int goal)
298{
299 struct rds_ib_mr *ibmr, *next;
300 struct rds_ib_frmr *frmr;
301 int ret = 0;
302 unsigned int freed = *nfreed;
303
304 /* String all ib_mr's onto one list and hand them to ib_unmap_fmr */
305 list_for_each_entry(ibmr, list, unmap_list) {
306 if (ibmr->sg_dma_len)
307 ret |= rds_ib_post_inv(ibmr);
308 }
309 if (ret)
310 pr_warn("RDS/IB: %s failed (err=%d)\n", __func__, ret);
311
312 /* Now we can destroy the DMA mapping and unpin any pages */
313 list_for_each_entry_safe(ibmr, next, list, unmap_list) {
314 *unpinned += ibmr->sg_len;
315 frmr = &ibmr->u.frmr;
316 __rds_ib_teardown_mr(ibmr);
317 if (freed < goal || frmr->fr_state == FRMR_IS_STALE) {
318 /* Don't de-allocate if the MR is not free yet */
319 if (frmr->fr_state == FRMR_IS_INUSE)
320 continue;
321
322 if (ibmr->pool->pool_type == RDS_IB_MR_8K_POOL)
323 rds_ib_stats_inc(s_ib_rdma_mr_8k_free);
324 else
325 rds_ib_stats_inc(s_ib_rdma_mr_1m_free);
326 list_del(&ibmr->unmap_list);
327 if (frmr->mr)
328 ib_dereg_mr(frmr->mr);
329 kfree(ibmr);
330 freed++;
331 }
332 }
333 *nfreed = freed;
334}
335
336struct rds_ib_mr *rds_ib_reg_frmr(struct rds_ib_device *rds_ibdev,
337 struct rds_ib_connection *ic,
338 struct scatterlist *sg,
339 unsigned long nents, u32 *key)
340{
341 struct rds_ib_mr *ibmr = NULL;
342 struct rds_ib_frmr *frmr;
343 int ret;
344
345 do {
346 if (ibmr)
347 rds_ib_free_frmr(ibmr, true);
348 ibmr = rds_ib_alloc_frmr(rds_ibdev, nents);
349 if (IS_ERR(ibmr))
350 return ibmr;
351 frmr = &ibmr->u.frmr;
352 } while (frmr->fr_state != FRMR_IS_FREE);
353
354 ibmr->ic = ic;
355 ibmr->device = rds_ibdev;
356 ret = rds_ib_map_frmr(rds_ibdev, ibmr->pool, ibmr, sg, nents);
357 if (ret == 0) {
358 *key = frmr->mr->rkey;
359 } else {
360 rds_ib_free_frmr(ibmr, false);
361 ibmr = ERR_PTR(ret);
362 }
363
364 return ibmr;
365}
366
367void rds_ib_free_frmr_list(struct rds_ib_mr *ibmr)
368{
369 struct rds_ib_mr_pool *pool = ibmr->pool;
370 struct rds_ib_frmr *frmr = &ibmr->u.frmr;
371
372 if (frmr->fr_state == FRMR_IS_STALE)
373 llist_add(&ibmr->llnode, &pool->drop_list);
374 else
375 llist_add(&ibmr->llnode, &pool->free_list);
376}
diff --git a/net/rds/ib_mr.h b/net/rds/ib_mr.h
new file mode 100644
index 000000000000..1c754f4acbe5
--- /dev/null
+++ b/net/rds/ib_mr.h
@@ -0,0 +1,148 @@
1/*
2 * Copyright (c) 2016 Oracle. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32#ifndef _RDS_IB_MR_H
33#define _RDS_IB_MR_H
34
35#include <linux/kernel.h>
36
37#include "rds.h"
38#include "ib.h"
39
40#define RDS_MR_1M_POOL_SIZE (8192 / 2)
41#define RDS_MR_1M_MSG_SIZE 256
42#define RDS_MR_8K_MSG_SIZE 2
43#define RDS_MR_8K_SCALE (256 / (RDS_MR_8K_MSG_SIZE + 1))
44#define RDS_MR_8K_POOL_SIZE (RDS_MR_8K_SCALE * (8192 / 2))
45
46struct rds_ib_fmr {
47 struct ib_fmr *fmr;
48 u64 *dma;
49};
50
51enum rds_ib_fr_state {
52 FRMR_IS_FREE, /* mr invalidated & ready for use */
53 FRMR_IS_INUSE, /* mr is in use or used & can be invalidated */
54 FRMR_IS_STALE, /* Stale MR and needs to be dropped */
55};
56
57struct rds_ib_frmr {
58 struct ib_mr *mr;
59 enum rds_ib_fr_state fr_state;
60 bool fr_inv;
61 struct ib_send_wr fr_wr;
62 unsigned int dma_npages;
63 unsigned int sg_byte_len;
64};
65
66/* This is stored as mr->r_trans_private. */
67struct rds_ib_mr {
68 struct rds_ib_device *device;
69 struct rds_ib_mr_pool *pool;
70 struct rds_ib_connection *ic;
71
72 struct llist_node llnode;
73
74 /* unmap_list is for freeing */
75 struct list_head unmap_list;
76 unsigned int remap_count;
77
78 struct scatterlist *sg;
79 unsigned int sg_len;
80 int sg_dma_len;
81
82 union {
83 struct rds_ib_fmr fmr;
84 struct rds_ib_frmr frmr;
85 } u;
86};
87
88/* Our own little MR pool */
89struct rds_ib_mr_pool {
90 unsigned int pool_type;
91 struct mutex flush_lock; /* serialize fmr invalidate */
92 struct delayed_work flush_worker; /* flush worker */
93
94 atomic_t item_count; /* total # of MRs */
95 atomic_t dirty_count; /* # dirty of MRs */
96
97 struct llist_head drop_list; /* MRs not reached max_maps */
98 struct llist_head free_list; /* unused MRs */
99 struct llist_head clean_list; /* unused & unmapped MRs */
100 wait_queue_head_t flush_wait;
101
102 atomic_t free_pinned; /* memory pinned by free MRs */
103 unsigned long max_items;
104 unsigned long max_items_soft;
105 unsigned long max_free_pinned;
106 struct ib_fmr_attr fmr_attr;
107 bool use_fastreg;
108};
109
110extern struct workqueue_struct *rds_ib_mr_wq;
111extern unsigned int rds_ib_mr_1m_pool_size;
112extern unsigned int rds_ib_mr_8k_pool_size;
113extern bool prefer_frmr;
114
115struct rds_ib_mr_pool *rds_ib_create_mr_pool(struct rds_ib_device *rds_dev,
116 int npages);
117void rds_ib_get_mr_info(struct rds_ib_device *rds_ibdev,
118 struct rds_info_rdma_connection *iinfo);
119void rds_ib_destroy_mr_pool(struct rds_ib_mr_pool *);
120void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents,
121 struct rds_sock *rs, u32 *key_ret);
122void rds_ib_sync_mr(void *trans_private, int dir);
123void rds_ib_free_mr(void *trans_private, int invalidate);
124void rds_ib_flush_mrs(void);
125int rds_ib_mr_init(void);
126void rds_ib_mr_exit(void);
127
128void __rds_ib_teardown_mr(struct rds_ib_mr *);
129void rds_ib_teardown_mr(struct rds_ib_mr *);
130struct rds_ib_mr *rds_ib_alloc_fmr(struct rds_ib_device *, int);
131int rds_ib_map_fmr(struct rds_ib_device *, struct rds_ib_mr *,
132 struct scatterlist *, unsigned int);
133struct rds_ib_mr *rds_ib_reuse_mr(struct rds_ib_mr_pool *);
134int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *, int, struct rds_ib_mr **);
135struct rds_ib_mr *rds_ib_reg_fmr(struct rds_ib_device *, struct scatterlist *,
136 unsigned long, u32 *);
137struct rds_ib_mr *rds_ib_try_reuse_ibmr(struct rds_ib_mr_pool *);
138void rds_ib_unreg_fmr(struct list_head *, unsigned int *,
139 unsigned long *, unsigned int);
140void rds_ib_free_fmr_list(struct rds_ib_mr *);
141struct rds_ib_mr *rds_ib_reg_frmr(struct rds_ib_device *rds_ibdev,
142 struct rds_ib_connection *ic,
143 struct scatterlist *sg,
144 unsigned long nents, u32 *key);
145void rds_ib_unreg_frmr(struct list_head *list, unsigned int *nfreed,
146 unsigned long *unpinned, unsigned int goal);
147void rds_ib_free_frmr_list(struct rds_ib_mr *);
148#endif
diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c
index a2340748ec86..f7164ac1ffc1 100644
--- a/net/rds/ib_rdma.c
+++ b/net/rds/ib_rdma.c
@@ -35,78 +35,13 @@
35#include <linux/rculist.h> 35#include <linux/rculist.h>
36#include <linux/llist.h> 36#include <linux/llist.h>
37 37
38#include "rds.h" 38#include "ib_mr.h"
39#include "ib.h" 39
40struct workqueue_struct *rds_ib_mr_wq;
40 41
41static DEFINE_PER_CPU(unsigned long, clean_list_grace); 42static DEFINE_PER_CPU(unsigned long, clean_list_grace);
42#define CLEAN_LIST_BUSY_BIT 0 43#define CLEAN_LIST_BUSY_BIT 0
43 44
44/*
45 * This is stored as mr->r_trans_private.
46 */
47struct rds_ib_mr {
48 struct rds_ib_device *device;
49 struct rds_ib_mr_pool *pool;
50 struct ib_fmr *fmr;
51
52 struct llist_node llnode;
53
54 /* unmap_list is for freeing */
55 struct list_head unmap_list;
56 unsigned int remap_count;
57
58 struct scatterlist *sg;
59 unsigned int sg_len;
60 u64 *dma;
61 int sg_dma_len;
62};
63
64/*
65 * Our own little FMR pool
66 */
67struct rds_ib_mr_pool {
68 unsigned int pool_type;
69 struct mutex flush_lock; /* serialize fmr invalidate */
70 struct delayed_work flush_worker; /* flush worker */
71
72 atomic_t item_count; /* total # of MRs */
73 atomic_t dirty_count; /* # dirty of MRs */
74
75 struct llist_head drop_list; /* MRs that have reached their max_maps limit */
76 struct llist_head free_list; /* unused MRs */
77 struct llist_head clean_list; /* global unused & unamapped MRs */
78 wait_queue_head_t flush_wait;
79
80 atomic_t free_pinned; /* memory pinned by free MRs */
81 unsigned long max_items;
82 unsigned long max_items_soft;
83 unsigned long max_free_pinned;
84 struct ib_fmr_attr fmr_attr;
85};
86
87static struct workqueue_struct *rds_ib_fmr_wq;
88
89int rds_ib_fmr_init(void)
90{
91 rds_ib_fmr_wq = create_workqueue("rds_fmr_flushd");
92 if (!rds_ib_fmr_wq)
93 return -ENOMEM;
94 return 0;
95}
96
97/* By the time this is called all the IB devices should have been torn down and
98 * had their pools freed. As each pool is freed its work struct is waited on,
99 * so the pool flushing work queue should be idle by the time we get here.
100 */
101void rds_ib_fmr_exit(void)
102{
103 destroy_workqueue(rds_ib_fmr_wq);
104}
105
106static int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool, int free_all, struct rds_ib_mr **);
107static void rds_ib_teardown_mr(struct rds_ib_mr *ibmr);
108static void rds_ib_mr_pool_flush_worker(struct work_struct *work);
109
110static struct rds_ib_device *rds_ib_get_device(__be32 ipaddr) 45static struct rds_ib_device *rds_ib_get_device(__be32 ipaddr)
111{ 46{
112 struct rds_ib_device *rds_ibdev; 47 struct rds_ib_device *rds_ibdev;
@@ -235,41 +170,6 @@ void rds_ib_destroy_nodev_conns(void)
235 rds_conn_destroy(ic->conn); 170 rds_conn_destroy(ic->conn);
236} 171}
237 172
238struct rds_ib_mr_pool *rds_ib_create_mr_pool(struct rds_ib_device *rds_ibdev,
239 int pool_type)
240{
241 struct rds_ib_mr_pool *pool;
242
243 pool = kzalloc(sizeof(*pool), GFP_KERNEL);
244 if (!pool)
245 return ERR_PTR(-ENOMEM);
246
247 pool->pool_type = pool_type;
248 init_llist_head(&pool->free_list);
249 init_llist_head(&pool->drop_list);
250 init_llist_head(&pool->clean_list);
251 mutex_init(&pool->flush_lock);
252 init_waitqueue_head(&pool->flush_wait);
253 INIT_DELAYED_WORK(&pool->flush_worker, rds_ib_mr_pool_flush_worker);
254
255 if (pool_type == RDS_IB_MR_1M_POOL) {
256 /* +1 allows for unaligned MRs */
257 pool->fmr_attr.max_pages = RDS_FMR_1M_MSG_SIZE + 1;
258 pool->max_items = RDS_FMR_1M_POOL_SIZE;
259 } else {
260 /* pool_type == RDS_IB_MR_8K_POOL */
261 pool->fmr_attr.max_pages = RDS_FMR_8K_MSG_SIZE + 1;
262 pool->max_items = RDS_FMR_8K_POOL_SIZE;
263 }
264
265 pool->max_free_pinned = pool->max_items * pool->fmr_attr.max_pages / 4;
266 pool->fmr_attr.max_maps = rds_ibdev->fmr_max_remaps;
267 pool->fmr_attr.page_shift = PAGE_SHIFT;
268 pool->max_items_soft = rds_ibdev->max_fmrs * 3 / 4;
269
270 return pool;
271}
272
273void rds_ib_get_mr_info(struct rds_ib_device *rds_ibdev, struct rds_info_rdma_connection *iinfo) 173void rds_ib_get_mr_info(struct rds_ib_device *rds_ibdev, struct rds_info_rdma_connection *iinfo)
274{ 174{
275 struct rds_ib_mr_pool *pool_1m = rds_ibdev->mr_1m_pool; 175 struct rds_ib_mr_pool *pool_1m = rds_ibdev->mr_1m_pool;
@@ -278,16 +178,7 @@ void rds_ib_get_mr_info(struct rds_ib_device *rds_ibdev, struct rds_info_rdma_co
278 iinfo->rdma_mr_size = pool_1m->fmr_attr.max_pages; 178 iinfo->rdma_mr_size = pool_1m->fmr_attr.max_pages;
279} 179}
280 180
281void rds_ib_destroy_mr_pool(struct rds_ib_mr_pool *pool) 181struct rds_ib_mr *rds_ib_reuse_mr(struct rds_ib_mr_pool *pool)
282{
283 cancel_delayed_work_sync(&pool->flush_worker);
284 rds_ib_flush_mr_pool(pool, 1, NULL);
285 WARN_ON(atomic_read(&pool->item_count));
286 WARN_ON(atomic_read(&pool->free_pinned));
287 kfree(pool);
288}
289
290static inline struct rds_ib_mr *rds_ib_reuse_fmr(struct rds_ib_mr_pool *pool)
291{ 182{
292 struct rds_ib_mr *ibmr = NULL; 183 struct rds_ib_mr *ibmr = NULL;
293 struct llist_node *ret; 184 struct llist_node *ret;
@@ -297,8 +188,13 @@ static inline struct rds_ib_mr *rds_ib_reuse_fmr(struct rds_ib_mr_pool *pool)
297 flag = this_cpu_ptr(&clean_list_grace); 188 flag = this_cpu_ptr(&clean_list_grace);
298 set_bit(CLEAN_LIST_BUSY_BIT, flag); 189 set_bit(CLEAN_LIST_BUSY_BIT, flag);
299 ret = llist_del_first(&pool->clean_list); 190 ret = llist_del_first(&pool->clean_list);
300 if (ret) 191 if (ret) {
301 ibmr = llist_entry(ret, struct rds_ib_mr, llnode); 192 ibmr = llist_entry(ret, struct rds_ib_mr, llnode);
193 if (pool->pool_type == RDS_IB_MR_8K_POOL)
194 rds_ib_stats_inc(s_ib_rdma_mr_8k_reused);
195 else
196 rds_ib_stats_inc(s_ib_rdma_mr_1m_reused);
197 }
302 198
303 clear_bit(CLEAN_LIST_BUSY_BIT, flag); 199 clear_bit(CLEAN_LIST_BUSY_BIT, flag);
304 preempt_enable(); 200 preempt_enable();
@@ -317,190 +213,6 @@ static inline void wait_clean_list_grace(void)
317 } 213 }
318} 214}
319 215
320static struct rds_ib_mr *rds_ib_alloc_fmr(struct rds_ib_device *rds_ibdev,
321 int npages)
322{
323 struct rds_ib_mr_pool *pool;
324 struct rds_ib_mr *ibmr = NULL;
325 int err = 0, iter = 0;
326
327 if (npages <= RDS_FMR_8K_MSG_SIZE)
328 pool = rds_ibdev->mr_8k_pool;
329 else
330 pool = rds_ibdev->mr_1m_pool;
331
332 if (atomic_read(&pool->dirty_count) >= pool->max_items / 10)
333 queue_delayed_work(rds_ib_fmr_wq, &pool->flush_worker, 10);
334
335 /* Switch pools if one of the pool is reaching upper limit */
336 if (atomic_read(&pool->dirty_count) >= pool->max_items * 9 / 10) {
337 if (pool->pool_type == RDS_IB_MR_8K_POOL)
338 pool = rds_ibdev->mr_1m_pool;
339 else
340 pool = rds_ibdev->mr_8k_pool;
341 }
342
343 while (1) {
344 ibmr = rds_ib_reuse_fmr(pool);
345 if (ibmr)
346 return ibmr;
347
348 /* No clean MRs - now we have the choice of either
349 * allocating a fresh MR up to the limit imposed by the
350 * driver, or flush any dirty unused MRs.
351 * We try to avoid stalling in the send path if possible,
352 * so we allocate as long as we're allowed to.
353 *
354 * We're fussy with enforcing the FMR limit, though. If the driver
355 * tells us we can't use more than N fmrs, we shouldn't start
356 * arguing with it */
357 if (atomic_inc_return(&pool->item_count) <= pool->max_items)
358 break;
359
360 atomic_dec(&pool->item_count);
361
362 if (++iter > 2) {
363 if (pool->pool_type == RDS_IB_MR_8K_POOL)
364 rds_ib_stats_inc(s_ib_rdma_mr_8k_pool_depleted);
365 else
366 rds_ib_stats_inc(s_ib_rdma_mr_1m_pool_depleted);
367 return ERR_PTR(-EAGAIN);
368 }
369
370 /* We do have some empty MRs. Flush them out. */
371 if (pool->pool_type == RDS_IB_MR_8K_POOL)
372 rds_ib_stats_inc(s_ib_rdma_mr_8k_pool_wait);
373 else
374 rds_ib_stats_inc(s_ib_rdma_mr_1m_pool_wait);
375 rds_ib_flush_mr_pool(pool, 0, &ibmr);
376 if (ibmr)
377 return ibmr;
378 }
379
380 ibmr = kzalloc_node(sizeof(*ibmr), GFP_KERNEL, rdsibdev_to_node(rds_ibdev));
381 if (!ibmr) {
382 err = -ENOMEM;
383 goto out_no_cigar;
384 }
385
386 ibmr->fmr = ib_alloc_fmr(rds_ibdev->pd,
387 (IB_ACCESS_LOCAL_WRITE |
388 IB_ACCESS_REMOTE_READ |
389 IB_ACCESS_REMOTE_WRITE|
390 IB_ACCESS_REMOTE_ATOMIC),
391 &pool->fmr_attr);
392 if (IS_ERR(ibmr->fmr)) {
393 err = PTR_ERR(ibmr->fmr);
394 ibmr->fmr = NULL;
395 printk(KERN_WARNING "RDS/IB: ib_alloc_fmr failed (err=%d)\n", err);
396 goto out_no_cigar;
397 }
398
399 ibmr->pool = pool;
400 if (pool->pool_type == RDS_IB_MR_8K_POOL)
401 rds_ib_stats_inc(s_ib_rdma_mr_8k_alloc);
402 else
403 rds_ib_stats_inc(s_ib_rdma_mr_1m_alloc);
404
405 return ibmr;
406
407out_no_cigar:
408 if (ibmr) {
409 if (ibmr->fmr)
410 ib_dealloc_fmr(ibmr->fmr);
411 kfree(ibmr);
412 }
413 atomic_dec(&pool->item_count);
414 return ERR_PTR(err);
415}
416
417static int rds_ib_map_fmr(struct rds_ib_device *rds_ibdev, struct rds_ib_mr *ibmr,
418 struct scatterlist *sg, unsigned int nents)
419{
420 struct ib_device *dev = rds_ibdev->dev;
421 struct scatterlist *scat = sg;
422 u64 io_addr = 0;
423 u64 *dma_pages;
424 u32 len;
425 int page_cnt, sg_dma_len;
426 int i, j;
427 int ret;
428
429 sg_dma_len = ib_dma_map_sg(dev, sg, nents,
430 DMA_BIDIRECTIONAL);
431 if (unlikely(!sg_dma_len)) {
432 printk(KERN_WARNING "RDS/IB: dma_map_sg failed!\n");
433 return -EBUSY;
434 }
435
436 len = 0;
437 page_cnt = 0;
438
439 for (i = 0; i < sg_dma_len; ++i) {
440 unsigned int dma_len = ib_sg_dma_len(dev, &scat[i]);
441 u64 dma_addr = ib_sg_dma_address(dev, &scat[i]);
442
443 if (dma_addr & ~PAGE_MASK) {
444 if (i > 0)
445 return -EINVAL;
446 else
447 ++page_cnt;
448 }
449 if ((dma_addr + dma_len) & ~PAGE_MASK) {
450 if (i < sg_dma_len - 1)
451 return -EINVAL;
452 else
453 ++page_cnt;
454 }
455
456 len += dma_len;
457 }
458
459 page_cnt += len >> PAGE_SHIFT;
460 if (page_cnt > ibmr->pool->fmr_attr.max_pages)
461 return -EINVAL;
462
463 dma_pages = kmalloc_node(sizeof(u64) * page_cnt, GFP_ATOMIC,
464 rdsibdev_to_node(rds_ibdev));
465 if (!dma_pages)
466 return -ENOMEM;
467
468 page_cnt = 0;
469 for (i = 0; i < sg_dma_len; ++i) {
470 unsigned int dma_len = ib_sg_dma_len(dev, &scat[i]);
471 u64 dma_addr = ib_sg_dma_address(dev, &scat[i]);
472
473 for (j = 0; j < dma_len; j += PAGE_SIZE)
474 dma_pages[page_cnt++] =
475 (dma_addr & PAGE_MASK) + j;
476 }
477
478 ret = ib_map_phys_fmr(ibmr->fmr,
479 dma_pages, page_cnt, io_addr);
480 if (ret)
481 goto out;
482
483 /* Success - we successfully remapped the MR, so we can
484 * safely tear down the old mapping. */
485 rds_ib_teardown_mr(ibmr);
486
487 ibmr->sg = scat;
488 ibmr->sg_len = nents;
489 ibmr->sg_dma_len = sg_dma_len;
490 ibmr->remap_count++;
491
492 if (ibmr->pool->pool_type == RDS_IB_MR_8K_POOL)
493 rds_ib_stats_inc(s_ib_rdma_mr_8k_used);
494 else
495 rds_ib_stats_inc(s_ib_rdma_mr_1m_used);
496 ret = 0;
497
498out:
499 kfree(dma_pages);
500
501 return ret;
502}
503
504void rds_ib_sync_mr(void *trans_private, int direction) 216void rds_ib_sync_mr(void *trans_private, int direction)
505{ 217{
506 struct rds_ib_mr *ibmr = trans_private; 218 struct rds_ib_mr *ibmr = trans_private;
@@ -518,7 +230,7 @@ void rds_ib_sync_mr(void *trans_private, int direction)
518 } 230 }
519} 231}
520 232
521static void __rds_ib_teardown_mr(struct rds_ib_mr *ibmr) 233void __rds_ib_teardown_mr(struct rds_ib_mr *ibmr)
522{ 234{
523 struct rds_ib_device *rds_ibdev = ibmr->device; 235 struct rds_ib_device *rds_ibdev = ibmr->device;
524 236
@@ -549,7 +261,7 @@ static void __rds_ib_teardown_mr(struct rds_ib_mr *ibmr)
549 } 261 }
550} 262}
551 263
552static void rds_ib_teardown_mr(struct rds_ib_mr *ibmr) 264void rds_ib_teardown_mr(struct rds_ib_mr *ibmr)
553{ 265{
554 unsigned int pinned = ibmr->sg_len; 266 unsigned int pinned = ibmr->sg_len;
555 267
@@ -623,17 +335,15 @@ static void list_to_llist_nodes(struct rds_ib_mr_pool *pool,
623 * If the number of MRs allocated exceeds the limit, we also try 335 * If the number of MRs allocated exceeds the limit, we also try
624 * to free as many MRs as needed to get back to this limit. 336 * to free as many MRs as needed to get back to this limit.
625 */ 337 */
626static int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool, 338int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool,
627 int free_all, struct rds_ib_mr **ibmr_ret) 339 int free_all, struct rds_ib_mr **ibmr_ret)
628{ 340{
629 struct rds_ib_mr *ibmr, *next; 341 struct rds_ib_mr *ibmr;
630 struct llist_node *clean_nodes; 342 struct llist_node *clean_nodes;
631 struct llist_node *clean_tail; 343 struct llist_node *clean_tail;
632 LIST_HEAD(unmap_list); 344 LIST_HEAD(unmap_list);
633 LIST_HEAD(fmr_list);
634 unsigned long unpinned = 0; 345 unsigned long unpinned = 0;
635 unsigned int nfreed = 0, dirty_to_clean = 0, free_goal; 346 unsigned int nfreed = 0, dirty_to_clean = 0, free_goal;
636 int ret = 0;
637 347
638 if (pool->pool_type == RDS_IB_MR_8K_POOL) 348 if (pool->pool_type == RDS_IB_MR_8K_POOL)
639 rds_ib_stats_inc(s_ib_rdma_mr_8k_pool_flush); 349 rds_ib_stats_inc(s_ib_rdma_mr_8k_pool_flush);
@@ -643,7 +353,7 @@ static int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool,
643 if (ibmr_ret) { 353 if (ibmr_ret) {
644 DEFINE_WAIT(wait); 354 DEFINE_WAIT(wait);
645 while (!mutex_trylock(&pool->flush_lock)) { 355 while (!mutex_trylock(&pool->flush_lock)) {
646 ibmr = rds_ib_reuse_fmr(pool); 356 ibmr = rds_ib_reuse_mr(pool);
647 if (ibmr) { 357 if (ibmr) {
648 *ibmr_ret = ibmr; 358 *ibmr_ret = ibmr;
649 finish_wait(&pool->flush_wait, &wait); 359 finish_wait(&pool->flush_wait, &wait);
@@ -655,7 +365,7 @@ static int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool,
655 if (llist_empty(&pool->clean_list)) 365 if (llist_empty(&pool->clean_list))
656 schedule(); 366 schedule();
657 367
658 ibmr = rds_ib_reuse_fmr(pool); 368 ibmr = rds_ib_reuse_mr(pool);
659 if (ibmr) { 369 if (ibmr) {
660 *ibmr_ret = ibmr; 370 *ibmr_ret = ibmr;
661 finish_wait(&pool->flush_wait, &wait); 371 finish_wait(&pool->flush_wait, &wait);
@@ -667,7 +377,7 @@ static int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool,
667 mutex_lock(&pool->flush_lock); 377 mutex_lock(&pool->flush_lock);
668 378
669 if (ibmr_ret) { 379 if (ibmr_ret) {
670 ibmr = rds_ib_reuse_fmr(pool); 380 ibmr = rds_ib_reuse_mr(pool);
671 if (ibmr) { 381 if (ibmr) {
672 *ibmr_ret = ibmr; 382 *ibmr_ret = ibmr;
673 goto out; 383 goto out;
@@ -687,30 +397,10 @@ static int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool,
687 if (list_empty(&unmap_list)) 397 if (list_empty(&unmap_list))
688 goto out; 398 goto out;
689 399
690 /* String all ib_mr's onto one list and hand them to ib_unmap_fmr */ 400 if (pool->use_fastreg)
691 list_for_each_entry(ibmr, &unmap_list, unmap_list) 401 rds_ib_unreg_frmr(&unmap_list, &nfreed, &unpinned, free_goal);
692 list_add(&ibmr->fmr->list, &fmr_list); 402 else
693 403 rds_ib_unreg_fmr(&unmap_list, &nfreed, &unpinned, free_goal);
694 ret = ib_unmap_fmr(&fmr_list);
695 if (ret)
696 printk(KERN_WARNING "RDS/IB: ib_unmap_fmr failed (err=%d)\n", ret);
697
698 /* Now we can destroy the DMA mapping and unpin any pages */
699 list_for_each_entry_safe(ibmr, next, &unmap_list, unmap_list) {
700 unpinned += ibmr->sg_len;
701 __rds_ib_teardown_mr(ibmr);
702 if (nfreed < free_goal ||
703 ibmr->remap_count >= pool->fmr_attr.max_maps) {
704 if (ibmr->pool->pool_type == RDS_IB_MR_8K_POOL)
705 rds_ib_stats_inc(s_ib_rdma_mr_8k_free);
706 else
707 rds_ib_stats_inc(s_ib_rdma_mr_1m_free);
708 list_del(&ibmr->unmap_list);
709 ib_dealloc_fmr(ibmr->fmr);
710 kfree(ibmr);
711 nfreed++;
712 }
713 }
714 404
715 if (!list_empty(&unmap_list)) { 405 if (!list_empty(&unmap_list)) {
716 /* we have to make sure that none of the things we're about 406 /* we have to make sure that none of the things we're about
@@ -743,7 +433,47 @@ out:
743 if (waitqueue_active(&pool->flush_wait)) 433 if (waitqueue_active(&pool->flush_wait))
744 wake_up(&pool->flush_wait); 434 wake_up(&pool->flush_wait);
745out_nolock: 435out_nolock:
746 return ret; 436 return 0;
437}
438
439struct rds_ib_mr *rds_ib_try_reuse_ibmr(struct rds_ib_mr_pool *pool)
440{
441 struct rds_ib_mr *ibmr = NULL;
442 int iter = 0;
443
444 if (atomic_read(&pool->dirty_count) >= pool->max_items_soft / 10)
445 queue_delayed_work(rds_ib_mr_wq, &pool->flush_worker, 10);
446
447 while (1) {
448 ibmr = rds_ib_reuse_mr(pool);
449 if (ibmr)
450 return ibmr;
451
452 if (atomic_inc_return(&pool->item_count) <= pool->max_items)
453 break;
454
455 atomic_dec(&pool->item_count);
456
457 if (++iter > 2) {
458 if (pool->pool_type == RDS_IB_MR_8K_POOL)
459 rds_ib_stats_inc(s_ib_rdma_mr_8k_pool_depleted);
460 else
461 rds_ib_stats_inc(s_ib_rdma_mr_1m_pool_depleted);
462 return ERR_PTR(-EAGAIN);
463 }
464
465 /* We do have some empty MRs. Flush them out. */
466 if (pool->pool_type == RDS_IB_MR_8K_POOL)
467 rds_ib_stats_inc(s_ib_rdma_mr_8k_pool_wait);
468 else
469 rds_ib_stats_inc(s_ib_rdma_mr_1m_pool_wait);
470
471 rds_ib_flush_mr_pool(pool, 0, &ibmr);
472 if (ibmr)
473 return ibmr;
474 }
475
476 return ibmr;
747} 477}
748 478
749static void rds_ib_mr_pool_flush_worker(struct work_struct *work) 479static void rds_ib_mr_pool_flush_worker(struct work_struct *work)
@@ -762,10 +492,10 @@ void rds_ib_free_mr(void *trans_private, int invalidate)
762 rdsdebug("RDS/IB: free_mr nents %u\n", ibmr->sg_len); 492 rdsdebug("RDS/IB: free_mr nents %u\n", ibmr->sg_len);
763 493
764 /* Return it to the pool's free list */ 494 /* Return it to the pool's free list */
765 if (ibmr->remap_count >= pool->fmr_attr.max_maps) 495 if (rds_ibdev->use_fastreg)
766 llist_add(&ibmr->llnode, &pool->drop_list); 496 rds_ib_free_frmr_list(ibmr);
767 else 497 else
768 llist_add(&ibmr->llnode, &pool->free_list); 498 rds_ib_free_fmr_list(ibmr);
769 499
770 atomic_add(ibmr->sg_len, &pool->free_pinned); 500 atomic_add(ibmr->sg_len, &pool->free_pinned);
771 atomic_inc(&pool->dirty_count); 501 atomic_inc(&pool->dirty_count);
@@ -773,7 +503,7 @@ void rds_ib_free_mr(void *trans_private, int invalidate)
773 /* If we've pinned too many pages, request a flush */ 503 /* If we've pinned too many pages, request a flush */
774 if (atomic_read(&pool->free_pinned) >= pool->max_free_pinned || 504 if (atomic_read(&pool->free_pinned) >= pool->max_free_pinned ||
775 atomic_read(&pool->dirty_count) >= pool->max_items / 5) 505 atomic_read(&pool->dirty_count) >= pool->max_items / 5)
776 queue_delayed_work(rds_ib_fmr_wq, &pool->flush_worker, 10); 506 queue_delayed_work(rds_ib_mr_wq, &pool->flush_worker, 10);
777 507
778 if (invalidate) { 508 if (invalidate) {
779 if (likely(!in_interrupt())) { 509 if (likely(!in_interrupt())) {
@@ -782,7 +512,7 @@ void rds_ib_free_mr(void *trans_private, int invalidate)
782 /* We get here if the user created a MR marked 512 /* We get here if the user created a MR marked
783 * as use_once and invalidate at the same time. 513 * as use_once and invalidate at the same time.
784 */ 514 */
785 queue_delayed_work(rds_ib_fmr_wq, 515 queue_delayed_work(rds_ib_mr_wq,
786 &pool->flush_worker, 10); 516 &pool->flush_worker, 10);
787 } 517 }
788 } 518 }
@@ -810,6 +540,7 @@ void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents,
810{ 540{
811 struct rds_ib_device *rds_ibdev; 541 struct rds_ib_device *rds_ibdev;
812 struct rds_ib_mr *ibmr = NULL; 542 struct rds_ib_mr *ibmr = NULL;
543 struct rds_ib_connection *ic = rs->rs_conn->c_transport_data;
813 int ret; 544 int ret;
814 545
815 rds_ibdev = rds_ib_get_device(rs->rs_bound_addr); 546 rds_ibdev = rds_ib_get_device(rs->rs_bound_addr);
@@ -823,29 +554,81 @@ void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents,
823 goto out; 554 goto out;
824 } 555 }
825 556
826 ibmr = rds_ib_alloc_fmr(rds_ibdev, nents); 557 if (rds_ibdev->use_fastreg)
827 if (IS_ERR(ibmr)) { 558 ibmr = rds_ib_reg_frmr(rds_ibdev, ic, sg, nents, key_ret);
828 rds_ib_dev_put(rds_ibdev);
829 return ibmr;
830 }
831
832 ret = rds_ib_map_fmr(rds_ibdev, ibmr, sg, nents);
833 if (ret == 0)
834 *key_ret = ibmr->fmr->rkey;
835 else 559 else
836 printk(KERN_WARNING "RDS/IB: map_fmr failed (errno=%d)\n", ret); 560 ibmr = rds_ib_reg_fmr(rds_ibdev, sg, nents, key_ret);
837 561 if (ibmr)
838 ibmr->device = rds_ibdev; 562 rds_ibdev = NULL;
839 rds_ibdev = NULL;
840 563
841 out: 564 out:
842 if (ret) { 565 if (!ibmr)
843 if (ibmr) 566 pr_warn("RDS/IB: rds_ib_get_mr failed (errno=%d)\n", ret);
844 rds_ib_free_mr(ibmr, 0); 567
845 ibmr = ERR_PTR(ret);
846 }
847 if (rds_ibdev) 568 if (rds_ibdev)
848 rds_ib_dev_put(rds_ibdev); 569 rds_ib_dev_put(rds_ibdev);
570
849 return ibmr; 571 return ibmr;
850} 572}
851 573
574void rds_ib_destroy_mr_pool(struct rds_ib_mr_pool *pool)
575{
576 cancel_delayed_work_sync(&pool->flush_worker);
577 rds_ib_flush_mr_pool(pool, 1, NULL);
578 WARN_ON(atomic_read(&pool->item_count));
579 WARN_ON(atomic_read(&pool->free_pinned));
580 kfree(pool);
581}
582
583struct rds_ib_mr_pool *rds_ib_create_mr_pool(struct rds_ib_device *rds_ibdev,
584 int pool_type)
585{
586 struct rds_ib_mr_pool *pool;
587
588 pool = kzalloc(sizeof(*pool), GFP_KERNEL);
589 if (!pool)
590 return ERR_PTR(-ENOMEM);
591
592 pool->pool_type = pool_type;
593 init_llist_head(&pool->free_list);
594 init_llist_head(&pool->drop_list);
595 init_llist_head(&pool->clean_list);
596 mutex_init(&pool->flush_lock);
597 init_waitqueue_head(&pool->flush_wait);
598 INIT_DELAYED_WORK(&pool->flush_worker, rds_ib_mr_pool_flush_worker);
599
600 if (pool_type == RDS_IB_MR_1M_POOL) {
601 /* +1 allows for unaligned MRs */
602 pool->fmr_attr.max_pages = RDS_MR_1M_MSG_SIZE + 1;
603 pool->max_items = RDS_MR_1M_POOL_SIZE;
604 } else {
605 /* pool_type == RDS_IB_MR_8K_POOL */
606 pool->fmr_attr.max_pages = RDS_MR_8K_MSG_SIZE + 1;
607 pool->max_items = RDS_MR_8K_POOL_SIZE;
608 }
609
610 pool->max_free_pinned = pool->max_items * pool->fmr_attr.max_pages / 4;
611 pool->fmr_attr.max_maps = rds_ibdev->fmr_max_remaps;
612 pool->fmr_attr.page_shift = PAGE_SHIFT;
613 pool->max_items_soft = rds_ibdev->max_mrs * 3 / 4;
614 pool->use_fastreg = rds_ibdev->use_fastreg;
615
616 return pool;
617}
618
619int rds_ib_mr_init(void)
620{
621 rds_ib_mr_wq = create_workqueue("rds_mr_flushd");
622 if (!rds_ib_mr_wq)
623 return -ENOMEM;
624 return 0;
625}
626
627/* By the time this is called all the IB devices should have been torn down and
628 * had their pools freed. As each pool is freed its work struct is waited on,
629 * so the pool flushing work queue should be idle by the time we get here.
630 */
631void rds_ib_mr_exit(void)
632{
633 destroy_workqueue(rds_ib_mr_wq);
634}
diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c
index eac30bf486d7..f27d2c82b036 100644
--- a/net/rds/ib_send.c
+++ b/net/rds/ib_send.c
@@ -195,7 +195,7 @@ void rds_ib_send_init_ring(struct rds_ib_connection *ic)
195 195
196 send->s_op = NULL; 196 send->s_op = NULL;
197 197
198 send->s_wr.wr_id = i | RDS_IB_SEND_OP; 198 send->s_wr.wr_id = i;
199 send->s_wr.sg_list = send->s_sge; 199 send->s_wr.sg_list = send->s_sge;
200 send->s_wr.ex.imm_data = 0; 200 send->s_wr.ex.imm_data = 0;
201 201
@@ -263,9 +263,7 @@ void rds_ib_send_cqe_handler(struct rds_ib_connection *ic, struct ib_wc *wc)
263 263
264 oldest = rds_ib_ring_oldest(&ic->i_send_ring); 264 oldest = rds_ib_ring_oldest(&ic->i_send_ring);
265 265
266 completed = rds_ib_ring_completed(&ic->i_send_ring, 266 completed = rds_ib_ring_completed(&ic->i_send_ring, wc->wr_id, oldest);
267 (wc->wr_id & ~RDS_IB_SEND_OP),
268 oldest);
269 267
270 for (i = 0; i < completed; i++) { 268 for (i = 0; i < completed; i++) {
271 send = &ic->i_sends[oldest]; 269 send = &ic->i_sends[oldest];
diff --git a/net/rds/ib_stats.c b/net/rds/ib_stats.c
index d77e04473056..7e78dca1f252 100644
--- a/net/rds/ib_stats.c
+++ b/net/rds/ib_stats.c
@@ -73,6 +73,8 @@ static const char *const rds_ib_stat_names[] = {
73 "ib_rdma_mr_1m_pool_flush", 73 "ib_rdma_mr_1m_pool_flush",
74 "ib_rdma_mr_1m_pool_wait", 74 "ib_rdma_mr_1m_pool_wait",
75 "ib_rdma_mr_1m_pool_depleted", 75 "ib_rdma_mr_1m_pool_depleted",
76 "ib_rdma_mr_8k_reused",
77 "ib_rdma_mr_1m_reused",
76 "ib_atomic_cswp", 78 "ib_atomic_cswp",
77 "ib_atomic_fadd", 79 "ib_atomic_fadd",
78}; 80};
diff --git a/net/rds/iw.c b/net/rds/iw.c
deleted file mode 100644
index f4a9fff829e0..000000000000
--- a/net/rds/iw.c
+++ /dev/null
@@ -1,312 +0,0 @@
1/*
2 * Copyright (c) 2006 Oracle. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 */
33#include <linux/kernel.h>
34#include <linux/in.h>
35#include <linux/if.h>
36#include <linux/netdevice.h>
37#include <linux/inetdevice.h>
38#include <linux/if_arp.h>
39#include <linux/delay.h>
40#include <linux/slab.h>
41#include <linux/module.h>
42
43#include "rds.h"
44#include "iw.h"
45
46unsigned int fastreg_pool_size = RDS_FASTREG_POOL_SIZE;
47unsigned int fastreg_message_size = RDS_FASTREG_SIZE + 1; /* +1 allows for unaligned MRs */
48
49module_param(fastreg_pool_size, int, 0444);
50MODULE_PARM_DESC(fastreg_pool_size, " Max number of fastreg MRs per device");
51module_param(fastreg_message_size, int, 0444);
52MODULE_PARM_DESC(fastreg_message_size, " Max size of a RDMA transfer (fastreg MRs)");
53
54struct list_head rds_iw_devices;
55
56/* NOTE: if also grabbing iwdev lock, grab this first */
57DEFINE_SPINLOCK(iw_nodev_conns_lock);
58LIST_HEAD(iw_nodev_conns);
59
60static void rds_iw_add_one(struct ib_device *device)
61{
62 struct rds_iw_device *rds_iwdev;
63
64 /* Only handle iwarp devices */
65 if (device->node_type != RDMA_NODE_RNIC)
66 return;
67
68 rds_iwdev = kmalloc(sizeof *rds_iwdev, GFP_KERNEL);
69 if (!rds_iwdev)
70 return;
71
72 spin_lock_init(&rds_iwdev->spinlock);
73
74 rds_iwdev->dma_local_lkey = !!(device->attrs.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY);
75 rds_iwdev->max_wrs = device->attrs.max_qp_wr;
76 rds_iwdev->max_sge = min(device->attrs.max_sge, RDS_IW_MAX_SGE);
77
78 rds_iwdev->dev = device;
79 rds_iwdev->pd = ib_alloc_pd(device);
80 if (IS_ERR(rds_iwdev->pd))
81 goto free_dev;
82
83 if (!rds_iwdev->dma_local_lkey) {
84 rds_iwdev->mr = ib_get_dma_mr(rds_iwdev->pd,
85 IB_ACCESS_REMOTE_READ |
86 IB_ACCESS_REMOTE_WRITE |
87 IB_ACCESS_LOCAL_WRITE);
88 if (IS_ERR(rds_iwdev->mr))
89 goto err_pd;
90 } else
91 rds_iwdev->mr = NULL;
92
93 rds_iwdev->mr_pool = rds_iw_create_mr_pool(rds_iwdev);
94 if (IS_ERR(rds_iwdev->mr_pool)) {
95 rds_iwdev->mr_pool = NULL;
96 goto err_mr;
97 }
98
99 INIT_LIST_HEAD(&rds_iwdev->cm_id_list);
100 INIT_LIST_HEAD(&rds_iwdev->conn_list);
101 list_add_tail(&rds_iwdev->list, &rds_iw_devices);
102
103 ib_set_client_data(device, &rds_iw_client, rds_iwdev);
104 return;
105
106err_mr:
107 if (rds_iwdev->mr)
108 ib_dereg_mr(rds_iwdev->mr);
109err_pd:
110 ib_dealloc_pd(rds_iwdev->pd);
111free_dev:
112 kfree(rds_iwdev);
113}
114
115static void rds_iw_remove_one(struct ib_device *device, void *client_data)
116{
117 struct rds_iw_device *rds_iwdev = client_data;
118 struct rds_iw_cm_id *i_cm_id, *next;
119
120 if (!rds_iwdev)
121 return;
122
123 spin_lock_irq(&rds_iwdev->spinlock);
124 list_for_each_entry_safe(i_cm_id, next, &rds_iwdev->cm_id_list, list) {
125 list_del(&i_cm_id->list);
126 kfree(i_cm_id);
127 }
128 spin_unlock_irq(&rds_iwdev->spinlock);
129
130 rds_iw_destroy_conns(rds_iwdev);
131
132 if (rds_iwdev->mr_pool)
133 rds_iw_destroy_mr_pool(rds_iwdev->mr_pool);
134
135 if (rds_iwdev->mr)
136 ib_dereg_mr(rds_iwdev->mr);
137
138 ib_dealloc_pd(rds_iwdev->pd);
139
140 list_del(&rds_iwdev->list);
141 kfree(rds_iwdev);
142}
143
144struct ib_client rds_iw_client = {
145 .name = "rds_iw",
146 .add = rds_iw_add_one,
147 .remove = rds_iw_remove_one
148};
149
150static int rds_iw_conn_info_visitor(struct rds_connection *conn,
151 void *buffer)
152{
153 struct rds_info_rdma_connection *iinfo = buffer;
154 struct rds_iw_connection *ic;
155
156 /* We will only ever look at IB transports */
157 if (conn->c_trans != &rds_iw_transport)
158 return 0;
159
160 iinfo->src_addr = conn->c_laddr;
161 iinfo->dst_addr = conn->c_faddr;
162
163 memset(&iinfo->src_gid, 0, sizeof(iinfo->src_gid));
164 memset(&iinfo->dst_gid, 0, sizeof(iinfo->dst_gid));
165 if (rds_conn_state(conn) == RDS_CONN_UP) {
166 struct rds_iw_device *rds_iwdev;
167 struct rdma_dev_addr *dev_addr;
168
169 ic = conn->c_transport_data;
170 dev_addr = &ic->i_cm_id->route.addr.dev_addr;
171
172 rdma_addr_get_sgid(dev_addr, (union ib_gid *) &iinfo->src_gid);
173 rdma_addr_get_dgid(dev_addr, (union ib_gid *) &iinfo->dst_gid);
174
175 rds_iwdev = ib_get_client_data(ic->i_cm_id->device, &rds_iw_client);
176 iinfo->max_send_wr = ic->i_send_ring.w_nr;
177 iinfo->max_recv_wr = ic->i_recv_ring.w_nr;
178 iinfo->max_send_sge = rds_iwdev->max_sge;
179 rds_iw_get_mr_info(rds_iwdev, iinfo);
180 }
181 return 1;
182}
183
184static void rds_iw_ic_info(struct socket *sock, unsigned int len,
185 struct rds_info_iterator *iter,
186 struct rds_info_lengths *lens)
187{
188 rds_for_each_conn_info(sock, len, iter, lens,
189 rds_iw_conn_info_visitor,
190 sizeof(struct rds_info_rdma_connection));
191}
192
193
194/*
195 * Early RDS/IB was built to only bind to an address if there is an IPoIB
196 * device with that address set.
197 *
198 * If it were me, I'd advocate for something more flexible. Sending and
199 * receiving should be device-agnostic. Transports would try and maintain
200 * connections between peers who have messages queued. Userspace would be
201 * allowed to influence which paths have priority. We could call userspace
202 * asserting this policy "routing".
203 */
204static int rds_iw_laddr_check(struct net *net, __be32 addr)
205{
206 int ret;
207 struct rdma_cm_id *cm_id;
208 struct sockaddr_in sin;
209
210 /* Create a CMA ID and try to bind it. This catches both
211 * IB and iWARP capable NICs.
212 */
213 cm_id = rdma_create_id(&init_net, NULL, NULL, RDMA_PS_TCP, IB_QPT_RC);
214 if (IS_ERR(cm_id))
215 return PTR_ERR(cm_id);
216
217 memset(&sin, 0, sizeof(sin));
218 sin.sin_family = AF_INET;
219 sin.sin_addr.s_addr = addr;
220
221 /* rdma_bind_addr will only succeed for IB & iWARP devices */
222 ret = rdma_bind_addr(cm_id, (struct sockaddr *)&sin);
223 /* due to this, we will claim to support IB devices unless we
224 check node_type. */
225 if (ret || !cm_id->device ||
226 cm_id->device->node_type != RDMA_NODE_RNIC)
227 ret = -EADDRNOTAVAIL;
228
229 rdsdebug("addr %pI4 ret %d node type %d\n",
230 &addr, ret,
231 cm_id->device ? cm_id->device->node_type : -1);
232
233 rdma_destroy_id(cm_id);
234
235 return ret;
236}
237
238void rds_iw_exit(void)
239{
240 rds_info_deregister_func(RDS_INFO_IWARP_CONNECTIONS, rds_iw_ic_info);
241 rds_iw_destroy_nodev_conns();
242 ib_unregister_client(&rds_iw_client);
243 rds_iw_sysctl_exit();
244 rds_iw_recv_exit();
245 rds_trans_unregister(&rds_iw_transport);
246}
247
248struct rds_transport rds_iw_transport = {
249 .laddr_check = rds_iw_laddr_check,
250 .xmit_complete = rds_iw_xmit_complete,
251 .xmit = rds_iw_xmit,
252 .xmit_rdma = rds_iw_xmit_rdma,
253 .recv = rds_iw_recv,
254 .conn_alloc = rds_iw_conn_alloc,
255 .conn_free = rds_iw_conn_free,
256 .conn_connect = rds_iw_conn_connect,
257 .conn_shutdown = rds_iw_conn_shutdown,
258 .inc_copy_to_user = rds_iw_inc_copy_to_user,
259 .inc_free = rds_iw_inc_free,
260 .cm_initiate_connect = rds_iw_cm_initiate_connect,
261 .cm_handle_connect = rds_iw_cm_handle_connect,
262 .cm_connect_complete = rds_iw_cm_connect_complete,
263 .stats_info_copy = rds_iw_stats_info_copy,
264 .exit = rds_iw_exit,
265 .get_mr = rds_iw_get_mr,
266 .sync_mr = rds_iw_sync_mr,
267 .free_mr = rds_iw_free_mr,
268 .flush_mrs = rds_iw_flush_mrs,
269 .t_owner = THIS_MODULE,
270 .t_name = "iwarp",
271 .t_type = RDS_TRANS_IWARP,
272 .t_prefer_loopback = 1,
273};
274
275int rds_iw_init(void)
276{
277 int ret;
278
279 INIT_LIST_HEAD(&rds_iw_devices);
280
281 ret = ib_register_client(&rds_iw_client);
282 if (ret)
283 goto out;
284
285 ret = rds_iw_sysctl_init();
286 if (ret)
287 goto out_ibreg;
288
289 ret = rds_iw_recv_init();
290 if (ret)
291 goto out_sysctl;
292
293 ret = rds_trans_register(&rds_iw_transport);
294 if (ret)
295 goto out_recv;
296
297 rds_info_register_func(RDS_INFO_IWARP_CONNECTIONS, rds_iw_ic_info);
298
299 goto out;
300
301out_recv:
302 rds_iw_recv_exit();
303out_sysctl:
304 rds_iw_sysctl_exit();
305out_ibreg:
306 ib_unregister_client(&rds_iw_client);
307out:
308 return ret;
309}
310
311MODULE_LICENSE("GPL");
312
diff --git a/net/rds/iw.h b/net/rds/iw.h
deleted file mode 100644
index 5af01d1758b3..000000000000
--- a/net/rds/iw.h
+++ /dev/null
@@ -1,398 +0,0 @@
1#ifndef _RDS_IW_H
2#define _RDS_IW_H
3
4#include <linux/interrupt.h>
5#include <rdma/ib_verbs.h>
6#include <rdma/rdma_cm.h>
7#include "rds.h"
8#include "rdma_transport.h"
9
10#define RDS_FASTREG_SIZE 20
11#define RDS_FASTREG_POOL_SIZE 2048
12
13#define RDS_IW_MAX_SGE 8
14#define RDS_IW_RECV_SGE 2
15
16#define RDS_IW_DEFAULT_RECV_WR 1024
17#define RDS_IW_DEFAULT_SEND_WR 256
18
19#define RDS_IW_SUPPORTED_PROTOCOLS 0x00000003 /* minor versions supported */
20
21extern struct list_head rds_iw_devices;
22
23/*
24 * IB posts RDS_FRAG_SIZE fragments of pages to the receive queues to
25 * try and minimize the amount of memory tied up both the device and
26 * socket receive queues.
27 */
28/* page offset of the final full frag that fits in the page */
29#define RDS_PAGE_LAST_OFF (((PAGE_SIZE / RDS_FRAG_SIZE) - 1) * RDS_FRAG_SIZE)
30struct rds_page_frag {
31 struct list_head f_item;
32 struct page *f_page;
33 unsigned long f_offset;
34 dma_addr_t f_mapped;
35};
36
37struct rds_iw_incoming {
38 struct list_head ii_frags;
39 struct rds_incoming ii_inc;
40};
41
42struct rds_iw_connect_private {
43 /* Add new fields at the end, and don't permute existing fields. */
44 __be32 dp_saddr;
45 __be32 dp_daddr;
46 u8 dp_protocol_major;
47 u8 dp_protocol_minor;
48 __be16 dp_protocol_minor_mask; /* bitmask */
49 __be32 dp_reserved1;
50 __be64 dp_ack_seq;
51 __be32 dp_credit; /* non-zero enables flow ctl */
52};
53
54struct rds_iw_scatterlist {
55 struct scatterlist *list;
56 unsigned int len;
57 int dma_len;
58 unsigned int dma_npages;
59 unsigned int bytes;
60};
61
62struct rds_iw_mapping {
63 spinlock_t m_lock; /* protect the mapping struct */
64 struct list_head m_list;
65 struct rds_iw_mr *m_mr;
66 uint32_t m_rkey;
67 struct rds_iw_scatterlist m_sg;
68};
69
70struct rds_iw_send_work {
71 struct rds_message *s_rm;
72
73 /* We should really put these into a union: */
74 struct rm_rdma_op *s_op;
75 struct rds_iw_mapping *s_mapping;
76 struct ib_mr *s_mr;
77 unsigned char s_remap_count;
78
79 union {
80 struct ib_send_wr s_send_wr;
81 struct ib_rdma_wr s_rdma_wr;
82 struct ib_reg_wr s_reg_wr;
83 };
84 struct ib_sge s_sge[RDS_IW_MAX_SGE];
85 unsigned long s_queued;
86};
87
88struct rds_iw_recv_work {
89 struct rds_iw_incoming *r_iwinc;
90 struct rds_page_frag *r_frag;
91 struct ib_recv_wr r_wr;
92 struct ib_sge r_sge[2];
93};
94
95struct rds_iw_work_ring {
96 u32 w_nr;
97 u32 w_alloc_ptr;
98 u32 w_alloc_ctr;
99 u32 w_free_ptr;
100 atomic_t w_free_ctr;
101};
102
103struct rds_iw_device;
104
105struct rds_iw_connection {
106
107 struct list_head iw_node;
108 struct rds_iw_device *rds_iwdev;
109 struct rds_connection *conn;
110
111 /* alphabet soup, IBTA style */
112 struct rdma_cm_id *i_cm_id;
113 struct ib_pd *i_pd;
114 struct ib_mr *i_mr;
115 struct ib_cq *i_send_cq;
116 struct ib_cq *i_recv_cq;
117
118 /* tx */
119 struct rds_iw_work_ring i_send_ring;
120 struct rds_message *i_rm;
121 struct rds_header *i_send_hdrs;
122 u64 i_send_hdrs_dma;
123 struct rds_iw_send_work *i_sends;
124
125 /* rx */
126 struct tasklet_struct i_recv_tasklet;
127 struct mutex i_recv_mutex;
128 struct rds_iw_work_ring i_recv_ring;
129 struct rds_iw_incoming *i_iwinc;
130 u32 i_recv_data_rem;
131 struct rds_header *i_recv_hdrs;
132 u64 i_recv_hdrs_dma;
133 struct rds_iw_recv_work *i_recvs;
134 struct rds_page_frag i_frag;
135 u64 i_ack_recv; /* last ACK received */
136
137 /* sending acks */
138 unsigned long i_ack_flags;
139#ifdef KERNEL_HAS_ATOMIC64
140 atomic64_t i_ack_next; /* next ACK to send */
141#else
142 spinlock_t i_ack_lock; /* protect i_ack_next */
143 u64 i_ack_next; /* next ACK to send */
144#endif
145 struct rds_header *i_ack;
146 struct ib_send_wr i_ack_wr;
147 struct ib_sge i_ack_sge;
148 u64 i_ack_dma;
149 unsigned long i_ack_queued;
150
151 /* Flow control related information
152 *
153 * Our algorithm uses a pair variables that we need to access
154 * atomically - one for the send credits, and one posted
155 * recv credits we need to transfer to remote.
156 * Rather than protect them using a slow spinlock, we put both into
157 * a single atomic_t and update it using cmpxchg
158 */
159 atomic_t i_credits;
160
161 /* Protocol version specific information */
162 unsigned int i_flowctl:1; /* enable/disable flow ctl */
163 unsigned int i_dma_local_lkey:1;
164 unsigned int i_fastreg_posted:1; /* fastreg posted on this connection */
165 /* Batched completions */
166 unsigned int i_unsignaled_wrs;
167 long i_unsignaled_bytes;
168};
169
170/* This assumes that atomic_t is at least 32 bits */
171#define IB_GET_SEND_CREDITS(v) ((v) & 0xffff)
172#define IB_GET_POST_CREDITS(v) ((v) >> 16)
173#define IB_SET_SEND_CREDITS(v) ((v) & 0xffff)
174#define IB_SET_POST_CREDITS(v) ((v) << 16)
175
176struct rds_iw_cm_id {
177 struct list_head list;
178 struct rdma_cm_id *cm_id;
179};
180
181struct rds_iw_device {
182 struct list_head list;
183 struct list_head cm_id_list;
184 struct list_head conn_list;
185 struct ib_device *dev;
186 struct ib_pd *pd;
187 struct ib_mr *mr;
188 struct rds_iw_mr_pool *mr_pool;
189 int max_sge;
190 unsigned int max_wrs;
191 unsigned int dma_local_lkey:1;
192 spinlock_t spinlock; /* protect the above */
193};
194
195/* bits for i_ack_flags */
196#define IB_ACK_IN_FLIGHT 0
197#define IB_ACK_REQUESTED 1
198
199/* Magic WR_ID for ACKs */
200#define RDS_IW_ACK_WR_ID ((u64)0xffffffffffffffffULL)
201#define RDS_IW_REG_WR_ID ((u64)0xefefefefefefefefULL)
202#define RDS_IW_LOCAL_INV_WR_ID ((u64)0xdfdfdfdfdfdfdfdfULL)
203
204struct rds_iw_statistics {
205 uint64_t s_iw_connect_raced;
206 uint64_t s_iw_listen_closed_stale;
207 uint64_t s_iw_tx_cq_call;
208 uint64_t s_iw_tx_cq_event;
209 uint64_t s_iw_tx_ring_full;
210 uint64_t s_iw_tx_throttle;
211 uint64_t s_iw_tx_sg_mapping_failure;
212 uint64_t s_iw_tx_stalled;
213 uint64_t s_iw_tx_credit_updates;
214 uint64_t s_iw_rx_cq_call;
215 uint64_t s_iw_rx_cq_event;
216 uint64_t s_iw_rx_ring_empty;
217 uint64_t s_iw_rx_refill_from_cq;
218 uint64_t s_iw_rx_refill_from_thread;
219 uint64_t s_iw_rx_alloc_limit;
220 uint64_t s_iw_rx_credit_updates;
221 uint64_t s_iw_ack_sent;
222 uint64_t s_iw_ack_send_failure;
223 uint64_t s_iw_ack_send_delayed;
224 uint64_t s_iw_ack_send_piggybacked;
225 uint64_t s_iw_ack_received;
226 uint64_t s_iw_rdma_mr_alloc;
227 uint64_t s_iw_rdma_mr_free;
228 uint64_t s_iw_rdma_mr_used;
229 uint64_t s_iw_rdma_mr_pool_flush;
230 uint64_t s_iw_rdma_mr_pool_wait;
231 uint64_t s_iw_rdma_mr_pool_depleted;
232};
233
234extern struct workqueue_struct *rds_iw_wq;
235
236/*
237 * Fake ib_dma_sync_sg_for_{cpu,device} as long as ib_verbs.h
238 * doesn't define it.
239 */
240static inline void rds_iw_dma_sync_sg_for_cpu(struct ib_device *dev,
241 struct scatterlist *sg, unsigned int sg_dma_len, int direction)
242{
243 unsigned int i;
244
245 for (i = 0; i < sg_dma_len; ++i) {
246 ib_dma_sync_single_for_cpu(dev,
247 ib_sg_dma_address(dev, &sg[i]),
248 ib_sg_dma_len(dev, &sg[i]),
249 direction);
250 }
251}
252#define ib_dma_sync_sg_for_cpu rds_iw_dma_sync_sg_for_cpu
253
254static inline void rds_iw_dma_sync_sg_for_device(struct ib_device *dev,
255 struct scatterlist *sg, unsigned int sg_dma_len, int direction)
256{
257 unsigned int i;
258
259 for (i = 0; i < sg_dma_len; ++i) {
260 ib_dma_sync_single_for_device(dev,
261 ib_sg_dma_address(dev, &sg[i]),
262 ib_sg_dma_len(dev, &sg[i]),
263 direction);
264 }
265}
266#define ib_dma_sync_sg_for_device rds_iw_dma_sync_sg_for_device
267
268static inline u32 rds_iw_local_dma_lkey(struct rds_iw_connection *ic)
269{
270 return ic->i_dma_local_lkey ? ic->i_cm_id->device->local_dma_lkey : ic->i_mr->lkey;
271}
272
273/* ib.c */
274extern struct rds_transport rds_iw_transport;
275extern struct ib_client rds_iw_client;
276
277extern unsigned int fastreg_pool_size;
278extern unsigned int fastreg_message_size;
279
280extern spinlock_t iw_nodev_conns_lock;
281extern struct list_head iw_nodev_conns;
282
283/* ib_cm.c */
284int rds_iw_conn_alloc(struct rds_connection *conn, gfp_t gfp);
285void rds_iw_conn_free(void *arg);
286int rds_iw_conn_connect(struct rds_connection *conn);
287void rds_iw_conn_shutdown(struct rds_connection *conn);
288void rds_iw_state_change(struct sock *sk);
289int rds_iw_listen_init(void);
290void rds_iw_listen_stop(void);
291void __rds_iw_conn_error(struct rds_connection *conn, const char *, ...);
292int rds_iw_cm_handle_connect(struct rdma_cm_id *cm_id,
293 struct rdma_cm_event *event);
294int rds_iw_cm_initiate_connect(struct rdma_cm_id *cm_id);
295void rds_iw_cm_connect_complete(struct rds_connection *conn,
296 struct rdma_cm_event *event);
297
298
299#define rds_iw_conn_error(conn, fmt...) \
300 __rds_iw_conn_error(conn, KERN_WARNING "RDS/IW: " fmt)
301
302/* ib_rdma.c */
303int rds_iw_update_cm_id(struct rds_iw_device *rds_iwdev, struct rdma_cm_id *cm_id);
304void rds_iw_add_conn(struct rds_iw_device *rds_iwdev, struct rds_connection *conn);
305void rds_iw_remove_conn(struct rds_iw_device *rds_iwdev, struct rds_connection *conn);
306void __rds_iw_destroy_conns(struct list_head *list, spinlock_t *list_lock);
307static inline void rds_iw_destroy_nodev_conns(void)
308{
309 __rds_iw_destroy_conns(&iw_nodev_conns, &iw_nodev_conns_lock);
310}
311static inline void rds_iw_destroy_conns(struct rds_iw_device *rds_iwdev)
312{
313 __rds_iw_destroy_conns(&rds_iwdev->conn_list, &rds_iwdev->spinlock);
314}
315struct rds_iw_mr_pool *rds_iw_create_mr_pool(struct rds_iw_device *);
316void rds_iw_get_mr_info(struct rds_iw_device *rds_iwdev, struct rds_info_rdma_connection *iinfo);
317void rds_iw_destroy_mr_pool(struct rds_iw_mr_pool *);
318void *rds_iw_get_mr(struct scatterlist *sg, unsigned long nents,
319 struct rds_sock *rs, u32 *key_ret);
320void rds_iw_sync_mr(void *trans_private, int dir);
321void rds_iw_free_mr(void *trans_private, int invalidate);
322void rds_iw_flush_mrs(void);
323
324/* ib_recv.c */
325int rds_iw_recv_init(void);
326void rds_iw_recv_exit(void);
327int rds_iw_recv(struct rds_connection *conn);
328int rds_iw_recv_refill(struct rds_connection *conn, gfp_t kptr_gfp,
329 gfp_t page_gfp, int prefill);
330void rds_iw_inc_free(struct rds_incoming *inc);
331int rds_iw_inc_copy_to_user(struct rds_incoming *inc, struct iov_iter *to);
332void rds_iw_recv_cq_comp_handler(struct ib_cq *cq, void *context);
333void rds_iw_recv_tasklet_fn(unsigned long data);
334void rds_iw_recv_init_ring(struct rds_iw_connection *ic);
335void rds_iw_recv_clear_ring(struct rds_iw_connection *ic);
336void rds_iw_recv_init_ack(struct rds_iw_connection *ic);
337void rds_iw_attempt_ack(struct rds_iw_connection *ic);
338void rds_iw_ack_send_complete(struct rds_iw_connection *ic);
339u64 rds_iw_piggyb_ack(struct rds_iw_connection *ic);
340
341/* ib_ring.c */
342void rds_iw_ring_init(struct rds_iw_work_ring *ring, u32 nr);
343void rds_iw_ring_resize(struct rds_iw_work_ring *ring, u32 nr);
344u32 rds_iw_ring_alloc(struct rds_iw_work_ring *ring, u32 val, u32 *pos);
345void rds_iw_ring_free(struct rds_iw_work_ring *ring, u32 val);
346void rds_iw_ring_unalloc(struct rds_iw_work_ring *ring, u32 val);
347int rds_iw_ring_empty(struct rds_iw_work_ring *ring);
348int rds_iw_ring_low(struct rds_iw_work_ring *ring);
349u32 rds_iw_ring_oldest(struct rds_iw_work_ring *ring);
350u32 rds_iw_ring_completed(struct rds_iw_work_ring *ring, u32 wr_id, u32 oldest);
351extern wait_queue_head_t rds_iw_ring_empty_wait;
352
353/* ib_send.c */
354void rds_iw_xmit_complete(struct rds_connection *conn);
355int rds_iw_xmit(struct rds_connection *conn, struct rds_message *rm,
356 unsigned int hdr_off, unsigned int sg, unsigned int off);
357void rds_iw_send_cq_comp_handler(struct ib_cq *cq, void *context);
358void rds_iw_send_init_ring(struct rds_iw_connection *ic);
359void rds_iw_send_clear_ring(struct rds_iw_connection *ic);
360int rds_iw_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op);
361void rds_iw_send_add_credits(struct rds_connection *conn, unsigned int credits);
362void rds_iw_advertise_credits(struct rds_connection *conn, unsigned int posted);
363int rds_iw_send_grab_credits(struct rds_iw_connection *ic, u32 wanted,
364 u32 *adv_credits, int need_posted, int max_posted);
365
366/* ib_stats.c */
367DECLARE_PER_CPU(struct rds_iw_statistics, rds_iw_stats);
368#define rds_iw_stats_inc(member) rds_stats_inc_which(rds_iw_stats, member)
369unsigned int rds_iw_stats_info_copy(struct rds_info_iterator *iter,
370 unsigned int avail);
371
372/* ib_sysctl.c */
373int rds_iw_sysctl_init(void);
374void rds_iw_sysctl_exit(void);
375extern unsigned long rds_iw_sysctl_max_send_wr;
376extern unsigned long rds_iw_sysctl_max_recv_wr;
377extern unsigned long rds_iw_sysctl_max_unsig_wrs;
378extern unsigned long rds_iw_sysctl_max_unsig_bytes;
379extern unsigned long rds_iw_sysctl_max_recv_allocation;
380extern unsigned int rds_iw_sysctl_flow_control;
381
382/*
383 * Helper functions for getting/setting the header and data SGEs in
384 * RDS packets (not RDMA)
385 */
386static inline struct ib_sge *
387rds_iw_header_sge(struct rds_iw_connection *ic, struct ib_sge *sge)
388{
389 return &sge[0];
390}
391
392static inline struct ib_sge *
393rds_iw_data_sge(struct rds_iw_connection *ic, struct ib_sge *sge)
394{
395 return &sge[1];
396}
397
398#endif
diff --git a/net/rds/iw_cm.c b/net/rds/iw_cm.c
deleted file mode 100644
index aea4c911bc76..000000000000
--- a/net/rds/iw_cm.c
+++ /dev/null
@@ -1,769 +0,0 @@
1/*
2 * Copyright (c) 2006 Oracle. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 */
33#include <linux/kernel.h>
34#include <linux/in.h>
35#include <linux/slab.h>
36#include <linux/vmalloc.h>
37#include <linux/ratelimit.h>
38
39#include "rds.h"
40#include "iw.h"
41
42/*
43 * Set the selected protocol version
44 */
45static void rds_iw_set_protocol(struct rds_connection *conn, unsigned int version)
46{
47 conn->c_version = version;
48}
49
50/*
51 * Set up flow control
52 */
53static void rds_iw_set_flow_control(struct rds_connection *conn, u32 credits)
54{
55 struct rds_iw_connection *ic = conn->c_transport_data;
56
57 if (rds_iw_sysctl_flow_control && credits != 0) {
58 /* We're doing flow control */
59 ic->i_flowctl = 1;
60 rds_iw_send_add_credits(conn, credits);
61 } else {
62 ic->i_flowctl = 0;
63 }
64}
65
66/*
67 * Connection established.
68 * We get here for both outgoing and incoming connection.
69 */
70void rds_iw_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_event *event)
71{
72 const struct rds_iw_connect_private *dp = NULL;
73 struct rds_iw_connection *ic = conn->c_transport_data;
74 struct rds_iw_device *rds_iwdev;
75 int err;
76
77 if (event->param.conn.private_data_len) {
78 dp = event->param.conn.private_data;
79
80 rds_iw_set_protocol(conn,
81 RDS_PROTOCOL(dp->dp_protocol_major,
82 dp->dp_protocol_minor));
83 rds_iw_set_flow_control(conn, be32_to_cpu(dp->dp_credit));
84 }
85
86 /* update ib_device with this local ipaddr & conn */
87 rds_iwdev = ib_get_client_data(ic->i_cm_id->device, &rds_iw_client);
88 err = rds_iw_update_cm_id(rds_iwdev, ic->i_cm_id);
89 if (err)
90 printk(KERN_ERR "rds_iw_update_ipaddr failed (%d)\n", err);
91 rds_iw_add_conn(rds_iwdev, conn);
92
93 /* If the peer gave us the last packet it saw, process this as if
94 * we had received a regular ACK. */
95 if (dp && dp->dp_ack_seq)
96 rds_send_drop_acked(conn, be64_to_cpu(dp->dp_ack_seq), NULL);
97
98 printk(KERN_NOTICE "RDS/IW: connected to %pI4<->%pI4 version %u.%u%s\n",
99 &conn->c_laddr, &conn->c_faddr,
100 RDS_PROTOCOL_MAJOR(conn->c_version),
101 RDS_PROTOCOL_MINOR(conn->c_version),
102 ic->i_flowctl ? ", flow control" : "");
103
104 rds_connect_complete(conn);
105}
106
107static void rds_iw_cm_fill_conn_param(struct rds_connection *conn,
108 struct rdma_conn_param *conn_param,
109 struct rds_iw_connect_private *dp,
110 u32 protocol_version)
111{
112 struct rds_iw_connection *ic = conn->c_transport_data;
113
114 memset(conn_param, 0, sizeof(struct rdma_conn_param));
115 /* XXX tune these? */
116 conn_param->responder_resources = 1;
117 conn_param->initiator_depth = 1;
118
119 if (dp) {
120 memset(dp, 0, sizeof(*dp));
121 dp->dp_saddr = conn->c_laddr;
122 dp->dp_daddr = conn->c_faddr;
123 dp->dp_protocol_major = RDS_PROTOCOL_MAJOR(protocol_version);
124 dp->dp_protocol_minor = RDS_PROTOCOL_MINOR(protocol_version);
125 dp->dp_protocol_minor_mask = cpu_to_be16(RDS_IW_SUPPORTED_PROTOCOLS);
126 dp->dp_ack_seq = rds_iw_piggyb_ack(ic);
127
128 /* Advertise flow control */
129 if (ic->i_flowctl) {
130 unsigned int credits;
131
132 credits = IB_GET_POST_CREDITS(atomic_read(&ic->i_credits));
133 dp->dp_credit = cpu_to_be32(credits);
134 atomic_sub(IB_SET_POST_CREDITS(credits), &ic->i_credits);
135 }
136
137 conn_param->private_data = dp;
138 conn_param->private_data_len = sizeof(*dp);
139 }
140}
141
142static void rds_iw_cq_event_handler(struct ib_event *event, void *data)
143{
144 rdsdebug("event %u data %p\n", event->event, data);
145}
146
147static void rds_iw_qp_event_handler(struct ib_event *event, void *data)
148{
149 struct rds_connection *conn = data;
150 struct rds_iw_connection *ic = conn->c_transport_data;
151
152 rdsdebug("conn %p ic %p event %u\n", conn, ic, event->event);
153
154 switch (event->event) {
155 case IB_EVENT_COMM_EST:
156 rdma_notify(ic->i_cm_id, IB_EVENT_COMM_EST);
157 break;
158 case IB_EVENT_QP_REQ_ERR:
159 case IB_EVENT_QP_FATAL:
160 default:
161 rdsdebug("Fatal QP Event %u "
162 "- connection %pI4->%pI4, reconnecting\n",
163 event->event, &conn->c_laddr,
164 &conn->c_faddr);
165 rds_conn_drop(conn);
166 break;
167 }
168}
169
170/*
171 * Create a QP
172 */
173static int rds_iw_init_qp_attrs(struct ib_qp_init_attr *attr,
174 struct rds_iw_device *rds_iwdev,
175 struct rds_iw_work_ring *send_ring,
176 void (*send_cq_handler)(struct ib_cq *, void *),
177 struct rds_iw_work_ring *recv_ring,
178 void (*recv_cq_handler)(struct ib_cq *, void *),
179 void *context)
180{
181 struct ib_device *dev = rds_iwdev->dev;
182 struct ib_cq_init_attr cq_attr = {};
183 unsigned int send_size, recv_size;
184 int ret;
185
186 /* The offset of 1 is to accommodate the additional ACK WR. */
187 send_size = min_t(unsigned int, rds_iwdev->max_wrs, rds_iw_sysctl_max_send_wr + 1);
188 recv_size = min_t(unsigned int, rds_iwdev->max_wrs, rds_iw_sysctl_max_recv_wr + 1);
189 rds_iw_ring_resize(send_ring, send_size - 1);
190 rds_iw_ring_resize(recv_ring, recv_size - 1);
191
192 memset(attr, 0, sizeof(*attr));
193 attr->event_handler = rds_iw_qp_event_handler;
194 attr->qp_context = context;
195 attr->cap.max_send_wr = send_size;
196 attr->cap.max_recv_wr = recv_size;
197 attr->cap.max_send_sge = rds_iwdev->max_sge;
198 attr->cap.max_recv_sge = RDS_IW_RECV_SGE;
199 attr->sq_sig_type = IB_SIGNAL_REQ_WR;
200 attr->qp_type = IB_QPT_RC;
201
202 cq_attr.cqe = send_size;
203 attr->send_cq = ib_create_cq(dev, send_cq_handler,
204 rds_iw_cq_event_handler,
205 context, &cq_attr);
206 if (IS_ERR(attr->send_cq)) {
207 ret = PTR_ERR(attr->send_cq);
208 attr->send_cq = NULL;
209 rdsdebug("ib_create_cq send failed: %d\n", ret);
210 goto out;
211 }
212
213 cq_attr.cqe = recv_size;
214 attr->recv_cq = ib_create_cq(dev, recv_cq_handler,
215 rds_iw_cq_event_handler,
216 context, &cq_attr);
217 if (IS_ERR(attr->recv_cq)) {
218 ret = PTR_ERR(attr->recv_cq);
219 attr->recv_cq = NULL;
220 rdsdebug("ib_create_cq send failed: %d\n", ret);
221 goto out;
222 }
223
224 ret = ib_req_notify_cq(attr->send_cq, IB_CQ_NEXT_COMP);
225 if (ret) {
226 rdsdebug("ib_req_notify_cq send failed: %d\n", ret);
227 goto out;
228 }
229
230 ret = ib_req_notify_cq(attr->recv_cq, IB_CQ_SOLICITED);
231 if (ret) {
232 rdsdebug("ib_req_notify_cq recv failed: %d\n", ret);
233 goto out;
234 }
235
236out:
237 if (ret) {
238 if (attr->send_cq)
239 ib_destroy_cq(attr->send_cq);
240 if (attr->recv_cq)
241 ib_destroy_cq(attr->recv_cq);
242 }
243 return ret;
244}
245
246/*
247 * This needs to be very careful to not leave IS_ERR pointers around for
248 * cleanup to trip over.
249 */
250static int rds_iw_setup_qp(struct rds_connection *conn)
251{
252 struct rds_iw_connection *ic = conn->c_transport_data;
253 struct ib_device *dev = ic->i_cm_id->device;
254 struct ib_qp_init_attr attr;
255 struct rds_iw_device *rds_iwdev;
256 int ret;
257
258 /* rds_iw_add_one creates a rds_iw_device object per IB device,
259 * and allocates a protection domain, memory range and MR pool
260 * for each. If that fails for any reason, it will not register
261 * the rds_iwdev at all.
262 */
263 rds_iwdev = ib_get_client_data(dev, &rds_iw_client);
264 if (!rds_iwdev) {
265 printk_ratelimited(KERN_NOTICE "RDS/IW: No client_data for device %s\n",
266 dev->name);
267 return -EOPNOTSUPP;
268 }
269
270 /* Protection domain and memory range */
271 ic->i_pd = rds_iwdev->pd;
272 ic->i_mr = rds_iwdev->mr;
273
274 ret = rds_iw_init_qp_attrs(&attr, rds_iwdev,
275 &ic->i_send_ring, rds_iw_send_cq_comp_handler,
276 &ic->i_recv_ring, rds_iw_recv_cq_comp_handler,
277 conn);
278 if (ret < 0)
279 goto out;
280
281 ic->i_send_cq = attr.send_cq;
282 ic->i_recv_cq = attr.recv_cq;
283
284 /*
285 * XXX this can fail if max_*_wr is too large? Are we supposed
286 * to back off until we get a value that the hardware can support?
287 */
288 ret = rdma_create_qp(ic->i_cm_id, ic->i_pd, &attr);
289 if (ret) {
290 rdsdebug("rdma_create_qp failed: %d\n", ret);
291 goto out;
292 }
293
294 ic->i_send_hdrs = ib_dma_alloc_coherent(dev,
295 ic->i_send_ring.w_nr *
296 sizeof(struct rds_header),
297 &ic->i_send_hdrs_dma, GFP_KERNEL);
298 if (!ic->i_send_hdrs) {
299 ret = -ENOMEM;
300 rdsdebug("ib_dma_alloc_coherent send failed\n");
301 goto out;
302 }
303
304 ic->i_recv_hdrs = ib_dma_alloc_coherent(dev,
305 ic->i_recv_ring.w_nr *
306 sizeof(struct rds_header),
307 &ic->i_recv_hdrs_dma, GFP_KERNEL);
308 if (!ic->i_recv_hdrs) {
309 ret = -ENOMEM;
310 rdsdebug("ib_dma_alloc_coherent recv failed\n");
311 goto out;
312 }
313
314 ic->i_ack = ib_dma_alloc_coherent(dev, sizeof(struct rds_header),
315 &ic->i_ack_dma, GFP_KERNEL);
316 if (!ic->i_ack) {
317 ret = -ENOMEM;
318 rdsdebug("ib_dma_alloc_coherent ack failed\n");
319 goto out;
320 }
321
322 ic->i_sends = vmalloc(ic->i_send_ring.w_nr * sizeof(struct rds_iw_send_work));
323 if (!ic->i_sends) {
324 ret = -ENOMEM;
325 rdsdebug("send allocation failed\n");
326 goto out;
327 }
328 rds_iw_send_init_ring(ic);
329
330 ic->i_recvs = vmalloc(ic->i_recv_ring.w_nr * sizeof(struct rds_iw_recv_work));
331 if (!ic->i_recvs) {
332 ret = -ENOMEM;
333 rdsdebug("recv allocation failed\n");
334 goto out;
335 }
336
337 rds_iw_recv_init_ring(ic);
338 rds_iw_recv_init_ack(ic);
339
340 /* Post receive buffers - as a side effect, this will update
341 * the posted credit count. */
342 rds_iw_recv_refill(conn, GFP_KERNEL, GFP_HIGHUSER, 1);
343
344 rdsdebug("conn %p pd %p mr %p cq %p %p\n", conn, ic->i_pd, ic->i_mr,
345 ic->i_send_cq, ic->i_recv_cq);
346
347out:
348 return ret;
349}
350
351static u32 rds_iw_protocol_compatible(const struct rds_iw_connect_private *dp)
352{
353 u16 common;
354 u32 version = 0;
355
356 /* rdma_cm private data is odd - when there is any private data in the
357 * request, we will be given a pretty large buffer without telling us the
358 * original size. The only way to tell the difference is by looking at
359 * the contents, which are initialized to zero.
360 * If the protocol version fields aren't set, this is a connection attempt
361 * from an older version. This could could be 3.0 or 2.0 - we can't tell.
362 * We really should have changed this for OFED 1.3 :-( */
363 if (dp->dp_protocol_major == 0)
364 return RDS_PROTOCOL_3_0;
365
366 common = be16_to_cpu(dp->dp_protocol_minor_mask) & RDS_IW_SUPPORTED_PROTOCOLS;
367 if (dp->dp_protocol_major == 3 && common) {
368 version = RDS_PROTOCOL_3_0;
369 while ((common >>= 1) != 0)
370 version++;
371 }
372 printk_ratelimited(KERN_NOTICE "RDS: Connection from %pI4 using "
373 "incompatible protocol version %u.%u\n",
374 &dp->dp_saddr,
375 dp->dp_protocol_major,
376 dp->dp_protocol_minor);
377 return version;
378}
379
380int rds_iw_cm_handle_connect(struct rdma_cm_id *cm_id,
381 struct rdma_cm_event *event)
382{
383 const struct rds_iw_connect_private *dp = event->param.conn.private_data;
384 struct rds_iw_connect_private dp_rep;
385 struct rds_connection *conn = NULL;
386 struct rds_iw_connection *ic = NULL;
387 struct rdma_conn_param conn_param;
388 struct rds_iw_device *rds_iwdev;
389 u32 version;
390 int err, destroy = 1;
391
392 /* Check whether the remote protocol version matches ours. */
393 version = rds_iw_protocol_compatible(dp);
394 if (!version)
395 goto out;
396
397 rdsdebug("saddr %pI4 daddr %pI4 RDSv%u.%u\n",
398 &dp->dp_saddr, &dp->dp_daddr,
399 RDS_PROTOCOL_MAJOR(version), RDS_PROTOCOL_MINOR(version));
400
401 /* RDS/IW is not currently netns aware, thus init_net */
402 conn = rds_conn_create(&init_net, dp->dp_daddr, dp->dp_saddr,
403 &rds_iw_transport, GFP_KERNEL);
404 if (IS_ERR(conn)) {
405 rdsdebug("rds_conn_create failed (%ld)\n", PTR_ERR(conn));
406 conn = NULL;
407 goto out;
408 }
409
410 /*
411 * The connection request may occur while the
412 * previous connection exist, e.g. in case of failover.
413 * But as connections may be initiated simultaneously
414 * by both hosts, we have a random backoff mechanism -
415 * see the comment above rds_queue_reconnect()
416 */
417 mutex_lock(&conn->c_cm_lock);
418 if (!rds_conn_transition(conn, RDS_CONN_DOWN, RDS_CONN_CONNECTING)) {
419 if (rds_conn_state(conn) == RDS_CONN_UP) {
420 rdsdebug("incoming connect while connecting\n");
421 rds_conn_drop(conn);
422 rds_iw_stats_inc(s_iw_listen_closed_stale);
423 } else
424 if (rds_conn_state(conn) == RDS_CONN_CONNECTING) {
425 /* Wait and see - our connect may still be succeeding */
426 rds_iw_stats_inc(s_iw_connect_raced);
427 }
428 mutex_unlock(&conn->c_cm_lock);
429 goto out;
430 }
431
432 ic = conn->c_transport_data;
433
434 rds_iw_set_protocol(conn, version);
435 rds_iw_set_flow_control(conn, be32_to_cpu(dp->dp_credit));
436
437 /* If the peer gave us the last packet it saw, process this as if
438 * we had received a regular ACK. */
439 if (dp->dp_ack_seq)
440 rds_send_drop_acked(conn, be64_to_cpu(dp->dp_ack_seq), NULL);
441
442 BUG_ON(cm_id->context);
443 BUG_ON(ic->i_cm_id);
444
445 ic->i_cm_id = cm_id;
446 cm_id->context = conn;
447
448 rds_iwdev = ib_get_client_data(cm_id->device, &rds_iw_client);
449 ic->i_dma_local_lkey = rds_iwdev->dma_local_lkey;
450
451 /* We got halfway through setting up the ib_connection, if we
452 * fail now, we have to take the long route out of this mess. */
453 destroy = 0;
454
455 err = rds_iw_setup_qp(conn);
456 if (err) {
457 rds_iw_conn_error(conn, "rds_iw_setup_qp failed (%d)\n", err);
458 mutex_unlock(&conn->c_cm_lock);
459 goto out;
460 }
461
462 rds_iw_cm_fill_conn_param(conn, &conn_param, &dp_rep, version);
463
464 /* rdma_accept() calls rdma_reject() internally if it fails */
465 err = rdma_accept(cm_id, &conn_param);
466 mutex_unlock(&conn->c_cm_lock);
467 if (err) {
468 rds_iw_conn_error(conn, "rdma_accept failed (%d)\n", err);
469 goto out;
470 }
471
472 return 0;
473
474out:
475 rdma_reject(cm_id, NULL, 0);
476 return destroy;
477}
478
479
480int rds_iw_cm_initiate_connect(struct rdma_cm_id *cm_id)
481{
482 struct rds_connection *conn = cm_id->context;
483 struct rds_iw_connection *ic = conn->c_transport_data;
484 struct rdma_conn_param conn_param;
485 struct rds_iw_connect_private dp;
486 int ret;
487
488 /* If the peer doesn't do protocol negotiation, we must
489 * default to RDSv3.0 */
490 rds_iw_set_protocol(conn, RDS_PROTOCOL_3_0);
491 ic->i_flowctl = rds_iw_sysctl_flow_control; /* advertise flow control */
492
493 ret = rds_iw_setup_qp(conn);
494 if (ret) {
495 rds_iw_conn_error(conn, "rds_iw_setup_qp failed (%d)\n", ret);
496 goto out;
497 }
498
499 rds_iw_cm_fill_conn_param(conn, &conn_param, &dp, RDS_PROTOCOL_VERSION);
500
501 ret = rdma_connect(cm_id, &conn_param);
502 if (ret)
503 rds_iw_conn_error(conn, "rdma_connect failed (%d)\n", ret);
504
505out:
506 /* Beware - returning non-zero tells the rdma_cm to destroy
507 * the cm_id. We should certainly not do it as long as we still
508 * "own" the cm_id. */
509 if (ret) {
510 struct rds_iw_connection *ic = conn->c_transport_data;
511
512 if (ic->i_cm_id == cm_id)
513 ret = 0;
514 }
515 return ret;
516}
517
518int rds_iw_conn_connect(struct rds_connection *conn)
519{
520 struct rds_iw_connection *ic = conn->c_transport_data;
521 struct rds_iw_device *rds_iwdev;
522 struct sockaddr_in src, dest;
523 int ret;
524
525 /* XXX I wonder what affect the port space has */
526 /* delegate cm event handler to rdma_transport */
527 ic->i_cm_id = rdma_create_id(&init_net, rds_rdma_cm_event_handler, conn,
528 RDMA_PS_TCP, IB_QPT_RC);
529 if (IS_ERR(ic->i_cm_id)) {
530 ret = PTR_ERR(ic->i_cm_id);
531 ic->i_cm_id = NULL;
532 rdsdebug("rdma_create_id() failed: %d\n", ret);
533 goto out;
534 }
535
536 rdsdebug("created cm id %p for conn %p\n", ic->i_cm_id, conn);
537
538 src.sin_family = AF_INET;
539 src.sin_addr.s_addr = (__force u32)conn->c_laddr;
540 src.sin_port = (__force u16)htons(0);
541
542 /* First, bind to the local address and device. */
543 ret = rdma_bind_addr(ic->i_cm_id, (struct sockaddr *) &src);
544 if (ret) {
545 rdsdebug("rdma_bind_addr(%pI4) failed: %d\n",
546 &conn->c_laddr, ret);
547 rdma_destroy_id(ic->i_cm_id);
548 ic->i_cm_id = NULL;
549 goto out;
550 }
551
552 rds_iwdev = ib_get_client_data(ic->i_cm_id->device, &rds_iw_client);
553 ic->i_dma_local_lkey = rds_iwdev->dma_local_lkey;
554
555 dest.sin_family = AF_INET;
556 dest.sin_addr.s_addr = (__force u32)conn->c_faddr;
557 dest.sin_port = (__force u16)htons(RDS_PORT);
558
559 ret = rdma_resolve_addr(ic->i_cm_id, (struct sockaddr *)&src,
560 (struct sockaddr *)&dest,
561 RDS_RDMA_RESOLVE_TIMEOUT_MS);
562 if (ret) {
563 rdsdebug("addr resolve failed for cm id %p: %d\n", ic->i_cm_id,
564 ret);
565 rdma_destroy_id(ic->i_cm_id);
566 ic->i_cm_id = NULL;
567 }
568
569out:
570 return ret;
571}
572
573/*
574 * This is so careful about only cleaning up resources that were built up
575 * so that it can be called at any point during startup. In fact it
576 * can be called multiple times for a given connection.
577 */
578void rds_iw_conn_shutdown(struct rds_connection *conn)
579{
580 struct rds_iw_connection *ic = conn->c_transport_data;
581 int err = 0;
582 struct ib_qp_attr qp_attr;
583
584 rdsdebug("cm %p pd %p cq %p %p qp %p\n", ic->i_cm_id,
585 ic->i_pd, ic->i_send_cq, ic->i_recv_cq,
586 ic->i_cm_id ? ic->i_cm_id->qp : NULL);
587
588 if (ic->i_cm_id) {
589 struct ib_device *dev = ic->i_cm_id->device;
590
591 rdsdebug("disconnecting cm %p\n", ic->i_cm_id);
592 err = rdma_disconnect(ic->i_cm_id);
593 if (err) {
594 /* Actually this may happen quite frequently, when
595 * an outgoing connect raced with an incoming connect.
596 */
597 rdsdebug("failed to disconnect, cm: %p err %d\n",
598 ic->i_cm_id, err);
599 }
600
601 if (ic->i_cm_id->qp) {
602 qp_attr.qp_state = IB_QPS_ERR;
603 ib_modify_qp(ic->i_cm_id->qp, &qp_attr, IB_QP_STATE);
604 }
605
606 wait_event(rds_iw_ring_empty_wait,
607 rds_iw_ring_empty(&ic->i_send_ring) &&
608 rds_iw_ring_empty(&ic->i_recv_ring));
609
610 if (ic->i_send_hdrs)
611 ib_dma_free_coherent(dev,
612 ic->i_send_ring.w_nr *
613 sizeof(struct rds_header),
614 ic->i_send_hdrs,
615 ic->i_send_hdrs_dma);
616
617 if (ic->i_recv_hdrs)
618 ib_dma_free_coherent(dev,
619 ic->i_recv_ring.w_nr *
620 sizeof(struct rds_header),
621 ic->i_recv_hdrs,
622 ic->i_recv_hdrs_dma);
623
624 if (ic->i_ack)
625 ib_dma_free_coherent(dev, sizeof(struct rds_header),
626 ic->i_ack, ic->i_ack_dma);
627
628 if (ic->i_sends)
629 rds_iw_send_clear_ring(ic);
630 if (ic->i_recvs)
631 rds_iw_recv_clear_ring(ic);
632
633 if (ic->i_cm_id->qp)
634 rdma_destroy_qp(ic->i_cm_id);
635 if (ic->i_send_cq)
636 ib_destroy_cq(ic->i_send_cq);
637 if (ic->i_recv_cq)
638 ib_destroy_cq(ic->i_recv_cq);
639
640 /*
641 * If associated with an rds_iw_device:
642 * Move connection back to the nodev list.
643 * Remove cm_id from the device cm_id list.
644 */
645 if (ic->rds_iwdev)
646 rds_iw_remove_conn(ic->rds_iwdev, conn);
647
648 rdma_destroy_id(ic->i_cm_id);
649
650 ic->i_cm_id = NULL;
651 ic->i_pd = NULL;
652 ic->i_mr = NULL;
653 ic->i_send_cq = NULL;
654 ic->i_recv_cq = NULL;
655 ic->i_send_hdrs = NULL;
656 ic->i_recv_hdrs = NULL;
657 ic->i_ack = NULL;
658 }
659 BUG_ON(ic->rds_iwdev);
660
661 /* Clear pending transmit */
662 if (ic->i_rm) {
663 rds_message_put(ic->i_rm);
664 ic->i_rm = NULL;
665 }
666
667 /* Clear the ACK state */
668 clear_bit(IB_ACK_IN_FLIGHT, &ic->i_ack_flags);
669#ifdef KERNEL_HAS_ATOMIC64
670 atomic64_set(&ic->i_ack_next, 0);
671#else
672 ic->i_ack_next = 0;
673#endif
674 ic->i_ack_recv = 0;
675
676 /* Clear flow control state */
677 ic->i_flowctl = 0;
678 atomic_set(&ic->i_credits, 0);
679
680 rds_iw_ring_init(&ic->i_send_ring, rds_iw_sysctl_max_send_wr);
681 rds_iw_ring_init(&ic->i_recv_ring, rds_iw_sysctl_max_recv_wr);
682
683 if (ic->i_iwinc) {
684 rds_inc_put(&ic->i_iwinc->ii_inc);
685 ic->i_iwinc = NULL;
686 }
687
688 vfree(ic->i_sends);
689 ic->i_sends = NULL;
690 vfree(ic->i_recvs);
691 ic->i_recvs = NULL;
692 rdsdebug("shutdown complete\n");
693}
694
695int rds_iw_conn_alloc(struct rds_connection *conn, gfp_t gfp)
696{
697 struct rds_iw_connection *ic;
698 unsigned long flags;
699
700 /* XXX too lazy? */
701 ic = kzalloc(sizeof(struct rds_iw_connection), gfp);
702 if (!ic)
703 return -ENOMEM;
704
705 INIT_LIST_HEAD(&ic->iw_node);
706 tasklet_init(&ic->i_recv_tasklet, rds_iw_recv_tasklet_fn,
707 (unsigned long) ic);
708 mutex_init(&ic->i_recv_mutex);
709#ifndef KERNEL_HAS_ATOMIC64
710 spin_lock_init(&ic->i_ack_lock);
711#endif
712
713 /*
714 * rds_iw_conn_shutdown() waits for these to be emptied so they
715 * must be initialized before it can be called.
716 */
717 rds_iw_ring_init(&ic->i_send_ring, rds_iw_sysctl_max_send_wr);
718 rds_iw_ring_init(&ic->i_recv_ring, rds_iw_sysctl_max_recv_wr);
719
720 ic->conn = conn;
721 conn->c_transport_data = ic;
722
723 spin_lock_irqsave(&iw_nodev_conns_lock, flags);
724 list_add_tail(&ic->iw_node, &iw_nodev_conns);
725 spin_unlock_irqrestore(&iw_nodev_conns_lock, flags);
726
727
728 rdsdebug("conn %p conn ic %p\n", conn, conn->c_transport_data);
729 return 0;
730}
731
732/*
733 * Free a connection. Connection must be shut down and not set for reconnect.
734 */
735void rds_iw_conn_free(void *arg)
736{
737 struct rds_iw_connection *ic = arg;
738 spinlock_t *lock_ptr;
739
740 rdsdebug("ic %p\n", ic);
741
742 /*
743 * Conn is either on a dev's list or on the nodev list.
744 * A race with shutdown() or connect() would cause problems
745 * (since rds_iwdev would change) but that should never happen.
746 */
747 lock_ptr = ic->rds_iwdev ? &ic->rds_iwdev->spinlock : &iw_nodev_conns_lock;
748
749 spin_lock_irq(lock_ptr);
750 list_del(&ic->iw_node);
751 spin_unlock_irq(lock_ptr);
752
753 kfree(ic);
754}
755
756/*
757 * An error occurred on the connection
758 */
759void
760__rds_iw_conn_error(struct rds_connection *conn, const char *fmt, ...)
761{
762 va_list ap;
763
764 rds_conn_drop(conn);
765
766 va_start(ap, fmt);
767 vprintk(fmt, ap);
768 va_end(ap);
769}
diff --git a/net/rds/iw_rdma.c b/net/rds/iw_rdma.c
deleted file mode 100644
index b09a40c1adce..000000000000
--- a/net/rds/iw_rdma.c
+++ /dev/null
@@ -1,837 +0,0 @@
1/*
2 * Copyright (c) 2006 Oracle. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 */
33#include <linux/kernel.h>
34#include <linux/slab.h>
35#include <linux/ratelimit.h>
36
37#include "rds.h"
38#include "iw.h"
39
40
41/*
42 * This is stored as mr->r_trans_private.
43 */
44struct rds_iw_mr {
45 struct rds_iw_device *device;
46 struct rds_iw_mr_pool *pool;
47 struct rdma_cm_id *cm_id;
48
49 struct ib_mr *mr;
50
51 struct rds_iw_mapping mapping;
52 unsigned char remap_count;
53};
54
55/*
56 * Our own little MR pool
57 */
58struct rds_iw_mr_pool {
59 struct rds_iw_device *device; /* back ptr to the device that owns us */
60
61 struct mutex flush_lock; /* serialize fmr invalidate */
62 struct work_struct flush_worker; /* flush worker */
63
64 spinlock_t list_lock; /* protect variables below */
65 atomic_t item_count; /* total # of MRs */
66 atomic_t dirty_count; /* # dirty of MRs */
67 struct list_head dirty_list; /* dirty mappings */
68 struct list_head clean_list; /* unused & unamapped MRs */
69 atomic_t free_pinned; /* memory pinned by free MRs */
70 unsigned long max_message_size; /* in pages */
71 unsigned long max_items;
72 unsigned long max_items_soft;
73 unsigned long max_free_pinned;
74 int max_pages;
75};
76
77static void rds_iw_flush_mr_pool(struct rds_iw_mr_pool *pool, int free_all);
78static void rds_iw_mr_pool_flush_worker(struct work_struct *work);
79static int rds_iw_init_reg(struct rds_iw_mr_pool *pool, struct rds_iw_mr *ibmr);
80static int rds_iw_map_reg(struct rds_iw_mr_pool *pool,
81 struct rds_iw_mr *ibmr,
82 struct scatterlist *sg, unsigned int nents);
83static void rds_iw_free_fastreg(struct rds_iw_mr_pool *pool, struct rds_iw_mr *ibmr);
84static unsigned int rds_iw_unmap_fastreg_list(struct rds_iw_mr_pool *pool,
85 struct list_head *unmap_list,
86 struct list_head *kill_list,
87 int *unpinned);
88static void rds_iw_destroy_fastreg(struct rds_iw_mr_pool *pool, struct rds_iw_mr *ibmr);
89
90static int rds_iw_get_device(struct sockaddr_in *src, struct sockaddr_in *dst,
91 struct rds_iw_device **rds_iwdev,
92 struct rdma_cm_id **cm_id)
93{
94 struct rds_iw_device *iwdev;
95 struct rds_iw_cm_id *i_cm_id;
96
97 *rds_iwdev = NULL;
98 *cm_id = NULL;
99
100 list_for_each_entry(iwdev, &rds_iw_devices, list) {
101 spin_lock_irq(&iwdev->spinlock);
102 list_for_each_entry(i_cm_id, &iwdev->cm_id_list, list) {
103 struct sockaddr_in *src_addr, *dst_addr;
104
105 src_addr = (struct sockaddr_in *)&i_cm_id->cm_id->route.addr.src_addr;
106 dst_addr = (struct sockaddr_in *)&i_cm_id->cm_id->route.addr.dst_addr;
107
108 rdsdebug("local ipaddr = %x port %d, "
109 "remote ipaddr = %x port %d"
110 "..looking for %x port %d, "
111 "remote ipaddr = %x port %d\n",
112 src_addr->sin_addr.s_addr,
113 src_addr->sin_port,
114 dst_addr->sin_addr.s_addr,
115 dst_addr->sin_port,
116 src->sin_addr.s_addr,
117 src->sin_port,
118 dst->sin_addr.s_addr,
119 dst->sin_port);
120#ifdef WORKING_TUPLE_DETECTION
121 if (src_addr->sin_addr.s_addr == src->sin_addr.s_addr &&
122 src_addr->sin_port == src->sin_port &&
123 dst_addr->sin_addr.s_addr == dst->sin_addr.s_addr &&
124 dst_addr->sin_port == dst->sin_port) {
125#else
126 /* FIXME - needs to compare the local and remote
127 * ipaddr/port tuple, but the ipaddr is the only
128 * available information in the rds_sock (as the rest are
129 * zero'ed. It doesn't appear to be properly populated
130 * during connection setup...
131 */
132 if (src_addr->sin_addr.s_addr == src->sin_addr.s_addr) {
133#endif
134 spin_unlock_irq(&iwdev->spinlock);
135 *rds_iwdev = iwdev;
136 *cm_id = i_cm_id->cm_id;
137 return 0;
138 }
139 }
140 spin_unlock_irq(&iwdev->spinlock);
141 }
142
143 return 1;
144}
145
146static int rds_iw_add_cm_id(struct rds_iw_device *rds_iwdev, struct rdma_cm_id *cm_id)
147{
148 struct rds_iw_cm_id *i_cm_id;
149
150 i_cm_id = kmalloc(sizeof *i_cm_id, GFP_KERNEL);
151 if (!i_cm_id)
152 return -ENOMEM;
153
154 i_cm_id->cm_id = cm_id;
155
156 spin_lock_irq(&rds_iwdev->spinlock);
157 list_add_tail(&i_cm_id->list, &rds_iwdev->cm_id_list);
158 spin_unlock_irq(&rds_iwdev->spinlock);
159
160 return 0;
161}
162
163static void rds_iw_remove_cm_id(struct rds_iw_device *rds_iwdev,
164 struct rdma_cm_id *cm_id)
165{
166 struct rds_iw_cm_id *i_cm_id;
167
168 spin_lock_irq(&rds_iwdev->spinlock);
169 list_for_each_entry(i_cm_id, &rds_iwdev->cm_id_list, list) {
170 if (i_cm_id->cm_id == cm_id) {
171 list_del(&i_cm_id->list);
172 kfree(i_cm_id);
173 break;
174 }
175 }
176 spin_unlock_irq(&rds_iwdev->spinlock);
177}
178
179
180int rds_iw_update_cm_id(struct rds_iw_device *rds_iwdev, struct rdma_cm_id *cm_id)
181{
182 struct sockaddr_in *src_addr, *dst_addr;
183 struct rds_iw_device *rds_iwdev_old;
184 struct rdma_cm_id *pcm_id;
185 int rc;
186
187 src_addr = (struct sockaddr_in *)&cm_id->route.addr.src_addr;
188 dst_addr = (struct sockaddr_in *)&cm_id->route.addr.dst_addr;
189
190 rc = rds_iw_get_device(src_addr, dst_addr, &rds_iwdev_old, &pcm_id);
191 if (rc)
192 rds_iw_remove_cm_id(rds_iwdev, cm_id);
193
194 return rds_iw_add_cm_id(rds_iwdev, cm_id);
195}
196
197void rds_iw_add_conn(struct rds_iw_device *rds_iwdev, struct rds_connection *conn)
198{
199 struct rds_iw_connection *ic = conn->c_transport_data;
200
201 /* conn was previously on the nodev_conns_list */
202 spin_lock_irq(&iw_nodev_conns_lock);
203 BUG_ON(list_empty(&iw_nodev_conns));
204 BUG_ON(list_empty(&ic->iw_node));
205 list_del(&ic->iw_node);
206
207 spin_lock(&rds_iwdev->spinlock);
208 list_add_tail(&ic->iw_node, &rds_iwdev->conn_list);
209 spin_unlock(&rds_iwdev->spinlock);
210 spin_unlock_irq(&iw_nodev_conns_lock);
211
212 ic->rds_iwdev = rds_iwdev;
213}
214
215void rds_iw_remove_conn(struct rds_iw_device *rds_iwdev, struct rds_connection *conn)
216{
217 struct rds_iw_connection *ic = conn->c_transport_data;
218
219 /* place conn on nodev_conns_list */
220 spin_lock(&iw_nodev_conns_lock);
221
222 spin_lock_irq(&rds_iwdev->spinlock);
223 BUG_ON(list_empty(&ic->iw_node));
224 list_del(&ic->iw_node);
225 spin_unlock_irq(&rds_iwdev->spinlock);
226
227 list_add_tail(&ic->iw_node, &iw_nodev_conns);
228
229 spin_unlock(&iw_nodev_conns_lock);
230
231 rds_iw_remove_cm_id(ic->rds_iwdev, ic->i_cm_id);
232 ic->rds_iwdev = NULL;
233}
234
235void __rds_iw_destroy_conns(struct list_head *list, spinlock_t *list_lock)
236{
237 struct rds_iw_connection *ic, *_ic;
238 LIST_HEAD(tmp_list);
239
240 /* avoid calling conn_destroy with irqs off */
241 spin_lock_irq(list_lock);
242 list_splice(list, &tmp_list);
243 INIT_LIST_HEAD(list);
244 spin_unlock_irq(list_lock);
245
246 list_for_each_entry_safe(ic, _ic, &tmp_list, iw_node)
247 rds_conn_destroy(ic->conn);
248}
249
250static void rds_iw_set_scatterlist(struct rds_iw_scatterlist *sg,
251 struct scatterlist *list, unsigned int sg_len)
252{
253 sg->list = list;
254 sg->len = sg_len;
255 sg->dma_len = 0;
256 sg->dma_npages = 0;
257 sg->bytes = 0;
258}
259
260static int rds_iw_map_scatterlist(struct rds_iw_device *rds_iwdev,
261 struct rds_iw_scatterlist *sg)
262{
263 struct ib_device *dev = rds_iwdev->dev;
264 int i, ret;
265
266 WARN_ON(sg->dma_len);
267
268 sg->dma_len = ib_dma_map_sg(dev, sg->list, sg->len, DMA_BIDIRECTIONAL);
269 if (unlikely(!sg->dma_len)) {
270 printk(KERN_WARNING "RDS/IW: dma_map_sg failed!\n");
271 return -EBUSY;
272 }
273
274 sg->bytes = 0;
275 sg->dma_npages = 0;
276
277 ret = -EINVAL;
278 for (i = 0; i < sg->dma_len; ++i) {
279 unsigned int dma_len = ib_sg_dma_len(dev, &sg->list[i]);
280 u64 dma_addr = ib_sg_dma_address(dev, &sg->list[i]);
281 u64 end_addr;
282
283 sg->bytes += dma_len;
284
285 end_addr = dma_addr + dma_len;
286 if (dma_addr & PAGE_MASK) {
287 if (i > 0)
288 goto out_unmap;
289 dma_addr &= ~PAGE_MASK;
290 }
291 if (end_addr & PAGE_MASK) {
292 if (i < sg->dma_len - 1)
293 goto out_unmap;
294 end_addr = (end_addr + PAGE_MASK) & ~PAGE_MASK;
295 }
296
297 sg->dma_npages += (end_addr - dma_addr) >> PAGE_SHIFT;
298 }
299
300 /* Now gather the dma addrs into one list */
301 if (sg->dma_npages > fastreg_message_size)
302 goto out_unmap;
303
304
305
306 return 0;
307
308out_unmap:
309 ib_dma_unmap_sg(rds_iwdev->dev, sg->list, sg->len, DMA_BIDIRECTIONAL);
310 sg->dma_len = 0;
311 return ret;
312}
313
314
315struct rds_iw_mr_pool *rds_iw_create_mr_pool(struct rds_iw_device *rds_iwdev)
316{
317 struct rds_iw_mr_pool *pool;
318
319 pool = kzalloc(sizeof(*pool), GFP_KERNEL);
320 if (!pool) {
321 printk(KERN_WARNING "RDS/IW: rds_iw_create_mr_pool alloc error\n");
322 return ERR_PTR(-ENOMEM);
323 }
324
325 pool->device = rds_iwdev;
326 INIT_LIST_HEAD(&pool->dirty_list);
327 INIT_LIST_HEAD(&pool->clean_list);
328 mutex_init(&pool->flush_lock);
329 spin_lock_init(&pool->list_lock);
330 INIT_WORK(&pool->flush_worker, rds_iw_mr_pool_flush_worker);
331
332 pool->max_message_size = fastreg_message_size;
333 pool->max_items = fastreg_pool_size;
334 pool->max_free_pinned = pool->max_items * pool->max_message_size / 4;
335 pool->max_pages = fastreg_message_size;
336
337 /* We never allow more than max_items MRs to be allocated.
338 * When we exceed more than max_items_soft, we start freeing
339 * items more aggressively.
340 * Make sure that max_items > max_items_soft > max_items / 2
341 */
342 pool->max_items_soft = pool->max_items * 3 / 4;
343
344 return pool;
345}
346
347void rds_iw_get_mr_info(struct rds_iw_device *rds_iwdev, struct rds_info_rdma_connection *iinfo)
348{
349 struct rds_iw_mr_pool *pool = rds_iwdev->mr_pool;
350
351 iinfo->rdma_mr_max = pool->max_items;
352 iinfo->rdma_mr_size = pool->max_pages;
353}
354
355void rds_iw_destroy_mr_pool(struct rds_iw_mr_pool *pool)
356{
357 flush_workqueue(rds_wq);
358 rds_iw_flush_mr_pool(pool, 1);
359 BUG_ON(atomic_read(&pool->item_count));
360 BUG_ON(atomic_read(&pool->free_pinned));
361 kfree(pool);
362}
363
364static inline struct rds_iw_mr *rds_iw_reuse_fmr(struct rds_iw_mr_pool *pool)
365{
366 struct rds_iw_mr *ibmr = NULL;
367 unsigned long flags;
368
369 spin_lock_irqsave(&pool->list_lock, flags);
370 if (!list_empty(&pool->clean_list)) {
371 ibmr = list_entry(pool->clean_list.next, struct rds_iw_mr, mapping.m_list);
372 list_del_init(&ibmr->mapping.m_list);
373 }
374 spin_unlock_irqrestore(&pool->list_lock, flags);
375
376 return ibmr;
377}
378
379static struct rds_iw_mr *rds_iw_alloc_mr(struct rds_iw_device *rds_iwdev)
380{
381 struct rds_iw_mr_pool *pool = rds_iwdev->mr_pool;
382 struct rds_iw_mr *ibmr = NULL;
383 int err = 0, iter = 0;
384
385 while (1) {
386 ibmr = rds_iw_reuse_fmr(pool);
387 if (ibmr)
388 return ibmr;
389
390 /* No clean MRs - now we have the choice of either
391 * allocating a fresh MR up to the limit imposed by the
392 * driver, or flush any dirty unused MRs.
393 * We try to avoid stalling in the send path if possible,
394 * so we allocate as long as we're allowed to.
395 *
396 * We're fussy with enforcing the FMR limit, though. If the driver
397 * tells us we can't use more than N fmrs, we shouldn't start
398 * arguing with it */
399 if (atomic_inc_return(&pool->item_count) <= pool->max_items)
400 break;
401
402 atomic_dec(&pool->item_count);
403
404 if (++iter > 2) {
405 rds_iw_stats_inc(s_iw_rdma_mr_pool_depleted);
406 return ERR_PTR(-EAGAIN);
407 }
408
409 /* We do have some empty MRs. Flush them out. */
410 rds_iw_stats_inc(s_iw_rdma_mr_pool_wait);
411 rds_iw_flush_mr_pool(pool, 0);
412 }
413
414 ibmr = kzalloc(sizeof(*ibmr), GFP_KERNEL);
415 if (!ibmr) {
416 err = -ENOMEM;
417 goto out_no_cigar;
418 }
419
420 spin_lock_init(&ibmr->mapping.m_lock);
421 INIT_LIST_HEAD(&ibmr->mapping.m_list);
422 ibmr->mapping.m_mr = ibmr;
423
424 err = rds_iw_init_reg(pool, ibmr);
425 if (err)
426 goto out_no_cigar;
427
428 rds_iw_stats_inc(s_iw_rdma_mr_alloc);
429 return ibmr;
430
431out_no_cigar:
432 if (ibmr) {
433 rds_iw_destroy_fastreg(pool, ibmr);
434 kfree(ibmr);
435 }
436 atomic_dec(&pool->item_count);
437 return ERR_PTR(err);
438}
439
440void rds_iw_sync_mr(void *trans_private, int direction)
441{
442 struct rds_iw_mr *ibmr = trans_private;
443 struct rds_iw_device *rds_iwdev = ibmr->device;
444
445 switch (direction) {
446 case DMA_FROM_DEVICE:
447 ib_dma_sync_sg_for_cpu(rds_iwdev->dev, ibmr->mapping.m_sg.list,
448 ibmr->mapping.m_sg.dma_len, DMA_BIDIRECTIONAL);
449 break;
450 case DMA_TO_DEVICE:
451 ib_dma_sync_sg_for_device(rds_iwdev->dev, ibmr->mapping.m_sg.list,
452 ibmr->mapping.m_sg.dma_len, DMA_BIDIRECTIONAL);
453 break;
454 }
455}
456
457/*
458 * Flush our pool of MRs.
459 * At a minimum, all currently unused MRs are unmapped.
460 * If the number of MRs allocated exceeds the limit, we also try
461 * to free as many MRs as needed to get back to this limit.
462 */
463static void rds_iw_flush_mr_pool(struct rds_iw_mr_pool *pool, int free_all)
464{
465 struct rds_iw_mr *ibmr, *next;
466 LIST_HEAD(unmap_list);
467 LIST_HEAD(kill_list);
468 unsigned long flags;
469 unsigned int nfreed = 0, ncleaned = 0, unpinned = 0;
470
471 rds_iw_stats_inc(s_iw_rdma_mr_pool_flush);
472
473 mutex_lock(&pool->flush_lock);
474
475 spin_lock_irqsave(&pool->list_lock, flags);
476 /* Get the list of all mappings to be destroyed */
477 list_splice_init(&pool->dirty_list, &unmap_list);
478 if (free_all)
479 list_splice_init(&pool->clean_list, &kill_list);
480 spin_unlock_irqrestore(&pool->list_lock, flags);
481
482 /* Batched invalidate of dirty MRs.
483 * For FMR based MRs, the mappings on the unmap list are
484 * actually members of an ibmr (ibmr->mapping). They either
485 * migrate to the kill_list, or have been cleaned and should be
486 * moved to the clean_list.
487 * For fastregs, they will be dynamically allocated, and
488 * will be destroyed by the unmap function.
489 */
490 if (!list_empty(&unmap_list)) {
491 ncleaned = rds_iw_unmap_fastreg_list(pool, &unmap_list,
492 &kill_list, &unpinned);
493 /* If we've been asked to destroy all MRs, move those
494 * that were simply cleaned to the kill list */
495 if (free_all)
496 list_splice_init(&unmap_list, &kill_list);
497 }
498
499 /* Destroy any MRs that are past their best before date */
500 list_for_each_entry_safe(ibmr, next, &kill_list, mapping.m_list) {
501 rds_iw_stats_inc(s_iw_rdma_mr_free);
502 list_del(&ibmr->mapping.m_list);
503 rds_iw_destroy_fastreg(pool, ibmr);
504 kfree(ibmr);
505 nfreed++;
506 }
507
508 /* Anything that remains are laundered ibmrs, which we can add
509 * back to the clean list. */
510 if (!list_empty(&unmap_list)) {
511 spin_lock_irqsave(&pool->list_lock, flags);
512 list_splice(&unmap_list, &pool->clean_list);
513 spin_unlock_irqrestore(&pool->list_lock, flags);
514 }
515
516 atomic_sub(unpinned, &pool->free_pinned);
517 atomic_sub(ncleaned, &pool->dirty_count);
518 atomic_sub(nfreed, &pool->item_count);
519
520 mutex_unlock(&pool->flush_lock);
521}
522
523static void rds_iw_mr_pool_flush_worker(struct work_struct *work)
524{
525 struct rds_iw_mr_pool *pool = container_of(work, struct rds_iw_mr_pool, flush_worker);
526
527 rds_iw_flush_mr_pool(pool, 0);
528}
529
530void rds_iw_free_mr(void *trans_private, int invalidate)
531{
532 struct rds_iw_mr *ibmr = trans_private;
533 struct rds_iw_mr_pool *pool = ibmr->device->mr_pool;
534
535 rdsdebug("RDS/IW: free_mr nents %u\n", ibmr->mapping.m_sg.len);
536 if (!pool)
537 return;
538
539 /* Return it to the pool's free list */
540 rds_iw_free_fastreg(pool, ibmr);
541
542 /* If we've pinned too many pages, request a flush */
543 if (atomic_read(&pool->free_pinned) >= pool->max_free_pinned ||
544 atomic_read(&pool->dirty_count) >= pool->max_items / 10)
545 queue_work(rds_wq, &pool->flush_worker);
546
547 if (invalidate) {
548 if (likely(!in_interrupt())) {
549 rds_iw_flush_mr_pool(pool, 0);
550 } else {
551 /* We get here if the user created a MR marked
552 * as use_once and invalidate at the same time. */
553 queue_work(rds_wq, &pool->flush_worker);
554 }
555 }
556}
557
558void rds_iw_flush_mrs(void)
559{
560 struct rds_iw_device *rds_iwdev;
561
562 list_for_each_entry(rds_iwdev, &rds_iw_devices, list) {
563 struct rds_iw_mr_pool *pool = rds_iwdev->mr_pool;
564
565 if (pool)
566 rds_iw_flush_mr_pool(pool, 0);
567 }
568}
569
570void *rds_iw_get_mr(struct scatterlist *sg, unsigned long nents,
571 struct rds_sock *rs, u32 *key_ret)
572{
573 struct rds_iw_device *rds_iwdev;
574 struct rds_iw_mr *ibmr = NULL;
575 struct rdma_cm_id *cm_id;
576 struct sockaddr_in src = {
577 .sin_addr.s_addr = rs->rs_bound_addr,
578 .sin_port = rs->rs_bound_port,
579 };
580 struct sockaddr_in dst = {
581 .sin_addr.s_addr = rs->rs_conn_addr,
582 .sin_port = rs->rs_conn_port,
583 };
584 int ret;
585
586 ret = rds_iw_get_device(&src, &dst, &rds_iwdev, &cm_id);
587 if (ret || !cm_id) {
588 ret = -ENODEV;
589 goto out;
590 }
591
592 if (!rds_iwdev->mr_pool) {
593 ret = -ENODEV;
594 goto out;
595 }
596
597 ibmr = rds_iw_alloc_mr(rds_iwdev);
598 if (IS_ERR(ibmr))
599 return ibmr;
600
601 ibmr->cm_id = cm_id;
602 ibmr->device = rds_iwdev;
603
604 ret = rds_iw_map_reg(rds_iwdev->mr_pool, ibmr, sg, nents);
605 if (ret == 0)
606 *key_ret = ibmr->mr->rkey;
607 else
608 printk(KERN_WARNING "RDS/IW: failed to map mr (errno=%d)\n", ret);
609
610out:
611 if (ret) {
612 if (ibmr)
613 rds_iw_free_mr(ibmr, 0);
614 ibmr = ERR_PTR(ret);
615 }
616 return ibmr;
617}
618
619/*
620 * iWARP reg handling
621 *
622 * The life cycle of a fastreg registration is a bit different from
623 * FMRs.
624 * The idea behind fastreg is to have one MR, to which we bind different
625 * mappings over time. To avoid stalling on the expensive map and invalidate
626 * operations, these operations are pipelined on the same send queue on
627 * which we want to send the message containing the r_key.
628 *
629 * This creates a bit of a problem for us, as we do not have the destination
630 * IP in GET_MR, so the connection must be setup prior to the GET_MR call for
631 * RDMA to be correctly setup. If a fastreg request is present, rds_iw_xmit
632 * will try to queue a LOCAL_INV (if needed) and a REG_MR work request
633 * before queuing the SEND. When completions for these arrive, they are
634 * dispatched to the MR has a bit set showing that RDMa can be performed.
635 *
636 * There is another interesting aspect that's related to invalidation.
637 * The application can request that a mapping is invalidated in FREE_MR.
638 * The expectation there is that this invalidation step includes ALL
639 * PREVIOUSLY FREED MRs.
640 */
641static int rds_iw_init_reg(struct rds_iw_mr_pool *pool,
642 struct rds_iw_mr *ibmr)
643{
644 struct rds_iw_device *rds_iwdev = pool->device;
645 struct ib_mr *mr;
646 int err;
647
648 mr = ib_alloc_mr(rds_iwdev->pd, IB_MR_TYPE_MEM_REG,
649 pool->max_message_size);
650 if (IS_ERR(mr)) {
651 err = PTR_ERR(mr);
652
653 printk(KERN_WARNING "RDS/IW: ib_alloc_mr failed (err=%d)\n", err);
654 return err;
655 }
656
657 ibmr->mr = mr;
658 return 0;
659}
660
661static int rds_iw_rdma_reg_mr(struct rds_iw_mapping *mapping)
662{
663 struct rds_iw_mr *ibmr = mapping->m_mr;
664 struct rds_iw_scatterlist *m_sg = &mapping->m_sg;
665 struct ib_reg_wr reg_wr;
666 struct ib_send_wr *failed_wr;
667 int ret, n;
668
669 n = ib_map_mr_sg_zbva(ibmr->mr, m_sg->list, m_sg->len, PAGE_SIZE);
670 if (unlikely(n != m_sg->len))
671 return n < 0 ? n : -EINVAL;
672
673 reg_wr.wr.next = NULL;
674 reg_wr.wr.opcode = IB_WR_REG_MR;
675 reg_wr.wr.wr_id = RDS_IW_REG_WR_ID;
676 reg_wr.wr.num_sge = 0;
677 reg_wr.mr = ibmr->mr;
678 reg_wr.key = mapping->m_rkey;
679 reg_wr.access = IB_ACCESS_LOCAL_WRITE |
680 IB_ACCESS_REMOTE_READ |
681 IB_ACCESS_REMOTE_WRITE;
682
683 /*
684 * Perform a WR for the reg_mr. Each individual page
685 * in the sg list is added to the fast reg page list and placed
686 * inside the reg_mr WR. The key used is a rolling 8bit
687 * counter, which should guarantee uniqueness.
688 */
689 ib_update_fast_reg_key(ibmr->mr, ibmr->remap_count++);
690 mapping->m_rkey = ibmr->mr->rkey;
691
692 failed_wr = &reg_wr.wr;
693 ret = ib_post_send(ibmr->cm_id->qp, &reg_wr.wr, &failed_wr);
694 BUG_ON(failed_wr != &reg_wr.wr);
695 if (ret)
696 printk_ratelimited(KERN_WARNING "RDS/IW: %s:%d ib_post_send returned %d\n",
697 __func__, __LINE__, ret);
698 return ret;
699}
700
701static int rds_iw_rdma_fastreg_inv(struct rds_iw_mr *ibmr)
702{
703 struct ib_send_wr s_wr, *failed_wr;
704 int ret = 0;
705
706 if (!ibmr->cm_id->qp || !ibmr->mr)
707 goto out;
708
709 memset(&s_wr, 0, sizeof(s_wr));
710 s_wr.wr_id = RDS_IW_LOCAL_INV_WR_ID;
711 s_wr.opcode = IB_WR_LOCAL_INV;
712 s_wr.ex.invalidate_rkey = ibmr->mr->rkey;
713 s_wr.send_flags = IB_SEND_SIGNALED;
714
715 failed_wr = &s_wr;
716 ret = ib_post_send(ibmr->cm_id->qp, &s_wr, &failed_wr);
717 if (ret) {
718 printk_ratelimited(KERN_WARNING "RDS/IW: %s:%d ib_post_send returned %d\n",
719 __func__, __LINE__, ret);
720 goto out;
721 }
722out:
723 return ret;
724}
725
726static int rds_iw_map_reg(struct rds_iw_mr_pool *pool,
727 struct rds_iw_mr *ibmr,
728 struct scatterlist *sg,
729 unsigned int sg_len)
730{
731 struct rds_iw_device *rds_iwdev = pool->device;
732 struct rds_iw_mapping *mapping = &ibmr->mapping;
733 u64 *dma_pages;
734 int ret = 0;
735
736 rds_iw_set_scatterlist(&mapping->m_sg, sg, sg_len);
737
738 ret = rds_iw_map_scatterlist(rds_iwdev, &mapping->m_sg);
739 if (ret) {
740 dma_pages = NULL;
741 goto out;
742 }
743
744 if (mapping->m_sg.dma_len > pool->max_message_size) {
745 ret = -EMSGSIZE;
746 goto out;
747 }
748
749 ret = rds_iw_rdma_reg_mr(mapping);
750 if (ret)
751 goto out;
752
753 rds_iw_stats_inc(s_iw_rdma_mr_used);
754
755out:
756 kfree(dma_pages);
757
758 return ret;
759}
760
761/*
762 * "Free" a fastreg MR.
763 */
764static void rds_iw_free_fastreg(struct rds_iw_mr_pool *pool,
765 struct rds_iw_mr *ibmr)
766{
767 unsigned long flags;
768 int ret;
769
770 if (!ibmr->mapping.m_sg.dma_len)
771 return;
772
773 ret = rds_iw_rdma_fastreg_inv(ibmr);
774 if (ret)
775 return;
776
777 /* Try to post the LOCAL_INV WR to the queue. */
778 spin_lock_irqsave(&pool->list_lock, flags);
779
780 list_add_tail(&ibmr->mapping.m_list, &pool->dirty_list);
781 atomic_add(ibmr->mapping.m_sg.len, &pool->free_pinned);
782 atomic_inc(&pool->dirty_count);
783
784 spin_unlock_irqrestore(&pool->list_lock, flags);
785}
786
787static unsigned int rds_iw_unmap_fastreg_list(struct rds_iw_mr_pool *pool,
788 struct list_head *unmap_list,
789 struct list_head *kill_list,
790 int *unpinned)
791{
792 struct rds_iw_mapping *mapping, *next;
793 unsigned int ncleaned = 0;
794 LIST_HEAD(laundered);
795
796 /* Batched invalidation of fastreg MRs.
797 * Why do we do it this way, even though we could pipeline unmap
798 * and remap? The reason is the application semantics - when the
799 * application requests an invalidation of MRs, it expects all
800 * previously released R_Keys to become invalid.
801 *
802 * If we implement MR reuse naively, we risk memory corruption
803 * (this has actually been observed). So the default behavior
804 * requires that a MR goes through an explicit unmap operation before
805 * we can reuse it again.
806 *
807 * We could probably improve on this a little, by allowing immediate
808 * reuse of a MR on the same socket (eg you could add small
809 * cache of unused MRs to strct rds_socket - GET_MR could grab one
810 * of these without requiring an explicit invalidate).
811 */
812 while (!list_empty(unmap_list)) {
813 unsigned long flags;
814
815 spin_lock_irqsave(&pool->list_lock, flags);
816 list_for_each_entry_safe(mapping, next, unmap_list, m_list) {
817 *unpinned += mapping->m_sg.len;
818 list_move(&mapping->m_list, &laundered);
819 ncleaned++;
820 }
821 spin_unlock_irqrestore(&pool->list_lock, flags);
822 }
823
824 /* Move all laundered mappings back to the unmap list.
825 * We do not kill any WRs right now - it doesn't seem the
826 * fastreg API has a max_remap limit. */
827 list_splice_init(&laundered, unmap_list);
828
829 return ncleaned;
830}
831
832static void rds_iw_destroy_fastreg(struct rds_iw_mr_pool *pool,
833 struct rds_iw_mr *ibmr)
834{
835 if (ibmr->mr)
836 ib_dereg_mr(ibmr->mr);
837}
diff --git a/net/rds/iw_recv.c b/net/rds/iw_recv.c
deleted file mode 100644
index a66d1794b2d0..000000000000
--- a/net/rds/iw_recv.c
+++ /dev/null
@@ -1,904 +0,0 @@
1/*
2 * Copyright (c) 2006 Oracle. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 */
33#include <linux/kernel.h>
34#include <linux/slab.h>
35#include <linux/pci.h>
36#include <linux/dma-mapping.h>
37#include <rdma/rdma_cm.h>
38
39#include "rds.h"
40#include "iw.h"
41
42static struct kmem_cache *rds_iw_incoming_slab;
43static struct kmem_cache *rds_iw_frag_slab;
44static atomic_t rds_iw_allocation = ATOMIC_INIT(0);
45
46static void rds_iw_frag_drop_page(struct rds_page_frag *frag)
47{
48 rdsdebug("frag %p page %p\n", frag, frag->f_page);
49 __free_page(frag->f_page);
50 frag->f_page = NULL;
51}
52
53static void rds_iw_frag_free(struct rds_page_frag *frag)
54{
55 rdsdebug("frag %p page %p\n", frag, frag->f_page);
56 BUG_ON(frag->f_page);
57 kmem_cache_free(rds_iw_frag_slab, frag);
58}
59
60/*
61 * We map a page at a time. Its fragments are posted in order. This
62 * is called in fragment order as the fragments get send completion events.
63 * Only the last frag in the page performs the unmapping.
64 *
65 * It's OK for ring cleanup to call this in whatever order it likes because
66 * DMA is not in flight and so we can unmap while other ring entries still
67 * hold page references in their frags.
68 */
69static void rds_iw_recv_unmap_page(struct rds_iw_connection *ic,
70 struct rds_iw_recv_work *recv)
71{
72 struct rds_page_frag *frag = recv->r_frag;
73
74 rdsdebug("recv %p frag %p page %p\n", recv, frag, frag->f_page);
75 if (frag->f_mapped)
76 ib_dma_unmap_page(ic->i_cm_id->device,
77 frag->f_mapped,
78 RDS_FRAG_SIZE, DMA_FROM_DEVICE);
79 frag->f_mapped = 0;
80}
81
82void rds_iw_recv_init_ring(struct rds_iw_connection *ic)
83{
84 struct rds_iw_recv_work *recv;
85 u32 i;
86
87 for (i = 0, recv = ic->i_recvs; i < ic->i_recv_ring.w_nr; i++, recv++) {
88 struct ib_sge *sge;
89
90 recv->r_iwinc = NULL;
91 recv->r_frag = NULL;
92
93 recv->r_wr.next = NULL;
94 recv->r_wr.wr_id = i;
95 recv->r_wr.sg_list = recv->r_sge;
96 recv->r_wr.num_sge = RDS_IW_RECV_SGE;
97
98 sge = rds_iw_data_sge(ic, recv->r_sge);
99 sge->addr = 0;
100 sge->length = RDS_FRAG_SIZE;
101 sge->lkey = 0;
102
103 sge = rds_iw_header_sge(ic, recv->r_sge);
104 sge->addr = ic->i_recv_hdrs_dma + (i * sizeof(struct rds_header));
105 sge->length = sizeof(struct rds_header);
106 sge->lkey = 0;
107 }
108}
109
110static void rds_iw_recv_clear_one(struct rds_iw_connection *ic,
111 struct rds_iw_recv_work *recv)
112{
113 if (recv->r_iwinc) {
114 rds_inc_put(&recv->r_iwinc->ii_inc);
115 recv->r_iwinc = NULL;
116 }
117 if (recv->r_frag) {
118 rds_iw_recv_unmap_page(ic, recv);
119 if (recv->r_frag->f_page)
120 rds_iw_frag_drop_page(recv->r_frag);
121 rds_iw_frag_free(recv->r_frag);
122 recv->r_frag = NULL;
123 }
124}
125
126void rds_iw_recv_clear_ring(struct rds_iw_connection *ic)
127{
128 u32 i;
129
130 for (i = 0; i < ic->i_recv_ring.w_nr; i++)
131 rds_iw_recv_clear_one(ic, &ic->i_recvs[i]);
132
133 if (ic->i_frag.f_page)
134 rds_iw_frag_drop_page(&ic->i_frag);
135}
136
137static int rds_iw_recv_refill_one(struct rds_connection *conn,
138 struct rds_iw_recv_work *recv,
139 gfp_t kptr_gfp, gfp_t page_gfp)
140{
141 struct rds_iw_connection *ic = conn->c_transport_data;
142 dma_addr_t dma_addr;
143 struct ib_sge *sge;
144 int ret = -ENOMEM;
145
146 if (!recv->r_iwinc) {
147 if (!atomic_add_unless(&rds_iw_allocation, 1, rds_iw_sysctl_max_recv_allocation)) {
148 rds_iw_stats_inc(s_iw_rx_alloc_limit);
149 goto out;
150 }
151 recv->r_iwinc = kmem_cache_alloc(rds_iw_incoming_slab,
152 kptr_gfp);
153 if (!recv->r_iwinc) {
154 atomic_dec(&rds_iw_allocation);
155 goto out;
156 }
157 INIT_LIST_HEAD(&recv->r_iwinc->ii_frags);
158 rds_inc_init(&recv->r_iwinc->ii_inc, conn, conn->c_faddr);
159 }
160
161 if (!recv->r_frag) {
162 recv->r_frag = kmem_cache_alloc(rds_iw_frag_slab, kptr_gfp);
163 if (!recv->r_frag)
164 goto out;
165 INIT_LIST_HEAD(&recv->r_frag->f_item);
166 recv->r_frag->f_page = NULL;
167 }
168
169 if (!ic->i_frag.f_page) {
170 ic->i_frag.f_page = alloc_page(page_gfp);
171 if (!ic->i_frag.f_page)
172 goto out;
173 ic->i_frag.f_offset = 0;
174 }
175
176 dma_addr = ib_dma_map_page(ic->i_cm_id->device,
177 ic->i_frag.f_page,
178 ic->i_frag.f_offset,
179 RDS_FRAG_SIZE,
180 DMA_FROM_DEVICE);
181 if (ib_dma_mapping_error(ic->i_cm_id->device, dma_addr))
182 goto out;
183
184 /*
185 * Once we get the RDS_PAGE_LAST_OFF frag then rds_iw_frag_unmap()
186 * must be called on this recv. This happens as completions hit
187 * in order or on connection shutdown.
188 */
189 recv->r_frag->f_page = ic->i_frag.f_page;
190 recv->r_frag->f_offset = ic->i_frag.f_offset;
191 recv->r_frag->f_mapped = dma_addr;
192
193 sge = rds_iw_data_sge(ic, recv->r_sge);
194 sge->addr = dma_addr;
195 sge->length = RDS_FRAG_SIZE;
196
197 sge = rds_iw_header_sge(ic, recv->r_sge);
198 sge->addr = ic->i_recv_hdrs_dma + (recv - ic->i_recvs) * sizeof(struct rds_header);
199 sge->length = sizeof(struct rds_header);
200
201 get_page(recv->r_frag->f_page);
202
203 if (ic->i_frag.f_offset < RDS_PAGE_LAST_OFF) {
204 ic->i_frag.f_offset += RDS_FRAG_SIZE;
205 } else {
206 put_page(ic->i_frag.f_page);
207 ic->i_frag.f_page = NULL;
208 ic->i_frag.f_offset = 0;
209 }
210
211 ret = 0;
212out:
213 return ret;
214}
215
216/*
217 * This tries to allocate and post unused work requests after making sure that
218 * they have all the allocations they need to queue received fragments into
219 * sockets. The i_recv_mutex is held here so that ring_alloc and _unalloc
220 * pairs don't go unmatched.
221 *
222 * -1 is returned if posting fails due to temporary resource exhaustion.
223 */
224int rds_iw_recv_refill(struct rds_connection *conn, gfp_t kptr_gfp,
225 gfp_t page_gfp, int prefill)
226{
227 struct rds_iw_connection *ic = conn->c_transport_data;
228 struct rds_iw_recv_work *recv;
229 struct ib_recv_wr *failed_wr;
230 unsigned int posted = 0;
231 int ret = 0;
232 u32 pos;
233
234 while ((prefill || rds_conn_up(conn)) &&
235 rds_iw_ring_alloc(&ic->i_recv_ring, 1, &pos)) {
236 if (pos >= ic->i_recv_ring.w_nr) {
237 printk(KERN_NOTICE "Argh - ring alloc returned pos=%u\n",
238 pos);
239 ret = -EINVAL;
240 break;
241 }
242
243 recv = &ic->i_recvs[pos];
244 ret = rds_iw_recv_refill_one(conn, recv, kptr_gfp, page_gfp);
245 if (ret) {
246 ret = -1;
247 break;
248 }
249
250 /* XXX when can this fail? */
251 ret = ib_post_recv(ic->i_cm_id->qp, &recv->r_wr, &failed_wr);
252 rdsdebug("recv %p iwinc %p page %p addr %lu ret %d\n", recv,
253 recv->r_iwinc, recv->r_frag->f_page,
254 (long) recv->r_frag->f_mapped, ret);
255 if (ret) {
256 rds_iw_conn_error(conn, "recv post on "
257 "%pI4 returned %d, disconnecting and "
258 "reconnecting\n", &conn->c_faddr,
259 ret);
260 ret = -1;
261 break;
262 }
263
264 posted++;
265 }
266
267 /* We're doing flow control - update the window. */
268 if (ic->i_flowctl && posted)
269 rds_iw_advertise_credits(conn, posted);
270
271 if (ret)
272 rds_iw_ring_unalloc(&ic->i_recv_ring, 1);
273 return ret;
274}
275
276static void rds_iw_inc_purge(struct rds_incoming *inc)
277{
278 struct rds_iw_incoming *iwinc;
279 struct rds_page_frag *frag;
280 struct rds_page_frag *pos;
281
282 iwinc = container_of(inc, struct rds_iw_incoming, ii_inc);
283 rdsdebug("purging iwinc %p inc %p\n", iwinc, inc);
284
285 list_for_each_entry_safe(frag, pos, &iwinc->ii_frags, f_item) {
286 list_del_init(&frag->f_item);
287 rds_iw_frag_drop_page(frag);
288 rds_iw_frag_free(frag);
289 }
290}
291
292void rds_iw_inc_free(struct rds_incoming *inc)
293{
294 struct rds_iw_incoming *iwinc;
295
296 iwinc = container_of(inc, struct rds_iw_incoming, ii_inc);
297
298 rds_iw_inc_purge(inc);
299 rdsdebug("freeing iwinc %p inc %p\n", iwinc, inc);
300 BUG_ON(!list_empty(&iwinc->ii_frags));
301 kmem_cache_free(rds_iw_incoming_slab, iwinc);
302 atomic_dec(&rds_iw_allocation);
303 BUG_ON(atomic_read(&rds_iw_allocation) < 0);
304}
305
306int rds_iw_inc_copy_to_user(struct rds_incoming *inc, struct iov_iter *to)
307{
308 struct rds_iw_incoming *iwinc;
309 struct rds_page_frag *frag;
310 unsigned long to_copy;
311 unsigned long frag_off = 0;
312 int copied = 0;
313 int ret;
314 u32 len;
315
316 iwinc = container_of(inc, struct rds_iw_incoming, ii_inc);
317 frag = list_entry(iwinc->ii_frags.next, struct rds_page_frag, f_item);
318 len = be32_to_cpu(inc->i_hdr.h_len);
319
320 while (iov_iter_count(to) && copied < len) {
321 if (frag_off == RDS_FRAG_SIZE) {
322 frag = list_entry(frag->f_item.next,
323 struct rds_page_frag, f_item);
324 frag_off = 0;
325 }
326 to_copy = min_t(unsigned long, iov_iter_count(to),
327 RDS_FRAG_SIZE - frag_off);
328 to_copy = min_t(unsigned long, to_copy, len - copied);
329
330 /* XXX needs + offset for multiple recvs per page */
331 rds_stats_add(s_copy_to_user, to_copy);
332 ret = copy_page_to_iter(frag->f_page,
333 frag->f_offset + frag_off,
334 to_copy,
335 to);
336 if (ret != to_copy)
337 return -EFAULT;
338
339 frag_off += to_copy;
340 copied += to_copy;
341 }
342
343 return copied;
344}
345
346/* ic starts out kzalloc()ed */
347void rds_iw_recv_init_ack(struct rds_iw_connection *ic)
348{
349 struct ib_send_wr *wr = &ic->i_ack_wr;
350 struct ib_sge *sge = &ic->i_ack_sge;
351
352 sge->addr = ic->i_ack_dma;
353 sge->length = sizeof(struct rds_header);
354 sge->lkey = rds_iw_local_dma_lkey(ic);
355
356 wr->sg_list = sge;
357 wr->num_sge = 1;
358 wr->opcode = IB_WR_SEND;
359 wr->wr_id = RDS_IW_ACK_WR_ID;
360 wr->send_flags = IB_SEND_SIGNALED | IB_SEND_SOLICITED;
361}
362
363/*
364 * You'd think that with reliable IB connections you wouldn't need to ack
365 * messages that have been received. The problem is that IB hardware generates
366 * an ack message before it has DMAed the message into memory. This creates a
367 * potential message loss if the HCA is disabled for any reason between when it
368 * sends the ack and before the message is DMAed and processed. This is only a
369 * potential issue if another HCA is available for fail-over.
370 *
371 * When the remote host receives our ack they'll free the sent message from
372 * their send queue. To decrease the latency of this we always send an ack
373 * immediately after we've received messages.
374 *
375 * For simplicity, we only have one ack in flight at a time. This puts
376 * pressure on senders to have deep enough send queues to absorb the latency of
377 * a single ack frame being in flight. This might not be good enough.
378 *
379 * This is implemented by have a long-lived send_wr and sge which point to a
380 * statically allocated ack frame. This ack wr does not fall under the ring
381 * accounting that the tx and rx wrs do. The QP attribute specifically makes
382 * room for it beyond the ring size. Send completion notices its special
383 * wr_id and avoids working with the ring in that case.
384 */
385#ifndef KERNEL_HAS_ATOMIC64
386static void rds_iw_set_ack(struct rds_iw_connection *ic, u64 seq,
387 int ack_required)
388{
389 unsigned long flags;
390
391 spin_lock_irqsave(&ic->i_ack_lock, flags);
392 ic->i_ack_next = seq;
393 if (ack_required)
394 set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
395 spin_unlock_irqrestore(&ic->i_ack_lock, flags);
396}
397
398static u64 rds_iw_get_ack(struct rds_iw_connection *ic)
399{
400 unsigned long flags;
401 u64 seq;
402
403 clear_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
404
405 spin_lock_irqsave(&ic->i_ack_lock, flags);
406 seq = ic->i_ack_next;
407 spin_unlock_irqrestore(&ic->i_ack_lock, flags);
408
409 return seq;
410}
411#else
412static void rds_iw_set_ack(struct rds_iw_connection *ic, u64 seq,
413 int ack_required)
414{
415 atomic64_set(&ic->i_ack_next, seq);
416 if (ack_required) {
417 smp_mb__before_atomic();
418 set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
419 }
420}
421
422static u64 rds_iw_get_ack(struct rds_iw_connection *ic)
423{
424 clear_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
425 smp_mb__after_atomic();
426
427 return atomic64_read(&ic->i_ack_next);
428}
429#endif
430
431
432static void rds_iw_send_ack(struct rds_iw_connection *ic, unsigned int adv_credits)
433{
434 struct rds_header *hdr = ic->i_ack;
435 struct ib_send_wr *failed_wr;
436 u64 seq;
437 int ret;
438
439 seq = rds_iw_get_ack(ic);
440
441 rdsdebug("send_ack: ic %p ack %llu\n", ic, (unsigned long long) seq);
442 rds_message_populate_header(hdr, 0, 0, 0);
443 hdr->h_ack = cpu_to_be64(seq);
444 hdr->h_credit = adv_credits;
445 rds_message_make_checksum(hdr);
446 ic->i_ack_queued = jiffies;
447
448 ret = ib_post_send(ic->i_cm_id->qp, &ic->i_ack_wr, &failed_wr);
449 if (unlikely(ret)) {
450 /* Failed to send. Release the WR, and
451 * force another ACK.
452 */
453 clear_bit(IB_ACK_IN_FLIGHT, &ic->i_ack_flags);
454 set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
455
456 rds_iw_stats_inc(s_iw_ack_send_failure);
457
458 rds_iw_conn_error(ic->conn, "sending ack failed\n");
459 } else
460 rds_iw_stats_inc(s_iw_ack_sent);
461}
462
463/*
464 * There are 3 ways of getting acknowledgements to the peer:
465 * 1. We call rds_iw_attempt_ack from the recv completion handler
466 * to send an ACK-only frame.
467 * However, there can be only one such frame in the send queue
468 * at any time, so we may have to postpone it.
469 * 2. When another (data) packet is transmitted while there's
470 * an ACK in the queue, we piggyback the ACK sequence number
471 * on the data packet.
472 * 3. If the ACK WR is done sending, we get called from the
473 * send queue completion handler, and check whether there's
474 * another ACK pending (postponed because the WR was on the
475 * queue). If so, we transmit it.
476 *
477 * We maintain 2 variables:
478 * - i_ack_flags, which keeps track of whether the ACK WR
479 * is currently in the send queue or not (IB_ACK_IN_FLIGHT)
480 * - i_ack_next, which is the last sequence number we received
481 *
482 * Potentially, send queue and receive queue handlers can run concurrently.
483 * It would be nice to not have to use a spinlock to synchronize things,
484 * but the one problem that rules this out is that 64bit updates are
485 * not atomic on all platforms. Things would be a lot simpler if
486 * we had atomic64 or maybe cmpxchg64 everywhere.
487 *
488 * Reconnecting complicates this picture just slightly. When we
489 * reconnect, we may be seeing duplicate packets. The peer
490 * is retransmitting them, because it hasn't seen an ACK for
491 * them. It is important that we ACK these.
492 *
493 * ACK mitigation adds a header flag "ACK_REQUIRED"; any packet with
494 * this flag set *MUST* be acknowledged immediately.
495 */
496
497/*
498 * When we get here, we're called from the recv queue handler.
499 * Check whether we ought to transmit an ACK.
500 */
501void rds_iw_attempt_ack(struct rds_iw_connection *ic)
502{
503 unsigned int adv_credits;
504
505 if (!test_bit(IB_ACK_REQUESTED, &ic->i_ack_flags))
506 return;
507
508 if (test_and_set_bit(IB_ACK_IN_FLIGHT, &ic->i_ack_flags)) {
509 rds_iw_stats_inc(s_iw_ack_send_delayed);
510 return;
511 }
512
513 /* Can we get a send credit? */
514 if (!rds_iw_send_grab_credits(ic, 1, &adv_credits, 0, RDS_MAX_ADV_CREDIT)) {
515 rds_iw_stats_inc(s_iw_tx_throttle);
516 clear_bit(IB_ACK_IN_FLIGHT, &ic->i_ack_flags);
517 return;
518 }
519
520 clear_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
521 rds_iw_send_ack(ic, adv_credits);
522}
523
524/*
525 * We get here from the send completion handler, when the
526 * adapter tells us the ACK frame was sent.
527 */
528void rds_iw_ack_send_complete(struct rds_iw_connection *ic)
529{
530 clear_bit(IB_ACK_IN_FLIGHT, &ic->i_ack_flags);
531 rds_iw_attempt_ack(ic);
532}
533
534/*
535 * This is called by the regular xmit code when it wants to piggyback
536 * an ACK on an outgoing frame.
537 */
538u64 rds_iw_piggyb_ack(struct rds_iw_connection *ic)
539{
540 if (test_and_clear_bit(IB_ACK_REQUESTED, &ic->i_ack_flags))
541 rds_iw_stats_inc(s_iw_ack_send_piggybacked);
542 return rds_iw_get_ack(ic);
543}
544
545/*
546 * It's kind of lame that we're copying from the posted receive pages into
547 * long-lived bitmaps. We could have posted the bitmaps and rdma written into
548 * them. But receiving new congestion bitmaps should be a *rare* event, so
549 * hopefully we won't need to invest that complexity in making it more
550 * efficient. By copying we can share a simpler core with TCP which has to
551 * copy.
552 */
553static void rds_iw_cong_recv(struct rds_connection *conn,
554 struct rds_iw_incoming *iwinc)
555{
556 struct rds_cong_map *map;
557 unsigned int map_off;
558 unsigned int map_page;
559 struct rds_page_frag *frag;
560 unsigned long frag_off;
561 unsigned long to_copy;
562 unsigned long copied;
563 uint64_t uncongested = 0;
564 void *addr;
565
566 /* catch completely corrupt packets */
567 if (be32_to_cpu(iwinc->ii_inc.i_hdr.h_len) != RDS_CONG_MAP_BYTES)
568 return;
569
570 map = conn->c_fcong;
571 map_page = 0;
572 map_off = 0;
573
574 frag = list_entry(iwinc->ii_frags.next, struct rds_page_frag, f_item);
575 frag_off = 0;
576
577 copied = 0;
578
579 while (copied < RDS_CONG_MAP_BYTES) {
580 uint64_t *src, *dst;
581 unsigned int k;
582
583 to_copy = min(RDS_FRAG_SIZE - frag_off, PAGE_SIZE - map_off);
584 BUG_ON(to_copy & 7); /* Must be 64bit aligned. */
585
586 addr = kmap_atomic(frag->f_page);
587
588 src = addr + frag_off;
589 dst = (void *)map->m_page_addrs[map_page] + map_off;
590 for (k = 0; k < to_copy; k += 8) {
591 /* Record ports that became uncongested, ie
592 * bits that changed from 0 to 1. */
593 uncongested |= ~(*src) & *dst;
594 *dst++ = *src++;
595 }
596 kunmap_atomic(addr);
597
598 copied += to_copy;
599
600 map_off += to_copy;
601 if (map_off == PAGE_SIZE) {
602 map_off = 0;
603 map_page++;
604 }
605
606 frag_off += to_copy;
607 if (frag_off == RDS_FRAG_SIZE) {
608 frag = list_entry(frag->f_item.next,
609 struct rds_page_frag, f_item);
610 frag_off = 0;
611 }
612 }
613
614 /* the congestion map is in little endian order */
615 uncongested = le64_to_cpu(uncongested);
616
617 rds_cong_map_updated(map, uncongested);
618}
619
620/*
621 * Rings are posted with all the allocations they'll need to queue the
622 * incoming message to the receiving socket so this can't fail.
623 * All fragments start with a header, so we can make sure we're not receiving
624 * garbage, and we can tell a small 8 byte fragment from an ACK frame.
625 */
626struct rds_iw_ack_state {
627 u64 ack_next;
628 u64 ack_recv;
629 unsigned int ack_required:1;
630 unsigned int ack_next_valid:1;
631 unsigned int ack_recv_valid:1;
632};
633
634static void rds_iw_process_recv(struct rds_connection *conn,
635 struct rds_iw_recv_work *recv, u32 byte_len,
636 struct rds_iw_ack_state *state)
637{
638 struct rds_iw_connection *ic = conn->c_transport_data;
639 struct rds_iw_incoming *iwinc = ic->i_iwinc;
640 struct rds_header *ihdr, *hdr;
641
642 /* XXX shut down the connection if port 0,0 are seen? */
643
644 rdsdebug("ic %p iwinc %p recv %p byte len %u\n", ic, iwinc, recv,
645 byte_len);
646
647 if (byte_len < sizeof(struct rds_header)) {
648 rds_iw_conn_error(conn, "incoming message "
649 "from %pI4 didn't include a "
650 "header, disconnecting and "
651 "reconnecting\n",
652 &conn->c_faddr);
653 return;
654 }
655 byte_len -= sizeof(struct rds_header);
656
657 ihdr = &ic->i_recv_hdrs[recv - ic->i_recvs];
658
659 /* Validate the checksum. */
660 if (!rds_message_verify_checksum(ihdr)) {
661 rds_iw_conn_error(conn, "incoming message "
662 "from %pI4 has corrupted header - "
663 "forcing a reconnect\n",
664 &conn->c_faddr);
665 rds_stats_inc(s_recv_drop_bad_checksum);
666 return;
667 }
668
669 /* Process the ACK sequence which comes with every packet */
670 state->ack_recv = be64_to_cpu(ihdr->h_ack);
671 state->ack_recv_valid = 1;
672
673 /* Process the credits update if there was one */
674 if (ihdr->h_credit)
675 rds_iw_send_add_credits(conn, ihdr->h_credit);
676
677 if (ihdr->h_sport == 0 && ihdr->h_dport == 0 && byte_len == 0) {
678 /* This is an ACK-only packet. The fact that it gets
679 * special treatment here is that historically, ACKs
680 * were rather special beasts.
681 */
682 rds_iw_stats_inc(s_iw_ack_received);
683
684 /*
685 * Usually the frags make their way on to incs and are then freed as
686 * the inc is freed. We don't go that route, so we have to drop the
687 * page ref ourselves. We can't just leave the page on the recv
688 * because that confuses the dma mapping of pages and each recv's use
689 * of a partial page. We can leave the frag, though, it will be
690 * reused.
691 *
692 * FIXME: Fold this into the code path below.
693 */
694 rds_iw_frag_drop_page(recv->r_frag);
695 return;
696 }
697
698 /*
699 * If we don't already have an inc on the connection then this
700 * fragment has a header and starts a message.. copy its header
701 * into the inc and save the inc so we can hang upcoming fragments
702 * off its list.
703 */
704 if (!iwinc) {
705 iwinc = recv->r_iwinc;
706 recv->r_iwinc = NULL;
707 ic->i_iwinc = iwinc;
708
709 hdr = &iwinc->ii_inc.i_hdr;
710 memcpy(hdr, ihdr, sizeof(*hdr));
711 ic->i_recv_data_rem = be32_to_cpu(hdr->h_len);
712
713 rdsdebug("ic %p iwinc %p rem %u flag 0x%x\n", ic, iwinc,
714 ic->i_recv_data_rem, hdr->h_flags);
715 } else {
716 hdr = &iwinc->ii_inc.i_hdr;
717 /* We can't just use memcmp here; fragments of a
718 * single message may carry different ACKs */
719 if (hdr->h_sequence != ihdr->h_sequence ||
720 hdr->h_len != ihdr->h_len ||
721 hdr->h_sport != ihdr->h_sport ||
722 hdr->h_dport != ihdr->h_dport) {
723 rds_iw_conn_error(conn,
724 "fragment header mismatch; forcing reconnect\n");
725 return;
726 }
727 }
728
729 list_add_tail(&recv->r_frag->f_item, &iwinc->ii_frags);
730 recv->r_frag = NULL;
731
732 if (ic->i_recv_data_rem > RDS_FRAG_SIZE)
733 ic->i_recv_data_rem -= RDS_FRAG_SIZE;
734 else {
735 ic->i_recv_data_rem = 0;
736 ic->i_iwinc = NULL;
737
738 if (iwinc->ii_inc.i_hdr.h_flags == RDS_FLAG_CONG_BITMAP)
739 rds_iw_cong_recv(conn, iwinc);
740 else {
741 rds_recv_incoming(conn, conn->c_faddr, conn->c_laddr,
742 &iwinc->ii_inc, GFP_ATOMIC);
743 state->ack_next = be64_to_cpu(hdr->h_sequence);
744 state->ack_next_valid = 1;
745 }
746
747 /* Evaluate the ACK_REQUIRED flag *after* we received
748 * the complete frame, and after bumping the next_rx
749 * sequence. */
750 if (hdr->h_flags & RDS_FLAG_ACK_REQUIRED) {
751 rds_stats_inc(s_recv_ack_required);
752 state->ack_required = 1;
753 }
754
755 rds_inc_put(&iwinc->ii_inc);
756 }
757}
758
759/*
760 * Plucking the oldest entry from the ring can be done concurrently with
761 * the thread refilling the ring. Each ring operation is protected by
762 * spinlocks and the transient state of refilling doesn't change the
763 * recording of which entry is oldest.
764 *
765 * This relies on IB only calling one cq comp_handler for each cq so that
766 * there will only be one caller of rds_recv_incoming() per RDS connection.
767 */
768void rds_iw_recv_cq_comp_handler(struct ib_cq *cq, void *context)
769{
770 struct rds_connection *conn = context;
771 struct rds_iw_connection *ic = conn->c_transport_data;
772
773 rdsdebug("conn %p cq %p\n", conn, cq);
774
775 rds_iw_stats_inc(s_iw_rx_cq_call);
776
777 tasklet_schedule(&ic->i_recv_tasklet);
778}
779
780static inline void rds_poll_cq(struct rds_iw_connection *ic,
781 struct rds_iw_ack_state *state)
782{
783 struct rds_connection *conn = ic->conn;
784 struct ib_wc wc;
785 struct rds_iw_recv_work *recv;
786
787 while (ib_poll_cq(ic->i_recv_cq, 1, &wc) > 0) {
788 rdsdebug("wc wr_id 0x%llx status %u byte_len %u imm_data %u\n",
789 (unsigned long long)wc.wr_id, wc.status, wc.byte_len,
790 be32_to_cpu(wc.ex.imm_data));
791 rds_iw_stats_inc(s_iw_rx_cq_event);
792
793 recv = &ic->i_recvs[rds_iw_ring_oldest(&ic->i_recv_ring)];
794
795 rds_iw_recv_unmap_page(ic, recv);
796
797 /*
798 * Also process recvs in connecting state because it is possible
799 * to get a recv completion _before_ the rdmacm ESTABLISHED
800 * event is processed.
801 */
802 if (rds_conn_up(conn) || rds_conn_connecting(conn)) {
803 /* We expect errors as the qp is drained during shutdown */
804 if (wc.status == IB_WC_SUCCESS) {
805 rds_iw_process_recv(conn, recv, wc.byte_len, state);
806 } else {
807 rds_iw_conn_error(conn, "recv completion on "
808 "%pI4 had status %u, disconnecting and "
809 "reconnecting\n", &conn->c_faddr,
810 wc.status);
811 }
812 }
813
814 rds_iw_ring_free(&ic->i_recv_ring, 1);
815 }
816}
817
818void rds_iw_recv_tasklet_fn(unsigned long data)
819{
820 struct rds_iw_connection *ic = (struct rds_iw_connection *) data;
821 struct rds_connection *conn = ic->conn;
822 struct rds_iw_ack_state state = { 0, };
823
824 rds_poll_cq(ic, &state);
825 ib_req_notify_cq(ic->i_recv_cq, IB_CQ_SOLICITED);
826 rds_poll_cq(ic, &state);
827
828 if (state.ack_next_valid)
829 rds_iw_set_ack(ic, state.ack_next, state.ack_required);
830 if (state.ack_recv_valid && state.ack_recv > ic->i_ack_recv) {
831 rds_send_drop_acked(conn, state.ack_recv, NULL);
832 ic->i_ack_recv = state.ack_recv;
833 }
834 if (rds_conn_up(conn))
835 rds_iw_attempt_ack(ic);
836
837 /* If we ever end up with a really empty receive ring, we're
838 * in deep trouble, as the sender will definitely see RNR
839 * timeouts. */
840 if (rds_iw_ring_empty(&ic->i_recv_ring))
841 rds_iw_stats_inc(s_iw_rx_ring_empty);
842
843 /*
844 * If the ring is running low, then schedule the thread to refill.
845 */
846 if (rds_iw_ring_low(&ic->i_recv_ring))
847 queue_delayed_work(rds_wq, &conn->c_recv_w, 0);
848}
849
850int rds_iw_recv(struct rds_connection *conn)
851{
852 struct rds_iw_connection *ic = conn->c_transport_data;
853 int ret = 0;
854
855 rdsdebug("conn %p\n", conn);
856
857 /*
858 * If we get a temporary posting failure in this context then
859 * we're really low and we want the caller to back off for a bit.
860 */
861 mutex_lock(&ic->i_recv_mutex);
862 if (rds_iw_recv_refill(conn, GFP_KERNEL, GFP_HIGHUSER, 0))
863 ret = -ENOMEM;
864 else
865 rds_iw_stats_inc(s_iw_rx_refill_from_thread);
866 mutex_unlock(&ic->i_recv_mutex);
867
868 if (rds_conn_up(conn))
869 rds_iw_attempt_ack(ic);
870
871 return ret;
872}
873
874int rds_iw_recv_init(void)
875{
876 struct sysinfo si;
877 int ret = -ENOMEM;
878
879 /* Default to 30% of all available RAM for recv memory */
880 si_meminfo(&si);
881 rds_iw_sysctl_max_recv_allocation = si.totalram / 3 * PAGE_SIZE / RDS_FRAG_SIZE;
882
883 rds_iw_incoming_slab = kmem_cache_create("rds_iw_incoming",
884 sizeof(struct rds_iw_incoming),
885 0, 0, NULL);
886 if (!rds_iw_incoming_slab)
887 goto out;
888
889 rds_iw_frag_slab = kmem_cache_create("rds_iw_frag",
890 sizeof(struct rds_page_frag),
891 0, 0, NULL);
892 if (!rds_iw_frag_slab)
893 kmem_cache_destroy(rds_iw_incoming_slab);
894 else
895 ret = 0;
896out:
897 return ret;
898}
899
900void rds_iw_recv_exit(void)
901{
902 kmem_cache_destroy(rds_iw_incoming_slab);
903 kmem_cache_destroy(rds_iw_frag_slab);
904}
diff --git a/net/rds/iw_ring.c b/net/rds/iw_ring.c
deleted file mode 100644
index da8e3b63f663..000000000000
--- a/net/rds/iw_ring.c
+++ /dev/null
@@ -1,169 +0,0 @@
1/*
2 * Copyright (c) 2006 Oracle. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 */
33#include <linux/kernel.h>
34
35#include "rds.h"
36#include "iw.h"
37
38/*
39 * Locking for IB rings.
40 * We assume that allocation is always protected by a mutex
41 * in the caller (this is a valid assumption for the current
42 * implementation).
43 *
44 * Freeing always happens in an interrupt, and hence only
45 * races with allocations, but not with other free()s.
46 *
47 * The interaction between allocation and freeing is that
48 * the alloc code has to determine the number of free entries.
49 * To this end, we maintain two counters; an allocation counter
50 * and a free counter. Both are allowed to run freely, and wrap
51 * around.
52 * The number of used entries is always (alloc_ctr - free_ctr) % NR.
53 *
54 * The current implementation makes free_ctr atomic. When the
55 * caller finds an allocation fails, it should set an "alloc fail"
56 * bit and retry the allocation. The "alloc fail" bit essentially tells
57 * the CQ completion handlers to wake it up after freeing some
58 * more entries.
59 */
60
61/*
62 * This only happens on shutdown.
63 */
64DECLARE_WAIT_QUEUE_HEAD(rds_iw_ring_empty_wait);
65
66void rds_iw_ring_init(struct rds_iw_work_ring *ring, u32 nr)
67{
68 memset(ring, 0, sizeof(*ring));
69 ring->w_nr = nr;
70 rdsdebug("ring %p nr %u\n", ring, ring->w_nr);
71}
72
73static inline u32 __rds_iw_ring_used(struct rds_iw_work_ring *ring)
74{
75 u32 diff;
76
77 /* This assumes that atomic_t has at least as many bits as u32 */
78 diff = ring->w_alloc_ctr - (u32) atomic_read(&ring->w_free_ctr);
79 BUG_ON(diff > ring->w_nr);
80
81 return diff;
82}
83
84void rds_iw_ring_resize(struct rds_iw_work_ring *ring, u32 nr)
85{
86 /* We only ever get called from the connection setup code,
87 * prior to creating the QP. */
88 BUG_ON(__rds_iw_ring_used(ring));
89 ring->w_nr = nr;
90}
91
92static int __rds_iw_ring_empty(struct rds_iw_work_ring *ring)
93{
94 return __rds_iw_ring_used(ring) == 0;
95}
96
97u32 rds_iw_ring_alloc(struct rds_iw_work_ring *ring, u32 val, u32 *pos)
98{
99 u32 ret = 0, avail;
100
101 avail = ring->w_nr - __rds_iw_ring_used(ring);
102
103 rdsdebug("ring %p val %u next %u free %u\n", ring, val,
104 ring->w_alloc_ptr, avail);
105
106 if (val && avail) {
107 ret = min(val, avail);
108 *pos = ring->w_alloc_ptr;
109
110 ring->w_alloc_ptr = (ring->w_alloc_ptr + ret) % ring->w_nr;
111 ring->w_alloc_ctr += ret;
112 }
113
114 return ret;
115}
116
117void rds_iw_ring_free(struct rds_iw_work_ring *ring, u32 val)
118{
119 ring->w_free_ptr = (ring->w_free_ptr + val) % ring->w_nr;
120 atomic_add(val, &ring->w_free_ctr);
121
122 if (__rds_iw_ring_empty(ring) &&
123 waitqueue_active(&rds_iw_ring_empty_wait))
124 wake_up(&rds_iw_ring_empty_wait);
125}
126
127void rds_iw_ring_unalloc(struct rds_iw_work_ring *ring, u32 val)
128{
129 ring->w_alloc_ptr = (ring->w_alloc_ptr - val) % ring->w_nr;
130 ring->w_alloc_ctr -= val;
131}
132
133int rds_iw_ring_empty(struct rds_iw_work_ring *ring)
134{
135 return __rds_iw_ring_empty(ring);
136}
137
138int rds_iw_ring_low(struct rds_iw_work_ring *ring)
139{
140 return __rds_iw_ring_used(ring) <= (ring->w_nr >> 1);
141}
142
143
144/*
145 * returns the oldest alloced ring entry. This will be the next one
146 * freed. This can't be called if there are none allocated.
147 */
148u32 rds_iw_ring_oldest(struct rds_iw_work_ring *ring)
149{
150 return ring->w_free_ptr;
151}
152
153/*
154 * returns the number of completed work requests.
155 */
156
157u32 rds_iw_ring_completed(struct rds_iw_work_ring *ring, u32 wr_id, u32 oldest)
158{
159 u32 ret;
160
161 if (oldest <= (unsigned long long)wr_id)
162 ret = (unsigned long long)wr_id - oldest + 1;
163 else
164 ret = ring->w_nr - oldest + (unsigned long long)wr_id + 1;
165
166 rdsdebug("ring %p ret %u wr_id %u oldest %u\n", ring, ret,
167 wr_id, oldest);
168 return ret;
169}
diff --git a/net/rds/iw_send.c b/net/rds/iw_send.c
deleted file mode 100644
index e20bd503f4bd..000000000000
--- a/net/rds/iw_send.c
+++ /dev/null
@@ -1,981 +0,0 @@
1/*
2 * Copyright (c) 2006 Oracle. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 */
33#include <linux/kernel.h>
34#include <linux/in.h>
35#include <linux/device.h>
36#include <linux/dmapool.h>
37#include <linux/ratelimit.h>
38
39#include "rds.h"
40#include "iw.h"
41
42static void rds_iw_send_rdma_complete(struct rds_message *rm,
43 int wc_status)
44{
45 int notify_status;
46
47 switch (wc_status) {
48 case IB_WC_WR_FLUSH_ERR:
49 return;
50
51 case IB_WC_SUCCESS:
52 notify_status = RDS_RDMA_SUCCESS;
53 break;
54
55 case IB_WC_REM_ACCESS_ERR:
56 notify_status = RDS_RDMA_REMOTE_ERROR;
57 break;
58
59 default:
60 notify_status = RDS_RDMA_OTHER_ERROR;
61 break;
62 }
63 rds_rdma_send_complete(rm, notify_status);
64}
65
66static void rds_iw_send_unmap_rdma(struct rds_iw_connection *ic,
67 struct rm_rdma_op *op)
68{
69 if (op->op_mapped) {
70 ib_dma_unmap_sg(ic->i_cm_id->device,
71 op->op_sg, op->op_nents,
72 op->op_write ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
73 op->op_mapped = 0;
74 }
75}
76
77static void rds_iw_send_unmap_rm(struct rds_iw_connection *ic,
78 struct rds_iw_send_work *send,
79 int wc_status)
80{
81 struct rds_message *rm = send->s_rm;
82
83 rdsdebug("ic %p send %p rm %p\n", ic, send, rm);
84
85 ib_dma_unmap_sg(ic->i_cm_id->device,
86 rm->data.op_sg, rm->data.op_nents,
87 DMA_TO_DEVICE);
88
89 if (rm->rdma.op_active) {
90 rds_iw_send_unmap_rdma(ic, &rm->rdma);
91
92 /* If the user asked for a completion notification on this
93 * message, we can implement three different semantics:
94 * 1. Notify when we received the ACK on the RDS message
95 * that was queued with the RDMA. This provides reliable
96 * notification of RDMA status at the expense of a one-way
97 * packet delay.
98 * 2. Notify when the IB stack gives us the completion event for
99 * the RDMA operation.
100 * 3. Notify when the IB stack gives us the completion event for
101 * the accompanying RDS messages.
102 * Here, we implement approach #3. To implement approach #2,
103 * call rds_rdma_send_complete from the cq_handler. To implement #1,
104 * don't call rds_rdma_send_complete at all, and fall back to the notify
105 * handling in the ACK processing code.
106 *
107 * Note: There's no need to explicitly sync any RDMA buffers using
108 * ib_dma_sync_sg_for_cpu - the completion for the RDMA
109 * operation itself unmapped the RDMA buffers, which takes care
110 * of synching.
111 */
112 rds_iw_send_rdma_complete(rm, wc_status);
113
114 if (rm->rdma.op_write)
115 rds_stats_add(s_send_rdma_bytes, rm->rdma.op_bytes);
116 else
117 rds_stats_add(s_recv_rdma_bytes, rm->rdma.op_bytes);
118 }
119
120 /* If anyone waited for this message to get flushed out, wake
121 * them up now */
122 rds_message_unmapped(rm);
123
124 rds_message_put(rm);
125 send->s_rm = NULL;
126}
127
128void rds_iw_send_init_ring(struct rds_iw_connection *ic)
129{
130 struct rds_iw_send_work *send;
131 u32 i;
132
133 for (i = 0, send = ic->i_sends; i < ic->i_send_ring.w_nr; i++, send++) {
134 struct ib_sge *sge;
135
136 send->s_rm = NULL;
137 send->s_op = NULL;
138 send->s_mapping = NULL;
139
140 send->s_send_wr.next = NULL;
141 send->s_send_wr.wr_id = i;
142 send->s_send_wr.sg_list = send->s_sge;
143 send->s_send_wr.num_sge = 1;
144 send->s_send_wr.opcode = IB_WR_SEND;
145 send->s_send_wr.send_flags = 0;
146 send->s_send_wr.ex.imm_data = 0;
147
148 sge = rds_iw_data_sge(ic, send->s_sge);
149 sge->lkey = 0;
150
151 sge = rds_iw_header_sge(ic, send->s_sge);
152 sge->addr = ic->i_send_hdrs_dma + (i * sizeof(struct rds_header));
153 sge->length = sizeof(struct rds_header);
154 sge->lkey = 0;
155
156 send->s_mr = ib_alloc_mr(ic->i_pd, IB_MR_TYPE_MEM_REG,
157 fastreg_message_size);
158 if (IS_ERR(send->s_mr)) {
159 printk(KERN_WARNING "RDS/IW: ib_alloc_mr failed\n");
160 break;
161 }
162 }
163}
164
165void rds_iw_send_clear_ring(struct rds_iw_connection *ic)
166{
167 struct rds_iw_send_work *send;
168 u32 i;
169
170 for (i = 0, send = ic->i_sends; i < ic->i_send_ring.w_nr; i++, send++) {
171 BUG_ON(!send->s_mr);
172 ib_dereg_mr(send->s_mr);
173 if (send->s_send_wr.opcode == 0xdead)
174 continue;
175 if (send->s_rm)
176 rds_iw_send_unmap_rm(ic, send, IB_WC_WR_FLUSH_ERR);
177 if (send->s_op)
178 rds_iw_send_unmap_rdma(ic, send->s_op);
179 }
180}
181
182/*
183 * The _oldest/_free ring operations here race cleanly with the alloc/unalloc
184 * operations performed in the send path. As the sender allocs and potentially
185 * unallocs the next free entry in the ring it doesn't alter which is
186 * the next to be freed, which is what this is concerned with.
187 */
188void rds_iw_send_cq_comp_handler(struct ib_cq *cq, void *context)
189{
190 struct rds_connection *conn = context;
191 struct rds_iw_connection *ic = conn->c_transport_data;
192 struct ib_wc wc;
193 struct rds_iw_send_work *send;
194 u32 completed;
195 u32 oldest;
196 u32 i;
197 int ret;
198
199 rdsdebug("cq %p conn %p\n", cq, conn);
200 rds_iw_stats_inc(s_iw_tx_cq_call);
201 ret = ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
202 if (ret)
203 rdsdebug("ib_req_notify_cq send failed: %d\n", ret);
204
205 while (ib_poll_cq(cq, 1, &wc) > 0) {
206 rdsdebug("wc wr_id 0x%llx status %u byte_len %u imm_data %u\n",
207 (unsigned long long)wc.wr_id, wc.status, wc.byte_len,
208 be32_to_cpu(wc.ex.imm_data));
209 rds_iw_stats_inc(s_iw_tx_cq_event);
210
211 if (wc.status != IB_WC_SUCCESS) {
212 printk(KERN_ERR "WC Error: status = %d opcode = %d\n", wc.status, wc.opcode);
213 break;
214 }
215
216 if (wc.opcode == IB_WC_LOCAL_INV && wc.wr_id == RDS_IW_LOCAL_INV_WR_ID) {
217 ic->i_fastreg_posted = 0;
218 continue;
219 }
220
221 if (wc.opcode == IB_WC_REG_MR && wc.wr_id == RDS_IW_REG_WR_ID) {
222 ic->i_fastreg_posted = 1;
223 continue;
224 }
225
226 if (wc.wr_id == RDS_IW_ACK_WR_ID) {
227 if (time_after(jiffies, ic->i_ack_queued + HZ/2))
228 rds_iw_stats_inc(s_iw_tx_stalled);
229 rds_iw_ack_send_complete(ic);
230 continue;
231 }
232
233 oldest = rds_iw_ring_oldest(&ic->i_send_ring);
234
235 completed = rds_iw_ring_completed(&ic->i_send_ring, wc.wr_id, oldest);
236
237 for (i = 0; i < completed; i++) {
238 send = &ic->i_sends[oldest];
239
240 /* In the error case, wc.opcode sometimes contains garbage */
241 switch (send->s_send_wr.opcode) {
242 case IB_WR_SEND:
243 if (send->s_rm)
244 rds_iw_send_unmap_rm(ic, send, wc.status);
245 break;
246 case IB_WR_REG_MR:
247 case IB_WR_RDMA_WRITE:
248 case IB_WR_RDMA_READ:
249 case IB_WR_RDMA_READ_WITH_INV:
250 /* Nothing to be done - the SG list will be unmapped
251 * when the SEND completes. */
252 break;
253 default:
254 printk_ratelimited(KERN_NOTICE
255 "RDS/IW: %s: unexpected opcode 0x%x in WR!\n",
256 __func__, send->s_send_wr.opcode);
257 break;
258 }
259
260 send->s_send_wr.opcode = 0xdead;
261 send->s_send_wr.num_sge = 1;
262 if (time_after(jiffies, send->s_queued + HZ/2))
263 rds_iw_stats_inc(s_iw_tx_stalled);
264
265 /* If a RDMA operation produced an error, signal this right
266 * away. If we don't, the subsequent SEND that goes with this
267 * RDMA will be canceled with ERR_WFLUSH, and the application
268 * never learn that the RDMA failed. */
269 if (unlikely(wc.status == IB_WC_REM_ACCESS_ERR && send->s_op)) {
270 struct rds_message *rm;
271
272 rm = rds_send_get_message(conn, send->s_op);
273 if (rm)
274 rds_iw_send_rdma_complete(rm, wc.status);
275 }
276
277 oldest = (oldest + 1) % ic->i_send_ring.w_nr;
278 }
279
280 rds_iw_ring_free(&ic->i_send_ring, completed);
281
282 if (test_and_clear_bit(RDS_LL_SEND_FULL, &conn->c_flags) ||
283 test_bit(0, &conn->c_map_queued))
284 queue_delayed_work(rds_wq, &conn->c_send_w, 0);
285
286 /* We expect errors as the qp is drained during shutdown */
287 if (wc.status != IB_WC_SUCCESS && rds_conn_up(conn)) {
288 rds_iw_conn_error(conn,
289 "send completion on %pI4 "
290 "had status %u, disconnecting and reconnecting\n",
291 &conn->c_faddr, wc.status);
292 }
293 }
294}
295
296/*
297 * This is the main function for allocating credits when sending
298 * messages.
299 *
300 * Conceptually, we have two counters:
301 * - send credits: this tells us how many WRs we're allowed
302 * to submit without overruning the receiver's queue. For
303 * each SEND WR we post, we decrement this by one.
304 *
305 * - posted credits: this tells us how many WRs we recently
306 * posted to the receive queue. This value is transferred
307 * to the peer as a "credit update" in a RDS header field.
308 * Every time we transmit credits to the peer, we subtract
309 * the amount of transferred credits from this counter.
310 *
311 * It is essential that we avoid situations where both sides have
312 * exhausted their send credits, and are unable to send new credits
313 * to the peer. We achieve this by requiring that we send at least
314 * one credit update to the peer before exhausting our credits.
315 * When new credits arrive, we subtract one credit that is withheld
316 * until we've posted new buffers and are ready to transmit these
317 * credits (see rds_iw_send_add_credits below).
318 *
319 * The RDS send code is essentially single-threaded; rds_send_xmit
320 * grabs c_send_lock to ensure exclusive access to the send ring.
321 * However, the ACK sending code is independent and can race with
322 * message SENDs.
323 *
324 * In the send path, we need to update the counters for send credits
325 * and the counter of posted buffers atomically - when we use the
326 * last available credit, we cannot allow another thread to race us
327 * and grab the posted credits counter. Hence, we have to use a
328 * spinlock to protect the credit counter, or use atomics.
329 *
330 * Spinlocks shared between the send and the receive path are bad,
331 * because they create unnecessary delays. An early implementation
332 * using a spinlock showed a 5% degradation in throughput at some
333 * loads.
334 *
335 * This implementation avoids spinlocks completely, putting both
336 * counters into a single atomic, and updating that atomic using
337 * atomic_add (in the receive path, when receiving fresh credits),
338 * and using atomic_cmpxchg when updating the two counters.
339 */
340int rds_iw_send_grab_credits(struct rds_iw_connection *ic,
341 u32 wanted, u32 *adv_credits, int need_posted, int max_posted)
342{
343 unsigned int avail, posted, got = 0, advertise;
344 long oldval, newval;
345
346 *adv_credits = 0;
347 if (!ic->i_flowctl)
348 return wanted;
349
350try_again:
351 advertise = 0;
352 oldval = newval = atomic_read(&ic->i_credits);
353 posted = IB_GET_POST_CREDITS(oldval);
354 avail = IB_GET_SEND_CREDITS(oldval);
355
356 rdsdebug("wanted=%u credits=%u posted=%u\n",
357 wanted, avail, posted);
358
359 /* The last credit must be used to send a credit update. */
360 if (avail && !posted)
361 avail--;
362
363 if (avail < wanted) {
364 struct rds_connection *conn = ic->i_cm_id->context;
365
366 /* Oops, there aren't that many credits left! */
367 set_bit(RDS_LL_SEND_FULL, &conn->c_flags);
368 got = avail;
369 } else {
370 /* Sometimes you get what you want, lalala. */
371 got = wanted;
372 }
373 newval -= IB_SET_SEND_CREDITS(got);
374
375 /*
376 * If need_posted is non-zero, then the caller wants
377 * the posted regardless of whether any send credits are
378 * available.
379 */
380 if (posted && (got || need_posted)) {
381 advertise = min_t(unsigned int, posted, max_posted);
382 newval -= IB_SET_POST_CREDITS(advertise);
383 }
384
385 /* Finally bill everything */
386 if (atomic_cmpxchg(&ic->i_credits, oldval, newval) != oldval)
387 goto try_again;
388
389 *adv_credits = advertise;
390 return got;
391}
392
393void rds_iw_send_add_credits(struct rds_connection *conn, unsigned int credits)
394{
395 struct rds_iw_connection *ic = conn->c_transport_data;
396
397 if (credits == 0)
398 return;
399
400 rdsdebug("credits=%u current=%u%s\n",
401 credits,
402 IB_GET_SEND_CREDITS(atomic_read(&ic->i_credits)),
403 test_bit(RDS_LL_SEND_FULL, &conn->c_flags) ? ", ll_send_full" : "");
404
405 atomic_add(IB_SET_SEND_CREDITS(credits), &ic->i_credits);
406 if (test_and_clear_bit(RDS_LL_SEND_FULL, &conn->c_flags))
407 queue_delayed_work(rds_wq, &conn->c_send_w, 0);
408
409 WARN_ON(IB_GET_SEND_CREDITS(credits) >= 16384);
410
411 rds_iw_stats_inc(s_iw_rx_credit_updates);
412}
413
414void rds_iw_advertise_credits(struct rds_connection *conn, unsigned int posted)
415{
416 struct rds_iw_connection *ic = conn->c_transport_data;
417
418 if (posted == 0)
419 return;
420
421 atomic_add(IB_SET_POST_CREDITS(posted), &ic->i_credits);
422
423 /* Decide whether to send an update to the peer now.
424 * If we would send a credit update for every single buffer we
425 * post, we would end up with an ACK storm (ACK arrives,
426 * consumes buffer, we refill the ring, send ACK to remote
427 * advertising the newly posted buffer... ad inf)
428 *
429 * Performance pretty much depends on how often we send
430 * credit updates - too frequent updates mean lots of ACKs.
431 * Too infrequent updates, and the peer will run out of
432 * credits and has to throttle.
433 * For the time being, 16 seems to be a good compromise.
434 */
435 if (IB_GET_POST_CREDITS(atomic_read(&ic->i_credits)) >= 16)
436 set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
437}
438
439static inline void
440rds_iw_xmit_populate_wr(struct rds_iw_connection *ic,
441 struct rds_iw_send_work *send, unsigned int pos,
442 unsigned long buffer, unsigned int length,
443 int send_flags)
444{
445 struct ib_sge *sge;
446
447 WARN_ON(pos != send - ic->i_sends);
448
449 send->s_send_wr.send_flags = send_flags;
450 send->s_send_wr.opcode = IB_WR_SEND;
451 send->s_send_wr.num_sge = 2;
452 send->s_send_wr.next = NULL;
453 send->s_queued = jiffies;
454 send->s_op = NULL;
455
456 if (length != 0) {
457 sge = rds_iw_data_sge(ic, send->s_sge);
458 sge->addr = buffer;
459 sge->length = length;
460 sge->lkey = rds_iw_local_dma_lkey(ic);
461
462 sge = rds_iw_header_sge(ic, send->s_sge);
463 } else {
464 /* We're sending a packet with no payload. There is only
465 * one SGE */
466 send->s_send_wr.num_sge = 1;
467 sge = &send->s_sge[0];
468 }
469
470 sge->addr = ic->i_send_hdrs_dma + (pos * sizeof(struct rds_header));
471 sge->length = sizeof(struct rds_header);
472 sge->lkey = rds_iw_local_dma_lkey(ic);
473}
474
475/*
476 * This can be called multiple times for a given message. The first time
477 * we see a message we map its scatterlist into the IB device so that
478 * we can provide that mapped address to the IB scatter gather entries
479 * in the IB work requests. We translate the scatterlist into a series
480 * of work requests that fragment the message. These work requests complete
481 * in order so we pass ownership of the message to the completion handler
482 * once we send the final fragment.
483 *
484 * The RDS core uses the c_send_lock to only enter this function once
485 * per connection. This makes sure that the tx ring alloc/unalloc pairs
486 * don't get out of sync and confuse the ring.
487 */
488int rds_iw_xmit(struct rds_connection *conn, struct rds_message *rm,
489 unsigned int hdr_off, unsigned int sg, unsigned int off)
490{
491 struct rds_iw_connection *ic = conn->c_transport_data;
492 struct ib_device *dev = ic->i_cm_id->device;
493 struct rds_iw_send_work *send = NULL;
494 struct rds_iw_send_work *first;
495 struct rds_iw_send_work *prev;
496 struct ib_send_wr *failed_wr;
497 struct scatterlist *scat;
498 u32 pos;
499 u32 i;
500 u32 work_alloc;
501 u32 credit_alloc;
502 u32 posted;
503 u32 adv_credits = 0;
504 int send_flags = 0;
505 int sent;
506 int ret;
507 int flow_controlled = 0;
508
509 BUG_ON(off % RDS_FRAG_SIZE);
510 BUG_ON(hdr_off != 0 && hdr_off != sizeof(struct rds_header));
511
512 /* Fastreg support */
513 if (rds_rdma_cookie_key(rm->m_rdma_cookie) && !ic->i_fastreg_posted) {
514 ret = -EAGAIN;
515 goto out;
516 }
517
518 /* FIXME we may overallocate here */
519 if (be32_to_cpu(rm->m_inc.i_hdr.h_len) == 0)
520 i = 1;
521 else
522 i = ceil(be32_to_cpu(rm->m_inc.i_hdr.h_len), RDS_FRAG_SIZE);
523
524 work_alloc = rds_iw_ring_alloc(&ic->i_send_ring, i, &pos);
525 if (work_alloc == 0) {
526 set_bit(RDS_LL_SEND_FULL, &conn->c_flags);
527 rds_iw_stats_inc(s_iw_tx_ring_full);
528 ret = -ENOMEM;
529 goto out;
530 }
531
532 credit_alloc = work_alloc;
533 if (ic->i_flowctl) {
534 credit_alloc = rds_iw_send_grab_credits(ic, work_alloc, &posted, 0, RDS_MAX_ADV_CREDIT);
535 adv_credits += posted;
536 if (credit_alloc < work_alloc) {
537 rds_iw_ring_unalloc(&ic->i_send_ring, work_alloc - credit_alloc);
538 work_alloc = credit_alloc;
539 flow_controlled++;
540 }
541 if (work_alloc == 0) {
542 set_bit(RDS_LL_SEND_FULL, &conn->c_flags);
543 rds_iw_stats_inc(s_iw_tx_throttle);
544 ret = -ENOMEM;
545 goto out;
546 }
547 }
548
549 /* map the message the first time we see it */
550 if (!ic->i_rm) {
551 /*
552 printk(KERN_NOTICE "rds_iw_xmit prep msg dport=%u flags=0x%x len=%d\n",
553 be16_to_cpu(rm->m_inc.i_hdr.h_dport),
554 rm->m_inc.i_hdr.h_flags,
555 be32_to_cpu(rm->m_inc.i_hdr.h_len));
556 */
557 if (rm->data.op_nents) {
558 rm->data.op_count = ib_dma_map_sg(dev,
559 rm->data.op_sg,
560 rm->data.op_nents,
561 DMA_TO_DEVICE);
562 rdsdebug("ic %p mapping rm %p: %d\n", ic, rm, rm->data.op_count);
563 if (rm->data.op_count == 0) {
564 rds_iw_stats_inc(s_iw_tx_sg_mapping_failure);
565 rds_iw_ring_unalloc(&ic->i_send_ring, work_alloc);
566 ret = -ENOMEM; /* XXX ? */
567 goto out;
568 }
569 } else {
570 rm->data.op_count = 0;
571 }
572
573 ic->i_unsignaled_wrs = rds_iw_sysctl_max_unsig_wrs;
574 ic->i_unsignaled_bytes = rds_iw_sysctl_max_unsig_bytes;
575 rds_message_addref(rm);
576 rm->data.op_dmasg = 0;
577 rm->data.op_dmaoff = 0;
578 ic->i_rm = rm;
579
580 /* Finalize the header */
581 if (test_bit(RDS_MSG_ACK_REQUIRED, &rm->m_flags))
582 rm->m_inc.i_hdr.h_flags |= RDS_FLAG_ACK_REQUIRED;
583 if (test_bit(RDS_MSG_RETRANSMITTED, &rm->m_flags))
584 rm->m_inc.i_hdr.h_flags |= RDS_FLAG_RETRANSMITTED;
585
586 /* If it has a RDMA op, tell the peer we did it. This is
587 * used by the peer to release use-once RDMA MRs. */
588 if (rm->rdma.op_active) {
589 struct rds_ext_header_rdma ext_hdr;
590
591 ext_hdr.h_rdma_rkey = cpu_to_be32(rm->rdma.op_rkey);
592 rds_message_add_extension(&rm->m_inc.i_hdr,
593 RDS_EXTHDR_RDMA, &ext_hdr, sizeof(ext_hdr));
594 }
595 if (rm->m_rdma_cookie) {
596 rds_message_add_rdma_dest_extension(&rm->m_inc.i_hdr,
597 rds_rdma_cookie_key(rm->m_rdma_cookie),
598 rds_rdma_cookie_offset(rm->m_rdma_cookie));
599 }
600
601 /* Note - rds_iw_piggyb_ack clears the ACK_REQUIRED bit, so
602 * we should not do this unless we have a chance of at least
603 * sticking the header into the send ring. Which is why we
604 * should call rds_iw_ring_alloc first. */
605 rm->m_inc.i_hdr.h_ack = cpu_to_be64(rds_iw_piggyb_ack(ic));
606 rds_message_make_checksum(&rm->m_inc.i_hdr);
607
608 /*
609 * Update adv_credits since we reset the ACK_REQUIRED bit.
610 */
611 rds_iw_send_grab_credits(ic, 0, &posted, 1, RDS_MAX_ADV_CREDIT - adv_credits);
612 adv_credits += posted;
613 BUG_ON(adv_credits > 255);
614 }
615
616 send = &ic->i_sends[pos];
617 first = send;
618 prev = NULL;
619 scat = &rm->data.op_sg[rm->data.op_dmasg];
620 sent = 0;
621 i = 0;
622
623 /* Sometimes you want to put a fence between an RDMA
624 * READ and the following SEND.
625 * We could either do this all the time
626 * or when requested by the user. Right now, we let
627 * the application choose.
628 */
629 if (rm->rdma.op_active && rm->rdma.op_fence)
630 send_flags = IB_SEND_FENCE;
631
632 /*
633 * We could be copying the header into the unused tail of the page.
634 * That would need to be changed in the future when those pages might
635 * be mapped userspace pages or page cache pages. So instead we always
636 * use a second sge and our long-lived ring of mapped headers. We send
637 * the header after the data so that the data payload can be aligned on
638 * the receiver.
639 */
640
641 /* handle a 0-len message */
642 if (be32_to_cpu(rm->m_inc.i_hdr.h_len) == 0) {
643 rds_iw_xmit_populate_wr(ic, send, pos, 0, 0, send_flags);
644 goto add_header;
645 }
646
647 /* if there's data reference it with a chain of work reqs */
648 for (; i < work_alloc && scat != &rm->data.op_sg[rm->data.op_count]; i++) {
649 unsigned int len;
650
651 send = &ic->i_sends[pos];
652
653 len = min(RDS_FRAG_SIZE,
654 ib_sg_dma_len(dev, scat) - rm->data.op_dmaoff);
655 rds_iw_xmit_populate_wr(ic, send, pos,
656 ib_sg_dma_address(dev, scat) + rm->data.op_dmaoff, len,
657 send_flags);
658
659 /*
660 * We want to delay signaling completions just enough to get
661 * the batching benefits but not so much that we create dead time
662 * on the wire.
663 */
664 if (ic->i_unsignaled_wrs-- == 0) {
665 ic->i_unsignaled_wrs = rds_iw_sysctl_max_unsig_wrs;
666 send->s_send_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED;
667 }
668
669 ic->i_unsignaled_bytes -= len;
670 if (ic->i_unsignaled_bytes <= 0) {
671 ic->i_unsignaled_bytes = rds_iw_sysctl_max_unsig_bytes;
672 send->s_send_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED;
673 }
674
675 /*
676 * Always signal the last one if we're stopping due to flow control.
677 */
678 if (flow_controlled && i == (work_alloc-1))
679 send->s_send_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED;
680
681 rdsdebug("send %p wr %p num_sge %u next %p\n", send,
682 &send->s_send_wr, send->s_send_wr.num_sge, send->s_send_wr.next);
683
684 sent += len;
685 rm->data.op_dmaoff += len;
686 if (rm->data.op_dmaoff == ib_sg_dma_len(dev, scat)) {
687 scat++;
688 rm->data.op_dmaoff = 0;
689 rm->data.op_dmasg++;
690 }
691
692add_header:
693 /* Tack on the header after the data. The header SGE should already
694 * have been set up to point to the right header buffer. */
695 memcpy(&ic->i_send_hdrs[pos], &rm->m_inc.i_hdr, sizeof(struct rds_header));
696
697 if (0) {
698 struct rds_header *hdr = &ic->i_send_hdrs[pos];
699
700 printk(KERN_NOTICE "send WR dport=%u flags=0x%x len=%d\n",
701 be16_to_cpu(hdr->h_dport),
702 hdr->h_flags,
703 be32_to_cpu(hdr->h_len));
704 }
705 if (adv_credits) {
706 struct rds_header *hdr = &ic->i_send_hdrs[pos];
707
708 /* add credit and redo the header checksum */
709 hdr->h_credit = adv_credits;
710 rds_message_make_checksum(hdr);
711 adv_credits = 0;
712 rds_iw_stats_inc(s_iw_tx_credit_updates);
713 }
714
715 if (prev)
716 prev->s_send_wr.next = &send->s_send_wr;
717 prev = send;
718
719 pos = (pos + 1) % ic->i_send_ring.w_nr;
720 }
721
722 /* Account the RDS header in the number of bytes we sent, but just once.
723 * The caller has no concept of fragmentation. */
724 if (hdr_off == 0)
725 sent += sizeof(struct rds_header);
726
727 /* if we finished the message then send completion owns it */
728 if (scat == &rm->data.op_sg[rm->data.op_count]) {
729 prev->s_rm = ic->i_rm;
730 prev->s_send_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED;
731 ic->i_rm = NULL;
732 }
733
734 if (i < work_alloc) {
735 rds_iw_ring_unalloc(&ic->i_send_ring, work_alloc - i);
736 work_alloc = i;
737 }
738 if (ic->i_flowctl && i < credit_alloc)
739 rds_iw_send_add_credits(conn, credit_alloc - i);
740
741 /* XXX need to worry about failed_wr and partial sends. */
742 failed_wr = &first->s_send_wr;
743 ret = ib_post_send(ic->i_cm_id->qp, &first->s_send_wr, &failed_wr);
744 rdsdebug("ic %p first %p (wr %p) ret %d wr %p\n", ic,
745 first, &first->s_send_wr, ret, failed_wr);
746 BUG_ON(failed_wr != &first->s_send_wr);
747 if (ret) {
748 printk(KERN_WARNING "RDS/IW: ib_post_send to %pI4 "
749 "returned %d\n", &conn->c_faddr, ret);
750 rds_iw_ring_unalloc(&ic->i_send_ring, work_alloc);
751 if (prev->s_rm) {
752 ic->i_rm = prev->s_rm;
753 prev->s_rm = NULL;
754 }
755 goto out;
756 }
757
758 ret = sent;
759out:
760 BUG_ON(adv_credits);
761 return ret;
762}
763
764static int rds_iw_build_send_reg(struct rds_iw_send_work *send,
765 struct scatterlist *sg,
766 int sg_nents)
767{
768 int n;
769
770 n = ib_map_mr_sg(send->s_mr, sg, sg_nents, PAGE_SIZE);
771 if (unlikely(n != sg_nents))
772 return n < 0 ? n : -EINVAL;
773
774 send->s_reg_wr.wr.opcode = IB_WR_REG_MR;
775 send->s_reg_wr.wr.wr_id = 0;
776 send->s_reg_wr.wr.num_sge = 0;
777 send->s_reg_wr.mr = send->s_mr;
778 send->s_reg_wr.key = send->s_mr->rkey;
779 send->s_reg_wr.access = IB_ACCESS_REMOTE_WRITE;
780
781 ib_update_fast_reg_key(send->s_mr, send->s_remap_count++);
782
783 return 0;
784}
785
786int rds_iw_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op)
787{
788 struct rds_iw_connection *ic = conn->c_transport_data;
789 struct rds_iw_send_work *send = NULL;
790 struct rds_iw_send_work *first;
791 struct rds_iw_send_work *prev;
792 struct ib_send_wr *failed_wr;
793 struct rds_iw_device *rds_iwdev;
794 struct scatterlist *scat;
795 unsigned long len;
796 u64 remote_addr = op->op_remote_addr;
797 u32 pos, fr_pos;
798 u32 work_alloc;
799 u32 i;
800 u32 j;
801 int sent;
802 int ret;
803 int num_sge;
804 int sg_nents;
805
806 rds_iwdev = ib_get_client_data(ic->i_cm_id->device, &rds_iw_client);
807
808 /* map the message the first time we see it */
809 if (!op->op_mapped) {
810 op->op_count = ib_dma_map_sg(ic->i_cm_id->device,
811 op->op_sg, op->op_nents, (op->op_write) ?
812 DMA_TO_DEVICE : DMA_FROM_DEVICE);
813 rdsdebug("ic %p mapping op %p: %d\n", ic, op, op->op_count);
814 if (op->op_count == 0) {
815 rds_iw_stats_inc(s_iw_tx_sg_mapping_failure);
816 ret = -ENOMEM; /* XXX ? */
817 goto out;
818 }
819
820 op->op_mapped = 1;
821 }
822
823 if (!op->op_write) {
824 /* Alloc space on the send queue for the fastreg */
825 work_alloc = rds_iw_ring_alloc(&ic->i_send_ring, 1, &fr_pos);
826 if (work_alloc != 1) {
827 rds_iw_ring_unalloc(&ic->i_send_ring, work_alloc);
828 rds_iw_stats_inc(s_iw_tx_ring_full);
829 ret = -ENOMEM;
830 goto out;
831 }
832 }
833
834 /*
835 * Instead of knowing how to return a partial rdma read/write we insist that there
836 * be enough work requests to send the entire message.
837 */
838 i = ceil(op->op_count, rds_iwdev->max_sge);
839
840 work_alloc = rds_iw_ring_alloc(&ic->i_send_ring, i, &pos);
841 if (work_alloc != i) {
842 rds_iw_ring_unalloc(&ic->i_send_ring, work_alloc);
843 rds_iw_stats_inc(s_iw_tx_ring_full);
844 ret = -ENOMEM;
845 goto out;
846 }
847
848 send = &ic->i_sends[pos];
849 if (!op->op_write) {
850 first = prev = &ic->i_sends[fr_pos];
851 } else {
852 first = send;
853 prev = NULL;
854 }
855 scat = &op->op_sg[0];
856 sent = 0;
857 num_sge = op->op_count;
858 sg_nents = 0;
859
860 for (i = 0; i < work_alloc && scat != &op->op_sg[op->op_count]; i++) {
861 send->s_rdma_wr.wr.send_flags = 0;
862 send->s_queued = jiffies;
863
864 /*
865 * We want to delay signaling completions just enough to get
866 * the batching benefits but not so much that we create dead time on the wire.
867 */
868 if (ic->i_unsignaled_wrs-- == 0) {
869 ic->i_unsignaled_wrs = rds_iw_sysctl_max_unsig_wrs;
870 send->s_rdma_wr.wr.send_flags = IB_SEND_SIGNALED;
871 }
872
873 /* To avoid the need to have the plumbing to invalidate the fastreg_mr used
874 * for local access after RDS is finished with it, using
875 * IB_WR_RDMA_READ_WITH_INV will invalidate it after the read has completed.
876 */
877 if (op->op_write)
878 send->s_rdma_wr.wr.opcode = IB_WR_RDMA_WRITE;
879 else
880 send->s_rdma_wr.wr.opcode = IB_WR_RDMA_READ_WITH_INV;
881
882 send->s_rdma_wr.remote_addr = remote_addr;
883 send->s_rdma_wr.rkey = op->op_rkey;
884 send->s_op = op;
885
886 if (num_sge > rds_iwdev->max_sge) {
887 send->s_rdma_wr.wr.num_sge = rds_iwdev->max_sge;
888 num_sge -= rds_iwdev->max_sge;
889 } else
890 send->s_rdma_wr.wr.num_sge = num_sge;
891
892 send->s_rdma_wr.wr.next = NULL;
893
894 if (prev)
895 prev->s_send_wr.next = &send->s_rdma_wr.wr;
896
897 for (j = 0; j < send->s_rdma_wr.wr.num_sge &&
898 scat != &op->op_sg[op->op_count]; j++) {
899 len = ib_sg_dma_len(ic->i_cm_id->device, scat);
900
901 if (send->s_rdma_wr.wr.opcode == IB_WR_RDMA_READ_WITH_INV)
902 sg_nents++;
903 else {
904 send->s_sge[j].addr = ib_sg_dma_address(ic->i_cm_id->device, scat);
905 send->s_sge[j].length = len;
906 send->s_sge[j].lkey = rds_iw_local_dma_lkey(ic);
907 }
908
909 sent += len;
910 rdsdebug("ic %p sent %d remote_addr %llu\n", ic, sent, remote_addr);
911 remote_addr += len;
912
913 scat++;
914 }
915
916 if (send->s_rdma_wr.wr.opcode == IB_WR_RDMA_READ_WITH_INV) {
917 send->s_rdma_wr.wr.num_sge = 1;
918 send->s_sge[0].addr = conn->c_xmit_rm->m_rs->rs_user_addr;
919 send->s_sge[0].length = conn->c_xmit_rm->m_rs->rs_user_bytes;
920 send->s_sge[0].lkey = ic->i_sends[fr_pos].s_mr->lkey;
921 }
922
923 rdsdebug("send %p wr %p num_sge %u next %p\n", send,
924 &send->s_rdma_wr,
925 send->s_rdma_wr.wr.num_sge,
926 send->s_rdma_wr.wr.next);
927
928 prev = send;
929 if (++send == &ic->i_sends[ic->i_send_ring.w_nr])
930 send = ic->i_sends;
931 }
932
933 /* if we finished the message then send completion owns it */
934 if (scat == &op->op_sg[op->op_count])
935 first->s_rdma_wr.wr.send_flags = IB_SEND_SIGNALED;
936
937 if (i < work_alloc) {
938 rds_iw_ring_unalloc(&ic->i_send_ring, work_alloc - i);
939 work_alloc = i;
940 }
941
942 /* On iWARP, local memory access by a remote system (ie, RDMA Read) is not
943 * recommended. Putting the lkey on the wire is a security hole, as it can
944 * allow for memory access to all of memory on the remote system. Some
945 * adapters do not allow using the lkey for this at all. To bypass this use a
946 * fastreg_mr (or possibly a dma_mr)
947 */
948 if (!op->op_write) {
949 ret = rds_iw_build_send_reg(&ic->i_sends[fr_pos],
950 &op->op_sg[0], sg_nents);
951 if (ret) {
952 printk(KERN_WARNING "RDS/IW: failed to reg send mem\n");
953 goto out;
954 }
955 work_alloc++;
956 }
957
958 failed_wr = &first->s_rdma_wr.wr;
959 ret = ib_post_send(ic->i_cm_id->qp, &first->s_rdma_wr.wr, &failed_wr);
960 rdsdebug("ic %p first %p (wr %p) ret %d wr %p\n", ic,
961 first, &first->s_rdma_wr, ret, failed_wr);
962 BUG_ON(failed_wr != &first->s_rdma_wr.wr);
963 if (ret) {
964 printk(KERN_WARNING "RDS/IW: rdma ib_post_send to %pI4 "
965 "returned %d\n", &conn->c_faddr, ret);
966 rds_iw_ring_unalloc(&ic->i_send_ring, work_alloc);
967 goto out;
968 }
969
970out:
971 return ret;
972}
973
974void rds_iw_xmit_complete(struct rds_connection *conn)
975{
976 struct rds_iw_connection *ic = conn->c_transport_data;
977
978 /* We may have a pending ACK or window update we were unable
979 * to send previously (due to flow control). Try again. */
980 rds_iw_attempt_ack(ic);
981}
diff --git a/net/rds/iw_stats.c b/net/rds/iw_stats.c
deleted file mode 100644
index 5fe67f6a1d80..000000000000
--- a/net/rds/iw_stats.c
+++ /dev/null
@@ -1,95 +0,0 @@
1/*
2 * Copyright (c) 2006 Oracle. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 */
33#include <linux/percpu.h>
34#include <linux/seq_file.h>
35#include <linux/proc_fs.h>
36
37#include "rds.h"
38#include "iw.h"
39
40DEFINE_PER_CPU_SHARED_ALIGNED(struct rds_iw_statistics, rds_iw_stats);
41
42static const char *const rds_iw_stat_names[] = {
43 "iw_connect_raced",
44 "iw_listen_closed_stale",
45 "iw_tx_cq_call",
46 "iw_tx_cq_event",
47 "iw_tx_ring_full",
48 "iw_tx_throttle",
49 "iw_tx_sg_mapping_failure",
50 "iw_tx_stalled",
51 "iw_tx_credit_updates",
52 "iw_rx_cq_call",
53 "iw_rx_cq_event",
54 "iw_rx_ring_empty",
55 "iw_rx_refill_from_cq",
56 "iw_rx_refill_from_thread",
57 "iw_rx_alloc_limit",
58 "iw_rx_credit_updates",
59 "iw_ack_sent",
60 "iw_ack_send_failure",
61 "iw_ack_send_delayed",
62 "iw_ack_send_piggybacked",
63 "iw_ack_received",
64 "iw_rdma_mr_alloc",
65 "iw_rdma_mr_free",
66 "iw_rdma_mr_used",
67 "iw_rdma_mr_pool_flush",
68 "iw_rdma_mr_pool_wait",
69 "iw_rdma_mr_pool_depleted",
70};
71
72unsigned int rds_iw_stats_info_copy(struct rds_info_iterator *iter,
73 unsigned int avail)
74{
75 struct rds_iw_statistics stats = {0, };
76 uint64_t *src;
77 uint64_t *sum;
78 size_t i;
79 int cpu;
80
81 if (avail < ARRAY_SIZE(rds_iw_stat_names))
82 goto out;
83
84 for_each_online_cpu(cpu) {
85 src = (uint64_t *)&(per_cpu(rds_iw_stats, cpu));
86 sum = (uint64_t *)&stats;
87 for (i = 0; i < sizeof(stats) / sizeof(uint64_t); i++)
88 *(sum++) += *(src++);
89 }
90
91 rds_stats_info_copy(iter, (uint64_t *)&stats, rds_iw_stat_names,
92 ARRAY_SIZE(rds_iw_stat_names));
93out:
94 return ARRAY_SIZE(rds_iw_stat_names);
95}
diff --git a/net/rds/iw_sysctl.c b/net/rds/iw_sysctl.c
deleted file mode 100644
index 139239d2cb22..000000000000
--- a/net/rds/iw_sysctl.c
+++ /dev/null
@@ -1,123 +0,0 @@
1/*
2 * Copyright (c) 2006 Oracle. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 */
33#include <linux/kernel.h>
34#include <linux/sysctl.h>
35#include <linux/proc_fs.h>
36
37#include "iw.h"
38
39static struct ctl_table_header *rds_iw_sysctl_hdr;
40
41unsigned long rds_iw_sysctl_max_send_wr = RDS_IW_DEFAULT_SEND_WR;
42unsigned long rds_iw_sysctl_max_recv_wr = RDS_IW_DEFAULT_RECV_WR;
43unsigned long rds_iw_sysctl_max_recv_allocation = (128 * 1024 * 1024) / RDS_FRAG_SIZE;
44static unsigned long rds_iw_sysctl_max_wr_min = 1;
45/* hardware will fail CQ creation long before this */
46static unsigned long rds_iw_sysctl_max_wr_max = (u32)~0;
47
48unsigned long rds_iw_sysctl_max_unsig_wrs = 16;
49static unsigned long rds_iw_sysctl_max_unsig_wr_min = 1;
50static unsigned long rds_iw_sysctl_max_unsig_wr_max = 64;
51
52unsigned long rds_iw_sysctl_max_unsig_bytes = (16 << 20);
53static unsigned long rds_iw_sysctl_max_unsig_bytes_min = 1;
54static unsigned long rds_iw_sysctl_max_unsig_bytes_max = ~0UL;
55
56unsigned int rds_iw_sysctl_flow_control = 1;
57
58static struct ctl_table rds_iw_sysctl_table[] = {
59 {
60 .procname = "max_send_wr",
61 .data = &rds_iw_sysctl_max_send_wr,
62 .maxlen = sizeof(unsigned long),
63 .mode = 0644,
64 .proc_handler = proc_doulongvec_minmax,
65 .extra1 = &rds_iw_sysctl_max_wr_min,
66 .extra2 = &rds_iw_sysctl_max_wr_max,
67 },
68 {
69 .procname = "max_recv_wr",
70 .data = &rds_iw_sysctl_max_recv_wr,
71 .maxlen = sizeof(unsigned long),
72 .mode = 0644,
73 .proc_handler = proc_doulongvec_minmax,
74 .extra1 = &rds_iw_sysctl_max_wr_min,
75 .extra2 = &rds_iw_sysctl_max_wr_max,
76 },
77 {
78 .procname = "max_unsignaled_wr",
79 .data = &rds_iw_sysctl_max_unsig_wrs,
80 .maxlen = sizeof(unsigned long),
81 .mode = 0644,
82 .proc_handler = proc_doulongvec_minmax,
83 .extra1 = &rds_iw_sysctl_max_unsig_wr_min,
84 .extra2 = &rds_iw_sysctl_max_unsig_wr_max,
85 },
86 {
87 .procname = "max_unsignaled_bytes",
88 .data = &rds_iw_sysctl_max_unsig_bytes,
89 .maxlen = sizeof(unsigned long),
90 .mode = 0644,
91 .proc_handler = proc_doulongvec_minmax,
92 .extra1 = &rds_iw_sysctl_max_unsig_bytes_min,
93 .extra2 = &rds_iw_sysctl_max_unsig_bytes_max,
94 },
95 {
96 .procname = "max_recv_allocation",
97 .data = &rds_iw_sysctl_max_recv_allocation,
98 .maxlen = sizeof(unsigned long),
99 .mode = 0644,
100 .proc_handler = proc_doulongvec_minmax,
101 },
102 {
103 .procname = "flow_control",
104 .data = &rds_iw_sysctl_flow_control,
105 .maxlen = sizeof(rds_iw_sysctl_flow_control),
106 .mode = 0644,
107 .proc_handler = proc_dointvec,
108 },
109 { }
110};
111
112void rds_iw_sysctl_exit(void)
113{
114 unregister_net_sysctl_table(rds_iw_sysctl_hdr);
115}
116
117int rds_iw_sysctl_init(void)
118{
119 rds_iw_sysctl_hdr = register_net_sysctl(&init_net, "net/rds/iw", rds_iw_sysctl_table);
120 if (!rds_iw_sysctl_hdr)
121 return -ENOMEM;
122 return 0;
123}
diff --git a/net/rds/page.c b/net/rds/page.c
index 5a14e6d6a926..616f21f4e7d7 100644
--- a/net/rds/page.c
+++ b/net/rds/page.c
@@ -42,8 +42,8 @@ struct rds_page_remainder {
42 unsigned long r_offset; 42 unsigned long r_offset;
43}; 43};
44 44
45static DEFINE_PER_CPU_SHARED_ALIGNED(struct rds_page_remainder, 45static
46 rds_page_remainders); 46DEFINE_PER_CPU_SHARED_ALIGNED(struct rds_page_remainder, rds_page_remainders);
47 47
48/* 48/*
49 * returns 0 on success or -errno on failure. 49 * returns 0 on success or -errno on failure.
diff --git a/net/rds/rdma_transport.c b/net/rds/rdma_transport.c
index 9c1fed81bf0f..7220bebcf558 100644
--- a/net/rds/rdma_transport.c
+++ b/net/rds/rdma_transport.c
@@ -49,9 +49,7 @@ int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id,
49 rdsdebug("conn %p id %p handling event %u (%s)\n", conn, cm_id, 49 rdsdebug("conn %p id %p handling event %u (%s)\n", conn, cm_id,
50 event->event, rdma_event_msg(event->event)); 50 event->event, rdma_event_msg(event->event));
51 51
52 if (cm_id->device->node_type == RDMA_NODE_RNIC) 52 if (cm_id->device->node_type == RDMA_NODE_IB_CA)
53 trans = &rds_iw_transport;
54 else
55 trans = &rds_ib_transport; 53 trans = &rds_ib_transport;
56 54
57 /* Prevent shutdown from tearing down the connection 55 /* Prevent shutdown from tearing down the connection
@@ -119,6 +117,14 @@ int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id,
119 rds_conn_drop(conn); 117 rds_conn_drop(conn);
120 break; 118 break;
121 119
120 case RDMA_CM_EVENT_TIMEWAIT_EXIT:
121 if (conn) {
122 pr_info("RDS: RDMA_CM_EVENT_TIMEWAIT_EXIT event: dropping connection %pI4->%pI4\n",
123 &conn->c_laddr, &conn->c_faddr);
124 rds_conn_drop(conn);
125 }
126 break;
127
122 default: 128 default:
123 /* things like device disconnect? */ 129 /* things like device disconnect? */
124 printk(KERN_ERR "RDS: unknown event %u (%s)!\n", 130 printk(KERN_ERR "RDS: unknown event %u (%s)!\n",
@@ -200,10 +206,6 @@ static int rds_rdma_init(void)
200 if (ret) 206 if (ret)
201 goto out; 207 goto out;
202 208
203 ret = rds_iw_init();
204 if (ret)
205 goto err_iw_init;
206
207 ret = rds_ib_init(); 209 ret = rds_ib_init();
208 if (ret) 210 if (ret)
209 goto err_ib_init; 211 goto err_ib_init;
@@ -211,8 +213,6 @@ static int rds_rdma_init(void)
211 goto out; 213 goto out;
212 214
213err_ib_init: 215err_ib_init:
214 rds_iw_exit();
215err_iw_init:
216 rds_rdma_listen_stop(); 216 rds_rdma_listen_stop();
217out: 217out:
218 return ret; 218 return ret;
@@ -224,11 +224,10 @@ static void rds_rdma_exit(void)
224 /* stop listening first to ensure no new connections are attempted */ 224 /* stop listening first to ensure no new connections are attempted */
225 rds_rdma_listen_stop(); 225 rds_rdma_listen_stop();
226 rds_ib_exit(); 226 rds_ib_exit();
227 rds_iw_exit();
228} 227}
229module_exit(rds_rdma_exit); 228module_exit(rds_rdma_exit);
230 229
231MODULE_AUTHOR("Oracle Corporation <rds-devel@oss.oracle.com>"); 230MODULE_AUTHOR("Oracle Corporation <rds-devel@oss.oracle.com>");
232MODULE_DESCRIPTION("RDS: IB/iWARP transport"); 231MODULE_DESCRIPTION("RDS: IB transport");
233MODULE_LICENSE("Dual BSD/GPL"); 232MODULE_LICENSE("Dual BSD/GPL");
234 233
diff --git a/net/rds/rdma_transport.h b/net/rds/rdma_transport.h
index faba4e382695..ff2010e9d20c 100644
--- a/net/rds/rdma_transport.h
+++ b/net/rds/rdma_transport.h
@@ -16,9 +16,4 @@ extern struct rds_transport rds_ib_transport;
16int rds_ib_init(void); 16int rds_ib_init(void);
17void rds_ib_exit(void); 17void rds_ib_exit(void);
18 18
19/* from iw.c */
20extern struct rds_transport rds_iw_transport;
21int rds_iw_init(void);
22void rds_iw_exit(void);
23
24#endif 19#endif
diff --git a/net/rds/rds.h b/net/rds/rds.h
index 0e2797bdc316..80256b08eac0 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -222,6 +222,7 @@ struct rds_incoming {
222 __be32 i_saddr; 222 __be32 i_saddr;
223 223
224 rds_rdma_cookie_t i_rdma_cookie; 224 rds_rdma_cookie_t i_rdma_cookie;
225 struct timeval i_rx_tstamp;
225}; 226};
226 227
227struct rds_mr { 228struct rds_mr {
diff --git a/net/rds/recv.c b/net/rds/recv.c
index a00462b0d01d..c0be1ecd11c9 100644
--- a/net/rds/recv.c
+++ b/net/rds/recv.c
@@ -35,6 +35,8 @@
35#include <net/sock.h> 35#include <net/sock.h>
36#include <linux/in.h> 36#include <linux/in.h>
37#include <linux/export.h> 37#include <linux/export.h>
38#include <linux/time.h>
39#include <linux/rds.h>
38 40
39#include "rds.h" 41#include "rds.h"
40 42
@@ -46,6 +48,8 @@ void rds_inc_init(struct rds_incoming *inc, struct rds_connection *conn,
46 inc->i_conn = conn; 48 inc->i_conn = conn;
47 inc->i_saddr = saddr; 49 inc->i_saddr = saddr;
48 inc->i_rdma_cookie = 0; 50 inc->i_rdma_cookie = 0;
51 inc->i_rx_tstamp.tv_sec = 0;
52 inc->i_rx_tstamp.tv_usec = 0;
49} 53}
50EXPORT_SYMBOL_GPL(rds_inc_init); 54EXPORT_SYMBOL_GPL(rds_inc_init);
51 55
@@ -228,6 +232,8 @@ void rds_recv_incoming(struct rds_connection *conn, __be32 saddr, __be32 daddr,
228 rds_recv_rcvbuf_delta(rs, sk, inc->i_conn->c_lcong, 232 rds_recv_rcvbuf_delta(rs, sk, inc->i_conn->c_lcong,
229 be32_to_cpu(inc->i_hdr.h_len), 233 be32_to_cpu(inc->i_hdr.h_len),
230 inc->i_hdr.h_dport); 234 inc->i_hdr.h_dport);
235 if (sock_flag(sk, SOCK_RCVTSTAMP))
236 do_gettimeofday(&inc->i_rx_tstamp);
231 rds_inc_addref(inc); 237 rds_inc_addref(inc);
232 list_add_tail(&inc->i_item, &rs->rs_recv_queue); 238 list_add_tail(&inc->i_item, &rs->rs_recv_queue);
233 __rds_wake_sk_sleep(sk); 239 __rds_wake_sk_sleep(sk);
@@ -381,7 +387,8 @@ static int rds_notify_cong(struct rds_sock *rs, struct msghdr *msghdr)
381/* 387/*
382 * Receive any control messages. 388 * Receive any control messages.
383 */ 389 */
384static int rds_cmsg_recv(struct rds_incoming *inc, struct msghdr *msg) 390static int rds_cmsg_recv(struct rds_incoming *inc, struct msghdr *msg,
391 struct rds_sock *rs)
385{ 392{
386 int ret = 0; 393 int ret = 0;
387 394
@@ -392,6 +399,15 @@ static int rds_cmsg_recv(struct rds_incoming *inc, struct msghdr *msg)
392 return ret; 399 return ret;
393 } 400 }
394 401
402 if ((inc->i_rx_tstamp.tv_sec != 0) &&
403 sock_flag(rds_rs_to_sk(rs), SOCK_RCVTSTAMP)) {
404 ret = put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP,
405 sizeof(struct timeval),
406 &inc->i_rx_tstamp);
407 if (ret)
408 return ret;
409 }
410
395 return 0; 411 return 0;
396} 412}
397 413
@@ -474,7 +490,7 @@ int rds_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
474 msg->msg_flags |= MSG_TRUNC; 490 msg->msg_flags |= MSG_TRUNC;
475 } 491 }
476 492
477 if (rds_cmsg_recv(inc, msg)) { 493 if (rds_cmsg_recv(inc, msg, rs)) {
478 ret = -EFAULT; 494 ret = -EFAULT;
479 goto out; 495 goto out;
480 } 496 }
diff --git a/net/rds/tcp.c b/net/rds/tcp.c
index 9d6ddbacd875..61ed2a8764ba 100644
--- a/net/rds/tcp.c
+++ b/net/rds/tcp.c
@@ -37,7 +37,6 @@
37#include <net/tcp.h> 37#include <net/tcp.h>
38#include <net/net_namespace.h> 38#include <net/net_namespace.h>
39#include <net/netns/generic.h> 39#include <net/netns/generic.h>
40#include <net/tcp.h>
41 40
42#include "rds.h" 41#include "rds.h"
43#include "tcp.h" 42#include "tcp.h"
@@ -53,7 +52,34 @@ static LIST_HEAD(rds_tcp_conn_list);
53 52
54static struct kmem_cache *rds_tcp_conn_slab; 53static struct kmem_cache *rds_tcp_conn_slab;
55 54
56#define RDS_TCP_DEFAULT_BUFSIZE (128 * 1024) 55static int rds_tcp_skbuf_handler(struct ctl_table *ctl, int write,
56 void __user *buffer, size_t *lenp,
57 loff_t *fpos);
58
59int rds_tcp_min_sndbuf = SOCK_MIN_SNDBUF;
60int rds_tcp_min_rcvbuf = SOCK_MIN_RCVBUF;
61
62static struct ctl_table rds_tcp_sysctl_table[] = {
63#define RDS_TCP_SNDBUF 0
64 {
65 .procname = "rds_tcp_sndbuf",
66 /* data is per-net pointer */
67 .maxlen = sizeof(int),
68 .mode = 0644,
69 .proc_handler = rds_tcp_skbuf_handler,
70 .extra1 = &rds_tcp_min_sndbuf,
71 },
72#define RDS_TCP_RCVBUF 1
73 {
74 .procname = "rds_tcp_rcvbuf",
75 /* data is per-net pointer */
76 .maxlen = sizeof(int),
77 .mode = 0644,
78 .proc_handler = rds_tcp_skbuf_handler,
79 .extra1 = &rds_tcp_min_rcvbuf,
80 },
81 { }
82};
57 83
58/* doing it this way avoids calling tcp_sk() */ 84/* doing it this way avoids calling tcp_sk() */
59void rds_tcp_nonagle(struct socket *sock) 85void rds_tcp_nonagle(struct socket *sock)
@@ -67,15 +93,6 @@ void rds_tcp_nonagle(struct socket *sock)
67 set_fs(oldfs); 93 set_fs(oldfs);
68} 94}
69 95
70/* All module specific customizations to the RDS-TCP socket should be done in
71 * rds_tcp_tune() and applied after socket creation. In general these
72 * customizations should be tunable via module_param()
73 */
74void rds_tcp_tune(struct socket *sock)
75{
76 rds_tcp_nonagle(sock);
77}
78
79u32 rds_tcp_snd_nxt(struct rds_tcp_connection *tc) 96u32 rds_tcp_snd_nxt(struct rds_tcp_connection *tc)
80{ 97{
81 return tcp_sk(tc->t_sock->sk)->snd_nxt; 98 return tcp_sk(tc->t_sock->sk)->snd_nxt;
@@ -273,8 +290,34 @@ static int rds_tcp_netid;
273struct rds_tcp_net { 290struct rds_tcp_net {
274 struct socket *rds_tcp_listen_sock; 291 struct socket *rds_tcp_listen_sock;
275 struct work_struct rds_tcp_accept_w; 292 struct work_struct rds_tcp_accept_w;
293 struct ctl_table_header *rds_tcp_sysctl;
294 struct ctl_table *ctl_table;
295 int sndbuf_size;
296 int rcvbuf_size;
276}; 297};
277 298
299/* All module specific customizations to the RDS-TCP socket should be done in
300 * rds_tcp_tune() and applied after socket creation.
301 */
302void rds_tcp_tune(struct socket *sock)
303{
304 struct sock *sk = sock->sk;
305 struct net *net = sock_net(sk);
306 struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid);
307
308 rds_tcp_nonagle(sock);
309 lock_sock(sk);
310 if (rtn->sndbuf_size > 0) {
311 sk->sk_sndbuf = rtn->sndbuf_size;
312 sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
313 }
314 if (rtn->rcvbuf_size > 0) {
315 sk->sk_sndbuf = rtn->rcvbuf_size;
316 sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
317 }
318 release_sock(sk);
319}
320
278static void rds_tcp_accept_worker(struct work_struct *work) 321static void rds_tcp_accept_worker(struct work_struct *work)
279{ 322{
280 struct rds_tcp_net *rtn = container_of(work, 323 struct rds_tcp_net *rtn = container_of(work,
@@ -296,20 +339,60 @@ void rds_tcp_accept_work(struct sock *sk)
296static __net_init int rds_tcp_init_net(struct net *net) 339static __net_init int rds_tcp_init_net(struct net *net)
297{ 340{
298 struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid); 341 struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid);
342 struct ctl_table *tbl;
343 int err = 0;
299 344
345 memset(rtn, 0, sizeof(*rtn));
346
347 /* {snd, rcv}buf_size default to 0, which implies we let the
348 * stack pick the value, and permit auto-tuning of buffer size.
349 */
350 if (net == &init_net) {
351 tbl = rds_tcp_sysctl_table;
352 } else {
353 tbl = kmemdup(rds_tcp_sysctl_table,
354 sizeof(rds_tcp_sysctl_table), GFP_KERNEL);
355 if (!tbl) {
356 pr_warn("could not set allocate syctl table\n");
357 return -ENOMEM;
358 }
359 rtn->ctl_table = tbl;
360 }
361 tbl[RDS_TCP_SNDBUF].data = &rtn->sndbuf_size;
362 tbl[RDS_TCP_RCVBUF].data = &rtn->rcvbuf_size;
363 rtn->rds_tcp_sysctl = register_net_sysctl(net, "net/rds/tcp", tbl);
364 if (!rtn->rds_tcp_sysctl) {
365 pr_warn("could not register sysctl\n");
366 err = -ENOMEM;
367 goto fail;
368 }
300 rtn->rds_tcp_listen_sock = rds_tcp_listen_init(net); 369 rtn->rds_tcp_listen_sock = rds_tcp_listen_init(net);
301 if (!rtn->rds_tcp_listen_sock) { 370 if (!rtn->rds_tcp_listen_sock) {
302 pr_warn("could not set up listen sock\n"); 371 pr_warn("could not set up listen sock\n");
303 return -EAFNOSUPPORT; 372 unregister_net_sysctl_table(rtn->rds_tcp_sysctl);
373 rtn->rds_tcp_sysctl = NULL;
374 err = -EAFNOSUPPORT;
375 goto fail;
304 } 376 }
305 INIT_WORK(&rtn->rds_tcp_accept_w, rds_tcp_accept_worker); 377 INIT_WORK(&rtn->rds_tcp_accept_w, rds_tcp_accept_worker);
306 return 0; 378 return 0;
379
380fail:
381 if (net != &init_net)
382 kfree(tbl);
383 return err;
307} 384}
308 385
309static void __net_exit rds_tcp_exit_net(struct net *net) 386static void __net_exit rds_tcp_exit_net(struct net *net)
310{ 387{
311 struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid); 388 struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid);
312 389
390 if (rtn->rds_tcp_sysctl)
391 unregister_net_sysctl_table(rtn->rds_tcp_sysctl);
392
393 if (net != &init_net && rtn->ctl_table)
394 kfree(rtn->ctl_table);
395
313 /* If rds_tcp_exit_net() is called as a result of netns deletion, 396 /* If rds_tcp_exit_net() is called as a result of netns deletion,
314 * the rds_tcp_kill_sock() device notifier would already have cleaned 397 * the rds_tcp_kill_sock() device notifier would already have cleaned
315 * up the listen socket, thus there is no work to do in this function. 398 * up the listen socket, thus there is no work to do in this function.
@@ -384,6 +467,45 @@ static struct notifier_block rds_tcp_dev_notifier = {
384 .priority = -10, /* must be called after other network notifiers */ 467 .priority = -10, /* must be called after other network notifiers */
385}; 468};
386 469
470/* when sysctl is used to modify some kernel socket parameters,this
471 * function resets the RDS connections in that netns so that we can
472 * restart with new parameters. The assumption is that such reset
473 * events are few and far-between.
474 */
475static void rds_tcp_sysctl_reset(struct net *net)
476{
477 struct rds_tcp_connection *tc, *_tc;
478
479 spin_lock_irq(&rds_tcp_conn_lock);
480 list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) {
481 struct net *c_net = read_pnet(&tc->conn->c_net);
482
483 if (net != c_net || !tc->t_sock)
484 continue;
485
486 rds_conn_drop(tc->conn); /* reconnect with new parameters */
487 }
488 spin_unlock_irq(&rds_tcp_conn_lock);
489}
490
491static int rds_tcp_skbuf_handler(struct ctl_table *ctl, int write,
492 void __user *buffer, size_t *lenp,
493 loff_t *fpos)
494{
495 struct net *net = current->nsproxy->net_ns;
496 int err;
497
498 err = proc_dointvec_minmax(ctl, write, buffer, lenp, fpos);
499 if (err < 0) {
500 pr_warn("Invalid input. Must be >= %d\n",
501 *(int *)(ctl->extra1));
502 return err;
503 }
504 if (write)
505 rds_tcp_sysctl_reset(net);
506 return 0;
507}
508
387static void rds_tcp_exit(void) 509static void rds_tcp_exit(void)
388{ 510{
389 rds_info_deregister_func(RDS_INFO_TCP_SOCKETS, rds_tcp_tc_info); 511 rds_info_deregister_func(RDS_INFO_TCP_SOCKETS, rds_tcp_tc_info);
diff --git a/net/rfkill/Kconfig b/net/rfkill/Kconfig
index 598d374f6a35..868f1ad0415a 100644
--- a/net/rfkill/Kconfig
+++ b/net/rfkill/Kconfig
@@ -41,5 +41,4 @@ config RFKILL_GPIO
41 default n 41 default n
42 help 42 help
43 If you say yes here you get support of a generic gpio RFKILL 43 If you say yes here you get support of a generic gpio RFKILL
44 driver. The platform should fill in the appropriate fields in the 44 driver.
45 rfkill_gpio_platform_data structure and pass that to the driver.
diff --git a/net/rfkill/core.c b/net/rfkill/core.c
index f53bf3b6558b..03f26e3a6f48 100644
--- a/net/rfkill/core.c
+++ b/net/rfkill/core.c
@@ -57,6 +57,8 @@ struct rfkill {
57 57
58 bool registered; 58 bool registered;
59 bool persistent; 59 bool persistent;
60 bool polling_paused;
61 bool suspended;
60 62
61 const struct rfkill_ops *ops; 63 const struct rfkill_ops *ops;
62 void *data; 64 void *data;
@@ -233,29 +235,6 @@ static void rfkill_event(struct rfkill *rfkill)
233 rfkill_send_events(rfkill, RFKILL_OP_CHANGE); 235 rfkill_send_events(rfkill, RFKILL_OP_CHANGE);
234} 236}
235 237
236static bool __rfkill_set_hw_state(struct rfkill *rfkill,
237 bool blocked, bool *change)
238{
239 unsigned long flags;
240 bool prev, any;
241
242 BUG_ON(!rfkill);
243
244 spin_lock_irqsave(&rfkill->lock, flags);
245 prev = !!(rfkill->state & RFKILL_BLOCK_HW);
246 if (blocked)
247 rfkill->state |= RFKILL_BLOCK_HW;
248 else
249 rfkill->state &= ~RFKILL_BLOCK_HW;
250 *change = prev != blocked;
251 any = !!(rfkill->state & RFKILL_BLOCK_ANY);
252 spin_unlock_irqrestore(&rfkill->lock, flags);
253
254 rfkill_led_trigger_event(rfkill);
255
256 return any;
257}
258
259/** 238/**
260 * rfkill_set_block - wrapper for set_block method 239 * rfkill_set_block - wrapper for set_block method
261 * 240 *
@@ -285,7 +264,7 @@ static void rfkill_set_block(struct rfkill *rfkill, bool blocked)
285 spin_lock_irqsave(&rfkill->lock, flags); 264 spin_lock_irqsave(&rfkill->lock, flags);
286 prev = rfkill->state & RFKILL_BLOCK_SW; 265 prev = rfkill->state & RFKILL_BLOCK_SW;
287 266
288 if (rfkill->state & RFKILL_BLOCK_SW) 267 if (prev)
289 rfkill->state |= RFKILL_BLOCK_SW_PREV; 268 rfkill->state |= RFKILL_BLOCK_SW_PREV;
290 else 269 else
291 rfkill->state &= ~RFKILL_BLOCK_SW_PREV; 270 rfkill->state &= ~RFKILL_BLOCK_SW_PREV;
@@ -303,8 +282,8 @@ static void rfkill_set_block(struct rfkill *rfkill, bool blocked)
303 spin_lock_irqsave(&rfkill->lock, flags); 282 spin_lock_irqsave(&rfkill->lock, flags);
304 if (err) { 283 if (err) {
305 /* 284 /*
306 * Failed -- reset status to _prev, this may be different 285 * Failed -- reset status to _PREV, which may be different
307 * from what set set _PREV to earlier in this function 286 * from what we have set _PREV to earlier in this function
308 * if rfkill_set_sw_state was invoked. 287 * if rfkill_set_sw_state was invoked.
309 */ 288 */
310 if (rfkill->state & RFKILL_BLOCK_SW_PREV) 289 if (rfkill->state & RFKILL_BLOCK_SW_PREV)
@@ -323,6 +302,19 @@ static void rfkill_set_block(struct rfkill *rfkill, bool blocked)
323 rfkill_event(rfkill); 302 rfkill_event(rfkill);
324} 303}
325 304
305static void rfkill_update_global_state(enum rfkill_type type, bool blocked)
306{
307 int i;
308
309 if (type != RFKILL_TYPE_ALL) {
310 rfkill_global_states[type].cur = blocked;
311 return;
312 }
313
314 for (i = 0; i < NUM_RFKILL_TYPES; i++)
315 rfkill_global_states[i].cur = blocked;
316}
317
326#ifdef CONFIG_RFKILL_INPUT 318#ifdef CONFIG_RFKILL_INPUT
327static atomic_t rfkill_input_disabled = ATOMIC_INIT(0); 319static atomic_t rfkill_input_disabled = ATOMIC_INIT(0);
328 320
@@ -332,8 +324,7 @@ static atomic_t rfkill_input_disabled = ATOMIC_INIT(0);
332 * @blocked: the new state 324 * @blocked: the new state
333 * 325 *
334 * This function sets the state of all switches of given type, 326 * This function sets the state of all switches of given type,
335 * unless a specific switch is claimed by userspace (in which case, 327 * unless a specific switch is suspended.
336 * that switch is left alone) or suspended.
337 * 328 *
338 * Caller must have acquired rfkill_global_mutex. 329 * Caller must have acquired rfkill_global_mutex.
339 */ 330 */
@@ -341,15 +332,7 @@ static void __rfkill_switch_all(const enum rfkill_type type, bool blocked)
341{ 332{
342 struct rfkill *rfkill; 333 struct rfkill *rfkill;
343 334
344 if (type == RFKILL_TYPE_ALL) { 335 rfkill_update_global_state(type, blocked);
345 int i;
346
347 for (i = 0; i < NUM_RFKILL_TYPES; i++)
348 rfkill_global_states[i].cur = blocked;
349 } else {
350 rfkill_global_states[type].cur = blocked;
351 }
352
353 list_for_each_entry(rfkill, &rfkill_list, node) { 336 list_for_each_entry(rfkill, &rfkill_list, node) {
354 if (rfkill->type != type && type != RFKILL_TYPE_ALL) 337 if (rfkill->type != type && type != RFKILL_TYPE_ALL)
355 continue; 338 continue;
@@ -477,17 +460,28 @@ bool rfkill_get_global_sw_state(const enum rfkill_type type)
477} 460}
478#endif 461#endif
479 462
480
481bool rfkill_set_hw_state(struct rfkill *rfkill, bool blocked) 463bool rfkill_set_hw_state(struct rfkill *rfkill, bool blocked)
482{ 464{
483 bool ret, change; 465 unsigned long flags;
466 bool ret, prev;
467
468 BUG_ON(!rfkill);
469
470 spin_lock_irqsave(&rfkill->lock, flags);
471 prev = !!(rfkill->state & RFKILL_BLOCK_HW);
472 if (blocked)
473 rfkill->state |= RFKILL_BLOCK_HW;
474 else
475 rfkill->state &= ~RFKILL_BLOCK_HW;
476 ret = !!(rfkill->state & RFKILL_BLOCK_ANY);
477 spin_unlock_irqrestore(&rfkill->lock, flags);
484 478
485 ret = __rfkill_set_hw_state(rfkill, blocked, &change); 479 rfkill_led_trigger_event(rfkill);
486 480
487 if (!rfkill->registered) 481 if (!rfkill->registered)
488 return ret; 482 return ret;
489 483
490 if (change) 484 if (prev != blocked)
491 schedule_work(&rfkill->uevent_work); 485 schedule_work(&rfkill->uevent_work);
492 486
493 return ret; 487 return ret;
@@ -582,6 +576,34 @@ void rfkill_set_states(struct rfkill *rfkill, bool sw, bool hw)
582} 576}
583EXPORT_SYMBOL(rfkill_set_states); 577EXPORT_SYMBOL(rfkill_set_states);
584 578
579static const char * const rfkill_types[] = {
580 NULL, /* RFKILL_TYPE_ALL */
581 "wlan",
582 "bluetooth",
583 "ultrawideband",
584 "wimax",
585 "wwan",
586 "gps",
587 "fm",
588 "nfc",
589};
590
591enum rfkill_type rfkill_find_type(const char *name)
592{
593 int i;
594
595 BUILD_BUG_ON(ARRAY_SIZE(rfkill_types) != NUM_RFKILL_TYPES);
596
597 if (!name)
598 return RFKILL_TYPE_ALL;
599
600 for (i = 1; i < NUM_RFKILL_TYPES; i++)
601 if (!strcmp(name, rfkill_types[i]))
602 return i;
603 return RFKILL_TYPE_ALL;
604}
605EXPORT_SYMBOL(rfkill_find_type);
606
585static ssize_t name_show(struct device *dev, struct device_attribute *attr, 607static ssize_t name_show(struct device *dev, struct device_attribute *attr,
586 char *buf) 608 char *buf)
587{ 609{
@@ -591,38 +613,12 @@ static ssize_t name_show(struct device *dev, struct device_attribute *attr,
591} 613}
592static DEVICE_ATTR_RO(name); 614static DEVICE_ATTR_RO(name);
593 615
594static const char *rfkill_get_type_str(enum rfkill_type type)
595{
596 BUILD_BUG_ON(NUM_RFKILL_TYPES != RFKILL_TYPE_NFC + 1);
597
598 switch (type) {
599 case RFKILL_TYPE_WLAN:
600 return "wlan";
601 case RFKILL_TYPE_BLUETOOTH:
602 return "bluetooth";
603 case RFKILL_TYPE_UWB:
604 return "ultrawideband";
605 case RFKILL_TYPE_WIMAX:
606 return "wimax";
607 case RFKILL_TYPE_WWAN:
608 return "wwan";
609 case RFKILL_TYPE_GPS:
610 return "gps";
611 case RFKILL_TYPE_FM:
612 return "fm";
613 case RFKILL_TYPE_NFC:
614 return "nfc";
615 default:
616 BUG();
617 }
618}
619
620static ssize_t type_show(struct device *dev, struct device_attribute *attr, 616static ssize_t type_show(struct device *dev, struct device_attribute *attr,
621 char *buf) 617 char *buf)
622{ 618{
623 struct rfkill *rfkill = to_rfkill(dev); 619 struct rfkill *rfkill = to_rfkill(dev);
624 620
625 return sprintf(buf, "%s\n", rfkill_get_type_str(rfkill->type)); 621 return sprintf(buf, "%s\n", rfkill_types[rfkill->type]);
626} 622}
627static DEVICE_ATTR_RO(type); 623static DEVICE_ATTR_RO(type);
628 624
@@ -730,20 +726,12 @@ static ssize_t state_store(struct device *dev, struct device_attribute *attr,
730} 726}
731static DEVICE_ATTR_RW(state); 727static DEVICE_ATTR_RW(state);
732 728
733static ssize_t claim_show(struct device *dev, struct device_attribute *attr,
734 char *buf)
735{
736 return sprintf(buf, "%d\n", 0);
737}
738static DEVICE_ATTR_RO(claim);
739
740static struct attribute *rfkill_dev_attrs[] = { 729static struct attribute *rfkill_dev_attrs[] = {
741 &dev_attr_name.attr, 730 &dev_attr_name.attr,
742 &dev_attr_type.attr, 731 &dev_attr_type.attr,
743 &dev_attr_index.attr, 732 &dev_attr_index.attr,
744 &dev_attr_persistent.attr, 733 &dev_attr_persistent.attr,
745 &dev_attr_state.attr, 734 &dev_attr_state.attr,
746 &dev_attr_claim.attr,
747 &dev_attr_soft.attr, 735 &dev_attr_soft.attr,
748 &dev_attr_hard.attr, 736 &dev_attr_hard.attr,
749 NULL, 737 NULL,
@@ -768,7 +756,7 @@ static int rfkill_dev_uevent(struct device *dev, struct kobj_uevent_env *env)
768 if (error) 756 if (error)
769 return error; 757 return error;
770 error = add_uevent_var(env, "RFKILL_TYPE=%s", 758 error = add_uevent_var(env, "RFKILL_TYPE=%s",
771 rfkill_get_type_str(rfkill->type)); 759 rfkill_types[rfkill->type]);
772 if (error) 760 if (error)
773 return error; 761 return error;
774 spin_lock_irqsave(&rfkill->lock, flags); 762 spin_lock_irqsave(&rfkill->lock, flags);
@@ -786,6 +774,7 @@ void rfkill_pause_polling(struct rfkill *rfkill)
786 if (!rfkill->ops->poll) 774 if (!rfkill->ops->poll)
787 return; 775 return;
788 776
777 rfkill->polling_paused = true;
789 cancel_delayed_work_sync(&rfkill->poll_work); 778 cancel_delayed_work_sync(&rfkill->poll_work);
790} 779}
791EXPORT_SYMBOL(rfkill_pause_polling); 780EXPORT_SYMBOL(rfkill_pause_polling);
@@ -797,6 +786,11 @@ void rfkill_resume_polling(struct rfkill *rfkill)
797 if (!rfkill->ops->poll) 786 if (!rfkill->ops->poll)
798 return; 787 return;
799 788
789 rfkill->polling_paused = false;
790
791 if (rfkill->suspended)
792 return;
793
800 queue_delayed_work(system_power_efficient_wq, 794 queue_delayed_work(system_power_efficient_wq,
801 &rfkill->poll_work, 0); 795 &rfkill->poll_work, 0);
802} 796}
@@ -807,7 +801,8 @@ static int rfkill_suspend(struct device *dev)
807{ 801{
808 struct rfkill *rfkill = to_rfkill(dev); 802 struct rfkill *rfkill = to_rfkill(dev);
809 803
810 rfkill_pause_polling(rfkill); 804 rfkill->suspended = true;
805 cancel_delayed_work_sync(&rfkill->poll_work);
811 806
812 return 0; 807 return 0;
813} 808}
@@ -817,12 +812,16 @@ static int rfkill_resume(struct device *dev)
817 struct rfkill *rfkill = to_rfkill(dev); 812 struct rfkill *rfkill = to_rfkill(dev);
818 bool cur; 813 bool cur;
819 814
815 rfkill->suspended = false;
816
820 if (!rfkill->persistent) { 817 if (!rfkill->persistent) {
821 cur = !!(rfkill->state & RFKILL_BLOCK_SW); 818 cur = !!(rfkill->state & RFKILL_BLOCK_SW);
822 rfkill_set_block(rfkill, cur); 819 rfkill_set_block(rfkill, cur);
823 } 820 }
824 821
825 rfkill_resume_polling(rfkill); 822 if (rfkill->ops->poll && !rfkill->polling_paused)
823 queue_delayed_work(system_power_efficient_wq,
824 &rfkill->poll_work, 0);
826 825
827 return 0; 826 return 0;
828} 827}
@@ -1095,17 +1094,6 @@ static unsigned int rfkill_fop_poll(struct file *file, poll_table *wait)
1095 return res; 1094 return res;
1096} 1095}
1097 1096
1098static bool rfkill_readable(struct rfkill_data *data)
1099{
1100 bool r;
1101
1102 mutex_lock(&data->mtx);
1103 r = !list_empty(&data->events);
1104 mutex_unlock(&data->mtx);
1105
1106 return r;
1107}
1108
1109static ssize_t rfkill_fop_read(struct file *file, char __user *buf, 1097static ssize_t rfkill_fop_read(struct file *file, char __user *buf,
1110 size_t count, loff_t *pos) 1098 size_t count, loff_t *pos)
1111{ 1099{
@@ -1122,8 +1110,11 @@ static ssize_t rfkill_fop_read(struct file *file, char __user *buf,
1122 goto out; 1110 goto out;
1123 } 1111 }
1124 mutex_unlock(&data->mtx); 1112 mutex_unlock(&data->mtx);
1113 /* since we re-check and it just compares pointers,
1114 * using !list_empty() without locking isn't a problem
1115 */
1125 ret = wait_event_interruptible(data->read_wait, 1116 ret = wait_event_interruptible(data->read_wait,
1126 rfkill_readable(data)); 1117 !list_empty(&data->events));
1127 mutex_lock(&data->mtx); 1118 mutex_lock(&data->mtx);
1128 1119
1129 if (ret) 1120 if (ret)
@@ -1172,15 +1163,8 @@ static ssize_t rfkill_fop_write(struct file *file, const char __user *buf,
1172 1163
1173 mutex_lock(&rfkill_global_mutex); 1164 mutex_lock(&rfkill_global_mutex);
1174 1165
1175 if (ev.op == RFKILL_OP_CHANGE_ALL) { 1166 if (ev.op == RFKILL_OP_CHANGE_ALL)
1176 if (ev.type == RFKILL_TYPE_ALL) { 1167 rfkill_update_global_state(ev.type, ev.soft);
1177 enum rfkill_type i;
1178 for (i = 0; i < NUM_RFKILL_TYPES; i++)
1179 rfkill_global_states[i].cur = ev.soft;
1180 } else {
1181 rfkill_global_states[ev.type].cur = ev.soft;
1182 }
1183 }
1184 1168
1185 list_for_each_entry(rfkill, &rfkill_list, node) { 1169 list_for_each_entry(rfkill, &rfkill_list, node) {
1186 if (rfkill->idx != ev.idx && ev.op != RFKILL_OP_CHANGE_ALL) 1170 if (rfkill->idx != ev.idx && ev.op != RFKILL_OP_CHANGE_ALL)
@@ -1269,10 +1253,8 @@ static struct miscdevice rfkill_miscdev = {
1269static int __init rfkill_init(void) 1253static int __init rfkill_init(void)
1270{ 1254{
1271 int error; 1255 int error;
1272 int i;
1273 1256
1274 for (i = 0; i < NUM_RFKILL_TYPES; i++) 1257 rfkill_update_global_state(RFKILL_TYPE_ALL, !rfkill_default_state);
1275 rfkill_global_states[i].cur = !rfkill_default_state;
1276 1258
1277 error = class_register(&rfkill_class); 1259 error = class_register(&rfkill_class);
1278 if (error) 1260 if (error)
diff --git a/net/rfkill/rfkill-gpio.c b/net/rfkill/rfkill-gpio.c
index 4b1e3f35f06c..76c01cbd56e3 100644
--- a/net/rfkill/rfkill-gpio.c
+++ b/net/rfkill/rfkill-gpio.c
@@ -27,8 +27,6 @@
27#include <linux/acpi.h> 27#include <linux/acpi.h>
28#include <linux/gpio/consumer.h> 28#include <linux/gpio/consumer.h>
29 29
30#include <linux/rfkill-gpio.h>
31
32struct rfkill_gpio_data { 30struct rfkill_gpio_data {
33 const char *name; 31 const char *name;
34 enum rfkill_type type; 32 enum rfkill_type type;
@@ -81,7 +79,6 @@ static int rfkill_gpio_acpi_probe(struct device *dev,
81 if (!id) 79 if (!id)
82 return -ENODEV; 80 return -ENODEV;
83 81
84 rfkill->name = dev_name(dev);
85 rfkill->type = (unsigned)id->driver_data; 82 rfkill->type = (unsigned)id->driver_data;
86 83
87 return acpi_dev_add_driver_gpios(ACPI_COMPANION(dev), 84 return acpi_dev_add_driver_gpios(ACPI_COMPANION(dev),
@@ -90,24 +87,27 @@ static int rfkill_gpio_acpi_probe(struct device *dev,
90 87
91static int rfkill_gpio_probe(struct platform_device *pdev) 88static int rfkill_gpio_probe(struct platform_device *pdev)
92{ 89{
93 struct rfkill_gpio_platform_data *pdata = pdev->dev.platform_data;
94 struct rfkill_gpio_data *rfkill; 90 struct rfkill_gpio_data *rfkill;
95 struct gpio_desc *gpio; 91 struct gpio_desc *gpio;
92 const char *type_name;
96 int ret; 93 int ret;
97 94
98 rfkill = devm_kzalloc(&pdev->dev, sizeof(*rfkill), GFP_KERNEL); 95 rfkill = devm_kzalloc(&pdev->dev, sizeof(*rfkill), GFP_KERNEL);
99 if (!rfkill) 96 if (!rfkill)
100 return -ENOMEM; 97 return -ENOMEM;
101 98
99 device_property_read_string(&pdev->dev, "name", &rfkill->name);
100 device_property_read_string(&pdev->dev, "type", &type_name);
101
102 if (!rfkill->name)
103 rfkill->name = dev_name(&pdev->dev);
104
105 rfkill->type = rfkill_find_type(type_name);
106
102 if (ACPI_HANDLE(&pdev->dev)) { 107 if (ACPI_HANDLE(&pdev->dev)) {
103 ret = rfkill_gpio_acpi_probe(&pdev->dev, rfkill); 108 ret = rfkill_gpio_acpi_probe(&pdev->dev, rfkill);
104 if (ret) 109 if (ret)
105 return ret; 110 return ret;
106 } else if (pdata) {
107 rfkill->name = pdata->name;
108 rfkill->type = pdata->type;
109 } else {
110 return -ENODEV;
111 } 111 }
112 112
113 rfkill->clk = devm_clk_get(&pdev->dev, NULL); 113 rfkill->clk = devm_clk_get(&pdev->dev, NULL);
@@ -124,10 +124,8 @@ static int rfkill_gpio_probe(struct platform_device *pdev)
124 124
125 rfkill->shutdown_gpio = gpio; 125 rfkill->shutdown_gpio = gpio;
126 126
127 /* Make sure at-least one of the GPIO is defined and that 127 /* Make sure at-least one GPIO is defined for this instance */
128 * a name is specified for this instance 128 if (!rfkill->reset_gpio && !rfkill->shutdown_gpio) {
129 */
130 if ((!rfkill->reset_gpio && !rfkill->shutdown_gpio) || !rfkill->name) {
131 dev_err(&pdev->dev, "invalid platform data\n"); 129 dev_err(&pdev->dev, "invalid platform data\n");
132 return -EINVAL; 130 return -EINVAL;
133 } 131 }
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index 7e2d1057d8bc..9d935fa5a2a9 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -37,7 +37,7 @@ static struct proto rxrpc_proto;
37static const struct proto_ops rxrpc_rpc_ops; 37static const struct proto_ops rxrpc_rpc_ops;
38 38
39/* local epoch for detecting local-end reset */ 39/* local epoch for detecting local-end reset */
40__be32 rxrpc_epoch; 40u32 rxrpc_epoch;
41 41
42/* current debugging ID */ 42/* current debugging ID */
43atomic_t rxrpc_debug_id; 43atomic_t rxrpc_debug_id;
@@ -81,6 +81,8 @@ static int rxrpc_validate_address(struct rxrpc_sock *rx,
81 struct sockaddr_rxrpc *srx, 81 struct sockaddr_rxrpc *srx,
82 int len) 82 int len)
83{ 83{
84 unsigned int tail;
85
84 if (len < sizeof(struct sockaddr_rxrpc)) 86 if (len < sizeof(struct sockaddr_rxrpc))
85 return -EINVAL; 87 return -EINVAL;
86 88
@@ -103,9 +105,7 @@ static int rxrpc_validate_address(struct rxrpc_sock *rx,
103 _debug("INET: %x @ %pI4", 105 _debug("INET: %x @ %pI4",
104 ntohs(srx->transport.sin.sin_port), 106 ntohs(srx->transport.sin.sin_port),
105 &srx->transport.sin.sin_addr); 107 &srx->transport.sin.sin_addr);
106 if (srx->transport_len > 8) 108 tail = offsetof(struct sockaddr_rxrpc, transport.sin.__pad);
107 memset((void *)&srx->transport + 8, 0,
108 srx->transport_len - 8);
109 break; 109 break;
110 110
111 case AF_INET6: 111 case AF_INET6:
@@ -113,6 +113,8 @@ static int rxrpc_validate_address(struct rxrpc_sock *rx,
113 return -EAFNOSUPPORT; 113 return -EAFNOSUPPORT;
114 } 114 }
115 115
116 if (tail < len)
117 memset((void *)srx + tail, 0, len - tail);
116 return 0; 118 return 0;
117} 119}
118 120
@@ -121,11 +123,10 @@ static int rxrpc_validate_address(struct rxrpc_sock *rx,
121 */ 123 */
122static int rxrpc_bind(struct socket *sock, struct sockaddr *saddr, int len) 124static int rxrpc_bind(struct socket *sock, struct sockaddr *saddr, int len)
123{ 125{
124 struct sockaddr_rxrpc *srx = (struct sockaddr_rxrpc *) saddr; 126 struct sockaddr_rxrpc *srx = (struct sockaddr_rxrpc *)saddr;
125 struct sock *sk = sock->sk; 127 struct sock *sk = sock->sk;
126 struct rxrpc_local *local; 128 struct rxrpc_local *local;
127 struct rxrpc_sock *rx = rxrpc_sk(sk), *prx; 129 struct rxrpc_sock *rx = rxrpc_sk(sk), *prx;
128 __be16 service_id;
129 int ret; 130 int ret;
130 131
131 _enter("%p,%p,%d", rx, saddr, len); 132 _enter("%p,%p,%d", rx, saddr, len);
@@ -143,7 +144,7 @@ static int rxrpc_bind(struct socket *sock, struct sockaddr *saddr, int len)
143 144
144 memcpy(&rx->srx, srx, sizeof(rx->srx)); 145 memcpy(&rx->srx, srx, sizeof(rx->srx));
145 146
146 /* find a local transport endpoint if we don't have one already */ 147 /* Find or create a local transport endpoint to use */
147 local = rxrpc_lookup_local(&rx->srx); 148 local = rxrpc_lookup_local(&rx->srx);
148 if (IS_ERR(local)) { 149 if (IS_ERR(local)) {
149 ret = PTR_ERR(local); 150 ret = PTR_ERR(local);
@@ -152,14 +153,12 @@ static int rxrpc_bind(struct socket *sock, struct sockaddr *saddr, int len)
152 153
153 rx->local = local; 154 rx->local = local;
154 if (srx->srx_service) { 155 if (srx->srx_service) {
155 service_id = htons(srx->srx_service);
156 write_lock_bh(&local->services_lock); 156 write_lock_bh(&local->services_lock);
157 list_for_each_entry(prx, &local->services, listen_link) { 157 list_for_each_entry(prx, &local->services, listen_link) {
158 if (prx->service_id == service_id) 158 if (prx->srx.srx_service == srx->srx_service)
159 goto service_in_use; 159 goto service_in_use;
160 } 160 }
161 161
162 rx->service_id = service_id;
163 list_add_tail(&rx->listen_link, &local->services); 162 list_add_tail(&rx->listen_link, &local->services);
164 write_unlock_bh(&local->services_lock); 163 write_unlock_bh(&local->services_lock);
165 164
@@ -276,7 +275,6 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock,
276 struct rxrpc_transport *trans; 275 struct rxrpc_transport *trans;
277 struct rxrpc_call *call; 276 struct rxrpc_call *call;
278 struct rxrpc_sock *rx = rxrpc_sk(sock->sk); 277 struct rxrpc_sock *rx = rxrpc_sk(sock->sk);
279 __be16 service_id;
280 278
281 _enter(",,%x,%lx", key_serial(key), user_call_ID); 279 _enter(",,%x,%lx", key_serial(key), user_call_ID);
282 280
@@ -299,16 +297,14 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock,
299 atomic_inc(&trans->usage); 297 atomic_inc(&trans->usage);
300 } 298 }
301 299
302 service_id = rx->service_id; 300 if (!srx)
303 if (srx) 301 srx = &rx->srx;
304 service_id = htons(srx->srx_service);
305
306 if (!key) 302 if (!key)
307 key = rx->key; 303 key = rx->key;
308 if (key && !key->payload.data[0]) 304 if (key && !key->payload.data[0])
309 key = NULL; /* a no-security key */ 305 key = NULL; /* a no-security key */
310 306
311 bundle = rxrpc_get_bundle(rx, trans, key, service_id, gfp); 307 bundle = rxrpc_get_bundle(rx, trans, key, srx->srx_service, gfp);
312 if (IS_ERR(bundle)) { 308 if (IS_ERR(bundle)) {
313 call = ERR_CAST(bundle); 309 call = ERR_CAST(bundle);
314 goto out; 310 goto out;
@@ -324,7 +320,6 @@ out_notrans:
324 _leave(" = %p", call); 320 _leave(" = %p", call);
325 return call; 321 return call;
326} 322}
327
328EXPORT_SYMBOL(rxrpc_kernel_begin_call); 323EXPORT_SYMBOL(rxrpc_kernel_begin_call);
329 324
330/** 325/**
@@ -340,7 +335,6 @@ void rxrpc_kernel_end_call(struct rxrpc_call *call)
340 rxrpc_remove_user_ID(call->socket, call); 335 rxrpc_remove_user_ID(call->socket, call);
341 rxrpc_put_call(call); 336 rxrpc_put_call(call);
342} 337}
343
344EXPORT_SYMBOL(rxrpc_kernel_end_call); 338EXPORT_SYMBOL(rxrpc_kernel_end_call);
345 339
346/** 340/**
@@ -425,7 +419,6 @@ static int rxrpc_connect(struct socket *sock, struct sockaddr *addr,
425 } 419 }
426 420
427 rx->trans = trans; 421 rx->trans = trans;
428 rx->service_id = htons(srx->srx_service);
429 rx->sk.sk_state = RXRPC_CLIENT_CONNECTED; 422 rx->sk.sk_state = RXRPC_CLIENT_CONNECTED;
430 423
431 release_sock(&rx->sk); 424 release_sock(&rx->sk);
@@ -622,7 +615,7 @@ static int rxrpc_create(struct net *net, struct socket *sock, int protocol,
622 if (!net_eq(net, &init_net)) 615 if (!net_eq(net, &init_net))
623 return -EAFNOSUPPORT; 616 return -EAFNOSUPPORT;
624 617
625 /* we support transport protocol UDP only */ 618 /* we support transport protocol UDP/UDP6 only */
626 if (protocol != PF_INET) 619 if (protocol != PF_INET)
627 return -EPROTONOSUPPORT; 620 return -EPROTONOSUPPORT;
628 621
@@ -754,7 +747,7 @@ static int rxrpc_release(struct socket *sock)
754 * RxRPC network protocol 747 * RxRPC network protocol
755 */ 748 */
756static const struct proto_ops rxrpc_rpc_ops = { 749static const struct proto_ops rxrpc_rpc_ops = {
757 .family = PF_UNIX, 750 .family = PF_RXRPC,
758 .owner = THIS_MODULE, 751 .owner = THIS_MODULE,
759 .release = rxrpc_release, 752 .release = rxrpc_release,
760 .bind = rxrpc_bind, 753 .bind = rxrpc_bind,
@@ -778,7 +771,7 @@ static struct proto rxrpc_proto = {
778 .name = "RXRPC", 771 .name = "RXRPC",
779 .owner = THIS_MODULE, 772 .owner = THIS_MODULE,
780 .obj_size = sizeof(struct rxrpc_sock), 773 .obj_size = sizeof(struct rxrpc_sock),
781 .max_header = sizeof(struct rxrpc_header), 774 .max_header = sizeof(struct rxrpc_wire_header),
782}; 775};
783 776
784static const struct net_proto_family rxrpc_family_ops = { 777static const struct net_proto_family rxrpc_family_ops = {
@@ -796,7 +789,7 @@ static int __init af_rxrpc_init(void)
796 789
797 BUILD_BUG_ON(sizeof(struct rxrpc_skb_priv) > FIELD_SIZEOF(struct sk_buff, cb)); 790 BUILD_BUG_ON(sizeof(struct rxrpc_skb_priv) > FIELD_SIZEOF(struct sk_buff, cb));
798 791
799 rxrpc_epoch = htonl(get_seconds()); 792 rxrpc_epoch = get_seconds();
800 793
801 ret = -ENOMEM; 794 ret = -ENOMEM;
802 rxrpc_call_jar = kmem_cache_create( 795 rxrpc_call_jar = kmem_cache_create(
diff --git a/net/rxrpc/ar-accept.c b/net/rxrpc/ar-accept.c
index 6d79310fcaae..277731a5e67a 100644
--- a/net/rxrpc/ar-accept.c
+++ b/net/rxrpc/ar-accept.c
@@ -27,7 +27,7 @@
27 * generate a connection-level abort 27 * generate a connection-level abort
28 */ 28 */
29static int rxrpc_busy(struct rxrpc_local *local, struct sockaddr_rxrpc *srx, 29static int rxrpc_busy(struct rxrpc_local *local, struct sockaddr_rxrpc *srx,
30 struct rxrpc_header *hdr) 30 struct rxrpc_wire_header *whdr)
31{ 31{
32 struct msghdr msg; 32 struct msghdr msg;
33 struct kvec iov[1]; 33 struct kvec iov[1];
@@ -36,25 +36,21 @@ static int rxrpc_busy(struct rxrpc_local *local, struct sockaddr_rxrpc *srx,
36 36
37 _enter("%d,,", local->debug_id); 37 _enter("%d,,", local->debug_id);
38 38
39 whdr->type = RXRPC_PACKET_TYPE_BUSY;
40 whdr->serial = htonl(1);
41
39 msg.msg_name = &srx->transport.sin; 42 msg.msg_name = &srx->transport.sin;
40 msg.msg_namelen = sizeof(srx->transport.sin); 43 msg.msg_namelen = sizeof(srx->transport.sin);
41 msg.msg_control = NULL; 44 msg.msg_control = NULL;
42 msg.msg_controllen = 0; 45 msg.msg_controllen = 0;
43 msg.msg_flags = 0; 46 msg.msg_flags = 0;
44 47
45 hdr->seq = 0; 48 iov[0].iov_base = whdr;
46 hdr->type = RXRPC_PACKET_TYPE_BUSY; 49 iov[0].iov_len = sizeof(*whdr);
47 hdr->flags = 0;
48 hdr->userStatus = 0;
49 hdr->_rsvd = 0;
50
51 iov[0].iov_base = hdr;
52 iov[0].iov_len = sizeof(*hdr);
53 50
54 len = iov[0].iov_len; 51 len = iov[0].iov_len;
55 52
56 hdr->serial = htonl(1); 53 _proto("Tx BUSY %%1");
57 _proto("Tx BUSY %%%u", ntohl(hdr->serial));
58 54
59 ret = kernel_sendmsg(local->socket, &msg, iov, 1, len); 55 ret = kernel_sendmsg(local->socket, &msg, iov, 1, len);
60 if (ret < 0) { 56 if (ret < 0) {
@@ -185,8 +181,8 @@ invalid_service:
185 read_unlock_bh(&local->services_lock); 181 read_unlock_bh(&local->services_lock);
186 182
187 read_lock_bh(&call->state_lock); 183 read_lock_bh(&call->state_lock);
188 if (!test_bit(RXRPC_CALL_RELEASE, &call->flags) && 184 if (!test_bit(RXRPC_CALL_RELEASED, &call->flags) &&
189 !test_and_set_bit(RXRPC_CALL_RELEASE, &call->events)) { 185 !test_and_set_bit(RXRPC_CALL_EV_RELEASE, &call->events)) {
190 rxrpc_get_call(call); 186 rxrpc_get_call(call);
191 rxrpc_queue_call(call); 187 rxrpc_queue_call(call);
192 } 188 }
@@ -211,8 +207,8 @@ void rxrpc_accept_incoming_calls(struct work_struct *work)
211 struct rxrpc_skb_priv *sp; 207 struct rxrpc_skb_priv *sp;
212 struct sockaddr_rxrpc srx; 208 struct sockaddr_rxrpc srx;
213 struct rxrpc_sock *rx; 209 struct rxrpc_sock *rx;
210 struct rxrpc_wire_header whdr;
214 struct sk_buff *skb; 211 struct sk_buff *skb;
215 __be16 service_id;
216 int ret; 212 int ret;
217 213
218 _enter("%d", local->debug_id); 214 _enter("%d", local->debug_id);
@@ -240,6 +236,19 @@ process_next_packet:
240 236
241 sp = rxrpc_skb(skb); 237 sp = rxrpc_skb(skb);
242 238
239 /* Set up a response packet header in case we need it */
240 whdr.epoch = htonl(sp->hdr.epoch);
241 whdr.cid = htonl(sp->hdr.cid);
242 whdr.callNumber = htonl(sp->hdr.callNumber);
243 whdr.seq = htonl(sp->hdr.seq);
244 whdr.serial = 0;
245 whdr.flags = 0;
246 whdr.type = 0;
247 whdr.userStatus = 0;
248 whdr.securityIndex = sp->hdr.securityIndex;
249 whdr._rsvd = 0;
250 whdr.serviceId = htons(sp->hdr.serviceId);
251
243 /* determine the remote address */ 252 /* determine the remote address */
244 memset(&srx, 0, sizeof(srx)); 253 memset(&srx, 0, sizeof(srx));
245 srx.srx_family = AF_RXRPC; 254 srx.srx_family = AF_RXRPC;
@@ -256,10 +265,9 @@ process_next_packet:
256 } 265 }
257 266
258 /* get the socket providing the service */ 267 /* get the socket providing the service */
259 service_id = sp->hdr.serviceId;
260 read_lock_bh(&local->services_lock); 268 read_lock_bh(&local->services_lock);
261 list_for_each_entry(rx, &local->services, listen_link) { 269 list_for_each_entry(rx, &local->services, listen_link) {
262 if (rx->service_id == service_id && 270 if (rx->srx.srx_service == sp->hdr.serviceId &&
263 rx->sk.sk_state != RXRPC_CLOSE) 271 rx->sk.sk_state != RXRPC_CLOSE)
264 goto found_service; 272 goto found_service;
265 } 273 }
@@ -267,7 +275,7 @@ process_next_packet:
267 goto invalid_service; 275 goto invalid_service;
268 276
269found_service: 277found_service:
270 _debug("found service %hd", ntohs(rx->service_id)); 278 _debug("found service %hd", rx->srx.srx_service);
271 if (sk_acceptq_is_full(&rx->sk)) 279 if (sk_acceptq_is_full(&rx->sk))
272 goto backlog_full; 280 goto backlog_full;
273 sk_acceptq_added(&rx->sk); 281 sk_acceptq_added(&rx->sk);
@@ -296,7 +304,7 @@ found_service:
296backlog_full: 304backlog_full:
297 read_unlock_bh(&local->services_lock); 305 read_unlock_bh(&local->services_lock);
298busy: 306busy:
299 rxrpc_busy(local, &srx, &sp->hdr); 307 rxrpc_busy(local, &srx, &whdr);
300 rxrpc_free_skb(skb); 308 rxrpc_free_skb(skb);
301 goto process_next_packet; 309 goto process_next_packet;
302 310
@@ -379,7 +387,7 @@ struct rxrpc_call *rxrpc_accept_call(struct rxrpc_sock *rx,
379 rb_insert_color(&call->sock_node, &rx->calls); 387 rb_insert_color(&call->sock_node, &rx->calls);
380 if (test_and_set_bit(RXRPC_CALL_HAS_USERID, &call->flags)) 388 if (test_and_set_bit(RXRPC_CALL_HAS_USERID, &call->flags))
381 BUG(); 389 BUG();
382 if (test_and_set_bit(RXRPC_CALL_ACCEPTED, &call->events)) 390 if (test_and_set_bit(RXRPC_CALL_EV_ACCEPTED, &call->events))
383 BUG(); 391 BUG();
384 rxrpc_queue_call(call); 392 rxrpc_queue_call(call);
385 393
@@ -395,7 +403,7 @@ struct rxrpc_call *rxrpc_accept_call(struct rxrpc_sock *rx,
395out_release: 403out_release:
396 _debug("release %p", call); 404 _debug("release %p", call);
397 if (!test_bit(RXRPC_CALL_RELEASED, &call->flags) && 405 if (!test_bit(RXRPC_CALL_RELEASED, &call->flags) &&
398 !test_and_set_bit(RXRPC_CALL_RELEASE, &call->events)) 406 !test_and_set_bit(RXRPC_CALL_EV_RELEASE, &call->events))
399 rxrpc_queue_call(call); 407 rxrpc_queue_call(call);
400out_discard: 408out_discard:
401 write_unlock_bh(&call->state_lock); 409 write_unlock_bh(&call->state_lock);
@@ -407,7 +415,7 @@ out:
407} 415}
408 416
409/* 417/*
410 * handle rejectance of a call by userspace 418 * Handle rejection of a call by userspace
411 * - reject the call at the front of the queue 419 * - reject the call at the front of the queue
412 */ 420 */
413int rxrpc_reject_call(struct rxrpc_sock *rx) 421int rxrpc_reject_call(struct rxrpc_sock *rx)
@@ -434,7 +442,7 @@ int rxrpc_reject_call(struct rxrpc_sock *rx)
434 switch (call->state) { 442 switch (call->state) {
435 case RXRPC_CALL_SERVER_ACCEPTING: 443 case RXRPC_CALL_SERVER_ACCEPTING:
436 call->state = RXRPC_CALL_SERVER_BUSY; 444 call->state = RXRPC_CALL_SERVER_BUSY;
437 if (test_and_set_bit(RXRPC_CALL_REJECT_BUSY, &call->events)) 445 if (test_and_set_bit(RXRPC_CALL_EV_REJECT_BUSY, &call->events))
438 rxrpc_queue_call(call); 446 rxrpc_queue_call(call);
439 ret = 0; 447 ret = 0;
440 goto out_release; 448 goto out_release;
@@ -458,7 +466,7 @@ int rxrpc_reject_call(struct rxrpc_sock *rx)
458out_release: 466out_release:
459 _debug("release %p", call); 467 _debug("release %p", call);
460 if (!test_bit(RXRPC_CALL_RELEASED, &call->flags) && 468 if (!test_bit(RXRPC_CALL_RELEASED, &call->flags) &&
461 !test_and_set_bit(RXRPC_CALL_RELEASE, &call->events)) 469 !test_and_set_bit(RXRPC_CALL_EV_RELEASE, &call->events))
462 rxrpc_queue_call(call); 470 rxrpc_queue_call(call);
463out_discard: 471out_discard:
464 write_unlock_bh(&call->state_lock); 472 write_unlock_bh(&call->state_lock);
@@ -487,7 +495,6 @@ struct rxrpc_call *rxrpc_kernel_accept_call(struct socket *sock,
487 _leave(" = %p", call); 495 _leave(" = %p", call);
488 return call; 496 return call;
489} 497}
490
491EXPORT_SYMBOL(rxrpc_kernel_accept_call); 498EXPORT_SYMBOL(rxrpc_kernel_accept_call);
492 499
493/** 500/**
@@ -506,5 +513,4 @@ int rxrpc_kernel_reject_call(struct socket *sock)
506 _leave(" = %d", ret); 513 _leave(" = %d", ret);
507 return ret; 514 return ret;
508} 515}
509
510EXPORT_SYMBOL(rxrpc_kernel_reject_call); 516EXPORT_SYMBOL(rxrpc_kernel_reject_call);
diff --git a/net/rxrpc/ar-ack.c b/net/rxrpc/ar-ack.c
index adc555e0323d..16d967075eaf 100644
--- a/net/rxrpc/ar-ack.c
+++ b/net/rxrpc/ar-ack.c
@@ -23,7 +23,7 @@
23 * How long to wait before scheduling ACK generation after seeing a 23 * How long to wait before scheduling ACK generation after seeing a
24 * packet with RXRPC_REQUEST_ACK set (in jiffies). 24 * packet with RXRPC_REQUEST_ACK set (in jiffies).
25 */ 25 */
26unsigned rxrpc_requested_ack_delay = 1; 26unsigned int rxrpc_requested_ack_delay = 1;
27 27
28/* 28/*
29 * How long to wait before scheduling an ACK with subtype DELAY (in jiffies). 29 * How long to wait before scheduling an ACK with subtype DELAY (in jiffies).
@@ -32,7 +32,7 @@ unsigned rxrpc_requested_ack_delay = 1;
32 * all consumed within this time we will send a DELAY ACK if an ACK was not 32 * all consumed within this time we will send a DELAY ACK if an ACK was not
33 * requested to let the sender know it doesn't need to resend. 33 * requested to let the sender know it doesn't need to resend.
34 */ 34 */
35unsigned rxrpc_soft_ack_delay = 1 * HZ; 35unsigned int rxrpc_soft_ack_delay = 1 * HZ;
36 36
37/* 37/*
38 * How long to wait before scheduling an ACK with subtype IDLE (in jiffies). 38 * How long to wait before scheduling an ACK with subtype IDLE (in jiffies).
@@ -41,7 +41,7 @@ unsigned rxrpc_soft_ack_delay = 1 * HZ;
41 * further packets aren't immediately received to decide when to send an IDLE 41 * further packets aren't immediately received to decide when to send an IDLE
42 * ACK let the other end know that it can free up its Tx buffer space. 42 * ACK let the other end know that it can free up its Tx buffer space.
43 */ 43 */
44unsigned rxrpc_idle_ack_delay = 0.5 * HZ; 44unsigned int rxrpc_idle_ack_delay = 0.5 * HZ;
45 45
46/* 46/*
47 * Receive window size in packets. This indicates the maximum number of 47 * Receive window size in packets. This indicates the maximum number of
@@ -49,19 +49,19 @@ unsigned rxrpc_idle_ack_delay = 0.5 * HZ;
49 * limit is hit, we should generate an EXCEEDS_WINDOW ACK and discard further 49 * limit is hit, we should generate an EXCEEDS_WINDOW ACK and discard further
50 * packets. 50 * packets.
51 */ 51 */
52unsigned rxrpc_rx_window_size = 32; 52unsigned int rxrpc_rx_window_size = 32;
53 53
54/* 54/*
55 * Maximum Rx MTU size. This indicates to the sender the size of jumbo packet 55 * Maximum Rx MTU size. This indicates to the sender the size of jumbo packet
56 * made by gluing normal packets together that we're willing to handle. 56 * made by gluing normal packets together that we're willing to handle.
57 */ 57 */
58unsigned rxrpc_rx_mtu = 5692; 58unsigned int rxrpc_rx_mtu = 5692;
59 59
60/* 60/*
61 * The maximum number of fragments in a received jumbo packet that we tell the 61 * The maximum number of fragments in a received jumbo packet that we tell the
62 * sender that we're willing to handle. 62 * sender that we're willing to handle.
63 */ 63 */
64unsigned rxrpc_rx_jumbo_max = 4; 64unsigned int rxrpc_rx_jumbo_max = 4;
65 65
66static const char *rxrpc_acks(u8 reason) 66static const char *rxrpc_acks(u8 reason)
67{ 67{
@@ -91,7 +91,7 @@ static const s8 rxrpc_ack_priority[] = {
91 * propose an ACK be sent 91 * propose an ACK be sent
92 */ 92 */
93void __rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason, 93void __rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason,
94 __be32 serial, bool immediate) 94 u32 serial, bool immediate)
95{ 95{
96 unsigned long expiry; 96 unsigned long expiry;
97 s8 prior = rxrpc_ack_priority[ack_reason]; 97 s8 prior = rxrpc_ack_priority[ack_reason];
@@ -99,8 +99,7 @@ void __rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason,
99 ASSERTCMP(prior, >, 0); 99 ASSERTCMP(prior, >, 0);
100 100
101 _enter("{%d},%s,%%%x,%u", 101 _enter("{%d},%s,%%%x,%u",
102 call->debug_id, rxrpc_acks(ack_reason), ntohl(serial), 102 call->debug_id, rxrpc_acks(ack_reason), serial, immediate);
103 immediate);
104 103
105 if (prior < rxrpc_ack_priority[call->ackr_reason]) { 104 if (prior < rxrpc_ack_priority[call->ackr_reason]) {
106 if (immediate) 105 if (immediate)
@@ -139,7 +138,7 @@ void __rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason,
139 expiry = rxrpc_requested_ack_delay; 138 expiry = rxrpc_requested_ack_delay;
140 if (!expiry) 139 if (!expiry)
141 goto cancel_timer; 140 goto cancel_timer;
142 if (!immediate || serial == cpu_to_be32(1)) { 141 if (!immediate || serial == 1) {
143 _debug("run defer timer"); 142 _debug("run defer timer");
144 goto run_timer; 143 goto run_timer;
145 } 144 }
@@ -157,11 +156,11 @@ run_timer:
157 return; 156 return;
158 157
159cancel_timer: 158cancel_timer:
160 _debug("cancel timer %%%u", ntohl(serial)); 159 _debug("cancel timer %%%u", serial);
161 try_to_del_timer_sync(&call->ack_timer); 160 try_to_del_timer_sync(&call->ack_timer);
162 read_lock_bh(&call->state_lock); 161 read_lock_bh(&call->state_lock);
163 if (call->state <= RXRPC_CALL_COMPLETE && 162 if (call->state <= RXRPC_CALL_COMPLETE &&
164 !test_and_set_bit(RXRPC_CALL_ACK, &call->events)) 163 !test_and_set_bit(RXRPC_CALL_EV_ACK, &call->events))
165 rxrpc_queue_call(call); 164 rxrpc_queue_call(call);
166 read_unlock_bh(&call->state_lock); 165 read_unlock_bh(&call->state_lock);
167} 166}
@@ -170,7 +169,7 @@ cancel_timer:
170 * propose an ACK be sent, locking the call structure 169 * propose an ACK be sent, locking the call structure
171 */ 170 */
172void rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason, 171void rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason,
173 __be32 serial, bool immediate) 172 u32 serial, bool immediate)
174{ 173{
175 s8 prior = rxrpc_ack_priority[ack_reason]; 174 s8 prior = rxrpc_ack_priority[ack_reason];
176 175
@@ -193,7 +192,7 @@ static void rxrpc_set_resend(struct rxrpc_call *call, u8 resend,
193 192
194 if (resend & 1) { 193 if (resend & 1) {
195 _debug("SET RESEND"); 194 _debug("SET RESEND");
196 set_bit(RXRPC_CALL_RESEND, &call->events); 195 set_bit(RXRPC_CALL_EV_RESEND, &call->events);
197 } 196 }
198 197
199 if (resend & 2) { 198 if (resend & 2) {
@@ -203,7 +202,7 @@ static void rxrpc_set_resend(struct rxrpc_call *call, u8 resend,
203 } else { 202 } else {
204 _debug("KILL RESEND TIMER"); 203 _debug("KILL RESEND TIMER");
205 del_timer_sync(&call->resend_timer); 204 del_timer_sync(&call->resend_timer);
206 clear_bit(RXRPC_CALL_RESEND_TIMER, &call->events); 205 clear_bit(RXRPC_CALL_EV_RESEND_TIMER, &call->events);
207 clear_bit(RXRPC_CALL_RUN_RTIMER, &call->flags); 206 clear_bit(RXRPC_CALL_RUN_RTIMER, &call->flags);
208 } 207 }
209 read_unlock_bh(&call->state_lock); 208 read_unlock_bh(&call->state_lock);
@@ -214,8 +213,8 @@ static void rxrpc_set_resend(struct rxrpc_call *call, u8 resend,
214 */ 213 */
215static void rxrpc_resend(struct rxrpc_call *call) 214static void rxrpc_resend(struct rxrpc_call *call)
216{ 215{
216 struct rxrpc_wire_header *whdr;
217 struct rxrpc_skb_priv *sp; 217 struct rxrpc_skb_priv *sp;
218 struct rxrpc_header *hdr;
219 struct sk_buff *txb; 218 struct sk_buff *txb;
220 unsigned long *p_txb, resend_at; 219 unsigned long *p_txb, resend_at;
221 bool stop; 220 bool stop;
@@ -247,14 +246,13 @@ static void rxrpc_resend(struct rxrpc_call *call)
247 sp->need_resend = false; 246 sp->need_resend = false;
248 247
249 /* each Tx packet has a new serial number */ 248 /* each Tx packet has a new serial number */
250 sp->hdr.serial = 249 sp->hdr.serial = atomic_inc_return(&call->conn->serial);
251 htonl(atomic_inc_return(&call->conn->serial));
252 250
253 hdr = (struct rxrpc_header *) txb->head; 251 whdr = (struct rxrpc_wire_header *)txb->head;
254 hdr->serial = sp->hdr.serial; 252 whdr->serial = htonl(sp->hdr.serial);
255 253
256 _proto("Tx DATA %%%u { #%d }", 254 _proto("Tx DATA %%%u { #%d }",
257 ntohl(sp->hdr.serial), ntohl(sp->hdr.seq)); 255 sp->hdr.serial, sp->hdr.seq);
258 if (rxrpc_send_packet(call->conn->trans, txb) < 0) { 256 if (rxrpc_send_packet(call->conn->trans, txb) < 0) {
259 stop = true; 257 stop = true;
260 sp->resend_at = jiffies + 3; 258 sp->resend_at = jiffies + 3;
@@ -428,7 +426,7 @@ static void rxrpc_rotate_tx_window(struct rxrpc_call *call, u32 hard)
428 int tail = call->acks_tail, old_tail; 426 int tail = call->acks_tail, old_tail;
429 int win = CIRC_CNT(call->acks_head, tail, call->acks_winsz); 427 int win = CIRC_CNT(call->acks_head, tail, call->acks_winsz);
430 428
431 _enter("{%u,%u},%u", call->acks_hard, win, hard); 429 kenter("{%u,%u},%u", call->acks_hard, win, hard);
432 430
433 ASSERTCMP(hard - call->acks_hard, <=, win); 431 ASSERTCMP(hard - call->acks_hard, <=, win);
434 432
@@ -478,11 +476,11 @@ static int rxrpc_drain_rx_oos_queue(struct rxrpc_call *call)
478 sp = rxrpc_skb(skb); 476 sp = rxrpc_skb(skb);
479 477
480 _debug("drain OOS packet %d [%d]", 478 _debug("drain OOS packet %d [%d]",
481 ntohl(sp->hdr.seq), call->rx_first_oos); 479 sp->hdr.seq, call->rx_first_oos);
482 480
483 if (ntohl(sp->hdr.seq) != call->rx_first_oos) { 481 if (sp->hdr.seq != call->rx_first_oos) {
484 skb_queue_head(&call->rx_oos_queue, skb); 482 skb_queue_head(&call->rx_oos_queue, skb);
485 call->rx_first_oos = ntohl(rxrpc_skb(skb)->hdr.seq); 483 call->rx_first_oos = rxrpc_skb(skb)->hdr.seq;
486 _debug("requeue %p {%u}", skb, call->rx_first_oos); 484 _debug("requeue %p {%u}", skb, call->rx_first_oos);
487 } else { 485 } else {
488 skb->mark = RXRPC_SKB_MARK_DATA; 486 skb->mark = RXRPC_SKB_MARK_DATA;
@@ -496,8 +494,7 @@ static int rxrpc_drain_rx_oos_queue(struct rxrpc_call *call)
496 /* find out what the next packet is */ 494 /* find out what the next packet is */
497 skb = skb_peek(&call->rx_oos_queue); 495 skb = skb_peek(&call->rx_oos_queue);
498 if (skb) 496 if (skb)
499 call->rx_first_oos = 497 call->rx_first_oos = rxrpc_skb(skb)->hdr.seq;
500 ntohl(rxrpc_skb(skb)->hdr.seq);
501 else 498 else
502 call->rx_first_oos = 0; 499 call->rx_first_oos = 0;
503 _debug("peek %p {%u}", skb, call->rx_first_oos); 500 _debug("peek %p {%u}", skb, call->rx_first_oos);
@@ -522,7 +519,7 @@ static void rxrpc_insert_oos_packet(struct rxrpc_call *call,
522 u32 seq; 519 u32 seq;
523 520
524 sp = rxrpc_skb(skb); 521 sp = rxrpc_skb(skb);
525 seq = ntohl(sp->hdr.seq); 522 seq = sp->hdr.seq;
526 _enter(",,{%u}", seq); 523 _enter(",,{%u}", seq);
527 524
528 skb->destructor = rxrpc_packet_destructor; 525 skb->destructor = rxrpc_packet_destructor;
@@ -535,9 +532,8 @@ static void rxrpc_insert_oos_packet(struct rxrpc_call *call,
535 532
536 skb_queue_walk(&call->rx_oos_queue, p) { 533 skb_queue_walk(&call->rx_oos_queue, p) {
537 psp = rxrpc_skb(p); 534 psp = rxrpc_skb(p);
538 if (ntohl(psp->hdr.seq) > seq) { 535 if (psp->hdr.seq > seq) {
539 _debug("insert oos #%u before #%u", 536 _debug("insert oos #%u before #%u", seq, psp->hdr.seq);
540 seq, ntohl(psp->hdr.seq));
541 skb_insert(p, skb, &call->rx_oos_queue); 537 skb_insert(p, skb, &call->rx_oos_queue);
542 goto inserted; 538 goto inserted;
543 } 539 }
@@ -555,7 +551,7 @@ inserted:
555 if (call->state < RXRPC_CALL_COMPLETE && 551 if (call->state < RXRPC_CALL_COMPLETE &&
556 call->rx_data_post == call->rx_first_oos) { 552 call->rx_data_post == call->rx_first_oos) {
557 _debug("drain rx oos now"); 553 _debug("drain rx oos now");
558 set_bit(RXRPC_CALL_DRAIN_RX_OOS, &call->events); 554 set_bit(RXRPC_CALL_EV_DRAIN_RX_OOS, &call->events);
559 } 555 }
560 read_unlock(&call->state_lock); 556 read_unlock(&call->state_lock);
561 557
@@ -586,7 +582,7 @@ static void rxrpc_zap_tx_window(struct rxrpc_call *call)
586 582
587 skb = (struct sk_buff *) _skb; 583 skb = (struct sk_buff *) _skb;
588 sp = rxrpc_skb(skb); 584 sp = rxrpc_skb(skb);
589 _debug("+++ clear Tx %u", ntohl(sp->hdr.seq)); 585 _debug("+++ clear Tx %u", sp->hdr.seq);
590 rxrpc_free_skb(skb); 586 rxrpc_free_skb(skb);
591 } 587 }
592 588
@@ -657,8 +653,7 @@ process_further:
657 /* data packets that wind up here have been received out of 653 /* data packets that wind up here have been received out of
658 * order, need security processing or are jumbo packets */ 654 * order, need security processing or are jumbo packets */
659 case RXRPC_PACKET_TYPE_DATA: 655 case RXRPC_PACKET_TYPE_DATA:
660 _proto("OOSQ DATA %%%u { #%u }", 656 _proto("OOSQ DATA %%%u { #%u }", sp->hdr.serial, sp->hdr.seq);
661 ntohl(sp->hdr.serial), ntohl(sp->hdr.seq));
662 657
663 /* secured packets must be verified and possibly decrypted */ 658 /* secured packets must be verified and possibly decrypted */
664 if (rxrpc_verify_packet(call, skb, _abort_code) < 0) 659 if (rxrpc_verify_packet(call, skb, _abort_code) < 0)
@@ -676,7 +671,7 @@ process_further:
676 if (!skb_pull(skb, sizeof(ack))) 671 if (!skb_pull(skb, sizeof(ack)))
677 BUG(); 672 BUG();
678 673
679 latest = ntohl(sp->hdr.serial); 674 latest = sp->hdr.serial;
680 hard = ntohl(ack.firstPacket); 675 hard = ntohl(ack.firstPacket);
681 tx = atomic_read(&call->sequence); 676 tx = atomic_read(&call->sequence);
682 677
@@ -793,7 +788,7 @@ all_acked:
793 788
794 del_timer_sync(&call->resend_timer); 789 del_timer_sync(&call->resend_timer);
795 clear_bit(RXRPC_CALL_RUN_RTIMER, &call->flags); 790 clear_bit(RXRPC_CALL_RUN_RTIMER, &call->flags);
796 clear_bit(RXRPC_CALL_RESEND_TIMER, &call->events); 791 clear_bit(RXRPC_CALL_EV_RESEND_TIMER, &call->events);
797 792
798 if (call->acks_window) 793 if (call->acks_window)
799 rxrpc_zap_tx_window(call); 794 rxrpc_zap_tx_window(call);
@@ -881,16 +876,17 @@ void rxrpc_process_call(struct work_struct *work)
881{ 876{
882 struct rxrpc_call *call = 877 struct rxrpc_call *call =
883 container_of(work, struct rxrpc_call, processor); 878 container_of(work, struct rxrpc_call, processor);
879 struct rxrpc_wire_header whdr;
884 struct rxrpc_ackpacket ack; 880 struct rxrpc_ackpacket ack;
885 struct rxrpc_ackinfo ackinfo; 881 struct rxrpc_ackinfo ackinfo;
886 struct rxrpc_header hdr;
887 struct msghdr msg; 882 struct msghdr msg;
888 struct kvec iov[5]; 883 struct kvec iov[5];
884 enum rxrpc_call_event genbit;
889 unsigned long bits; 885 unsigned long bits;
890 __be32 data, pad; 886 __be32 data, pad;
891 size_t len; 887 size_t len;
892 int genbit, loop, nbit, ioc, ret, mtu; 888 int loop, nbit, ioc, ret, mtu;
893 u32 abort_code = RX_PROTOCOL_ERROR; 889 u32 serial, abort_code = RX_PROTOCOL_ERROR;
894 u8 *acks = NULL; 890 u8 *acks = NULL;
895 891
896 //printk("\n--------------------\n"); 892 //printk("\n--------------------\n");
@@ -911,33 +907,33 @@ void rxrpc_process_call(struct work_struct *work)
911 msg.msg_controllen = 0; 907 msg.msg_controllen = 0;
912 msg.msg_flags = 0; 908 msg.msg_flags = 0;
913 909
914 hdr.epoch = call->conn->epoch; 910 whdr.epoch = htonl(call->conn->epoch);
915 hdr.cid = call->cid; 911 whdr.cid = htonl(call->cid);
916 hdr.callNumber = call->call_id; 912 whdr.callNumber = htonl(call->call_id);
917 hdr.seq = 0; 913 whdr.seq = 0;
918 hdr.type = RXRPC_PACKET_TYPE_ACK; 914 whdr.type = RXRPC_PACKET_TYPE_ACK;
919 hdr.flags = call->conn->out_clientflag; 915 whdr.flags = call->conn->out_clientflag;
920 hdr.userStatus = 0; 916 whdr.userStatus = 0;
921 hdr.securityIndex = call->conn->security_ix; 917 whdr.securityIndex = call->conn->security_ix;
922 hdr._rsvd = 0; 918 whdr._rsvd = 0;
923 hdr.serviceId = call->conn->service_id; 919 whdr.serviceId = htons(call->service_id);
924 920
925 memset(iov, 0, sizeof(iov)); 921 memset(iov, 0, sizeof(iov));
926 iov[0].iov_base = &hdr; 922 iov[0].iov_base = &whdr;
927 iov[0].iov_len = sizeof(hdr); 923 iov[0].iov_len = sizeof(whdr);
928 924
929 /* deal with events of a final nature */ 925 /* deal with events of a final nature */
930 if (test_bit(RXRPC_CALL_RELEASE, &call->events)) { 926 if (test_bit(RXRPC_CALL_EV_RELEASE, &call->events)) {
931 rxrpc_release_call(call); 927 rxrpc_release_call(call);
932 clear_bit(RXRPC_CALL_RELEASE, &call->events); 928 clear_bit(RXRPC_CALL_EV_RELEASE, &call->events);
933 } 929 }
934 930
935 if (test_bit(RXRPC_CALL_RCVD_ERROR, &call->events)) { 931 if (test_bit(RXRPC_CALL_EV_RCVD_ERROR, &call->events)) {
936 int error; 932 int error;
937 933
938 clear_bit(RXRPC_CALL_CONN_ABORT, &call->events); 934 clear_bit(RXRPC_CALL_EV_CONN_ABORT, &call->events);
939 clear_bit(RXRPC_CALL_REJECT_BUSY, &call->events); 935 clear_bit(RXRPC_CALL_EV_REJECT_BUSY, &call->events);
940 clear_bit(RXRPC_CALL_ABORT, &call->events); 936 clear_bit(RXRPC_CALL_EV_ABORT, &call->events);
941 937
942 error = call->conn->trans->peer->net_error; 938 error = call->conn->trans->peer->net_error;
943 _debug("post net error %d", error); 939 _debug("post net error %d", error);
@@ -945,47 +941,47 @@ void rxrpc_process_call(struct work_struct *work)
945 if (rxrpc_post_message(call, RXRPC_SKB_MARK_NET_ERROR, 941 if (rxrpc_post_message(call, RXRPC_SKB_MARK_NET_ERROR,
946 error, true) < 0) 942 error, true) < 0)
947 goto no_mem; 943 goto no_mem;
948 clear_bit(RXRPC_CALL_RCVD_ERROR, &call->events); 944 clear_bit(RXRPC_CALL_EV_RCVD_ERROR, &call->events);
949 goto kill_ACKs; 945 goto kill_ACKs;
950 } 946 }
951 947
952 if (test_bit(RXRPC_CALL_CONN_ABORT, &call->events)) { 948 if (test_bit(RXRPC_CALL_EV_CONN_ABORT, &call->events)) {
953 ASSERTCMP(call->state, >, RXRPC_CALL_COMPLETE); 949 ASSERTCMP(call->state, >, RXRPC_CALL_COMPLETE);
954 950
955 clear_bit(RXRPC_CALL_REJECT_BUSY, &call->events); 951 clear_bit(RXRPC_CALL_EV_REJECT_BUSY, &call->events);
956 clear_bit(RXRPC_CALL_ABORT, &call->events); 952 clear_bit(RXRPC_CALL_EV_ABORT, &call->events);
957 953
958 _debug("post conn abort"); 954 _debug("post conn abort");
959 955
960 if (rxrpc_post_message(call, RXRPC_SKB_MARK_LOCAL_ERROR, 956 if (rxrpc_post_message(call, RXRPC_SKB_MARK_LOCAL_ERROR,
961 call->conn->error, true) < 0) 957 call->conn->error, true) < 0)
962 goto no_mem; 958 goto no_mem;
963 clear_bit(RXRPC_CALL_CONN_ABORT, &call->events); 959 clear_bit(RXRPC_CALL_EV_CONN_ABORT, &call->events);
964 goto kill_ACKs; 960 goto kill_ACKs;
965 } 961 }
966 962
967 if (test_bit(RXRPC_CALL_REJECT_BUSY, &call->events)) { 963 if (test_bit(RXRPC_CALL_EV_REJECT_BUSY, &call->events)) {
968 hdr.type = RXRPC_PACKET_TYPE_BUSY; 964 whdr.type = RXRPC_PACKET_TYPE_BUSY;
969 genbit = RXRPC_CALL_REJECT_BUSY; 965 genbit = RXRPC_CALL_EV_REJECT_BUSY;
970 goto send_message; 966 goto send_message;
971 } 967 }
972 968
973 if (test_bit(RXRPC_CALL_ABORT, &call->events)) { 969 if (test_bit(RXRPC_CALL_EV_ABORT, &call->events)) {
974 ASSERTCMP(call->state, >, RXRPC_CALL_COMPLETE); 970 ASSERTCMP(call->state, >, RXRPC_CALL_COMPLETE);
975 971
976 if (rxrpc_post_message(call, RXRPC_SKB_MARK_LOCAL_ERROR, 972 if (rxrpc_post_message(call, RXRPC_SKB_MARK_LOCAL_ERROR,
977 ECONNABORTED, true) < 0) 973 ECONNABORTED, true) < 0)
978 goto no_mem; 974 goto no_mem;
979 hdr.type = RXRPC_PACKET_TYPE_ABORT; 975 whdr.type = RXRPC_PACKET_TYPE_ABORT;
980 data = htonl(call->abort_code); 976 data = htonl(call->abort_code);
981 iov[1].iov_base = &data; 977 iov[1].iov_base = &data;
982 iov[1].iov_len = sizeof(data); 978 iov[1].iov_len = sizeof(data);
983 genbit = RXRPC_CALL_ABORT; 979 genbit = RXRPC_CALL_EV_ABORT;
984 goto send_message; 980 goto send_message;
985 } 981 }
986 982
987 if (test_bit(RXRPC_CALL_ACK_FINAL, &call->events)) { 983 if (test_bit(RXRPC_CALL_EV_ACK_FINAL, &call->events)) {
988 genbit = RXRPC_CALL_ACK_FINAL; 984 genbit = RXRPC_CALL_EV_ACK_FINAL;
989 985
990 ack.bufferSpace = htons(8); 986 ack.bufferSpace = htons(8);
991 ack.maxSkew = 0; 987 ack.maxSkew = 0;
@@ -995,9 +991,9 @@ void rxrpc_process_call(struct work_struct *work)
995 call->ackr_reason = 0; 991 call->ackr_reason = 0;
996 992
997 spin_lock_bh(&call->lock); 993 spin_lock_bh(&call->lock);
998 ack.serial = call->ackr_serial; 994 ack.serial = htonl(call->ackr_serial);
999 ack.previousPacket = call->ackr_prev_seq; 995 ack.previousPacket = htonl(call->ackr_prev_seq);
1000 ack.firstPacket = htonl(call->rx_data_eaten + 1); 996 ack.firstPacket = htonl(call->rx_data_eaten + 1);
1001 spin_unlock_bh(&call->lock); 997 spin_unlock_bh(&call->lock);
1002 998
1003 pad = 0; 999 pad = 0;
@@ -1011,12 +1007,12 @@ void rxrpc_process_call(struct work_struct *work)
1011 goto send_ACK; 1007 goto send_ACK;
1012 } 1008 }
1013 1009
1014 if (call->events & ((1 << RXRPC_CALL_RCVD_BUSY) | 1010 if (call->events & ((1 << RXRPC_CALL_EV_RCVD_BUSY) |
1015 (1 << RXRPC_CALL_RCVD_ABORT)) 1011 (1 << RXRPC_CALL_EV_RCVD_ABORT))
1016 ) { 1012 ) {
1017 u32 mark; 1013 u32 mark;
1018 1014
1019 if (test_bit(RXRPC_CALL_RCVD_ABORT, &call->events)) 1015 if (test_bit(RXRPC_CALL_EV_RCVD_ABORT, &call->events))
1020 mark = RXRPC_SKB_MARK_REMOTE_ABORT; 1016 mark = RXRPC_SKB_MARK_REMOTE_ABORT;
1021 else 1017 else
1022 mark = RXRPC_SKB_MARK_BUSY; 1018 mark = RXRPC_SKB_MARK_BUSY;
@@ -1026,22 +1022,22 @@ void rxrpc_process_call(struct work_struct *work)
1026 if (rxrpc_post_message(call, mark, ECONNABORTED, true) < 0) 1022 if (rxrpc_post_message(call, mark, ECONNABORTED, true) < 0)
1027 goto no_mem; 1023 goto no_mem;
1028 1024
1029 clear_bit(RXRPC_CALL_RCVD_BUSY, &call->events); 1025 clear_bit(RXRPC_CALL_EV_RCVD_BUSY, &call->events);
1030 clear_bit(RXRPC_CALL_RCVD_ABORT, &call->events); 1026 clear_bit(RXRPC_CALL_EV_RCVD_ABORT, &call->events);
1031 goto kill_ACKs; 1027 goto kill_ACKs;
1032 } 1028 }
1033 1029
1034 if (test_and_clear_bit(RXRPC_CALL_RCVD_ACKALL, &call->events)) { 1030 if (test_and_clear_bit(RXRPC_CALL_EV_RCVD_ACKALL, &call->events)) {
1035 _debug("do implicit ackall"); 1031 _debug("do implicit ackall");
1036 rxrpc_clear_tx_window(call); 1032 rxrpc_clear_tx_window(call);
1037 } 1033 }
1038 1034
1039 if (test_bit(RXRPC_CALL_LIFE_TIMER, &call->events)) { 1035 if (test_bit(RXRPC_CALL_EV_LIFE_TIMER, &call->events)) {
1040 write_lock_bh(&call->state_lock); 1036 write_lock_bh(&call->state_lock);
1041 if (call->state <= RXRPC_CALL_COMPLETE) { 1037 if (call->state <= RXRPC_CALL_COMPLETE) {
1042 call->state = RXRPC_CALL_LOCALLY_ABORTED; 1038 call->state = RXRPC_CALL_LOCALLY_ABORTED;
1043 call->abort_code = RX_CALL_TIMEOUT; 1039 call->abort_code = RX_CALL_TIMEOUT;
1044 set_bit(RXRPC_CALL_ABORT, &call->events); 1040 set_bit(RXRPC_CALL_EV_ABORT, &call->events);
1045 } 1041 }
1046 write_unlock_bh(&call->state_lock); 1042 write_unlock_bh(&call->state_lock);
1047 1043
@@ -1050,7 +1046,7 @@ void rxrpc_process_call(struct work_struct *work)
1050 ETIME, true) < 0) 1046 ETIME, true) < 0)
1051 goto no_mem; 1047 goto no_mem;
1052 1048
1053 clear_bit(RXRPC_CALL_LIFE_TIMER, &call->events); 1049 clear_bit(RXRPC_CALL_EV_LIFE_TIMER, &call->events);
1054 goto kill_ACKs; 1050 goto kill_ACKs;
1055 } 1051 }
1056 1052
@@ -1071,13 +1067,13 @@ void rxrpc_process_call(struct work_struct *work)
1071 } 1067 }
1072 1068
1073 /* handle resending */ 1069 /* handle resending */
1074 if (test_and_clear_bit(RXRPC_CALL_RESEND_TIMER, &call->events)) 1070 if (test_and_clear_bit(RXRPC_CALL_EV_RESEND_TIMER, &call->events))
1075 rxrpc_resend_timer(call); 1071 rxrpc_resend_timer(call);
1076 if (test_and_clear_bit(RXRPC_CALL_RESEND, &call->events)) 1072 if (test_and_clear_bit(RXRPC_CALL_EV_RESEND, &call->events))
1077 rxrpc_resend(call); 1073 rxrpc_resend(call);
1078 1074
1079 /* consider sending an ordinary ACK */ 1075 /* consider sending an ordinary ACK */
1080 if (test_bit(RXRPC_CALL_ACK, &call->events)) { 1076 if (test_bit(RXRPC_CALL_EV_ACK, &call->events)) {
1081 _debug("send ACK: window: %d - %d { %lx }", 1077 _debug("send ACK: window: %d - %d { %lx }",
1082 call->rx_data_eaten, call->ackr_win_top, 1078 call->rx_data_eaten, call->ackr_win_top,
1083 call->ackr_window[0]); 1079 call->ackr_window[0]);
@@ -1085,11 +1081,11 @@ void rxrpc_process_call(struct work_struct *work)
1085 if (call->state > RXRPC_CALL_SERVER_ACK_REQUEST && 1081 if (call->state > RXRPC_CALL_SERVER_ACK_REQUEST &&
1086 call->ackr_reason != RXRPC_ACK_PING_RESPONSE) { 1082 call->ackr_reason != RXRPC_ACK_PING_RESPONSE) {
1087 /* ACK by sending reply DATA packet in this state */ 1083 /* ACK by sending reply DATA packet in this state */
1088 clear_bit(RXRPC_CALL_ACK, &call->events); 1084 clear_bit(RXRPC_CALL_EV_ACK, &call->events);
1089 goto maybe_reschedule; 1085 goto maybe_reschedule;
1090 } 1086 }
1091 1087
1092 genbit = RXRPC_CALL_ACK; 1088 genbit = RXRPC_CALL_EV_ACK;
1093 1089
1094 acks = kzalloc(call->ackr_win_top - call->rx_data_eaten, 1090 acks = kzalloc(call->ackr_win_top - call->rx_data_eaten,
1095 GFP_NOFS); 1091 GFP_NOFS);
@@ -1099,13 +1095,11 @@ void rxrpc_process_call(struct work_struct *work)
1099 //hdr.flags = RXRPC_SLOW_START_OK; 1095 //hdr.flags = RXRPC_SLOW_START_OK;
1100 ack.bufferSpace = htons(8); 1096 ack.bufferSpace = htons(8);
1101 ack.maxSkew = 0; 1097 ack.maxSkew = 0;
1102 ack.serial = 0;
1103 ack.reason = 0;
1104 1098
1105 spin_lock_bh(&call->lock); 1099 spin_lock_bh(&call->lock);
1106 ack.reason = call->ackr_reason; 1100 ack.reason = call->ackr_reason;
1107 ack.serial = call->ackr_serial; 1101 ack.serial = htonl(call->ackr_serial);
1108 ack.previousPacket = call->ackr_prev_seq; 1102 ack.previousPacket = htonl(call->ackr_prev_seq);
1109 ack.firstPacket = htonl(call->rx_data_eaten + 1); 1103 ack.firstPacket = htonl(call->rx_data_eaten + 1);
1110 1104
1111 ack.nAcks = 0; 1105 ack.nAcks = 0;
@@ -1152,7 +1146,7 @@ void rxrpc_process_call(struct work_struct *work)
1152 1146
1153 /* handle completion of security negotiations on an incoming 1147 /* handle completion of security negotiations on an incoming
1154 * connection */ 1148 * connection */
1155 if (test_and_clear_bit(RXRPC_CALL_SECURED, &call->events)) { 1149 if (test_and_clear_bit(RXRPC_CALL_EV_SECURED, &call->events)) {
1156 _debug("secured"); 1150 _debug("secured");
1157 spin_lock_bh(&call->lock); 1151 spin_lock_bh(&call->lock);
1158 1152
@@ -1160,7 +1154,7 @@ void rxrpc_process_call(struct work_struct *work)
1160 _debug("securing"); 1154 _debug("securing");
1161 write_lock(&call->conn->lock); 1155 write_lock(&call->conn->lock);
1162 if (!test_bit(RXRPC_CALL_RELEASED, &call->flags) && 1156 if (!test_bit(RXRPC_CALL_RELEASED, &call->flags) &&
1163 !test_bit(RXRPC_CALL_RELEASE, &call->events)) { 1157 !test_bit(RXRPC_CALL_EV_RELEASE, &call->events)) {
1164 _debug("not released"); 1158 _debug("not released");
1165 call->state = RXRPC_CALL_SERVER_ACCEPTING; 1159 call->state = RXRPC_CALL_SERVER_ACCEPTING;
1166 list_move_tail(&call->accept_link, 1160 list_move_tail(&call->accept_link,
@@ -1169,39 +1163,39 @@ void rxrpc_process_call(struct work_struct *work)
1169 write_unlock(&call->conn->lock); 1163 write_unlock(&call->conn->lock);
1170 read_lock(&call->state_lock); 1164 read_lock(&call->state_lock);
1171 if (call->state < RXRPC_CALL_COMPLETE) 1165 if (call->state < RXRPC_CALL_COMPLETE)
1172 set_bit(RXRPC_CALL_POST_ACCEPT, &call->events); 1166 set_bit(RXRPC_CALL_EV_POST_ACCEPT, &call->events);
1173 read_unlock(&call->state_lock); 1167 read_unlock(&call->state_lock);
1174 } 1168 }
1175 1169
1176 spin_unlock_bh(&call->lock); 1170 spin_unlock_bh(&call->lock);
1177 if (!test_bit(RXRPC_CALL_POST_ACCEPT, &call->events)) 1171 if (!test_bit(RXRPC_CALL_EV_POST_ACCEPT, &call->events))
1178 goto maybe_reschedule; 1172 goto maybe_reschedule;
1179 } 1173 }
1180 1174
1181 /* post a notification of an acceptable connection to the app */ 1175 /* post a notification of an acceptable connection to the app */
1182 if (test_bit(RXRPC_CALL_POST_ACCEPT, &call->events)) { 1176 if (test_bit(RXRPC_CALL_EV_POST_ACCEPT, &call->events)) {
1183 _debug("post accept"); 1177 _debug("post accept");
1184 if (rxrpc_post_message(call, RXRPC_SKB_MARK_NEW_CALL, 1178 if (rxrpc_post_message(call, RXRPC_SKB_MARK_NEW_CALL,
1185 0, false) < 0) 1179 0, false) < 0)
1186 goto no_mem; 1180 goto no_mem;
1187 clear_bit(RXRPC_CALL_POST_ACCEPT, &call->events); 1181 clear_bit(RXRPC_CALL_EV_POST_ACCEPT, &call->events);
1188 goto maybe_reschedule; 1182 goto maybe_reschedule;
1189 } 1183 }
1190 1184
1191 /* handle incoming call acceptance */ 1185 /* handle incoming call acceptance */
1192 if (test_and_clear_bit(RXRPC_CALL_ACCEPTED, &call->events)) { 1186 if (test_and_clear_bit(RXRPC_CALL_EV_ACCEPTED, &call->events)) {
1193 _debug("accepted"); 1187 _debug("accepted");
1194 ASSERTCMP(call->rx_data_post, ==, 0); 1188 ASSERTCMP(call->rx_data_post, ==, 0);
1195 call->rx_data_post = 1; 1189 call->rx_data_post = 1;
1196 read_lock_bh(&call->state_lock); 1190 read_lock_bh(&call->state_lock);
1197 if (call->state < RXRPC_CALL_COMPLETE) 1191 if (call->state < RXRPC_CALL_COMPLETE)
1198 set_bit(RXRPC_CALL_DRAIN_RX_OOS, &call->events); 1192 set_bit(RXRPC_CALL_EV_DRAIN_RX_OOS, &call->events);
1199 read_unlock_bh(&call->state_lock); 1193 read_unlock_bh(&call->state_lock);
1200 } 1194 }
1201 1195
1202 /* drain the out of sequence received packet queue into the packet Rx 1196 /* drain the out of sequence received packet queue into the packet Rx
1203 * queue */ 1197 * queue */
1204 if (test_and_clear_bit(RXRPC_CALL_DRAIN_RX_OOS, &call->events)) { 1198 if (test_and_clear_bit(RXRPC_CALL_EV_DRAIN_RX_OOS, &call->events)) {
1205 while (call->rx_data_post == call->rx_first_oos) 1199 while (call->rx_data_post == call->rx_first_oos)
1206 if (rxrpc_drain_rx_oos_queue(call) < 0) 1200 if (rxrpc_drain_rx_oos_queue(call) < 0)
1207 break; 1201 break;
@@ -1224,9 +1218,10 @@ send_ACK:
1224 ackinfo.rxMTU = htonl(rxrpc_rx_mtu); 1218 ackinfo.rxMTU = htonl(rxrpc_rx_mtu);
1225 ackinfo.jumbo_max = htonl(rxrpc_rx_jumbo_max); 1219 ackinfo.jumbo_max = htonl(rxrpc_rx_jumbo_max);
1226 1220
1227 hdr.serial = htonl(atomic_inc_return(&call->conn->serial)); 1221 serial = atomic_inc_return(&call->conn->serial);
1222 whdr.serial = htonl(serial);
1228 _proto("Tx ACK %%%u { m=%hu f=#%u p=#%u s=%%%u r=%s n=%u }", 1223 _proto("Tx ACK %%%u { m=%hu f=#%u p=#%u s=%%%u r=%s n=%u }",
1229 ntohl(hdr.serial), 1224 serial,
1230 ntohs(ack.maxSkew), 1225 ntohs(ack.maxSkew),
1231 ntohl(ack.firstPacket), 1226 ntohl(ack.firstPacket),
1232 ntohl(ack.previousPacket), 1227 ntohl(ack.previousPacket),
@@ -1242,8 +1237,9 @@ send_ACK:
1242send_message: 1237send_message:
1243 _debug("send message"); 1238 _debug("send message");
1244 1239
1245 hdr.serial = htonl(atomic_inc_return(&call->conn->serial)); 1240 serial = atomic_inc_return(&call->conn->serial);
1246 _proto("Tx %s %%%u", rxrpc_pkts[hdr.type], ntohl(hdr.serial)); 1241 whdr.serial = htonl(serial);
1242 _proto("Tx %s %%%u", rxrpc_pkts[whdr.type], serial);
1247send_message_2: 1243send_message_2:
1248 1244
1249 len = iov[0].iov_len; 1245 len = iov[0].iov_len;
@@ -1280,12 +1276,12 @@ send_message_2:
1280 } 1276 }
1281 1277
1282 switch (genbit) { 1278 switch (genbit) {
1283 case RXRPC_CALL_ABORT: 1279 case RXRPC_CALL_EV_ABORT:
1284 clear_bit(genbit, &call->events); 1280 clear_bit(genbit, &call->events);
1285 clear_bit(RXRPC_CALL_RCVD_ABORT, &call->events); 1281 clear_bit(RXRPC_CALL_EV_RCVD_ABORT, &call->events);
1286 goto kill_ACKs; 1282 goto kill_ACKs;
1287 1283
1288 case RXRPC_CALL_ACK_FINAL: 1284 case RXRPC_CALL_EV_ACK_FINAL:
1289 write_lock_bh(&call->state_lock); 1285 write_lock_bh(&call->state_lock);
1290 if (call->state == RXRPC_CALL_CLIENT_FINAL_ACK) 1286 if (call->state == RXRPC_CALL_CLIENT_FINAL_ACK)
1291 call->state = RXRPC_CALL_COMPLETE; 1287 call->state = RXRPC_CALL_COMPLETE;
@@ -1310,9 +1306,9 @@ send_message_2:
1310 1306
1311kill_ACKs: 1307kill_ACKs:
1312 del_timer_sync(&call->ack_timer); 1308 del_timer_sync(&call->ack_timer);
1313 if (test_and_clear_bit(RXRPC_CALL_ACK_FINAL, &call->events)) 1309 if (test_and_clear_bit(RXRPC_CALL_EV_ACK_FINAL, &call->events))
1314 rxrpc_put_call(call); 1310 rxrpc_put_call(call);
1315 clear_bit(RXRPC_CALL_ACK, &call->events); 1311 clear_bit(RXRPC_CALL_EV_ACK, &call->events);
1316 1312
1317maybe_reschedule: 1313maybe_reschedule:
1318 if (call->events || !skb_queue_empty(&call->rx_queue)) { 1314 if (call->events || !skb_queue_empty(&call->rx_queue)) {
@@ -1326,12 +1322,11 @@ maybe_reschedule:
1326 if (call->state >= RXRPC_CALL_COMPLETE && 1322 if (call->state >= RXRPC_CALL_COMPLETE &&
1327 !list_empty(&call->accept_link)) { 1323 !list_empty(&call->accept_link)) {
1328 _debug("X unlinking once-pending call %p { e=%lx f=%lx c=%x }", 1324 _debug("X unlinking once-pending call %p { e=%lx f=%lx c=%x }",
1329 call, call->events, call->flags, 1325 call, call->events, call->flags, call->conn->cid);
1330 ntohl(call->conn->cid));
1331 1326
1332 read_lock_bh(&call->state_lock); 1327 read_lock_bh(&call->state_lock);
1333 if (!test_bit(RXRPC_CALL_RELEASED, &call->flags) && 1328 if (!test_bit(RXRPC_CALL_RELEASED, &call->flags) &&
1334 !test_and_set_bit(RXRPC_CALL_RELEASE, &call->events)) 1329 !test_and_set_bit(RXRPC_CALL_EV_RELEASE, &call->events))
1335 rxrpc_queue_call(call); 1330 rxrpc_queue_call(call);
1336 read_unlock_bh(&call->state_lock); 1331 read_unlock_bh(&call->state_lock);
1337 } 1332 }
@@ -1345,7 +1340,7 @@ error:
1345 * this means there's a race between clearing the flag and setting the 1340 * this means there's a race between clearing the flag and setting the
1346 * work pending bit and the work item being processed again */ 1341 * work pending bit and the work item being processed again */
1347 if (call->events && !work_pending(&call->processor)) { 1342 if (call->events && !work_pending(&call->processor)) {
1348 _debug("jumpstart %x", ntohl(call->conn->cid)); 1343 _debug("jumpstart %x", call->conn->cid);
1349 rxrpc_queue_call(call); 1344 rxrpc_queue_call(call);
1350 } 1345 }
1351 1346
diff --git a/net/rxrpc/ar-call.c b/net/rxrpc/ar-call.c
index a9e05db0f5d5..7c8d300ade9b 100644
--- a/net/rxrpc/ar-call.c
+++ b/net/rxrpc/ar-call.c
@@ -21,14 +21,14 @@
21/* 21/*
22 * Maximum lifetime of a call (in jiffies). 22 * Maximum lifetime of a call (in jiffies).
23 */ 23 */
24unsigned rxrpc_max_call_lifetime = 60 * HZ; 24unsigned int rxrpc_max_call_lifetime = 60 * HZ;
25 25
26/* 26/*
27 * Time till dead call expires after last use (in jiffies). 27 * Time till dead call expires after last use (in jiffies).
28 */ 28 */
29unsigned rxrpc_dead_call_expiry = 2 * HZ; 29unsigned int rxrpc_dead_call_expiry = 2 * HZ;
30 30
31const char *const rxrpc_call_states[] = { 31const char *const rxrpc_call_states[NR__RXRPC_CALL_STATES] = {
32 [RXRPC_CALL_CLIENT_SEND_REQUEST] = "ClSndReq", 32 [RXRPC_CALL_CLIENT_SEND_REQUEST] = "ClSndReq",
33 [RXRPC_CALL_CLIENT_AWAIT_REPLY] = "ClAwtRpl", 33 [RXRPC_CALL_CLIENT_AWAIT_REPLY] = "ClAwtRpl",
34 [RXRPC_CALL_CLIENT_RECV_REPLY] = "ClRcvRpl", 34 [RXRPC_CALL_CLIENT_RECV_REPLY] = "ClRcvRpl",
@@ -64,11 +64,11 @@ static DEFINE_HASHTABLE(rxrpc_call_hash, 10);
64 * Hash function for rxrpc_call_hash 64 * Hash function for rxrpc_call_hash
65 */ 65 */
66static unsigned long rxrpc_call_hashfunc( 66static unsigned long rxrpc_call_hashfunc(
67 u8 clientflag, 67 u8 in_clientflag,
68 __be32 cid, 68 u32 cid,
69 __be32 call_id, 69 u32 call_id,
70 __be32 epoch, 70 u32 epoch,
71 __be16 service_id, 71 u16 service_id,
72 sa_family_t proto, 72 sa_family_t proto,
73 void *localptr, 73 void *localptr,
74 unsigned int addr_size, 74 unsigned int addr_size,
@@ -77,7 +77,6 @@ static unsigned long rxrpc_call_hashfunc(
77 const u16 *p; 77 const u16 *p;
78 unsigned int i; 78 unsigned int i;
79 unsigned long key; 79 unsigned long key;
80 u32 hcid = ntohl(cid);
81 80
82 _enter(""); 81 _enter("");
83 82
@@ -85,12 +84,12 @@ static unsigned long rxrpc_call_hashfunc(
85 /* We just want to add up the __be32 values, so forcing the 84 /* We just want to add up the __be32 values, so forcing the
86 * cast should be okay. 85 * cast should be okay.
87 */ 86 */
88 key += (__force u32)epoch; 87 key += epoch;
89 key += (__force u16)service_id; 88 key += service_id;
90 key += (__force u32)call_id; 89 key += call_id;
91 key += (hcid & RXRPC_CIDMASK) >> RXRPC_CIDSHIFT; 90 key += (cid & RXRPC_CIDMASK) >> RXRPC_CIDSHIFT;
92 key += hcid & RXRPC_CHANNELMASK; 91 key += cid & RXRPC_CHANNELMASK;
93 key += clientflag; 92 key += in_clientflag;
94 key += proto; 93 key += proto;
95 /* Step through the peer address in 16-bit portions for speed */ 94 /* Step through the peer address in 16-bit portions for speed */
96 for (i = 0, p = (const u16 *)peer_addr; i < addr_size >> 1; i++, p++) 95 for (i = 0, p = (const u16 *)peer_addr; i < addr_size >> 1; i++, p++)
@@ -148,19 +147,16 @@ static void rxrpc_call_hash_del(struct rxrpc_call *call)
148 * isn't there. 147 * isn't there.
149 */ 148 */
150struct rxrpc_call *rxrpc_find_call_hash( 149struct rxrpc_call *rxrpc_find_call_hash(
151 u8 clientflag, 150 struct rxrpc_host_header *hdr,
152 __be32 cid,
153 __be32 call_id,
154 __be32 epoch,
155 __be16 service_id,
156 void *localptr, 151 void *localptr,
157 sa_family_t proto, 152 sa_family_t proto,
158 const u8 *peer_addr) 153 const void *peer_addr)
159{ 154{
160 unsigned long key; 155 unsigned long key;
161 unsigned int addr_size = 0; 156 unsigned int addr_size = 0;
162 struct rxrpc_call *call = NULL; 157 struct rxrpc_call *call = NULL;
163 struct rxrpc_call *ret = NULL; 158 struct rxrpc_call *ret = NULL;
159 u8 in_clientflag = hdr->flags & RXRPC_CLIENT_INITIATED;
164 160
165 _enter(""); 161 _enter("");
166 switch (proto) { 162 switch (proto) {
@@ -174,20 +170,21 @@ struct rxrpc_call *rxrpc_find_call_hash(
174 break; 170 break;
175 } 171 }
176 172
177 key = rxrpc_call_hashfunc(clientflag, cid, call_id, epoch, 173 key = rxrpc_call_hashfunc(in_clientflag, hdr->cid, hdr->callNumber,
178 service_id, proto, localptr, addr_size, 174 hdr->epoch, hdr->serviceId,
175 proto, localptr, addr_size,
179 peer_addr); 176 peer_addr);
180 hash_for_each_possible_rcu(rxrpc_call_hash, call, hash_node, key) { 177 hash_for_each_possible_rcu(rxrpc_call_hash, call, hash_node, key) {
181 if (call->hash_key == key && 178 if (call->hash_key == key &&
182 call->call_id == call_id && 179 call->call_id == hdr->callNumber &&
183 call->cid == cid && 180 call->cid == hdr->cid &&
184 call->in_clientflag == clientflag && 181 call->in_clientflag == in_clientflag &&
185 call->service_id == service_id && 182 call->service_id == hdr->serviceId &&
186 call->proto == proto && 183 call->proto == proto &&
187 call->local == localptr && 184 call->local == localptr &&
188 memcmp(call->peer_ip.ipv6_addr, peer_addr, 185 memcmp(call->peer_ip.ipv6_addr, peer_addr,
189 addr_size) == 0 && 186 addr_size) == 0 &&
190 call->epoch == epoch) { 187 call->epoch == hdr->epoch) {
191 ret = call; 188 ret = call;
192 break; 189 break;
193 } 190 }
@@ -414,12 +411,12 @@ found_extant_second:
414 */ 411 */
415struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *rx, 412struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *rx,
416 struct rxrpc_connection *conn, 413 struct rxrpc_connection *conn,
417 struct rxrpc_header *hdr, 414 struct rxrpc_host_header *hdr,
418 gfp_t gfp) 415 gfp_t gfp)
419{ 416{
420 struct rxrpc_call *call, *candidate; 417 struct rxrpc_call *call, *candidate;
421 struct rb_node **p, *parent; 418 struct rb_node **p, *parent;
422 __be32 call_id; 419 u32 call_id;
423 420
424 _enter(",%d,,%x", conn->debug_id, gfp); 421 _enter(",%d,,%x", conn->debug_id, gfp);
425 422
@@ -433,7 +430,7 @@ struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *rx,
433 candidate->conn = conn; 430 candidate->conn = conn;
434 candidate->cid = hdr->cid; 431 candidate->cid = hdr->cid;
435 candidate->call_id = hdr->callNumber; 432 candidate->call_id = hdr->callNumber;
436 candidate->channel = ntohl(hdr->cid) & RXRPC_CHANNELMASK; 433 candidate->channel = hdr->cid & RXRPC_CHANNELMASK;
437 candidate->rx_data_post = 0; 434 candidate->rx_data_post = 0;
438 candidate->state = RXRPC_CALL_SERVER_ACCEPTING; 435 candidate->state = RXRPC_CALL_SERVER_ACCEPTING;
439 if (conn->security_ix > 0) 436 if (conn->security_ix > 0)
@@ -452,7 +449,7 @@ struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *rx,
452 read_lock(&call->state_lock); 449 read_lock(&call->state_lock);
453 switch (call->state) { 450 switch (call->state) {
454 case RXRPC_CALL_LOCALLY_ABORTED: 451 case RXRPC_CALL_LOCALLY_ABORTED:
455 if (!test_and_set_bit(RXRPC_CALL_ABORT, &call->events)) 452 if (!test_and_set_bit(RXRPC_CALL_EV_ABORT, &call->events))
456 rxrpc_queue_call(call); 453 rxrpc_queue_call(call);
457 case RXRPC_CALL_REMOTELY_ABORTED: 454 case RXRPC_CALL_REMOTELY_ABORTED:
458 read_unlock(&call->state_lock); 455 read_unlock(&call->state_lock);
@@ -492,9 +489,9 @@ struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *rx,
492 /* The tree is sorted in order of the __be32 value without 489 /* The tree is sorted in order of the __be32 value without
493 * turning it into host order. 490 * turning it into host order.
494 */ 491 */
495 if ((__force u32)call_id < (__force u32)call->call_id) 492 if (call_id < call->call_id)
496 p = &(*p)->rb_left; 493 p = &(*p)->rb_left;
497 else if ((__force u32)call_id > (__force u32)call->call_id) 494 else if (call_id > call->call_id)
498 p = &(*p)->rb_right; 495 p = &(*p)->rb_right;
499 else 496 else
500 goto old_call; 497 goto old_call;
@@ -686,7 +683,7 @@ void rxrpc_release_call(struct rxrpc_call *call)
686 _debug("+++ ABORTING STATE %d +++\n", call->state); 683 _debug("+++ ABORTING STATE %d +++\n", call->state);
687 call->state = RXRPC_CALL_LOCALLY_ABORTED; 684 call->state = RXRPC_CALL_LOCALLY_ABORTED;
688 call->abort_code = RX_CALL_DEAD; 685 call->abort_code = RX_CALL_DEAD;
689 set_bit(RXRPC_CALL_ABORT, &call->events); 686 set_bit(RXRPC_CALL_EV_ABORT, &call->events);
690 rxrpc_queue_call(call); 687 rxrpc_queue_call(call);
691 } 688 }
692 write_unlock(&call->state_lock); 689 write_unlock(&call->state_lock);
@@ -714,8 +711,7 @@ void rxrpc_release_call(struct rxrpc_call *call)
714 711
715 _debug("- zap %s %%%u #%u", 712 _debug("- zap %s %%%u #%u",
716 rxrpc_pkts[sp->hdr.type], 713 rxrpc_pkts[sp->hdr.type],
717 ntohl(sp->hdr.serial), 714 sp->hdr.serial, sp->hdr.seq);
718 ntohl(sp->hdr.seq));
719 rxrpc_free_skb(skb); 715 rxrpc_free_skb(skb);
720 spin_lock_bh(&call->lock); 716 spin_lock_bh(&call->lock);
721 } 717 }
@@ -763,10 +759,10 @@ static void rxrpc_mark_call_released(struct rxrpc_call *call)
763 _debug("abort call %p", call); 759 _debug("abort call %p", call);
764 call->state = RXRPC_CALL_LOCALLY_ABORTED; 760 call->state = RXRPC_CALL_LOCALLY_ABORTED;
765 call->abort_code = RX_CALL_DEAD; 761 call->abort_code = RX_CALL_DEAD;
766 if (!test_and_set_bit(RXRPC_CALL_ABORT, &call->events)) 762 if (!test_and_set_bit(RXRPC_CALL_EV_ABORT, &call->events))
767 sched = true; 763 sched = true;
768 } 764 }
769 if (!test_and_set_bit(RXRPC_CALL_RELEASE, &call->events)) 765 if (!test_and_set_bit(RXRPC_CALL_EV_RELEASE, &call->events))
770 sched = true; 766 sched = true;
771 if (sched) 767 if (sched)
772 rxrpc_queue_call(call); 768 rxrpc_queue_call(call);
@@ -873,9 +869,9 @@ static void rxrpc_cleanup_call(struct rxrpc_call *call)
873 unsigned long _skb; 869 unsigned long _skb;
874 870
875 _skb = call->acks_window[call->acks_tail] & ~1; 871 _skb = call->acks_window[call->acks_tail] & ~1;
876 sp = rxrpc_skb((struct sk_buff *) _skb); 872 sp = rxrpc_skb((struct sk_buff *)_skb);
877 _debug("+++ clear Tx %u", ntohl(sp->hdr.seq)); 873 _debug("+++ clear Tx %u", sp->hdr.seq);
878 rxrpc_free_skb((struct sk_buff *) _skb); 874 rxrpc_free_skb((struct sk_buff *)_skb);
879 call->acks_tail = 875 call->acks_tail =
880 (call->acks_tail + 1) & (call->acks_winsz - 1); 876 (call->acks_tail + 1) & (call->acks_winsz - 1);
881 } 877 }
@@ -975,7 +971,7 @@ static void rxrpc_call_life_expired(unsigned long _call)
975 _enter("{%d}", call->debug_id); 971 _enter("{%d}", call->debug_id);
976 read_lock_bh(&call->state_lock); 972 read_lock_bh(&call->state_lock);
977 if (call->state < RXRPC_CALL_COMPLETE) { 973 if (call->state < RXRPC_CALL_COMPLETE) {
978 set_bit(RXRPC_CALL_LIFE_TIMER, &call->events); 974 set_bit(RXRPC_CALL_EV_LIFE_TIMER, &call->events);
979 rxrpc_queue_call(call); 975 rxrpc_queue_call(call);
980 } 976 }
981 read_unlock_bh(&call->state_lock); 977 read_unlock_bh(&call->state_lock);
@@ -995,7 +991,7 @@ static void rxrpc_resend_time_expired(unsigned long _call)
995 return; 991 return;
996 992
997 clear_bit(RXRPC_CALL_RUN_RTIMER, &call->flags); 993 clear_bit(RXRPC_CALL_RUN_RTIMER, &call->flags);
998 if (!test_and_set_bit(RXRPC_CALL_RESEND_TIMER, &call->events)) 994 if (!test_and_set_bit(RXRPC_CALL_EV_RESEND_TIMER, &call->events))
999 rxrpc_queue_call(call); 995 rxrpc_queue_call(call);
1000} 996}
1001 997
@@ -1013,7 +1009,7 @@ static void rxrpc_ack_time_expired(unsigned long _call)
1013 1009
1014 read_lock_bh(&call->state_lock); 1010 read_lock_bh(&call->state_lock);
1015 if (call->state < RXRPC_CALL_COMPLETE && 1011 if (call->state < RXRPC_CALL_COMPLETE &&
1016 !test_and_set_bit(RXRPC_CALL_ACK, &call->events)) 1012 !test_and_set_bit(RXRPC_CALL_EV_ACK, &call->events))
1017 rxrpc_queue_call(call); 1013 rxrpc_queue_call(call);
1018 read_unlock_bh(&call->state_lock); 1014 read_unlock_bh(&call->state_lock);
1019} 1015}
diff --git a/net/rxrpc/ar-connection.c b/net/rxrpc/ar-connection.c
index 6c71ed1caf16..9942da1edbf6 100644
--- a/net/rxrpc/ar-connection.c
+++ b/net/rxrpc/ar-connection.c
@@ -21,7 +21,7 @@
21/* 21/*
22 * Time till a connection expires after last use (in seconds). 22 * Time till a connection expires after last use (in seconds).
23 */ 23 */
24unsigned rxrpc_connection_expiry = 10 * 60; 24unsigned int rxrpc_connection_expiry = 10 * 60;
25 25
26static void rxrpc_connection_reaper(struct work_struct *work); 26static void rxrpc_connection_reaper(struct work_struct *work);
27 27
@@ -57,10 +57,10 @@ static struct rxrpc_conn_bundle *rxrpc_alloc_bundle(gfp_t gfp)
57 */ 57 */
58static inline 58static inline
59int rxrpc_cmp_bundle(const struct rxrpc_conn_bundle *bundle, 59int rxrpc_cmp_bundle(const struct rxrpc_conn_bundle *bundle,
60 struct key *key, __be16 service_id) 60 struct key *key, u16 service_id)
61{ 61{
62 return (bundle->service_id - service_id) ?: 62 return (bundle->service_id - service_id) ?:
63 ((unsigned long) bundle->key - (unsigned long) key); 63 ((unsigned long)bundle->key - (unsigned long)key);
64} 64}
65 65
66/* 66/*
@@ -69,14 +69,14 @@ int rxrpc_cmp_bundle(const struct rxrpc_conn_bundle *bundle,
69struct rxrpc_conn_bundle *rxrpc_get_bundle(struct rxrpc_sock *rx, 69struct rxrpc_conn_bundle *rxrpc_get_bundle(struct rxrpc_sock *rx,
70 struct rxrpc_transport *trans, 70 struct rxrpc_transport *trans,
71 struct key *key, 71 struct key *key,
72 __be16 service_id, 72 u16 service_id,
73 gfp_t gfp) 73 gfp_t gfp)
74{ 74{
75 struct rxrpc_conn_bundle *bundle, *candidate; 75 struct rxrpc_conn_bundle *bundle, *candidate;
76 struct rb_node *p, *parent, **pp; 76 struct rb_node *p, *parent, **pp;
77 77
78 _enter("%p{%x},%x,%hx,", 78 _enter("%p{%x},%x,%hx,",
79 rx, key_serial(key), trans->debug_id, ntohs(service_id)); 79 rx, key_serial(key), trans->debug_id, service_id);
80 80
81 if (rx->trans == trans && rx->bundle) { 81 if (rx->trans == trans && rx->bundle) {
82 atomic_inc(&rx->bundle->usage); 82 atomic_inc(&rx->bundle->usage);
@@ -213,7 +213,7 @@ static struct rxrpc_connection *rxrpc_alloc_connection(gfp_t gfp)
213 conn->debug_id = atomic_inc_return(&rxrpc_debug_id); 213 conn->debug_id = atomic_inc_return(&rxrpc_debug_id);
214 conn->avail_calls = RXRPC_MAXCALLS; 214 conn->avail_calls = RXRPC_MAXCALLS;
215 conn->size_align = 4; 215 conn->size_align = 4;
216 conn->header_size = sizeof(struct rxrpc_header); 216 conn->header_size = sizeof(struct rxrpc_wire_header);
217 } 217 }
218 218
219 _leave(" = %p{%d}", conn, conn ? conn->debug_id : 0); 219 _leave(" = %p{%d}", conn, conn ? conn->debug_id : 0);
@@ -230,7 +230,7 @@ static void rxrpc_assign_connection_id(struct rxrpc_connection *conn)
230 struct rxrpc_connection *xconn; 230 struct rxrpc_connection *xconn;
231 struct rb_node *parent, **p; 231 struct rb_node *parent, **p;
232 __be32 epoch; 232 __be32 epoch;
233 u32 real_conn_id; 233 u32 cid;
234 234
235 _enter(""); 235 _enter("");
236 236
@@ -241,7 +241,7 @@ static void rxrpc_assign_connection_id(struct rxrpc_connection *conn)
241 conn->trans->conn_idcounter += RXRPC_CID_INC; 241 conn->trans->conn_idcounter += RXRPC_CID_INC;
242 if (conn->trans->conn_idcounter < RXRPC_CID_INC) 242 if (conn->trans->conn_idcounter < RXRPC_CID_INC)
243 conn->trans->conn_idcounter = RXRPC_CID_INC; 243 conn->trans->conn_idcounter = RXRPC_CID_INC;
244 real_conn_id = conn->trans->conn_idcounter; 244 cid = conn->trans->conn_idcounter;
245 245
246attempt_insertion: 246attempt_insertion:
247 parent = NULL; 247 parent = NULL;
@@ -255,9 +255,9 @@ attempt_insertion:
255 p = &(*p)->rb_left; 255 p = &(*p)->rb_left;
256 else if (epoch > xconn->epoch) 256 else if (epoch > xconn->epoch)
257 p = &(*p)->rb_right; 257 p = &(*p)->rb_right;
258 else if (real_conn_id < xconn->real_conn_id) 258 else if (cid < xconn->cid)
259 p = &(*p)->rb_left; 259 p = &(*p)->rb_left;
260 else if (real_conn_id > xconn->real_conn_id) 260 else if (cid > xconn->cid)
261 p = &(*p)->rb_right; 261 p = &(*p)->rb_right;
262 else 262 else
263 goto id_exists; 263 goto id_exists;
@@ -268,20 +268,19 @@ attempt_insertion:
268 rb_link_node(&conn->node, parent, p); 268 rb_link_node(&conn->node, parent, p);
269 rb_insert_color(&conn->node, &conn->trans->client_conns); 269 rb_insert_color(&conn->node, &conn->trans->client_conns);
270 270
271 conn->real_conn_id = real_conn_id; 271 conn->cid = cid;
272 conn->cid = htonl(real_conn_id);
273 write_unlock_bh(&conn->trans->conn_lock); 272 write_unlock_bh(&conn->trans->conn_lock);
274 _leave(" [CONNID %x CID %x]", real_conn_id, ntohl(conn->cid)); 273 _leave(" [CID %x]", cid);
275 return; 274 return;
276 275
277 /* we found a connection with the proposed ID - walk the tree from that 276 /* we found a connection with the proposed ID - walk the tree from that
278 * point looking for the next unused ID */ 277 * point looking for the next unused ID */
279id_exists: 278id_exists:
280 for (;;) { 279 for (;;) {
281 real_conn_id += RXRPC_CID_INC; 280 cid += RXRPC_CID_INC;
282 if (real_conn_id < RXRPC_CID_INC) { 281 if (cid < RXRPC_CID_INC) {
283 real_conn_id = RXRPC_CID_INC; 282 cid = RXRPC_CID_INC;
284 conn->trans->conn_idcounter = real_conn_id; 283 conn->trans->conn_idcounter = cid;
285 goto attempt_insertion; 284 goto attempt_insertion;
286 } 285 }
287 286
@@ -291,7 +290,7 @@ id_exists:
291 290
292 xconn = rb_entry(parent, struct rxrpc_connection, node); 291 xconn = rb_entry(parent, struct rxrpc_connection, node);
293 if (epoch < xconn->epoch || 292 if (epoch < xconn->epoch ||
294 real_conn_id < xconn->real_conn_id) 293 cid < xconn->cid)
295 goto attempt_insertion; 294 goto attempt_insertion;
296 } 295 }
297} 296}
@@ -334,7 +333,7 @@ static void rxrpc_add_call_ID_to_conn(struct rxrpc_connection *conn,
334 */ 333 */
335static int rxrpc_connect_exclusive(struct rxrpc_sock *rx, 334static int rxrpc_connect_exclusive(struct rxrpc_sock *rx,
336 struct rxrpc_transport *trans, 335 struct rxrpc_transport *trans,
337 __be16 service_id, 336 u16 service_id,
338 struct rxrpc_call *call, 337 struct rxrpc_call *call,
339 gfp_t gfp) 338 gfp_t gfp)
340{ 339{
@@ -404,11 +403,11 @@ found_channel:
404 conn->channels[chan] = call; 403 conn->channels[chan] = call;
405 call->conn = conn; 404 call->conn = conn;
406 call->channel = chan; 405 call->channel = chan;
407 call->cid = conn->cid | htonl(chan); 406 call->cid = conn->cid | chan;
408 call->call_id = htonl(++conn->call_counter); 407 call->call_id = ++conn->call_counter;
409 408
410 _net("CONNECT client on conn %d chan %d as call %x", 409 _net("CONNECT client on conn %d chan %d as call %x",
411 conn->debug_id, chan, ntohl(call->call_id)); 410 conn->debug_id, chan, call->call_id);
412 411
413 spin_unlock(&trans->client_lock); 412 spin_unlock(&trans->client_lock);
414 413
@@ -593,11 +592,11 @@ found_channel:
593 conn->channels[chan] = call; 592 conn->channels[chan] = call;
594 call->conn = conn; 593 call->conn = conn;
595 call->channel = chan; 594 call->channel = chan;
596 call->cid = conn->cid | htonl(chan); 595 call->cid = conn->cid | chan;
597 call->call_id = htonl(++conn->call_counter); 596 call->call_id = ++conn->call_counter;
598 597
599 _net("CONNECT client on conn %d chan %d as call %x", 598 _net("CONNECT client on conn %d chan %d as call %x",
600 conn->debug_id, chan, ntohl(call->call_id)); 599 conn->debug_id, chan, call->call_id);
601 600
602 ASSERTCMP(conn->avail_calls, <, RXRPC_MAXCALLS); 601 ASSERTCMP(conn->avail_calls, <, RXRPC_MAXCALLS);
603 spin_unlock(&trans->client_lock); 602 spin_unlock(&trans->client_lock);
@@ -620,21 +619,21 @@ interrupted:
620 */ 619 */
621struct rxrpc_connection * 620struct rxrpc_connection *
622rxrpc_incoming_connection(struct rxrpc_transport *trans, 621rxrpc_incoming_connection(struct rxrpc_transport *trans,
623 struct rxrpc_header *hdr, 622 struct rxrpc_host_header *hdr,
624 gfp_t gfp) 623 gfp_t gfp)
625{ 624{
626 struct rxrpc_connection *conn, *candidate = NULL; 625 struct rxrpc_connection *conn, *candidate = NULL;
627 struct rb_node *p, **pp; 626 struct rb_node *p, **pp;
628 const char *new = "old"; 627 const char *new = "old";
629 __be32 epoch; 628 __be32 epoch;
630 u32 conn_id; 629 u32 cid;
631 630
632 _enter(""); 631 _enter("");
633 632
634 ASSERT(hdr->flags & RXRPC_CLIENT_INITIATED); 633 ASSERT(hdr->flags & RXRPC_CLIENT_INITIATED);
635 634
636 epoch = hdr->epoch; 635 epoch = hdr->epoch;
637 conn_id = ntohl(hdr->cid) & RXRPC_CIDMASK; 636 cid = hdr->cid & RXRPC_CIDMASK;
638 637
639 /* search the connection list first */ 638 /* search the connection list first */
640 read_lock_bh(&trans->conn_lock); 639 read_lock_bh(&trans->conn_lock);
@@ -643,15 +642,15 @@ rxrpc_incoming_connection(struct rxrpc_transport *trans,
643 while (p) { 642 while (p) {
644 conn = rb_entry(p, struct rxrpc_connection, node); 643 conn = rb_entry(p, struct rxrpc_connection, node);
645 644
646 _debug("maybe %x", conn->real_conn_id); 645 _debug("maybe %x", conn->cid);
647 646
648 if (epoch < conn->epoch) 647 if (epoch < conn->epoch)
649 p = p->rb_left; 648 p = p->rb_left;
650 else if (epoch > conn->epoch) 649 else if (epoch > conn->epoch)
651 p = p->rb_right; 650 p = p->rb_right;
652 else if (conn_id < conn->real_conn_id) 651 else if (cid < conn->cid)
653 p = p->rb_left; 652 p = p->rb_left;
654 else if (conn_id > conn->real_conn_id) 653 else if (cid > conn->cid)
655 p = p->rb_right; 654 p = p->rb_right;
656 else 655 else
657 goto found_extant_connection; 656 goto found_extant_connection;
@@ -668,12 +667,11 @@ rxrpc_incoming_connection(struct rxrpc_transport *trans,
668 667
669 candidate->trans = trans; 668 candidate->trans = trans;
670 candidate->epoch = hdr->epoch; 669 candidate->epoch = hdr->epoch;
671 candidate->cid = hdr->cid & cpu_to_be32(RXRPC_CIDMASK); 670 candidate->cid = hdr->cid & RXRPC_CIDMASK;
672 candidate->service_id = hdr->serviceId; 671 candidate->service_id = hdr->serviceId;
673 candidate->security_ix = hdr->securityIndex; 672 candidate->security_ix = hdr->securityIndex;
674 candidate->in_clientflag = RXRPC_CLIENT_INITIATED; 673 candidate->in_clientflag = RXRPC_CLIENT_INITIATED;
675 candidate->out_clientflag = 0; 674 candidate->out_clientflag = 0;
676 candidate->real_conn_id = conn_id;
677 candidate->state = RXRPC_CONN_SERVER; 675 candidate->state = RXRPC_CONN_SERVER;
678 if (candidate->service_id) 676 if (candidate->service_id)
679 candidate->state = RXRPC_CONN_SERVER_UNSECURED; 677 candidate->state = RXRPC_CONN_SERVER_UNSECURED;
@@ -690,9 +688,9 @@ rxrpc_incoming_connection(struct rxrpc_transport *trans,
690 pp = &(*pp)->rb_left; 688 pp = &(*pp)->rb_left;
691 else if (epoch > conn->epoch) 689 else if (epoch > conn->epoch)
692 pp = &(*pp)->rb_right; 690 pp = &(*pp)->rb_right;
693 else if (conn_id < conn->real_conn_id) 691 else if (cid < conn->cid)
694 pp = &(*pp)->rb_left; 692 pp = &(*pp)->rb_left;
695 else if (conn_id > conn->real_conn_id) 693 else if (cid > conn->cid)
696 pp = &(*pp)->rb_right; 694 pp = &(*pp)->rb_right;
697 else 695 else
698 goto found_extant_second; 696 goto found_extant_second;
@@ -714,7 +712,7 @@ rxrpc_incoming_connection(struct rxrpc_transport *trans,
714 new = "new"; 712 new = "new";
715 713
716success: 714success:
717 _net("CONNECTION %s %d {%x}", new, conn->debug_id, conn->real_conn_id); 715 _net("CONNECTION %s %d {%x}", new, conn->debug_id, conn->cid);
718 716
719 _leave(" = %p {u=%d}", conn, atomic_read(&conn->usage)); 717 _leave(" = %p {u=%d}", conn, atomic_read(&conn->usage));
720 return conn; 718 return conn;
@@ -751,18 +749,17 @@ security_mismatch:
751 * packet 749 * packet
752 */ 750 */
753struct rxrpc_connection *rxrpc_find_connection(struct rxrpc_transport *trans, 751struct rxrpc_connection *rxrpc_find_connection(struct rxrpc_transport *trans,
754 struct rxrpc_header *hdr) 752 struct rxrpc_host_header *hdr)
755{ 753{
756 struct rxrpc_connection *conn; 754 struct rxrpc_connection *conn;
757 struct rb_node *p; 755 struct rb_node *p;
758 __be32 epoch; 756 u32 epoch, cid;
759 u32 conn_id;
760 757
761 _enter(",{%x,%x}", ntohl(hdr->cid), hdr->flags); 758 _enter(",{%x,%x}", hdr->cid, hdr->flags);
762 759
763 read_lock_bh(&trans->conn_lock); 760 read_lock_bh(&trans->conn_lock);
764 761
765 conn_id = ntohl(hdr->cid) & RXRPC_CIDMASK; 762 cid = hdr->cid & RXRPC_CIDMASK;
766 epoch = hdr->epoch; 763 epoch = hdr->epoch;
767 764
768 if (hdr->flags & RXRPC_CLIENT_INITIATED) 765 if (hdr->flags & RXRPC_CLIENT_INITIATED)
@@ -773,15 +770,15 @@ struct rxrpc_connection *rxrpc_find_connection(struct rxrpc_transport *trans,
773 while (p) { 770 while (p) {
774 conn = rb_entry(p, struct rxrpc_connection, node); 771 conn = rb_entry(p, struct rxrpc_connection, node);
775 772
776 _debug("maybe %x", conn->real_conn_id); 773 _debug("maybe %x", conn->cid);
777 774
778 if (epoch < conn->epoch) 775 if (epoch < conn->epoch)
779 p = p->rb_left; 776 p = p->rb_left;
780 else if (epoch > conn->epoch) 777 else if (epoch > conn->epoch)
781 p = p->rb_right; 778 p = p->rb_right;
782 else if (conn_id < conn->real_conn_id) 779 else if (cid < conn->cid)
783 p = p->rb_left; 780 p = p->rb_left;
784 else if (conn_id > conn->real_conn_id) 781 else if (cid > conn->cid)
785 p = p->rb_right; 782 p = p->rb_right;
786 else 783 else
787 goto found; 784 goto found;
diff --git a/net/rxrpc/ar-connevent.c b/net/rxrpc/ar-connevent.c
index e7ed43a54c41..1bdaaed8cdc4 100644
--- a/net/rxrpc/ar-connevent.c
+++ b/net/rxrpc/ar-connevent.c
@@ -42,9 +42,9 @@ static void rxrpc_abort_calls(struct rxrpc_connection *conn, int state,
42 call->state = state; 42 call->state = state;
43 call->abort_code = abort_code; 43 call->abort_code = abort_code;
44 if (state == RXRPC_CALL_LOCALLY_ABORTED) 44 if (state == RXRPC_CALL_LOCALLY_ABORTED)
45 set_bit(RXRPC_CALL_CONN_ABORT, &call->events); 45 set_bit(RXRPC_CALL_EV_CONN_ABORT, &call->events);
46 else 46 else
47 set_bit(RXRPC_CALL_RCVD_ABORT, &call->events); 47 set_bit(RXRPC_CALL_EV_RCVD_ABORT, &call->events);
48 rxrpc_queue_call(call); 48 rxrpc_queue_call(call);
49 } 49 }
50 write_unlock(&call->state_lock); 50 write_unlock(&call->state_lock);
@@ -60,11 +60,12 @@ static void rxrpc_abort_calls(struct rxrpc_connection *conn, int state,
60static int rxrpc_abort_connection(struct rxrpc_connection *conn, 60static int rxrpc_abort_connection(struct rxrpc_connection *conn,
61 u32 error, u32 abort_code) 61 u32 error, u32 abort_code)
62{ 62{
63 struct rxrpc_header hdr; 63 struct rxrpc_wire_header whdr;
64 struct msghdr msg; 64 struct msghdr msg;
65 struct kvec iov[2]; 65 struct kvec iov[2];
66 __be32 word; 66 __be32 word;
67 size_t len; 67 size_t len;
68 u32 serial;
68 int ret; 69 int ret;
69 70
70 _enter("%d,,%u,%u", conn->debug_id, error, abort_code); 71 _enter("%d,,%u,%u", conn->debug_id, error, abort_code);
@@ -89,28 +90,29 @@ static int rxrpc_abort_connection(struct rxrpc_connection *conn,
89 msg.msg_controllen = 0; 90 msg.msg_controllen = 0;
90 msg.msg_flags = 0; 91 msg.msg_flags = 0;
91 92
92 hdr.epoch = conn->epoch; 93 whdr.epoch = htonl(conn->epoch);
93 hdr.cid = conn->cid; 94 whdr.cid = htonl(conn->cid);
94 hdr.callNumber = 0; 95 whdr.callNumber = 0;
95 hdr.seq = 0; 96 whdr.seq = 0;
96 hdr.type = RXRPC_PACKET_TYPE_ABORT; 97 whdr.type = RXRPC_PACKET_TYPE_ABORT;
97 hdr.flags = conn->out_clientflag; 98 whdr.flags = conn->out_clientflag;
98 hdr.userStatus = 0; 99 whdr.userStatus = 0;
99 hdr.securityIndex = conn->security_ix; 100 whdr.securityIndex = conn->security_ix;
100 hdr._rsvd = 0; 101 whdr._rsvd = 0;
101 hdr.serviceId = conn->service_id; 102 whdr.serviceId = htons(conn->service_id);
102 103
103 word = htonl(abort_code); 104 word = htonl(abort_code);
104 105
105 iov[0].iov_base = &hdr; 106 iov[0].iov_base = &whdr;
106 iov[0].iov_len = sizeof(hdr); 107 iov[0].iov_len = sizeof(whdr);
107 iov[1].iov_base = &word; 108 iov[1].iov_base = &word;
108 iov[1].iov_len = sizeof(word); 109 iov[1].iov_len = sizeof(word);
109 110
110 len = iov[0].iov_len + iov[1].iov_len; 111 len = iov[0].iov_len + iov[1].iov_len;
111 112
112 hdr.serial = htonl(atomic_inc_return(&conn->serial)); 113 serial = atomic_inc_return(&conn->serial);
113 _proto("Tx CONN ABORT %%%u { %d }", ntohl(hdr.serial), abort_code); 114 whdr.serial = htonl(serial);
115 _proto("Tx CONN ABORT %%%u { %d }", serial, abort_code);
114 116
115 ret = kernel_sendmsg(conn->trans->local->socket, &msg, iov, 2, len); 117 ret = kernel_sendmsg(conn->trans->local->socket, &msg, iov, 2, len);
116 if (ret < 0) { 118 if (ret < 0) {
@@ -132,7 +134,7 @@ static void rxrpc_call_is_secure(struct rxrpc_call *call)
132 if (call) { 134 if (call) {
133 read_lock(&call->state_lock); 135 read_lock(&call->state_lock);
134 if (call->state < RXRPC_CALL_COMPLETE && 136 if (call->state < RXRPC_CALL_COMPLETE &&
135 !test_and_set_bit(RXRPC_CALL_SECURED, &call->events)) 137 !test_and_set_bit(RXRPC_CALL_EV_SECURED, &call->events))
136 rxrpc_queue_call(call); 138 rxrpc_queue_call(call);
137 read_unlock(&call->state_lock); 139 read_unlock(&call->state_lock);
138 } 140 }
@@ -146,8 +148,8 @@ static int rxrpc_process_event(struct rxrpc_connection *conn,
146 u32 *_abort_code) 148 u32 *_abort_code)
147{ 149{
148 struct rxrpc_skb_priv *sp = rxrpc_skb(skb); 150 struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
149 __be32 tmp; 151 __be32 wtmp;
150 u32 serial; 152 u32 abort_code;
151 int loop, ret; 153 int loop, ret;
152 154
153 if (conn->state >= RXRPC_CONN_REMOTELY_ABORTED) { 155 if (conn->state >= RXRPC_CONN_REMOTELY_ABORTED) {
@@ -155,19 +157,18 @@ static int rxrpc_process_event(struct rxrpc_connection *conn,
155 return -ECONNABORTED; 157 return -ECONNABORTED;
156 } 158 }
157 159
158 serial = ntohl(sp->hdr.serial); 160 _enter("{%d},{%u,%%%u},", conn->debug_id, sp->hdr.type, sp->hdr.serial);
159
160 _enter("{%d},{%u,%%%u},", conn->debug_id, sp->hdr.type, serial);
161 161
162 switch (sp->hdr.type) { 162 switch (sp->hdr.type) {
163 case RXRPC_PACKET_TYPE_ABORT: 163 case RXRPC_PACKET_TYPE_ABORT:
164 if (skb_copy_bits(skb, 0, &tmp, sizeof(tmp)) < 0) 164 if (skb_copy_bits(skb, 0, &wtmp, sizeof(wtmp)) < 0)
165 return -EPROTO; 165 return -EPROTO;
166 _proto("Rx ABORT %%%u { ac=%d }", serial, ntohl(tmp)); 166 abort_code = ntohl(wtmp);
167 _proto("Rx ABORT %%%u { ac=%d }", sp->hdr.serial, abort_code);
167 168
168 conn->state = RXRPC_CONN_REMOTELY_ABORTED; 169 conn->state = RXRPC_CONN_REMOTELY_ABORTED;
169 rxrpc_abort_calls(conn, RXRPC_CALL_REMOTELY_ABORTED, 170 rxrpc_abort_calls(conn, RXRPC_CALL_REMOTELY_ABORTED,
170 ntohl(tmp)); 171 abort_code);
171 return -ECONNABORTED; 172 return -ECONNABORTED;
172 173
173 case RXRPC_PACKET_TYPE_CHALLENGE: 174 case RXRPC_PACKET_TYPE_CHALLENGE:
@@ -335,7 +336,7 @@ void rxrpc_reject_packets(struct work_struct *work)
335 struct sockaddr_in sin; 336 struct sockaddr_in sin;
336 } sa; 337 } sa;
337 struct rxrpc_skb_priv *sp; 338 struct rxrpc_skb_priv *sp;
338 struct rxrpc_header hdr; 339 struct rxrpc_wire_header whdr;
339 struct rxrpc_local *local; 340 struct rxrpc_local *local;
340 struct sk_buff *skb; 341 struct sk_buff *skb;
341 struct msghdr msg; 342 struct msghdr msg;
@@ -348,11 +349,11 @@ void rxrpc_reject_packets(struct work_struct *work)
348 349
349 _enter("%d", local->debug_id); 350 _enter("%d", local->debug_id);
350 351
351 iov[0].iov_base = &hdr; 352 iov[0].iov_base = &whdr;
352 iov[0].iov_len = sizeof(hdr); 353 iov[0].iov_len = sizeof(whdr);
353 iov[1].iov_base = &code; 354 iov[1].iov_base = &code;
354 iov[1].iov_len = sizeof(code); 355 iov[1].iov_len = sizeof(code);
355 size = sizeof(hdr) + sizeof(code); 356 size = sizeof(whdr) + sizeof(code);
356 357
357 msg.msg_name = &sa; 358 msg.msg_name = &sa;
358 msg.msg_control = NULL; 359 msg.msg_control = NULL;
@@ -370,8 +371,8 @@ void rxrpc_reject_packets(struct work_struct *work)
370 break; 371 break;
371 } 372 }
372 373
373 memset(&hdr, 0, sizeof(hdr)); 374 memset(&whdr, 0, sizeof(whdr));
374 hdr.type = RXRPC_PACKET_TYPE_ABORT; 375 whdr.type = RXRPC_PACKET_TYPE_ABORT;
375 376
376 while ((skb = skb_dequeue(&local->reject_queue))) { 377 while ((skb = skb_dequeue(&local->reject_queue))) {
377 sp = rxrpc_skb(skb); 378 sp = rxrpc_skb(skb);
@@ -381,13 +382,13 @@ void rxrpc_reject_packets(struct work_struct *work)
381 sa.sin.sin_addr.s_addr = ip_hdr(skb)->saddr; 382 sa.sin.sin_addr.s_addr = ip_hdr(skb)->saddr;
382 code = htonl(skb->priority); 383 code = htonl(skb->priority);
383 384
384 hdr.epoch = sp->hdr.epoch; 385 whdr.epoch = htonl(sp->hdr.epoch);
385 hdr.cid = sp->hdr.cid; 386 whdr.cid = htonl(sp->hdr.cid);
386 hdr.callNumber = sp->hdr.callNumber; 387 whdr.callNumber = htonl(sp->hdr.callNumber);
387 hdr.serviceId = sp->hdr.serviceId; 388 whdr.serviceId = htons(sp->hdr.serviceId);
388 hdr.flags = sp->hdr.flags; 389 whdr.flags = sp->hdr.flags;
389 hdr.flags ^= RXRPC_CLIENT_INITIATED; 390 whdr.flags ^= RXRPC_CLIENT_INITIATED;
390 hdr.flags &= RXRPC_CLIENT_INITIATED; 391 whdr.flags &= RXRPC_CLIENT_INITIATED;
391 392
392 kernel_sendmsg(local->socket, &msg, iov, 2, size); 393 kernel_sendmsg(local->socket, &msg, iov, 2, size);
393 break; 394 break;
diff --git a/net/rxrpc/ar-error.c b/net/rxrpc/ar-error.c
index 0610efa83d72..3e82d6f0313c 100644
--- a/net/rxrpc/ar-error.c
+++ b/net/rxrpc/ar-error.c
@@ -115,7 +115,6 @@ void rxrpc_UDP_error_report(struct sock *sk)
115 /* pass the transport ref to error_handler to release */ 115 /* pass the transport ref to error_handler to release */
116 skb_queue_tail(&trans->error_queue, skb); 116 skb_queue_tail(&trans->error_queue, skb);
117 rxrpc_queue_work(&trans->error_handler); 117 rxrpc_queue_work(&trans->error_handler);
118
119 _leave(""); 118 _leave("");
120} 119}
121 120
@@ -152,28 +151,18 @@ void rxrpc_UDP_error_handler(struct work_struct *work)
152 switch (ee->ee_code) { 151 switch (ee->ee_code) {
153 case ICMP_NET_UNREACH: 152 case ICMP_NET_UNREACH:
154 _net("Rx Received ICMP Network Unreachable"); 153 _net("Rx Received ICMP Network Unreachable");
155 err = ENETUNREACH;
156 break; 154 break;
157 case ICMP_HOST_UNREACH: 155 case ICMP_HOST_UNREACH:
158 _net("Rx Received ICMP Host Unreachable"); 156 _net("Rx Received ICMP Host Unreachable");
159 err = EHOSTUNREACH;
160 break; 157 break;
161 case ICMP_PORT_UNREACH: 158 case ICMP_PORT_UNREACH:
162 _net("Rx Received ICMP Port Unreachable"); 159 _net("Rx Received ICMP Port Unreachable");
163 err = ECONNREFUSED;
164 break;
165 case ICMP_FRAG_NEEDED:
166 _net("Rx Received ICMP Fragmentation Needed (%d)",
167 ee->ee_info);
168 err = 0; /* dealt with elsewhere */
169 break; 160 break;
170 case ICMP_NET_UNKNOWN: 161 case ICMP_NET_UNKNOWN:
171 _net("Rx Received ICMP Unknown Network"); 162 _net("Rx Received ICMP Unknown Network");
172 err = ENETUNREACH;
173 break; 163 break;
174 case ICMP_HOST_UNKNOWN: 164 case ICMP_HOST_UNKNOWN:
175 _net("Rx Received ICMP Unknown Host"); 165 _net("Rx Received ICMP Unknown Host");
176 err = EHOSTUNREACH;
177 break; 166 break;
178 default: 167 default:
179 _net("Rx Received ICMP DestUnreach code=%u", 168 _net("Rx Received ICMP DestUnreach code=%u",
@@ -222,7 +211,7 @@ void rxrpc_UDP_error_handler(struct work_struct *work)
222 if (call->state != RXRPC_CALL_COMPLETE && 211 if (call->state != RXRPC_CALL_COMPLETE &&
223 call->state < RXRPC_CALL_NETWORK_ERROR) { 212 call->state < RXRPC_CALL_NETWORK_ERROR) {
224 call->state = RXRPC_CALL_NETWORK_ERROR; 213 call->state = RXRPC_CALL_NETWORK_ERROR;
225 set_bit(RXRPC_CALL_RCVD_ERROR, &call->events); 214 set_bit(RXRPC_CALL_EV_RCVD_ERROR, &call->events);
226 rxrpc_queue_call(call); 215 rxrpc_queue_call(call);
227 } 216 }
228 write_unlock(&call->state_lock); 217 write_unlock(&call->state_lock);
diff --git a/net/rxrpc/ar-input.c b/net/rxrpc/ar-input.c
index 4505a691d88c..63ed75c40e29 100644
--- a/net/rxrpc/ar-input.c
+++ b/net/rxrpc/ar-input.c
@@ -231,7 +231,7 @@ static int rxrpc_fast_process_data(struct rxrpc_call *call,
231 _debug("drain rx oos now"); 231 _debug("drain rx oos now");
232 read_lock(&call->state_lock); 232 read_lock(&call->state_lock);
233 if (call->state < RXRPC_CALL_COMPLETE && 233 if (call->state < RXRPC_CALL_COMPLETE &&
234 !test_and_set_bit(RXRPC_CALL_DRAIN_RX_OOS, &call->events)) 234 !test_and_set_bit(RXRPC_CALL_EV_DRAIN_RX_OOS, &call->events))
235 rxrpc_queue_call(call); 235 rxrpc_queue_call(call);
236 read_unlock(&call->state_lock); 236 read_unlock(&call->state_lock);
237 } 237 }
@@ -287,12 +287,12 @@ static void rxrpc_assume_implicit_ackall(struct rxrpc_call *call, u32 serial)
287 call->acks_latest = serial; 287 call->acks_latest = serial;
288 288
289 _debug("implicit ACKALL %%%u", call->acks_latest); 289 _debug("implicit ACKALL %%%u", call->acks_latest);
290 set_bit(RXRPC_CALL_RCVD_ACKALL, &call->events); 290 set_bit(RXRPC_CALL_EV_RCVD_ACKALL, &call->events);
291 write_unlock_bh(&call->state_lock); 291 write_unlock_bh(&call->state_lock);
292 292
293 if (try_to_del_timer_sync(&call->resend_timer) >= 0) { 293 if (try_to_del_timer_sync(&call->resend_timer) >= 0) {
294 clear_bit(RXRPC_CALL_RESEND_TIMER, &call->events); 294 clear_bit(RXRPC_CALL_EV_RESEND_TIMER, &call->events);
295 clear_bit(RXRPC_CALL_RESEND, &call->events); 295 clear_bit(RXRPC_CALL_EV_RESEND, &call->events);
296 clear_bit(RXRPC_CALL_RUN_RTIMER, &call->flags); 296 clear_bit(RXRPC_CALL_RUN_RTIMER, &call->flags);
297 } 297 }
298 break; 298 break;
@@ -310,8 +310,8 @@ static void rxrpc_assume_implicit_ackall(struct rxrpc_call *call, u32 serial)
310void rxrpc_fast_process_packet(struct rxrpc_call *call, struct sk_buff *skb) 310void rxrpc_fast_process_packet(struct rxrpc_call *call, struct sk_buff *skb)
311{ 311{
312 struct rxrpc_skb_priv *sp = rxrpc_skb(skb); 312 struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
313 __be32 _abort_code; 313 __be32 wtmp;
314 u32 serial, hi_serial, seq, abort_code; 314 u32 hi_serial, abort_code;
315 315
316 _enter("%p,%p", call, skb); 316 _enter("%p,%p", call, skb);
317 317
@@ -330,16 +330,15 @@ void rxrpc_fast_process_packet(struct rxrpc_call *call, struct sk_buff *skb)
330 330
331 /* track the latest serial number on this connection for ACK packet 331 /* track the latest serial number on this connection for ACK packet
332 * information */ 332 * information */
333 serial = ntohl(sp->hdr.serial);
334 hi_serial = atomic_read(&call->conn->hi_serial); 333 hi_serial = atomic_read(&call->conn->hi_serial);
335 while (serial > hi_serial) 334 while (sp->hdr.serial > hi_serial)
336 hi_serial = atomic_cmpxchg(&call->conn->hi_serial, hi_serial, 335 hi_serial = atomic_cmpxchg(&call->conn->hi_serial, hi_serial,
337 serial); 336 sp->hdr.serial);
338 337
339 /* request ACK generation for any ACK or DATA packet that requests 338 /* request ACK generation for any ACK or DATA packet that requests
340 * it */ 339 * it */
341 if (sp->hdr.flags & RXRPC_REQUEST_ACK) { 340 if (sp->hdr.flags & RXRPC_REQUEST_ACK) {
342 _proto("ACK Requested on %%%u", serial); 341 _proto("ACK Requested on %%%u", sp->hdr.serial);
343 rxrpc_propose_ACK(call, RXRPC_ACK_REQUESTED, sp->hdr.serial, false); 342 rxrpc_propose_ACK(call, RXRPC_ACK_REQUESTED, sp->hdr.serial, false);
344 } 343 }
345 344
@@ -347,24 +346,23 @@ void rxrpc_fast_process_packet(struct rxrpc_call *call, struct sk_buff *skb)
347 case RXRPC_PACKET_TYPE_ABORT: 346 case RXRPC_PACKET_TYPE_ABORT:
348 _debug("abort"); 347 _debug("abort");
349 348
350 if (skb_copy_bits(skb, 0, &_abort_code, 349 if (skb_copy_bits(skb, 0, &wtmp, sizeof(wtmp)) < 0)
351 sizeof(_abort_code)) < 0)
352 goto protocol_error; 350 goto protocol_error;
353 351
354 abort_code = ntohl(_abort_code); 352 abort_code = ntohl(wtmp);
355 _proto("Rx ABORT %%%u { %x }", serial, abort_code); 353 _proto("Rx ABORT %%%u { %x }", sp->hdr.serial, abort_code);
356 354
357 write_lock_bh(&call->state_lock); 355 write_lock_bh(&call->state_lock);
358 if (call->state < RXRPC_CALL_COMPLETE) { 356 if (call->state < RXRPC_CALL_COMPLETE) {
359 call->state = RXRPC_CALL_REMOTELY_ABORTED; 357 call->state = RXRPC_CALL_REMOTELY_ABORTED;
360 call->abort_code = abort_code; 358 call->abort_code = abort_code;
361 set_bit(RXRPC_CALL_RCVD_ABORT, &call->events); 359 set_bit(RXRPC_CALL_EV_RCVD_ABORT, &call->events);
362 rxrpc_queue_call(call); 360 rxrpc_queue_call(call);
363 } 361 }
364 goto free_packet_unlock; 362 goto free_packet_unlock;
365 363
366 case RXRPC_PACKET_TYPE_BUSY: 364 case RXRPC_PACKET_TYPE_BUSY:
367 _proto("Rx BUSY %%%u", serial); 365 _proto("Rx BUSY %%%u", sp->hdr.serial);
368 366
369 if (call->conn->out_clientflag) 367 if (call->conn->out_clientflag)
370 goto protocol_error; 368 goto protocol_error;
@@ -373,7 +371,7 @@ void rxrpc_fast_process_packet(struct rxrpc_call *call, struct sk_buff *skb)
373 switch (call->state) { 371 switch (call->state) {
374 case RXRPC_CALL_CLIENT_SEND_REQUEST: 372 case RXRPC_CALL_CLIENT_SEND_REQUEST:
375 call->state = RXRPC_CALL_SERVER_BUSY; 373 call->state = RXRPC_CALL_SERVER_BUSY;
376 set_bit(RXRPC_CALL_RCVD_BUSY, &call->events); 374 set_bit(RXRPC_CALL_EV_RCVD_BUSY, &call->events);
377 rxrpc_queue_call(call); 375 rxrpc_queue_call(call);
378 case RXRPC_CALL_SERVER_BUSY: 376 case RXRPC_CALL_SERVER_BUSY:
379 goto free_packet_unlock; 377 goto free_packet_unlock;
@@ -382,15 +380,13 @@ void rxrpc_fast_process_packet(struct rxrpc_call *call, struct sk_buff *skb)
382 } 380 }
383 381
384 default: 382 default:
385 _proto("Rx %s %%%u", rxrpc_pkts[sp->hdr.type], serial); 383 _proto("Rx %s %%%u", rxrpc_pkts[sp->hdr.type], sp->hdr.serial);
386 goto protocol_error; 384 goto protocol_error;
387 385
388 case RXRPC_PACKET_TYPE_DATA: 386 case RXRPC_PACKET_TYPE_DATA:
389 seq = ntohl(sp->hdr.seq); 387 _proto("Rx DATA %%%u { #%u }", sp->hdr.serial, sp->hdr.seq);
390 388
391 _proto("Rx DATA %%%u { #%u }", serial, seq); 389 if (sp->hdr.seq == 0)
392
393 if (seq == 0)
394 goto protocol_error; 390 goto protocol_error;
395 391
396 call->ackr_prev_seq = sp->hdr.seq; 392 call->ackr_prev_seq = sp->hdr.seq;
@@ -398,9 +394,9 @@ void rxrpc_fast_process_packet(struct rxrpc_call *call, struct sk_buff *skb)
398 /* received data implicitly ACKs all of the request packets we 394 /* received data implicitly ACKs all of the request packets we
399 * sent when we're acting as a client */ 395 * sent when we're acting as a client */
400 if (call->state == RXRPC_CALL_CLIENT_AWAIT_REPLY) 396 if (call->state == RXRPC_CALL_CLIENT_AWAIT_REPLY)
401 rxrpc_assume_implicit_ackall(call, serial); 397 rxrpc_assume_implicit_ackall(call, sp->hdr.serial);
402 398
403 switch (rxrpc_fast_process_data(call, skb, seq)) { 399 switch (rxrpc_fast_process_data(call, skb, sp->hdr.seq)) {
404 case 0: 400 case 0:
405 skb = NULL; 401 skb = NULL;
406 goto done; 402 goto done;
@@ -433,7 +429,7 @@ protocol_error_locked:
433 if (call->state <= RXRPC_CALL_COMPLETE) { 429 if (call->state <= RXRPC_CALL_COMPLETE) {
434 call->state = RXRPC_CALL_LOCALLY_ABORTED; 430 call->state = RXRPC_CALL_LOCALLY_ABORTED;
435 call->abort_code = RX_PROTOCOL_ERROR; 431 call->abort_code = RX_PROTOCOL_ERROR;
436 set_bit(RXRPC_CALL_ABORT, &call->events); 432 set_bit(RXRPC_CALL_EV_ABORT, &call->events);
437 rxrpc_queue_call(call); 433 rxrpc_queue_call(call);
438 } 434 }
439free_packet_unlock: 435free_packet_unlock:
@@ -481,12 +477,12 @@ static void rxrpc_process_jumbo_packet(struct rxrpc_call *call,
481 if (!pskb_pull(jumbo, sizeof(jhdr))) 477 if (!pskb_pull(jumbo, sizeof(jhdr)))
482 BUG(); 478 BUG();
483 479
484 sp->hdr.seq = htonl(ntohl(sp->hdr.seq) + 1); 480 sp->hdr.seq += 1;
485 sp->hdr.serial = htonl(ntohl(sp->hdr.serial) + 1); 481 sp->hdr.serial += 1;
486 sp->hdr.flags = jhdr.flags; 482 sp->hdr.flags = jhdr.flags;
487 sp->hdr._rsvd = jhdr._rsvd; 483 sp->hdr._rsvd = jhdr._rsvd;
488 484
489 _proto("Rx DATA Jumbo %%%u", ntohl(sp->hdr.serial) - 1); 485 _proto("Rx DATA Jumbo %%%u", sp->hdr.serial - 1);
490 486
491 rxrpc_fast_process_packet(call, part); 487 rxrpc_fast_process_packet(call, part);
492 part = NULL; 488 part = NULL;
@@ -505,7 +501,7 @@ protocol_error:
505 if (call->state <= RXRPC_CALL_COMPLETE) { 501 if (call->state <= RXRPC_CALL_COMPLETE) {
506 call->state = RXRPC_CALL_LOCALLY_ABORTED; 502 call->state = RXRPC_CALL_LOCALLY_ABORTED;
507 call->abort_code = RX_PROTOCOL_ERROR; 503 call->abort_code = RX_PROTOCOL_ERROR;
508 set_bit(RXRPC_CALL_ABORT, &call->events); 504 set_bit(RXRPC_CALL_EV_ABORT, &call->events);
509 rxrpc_queue_call(call); 505 rxrpc_queue_call(call);
510 } 506 }
511 write_unlock_bh(&call->state_lock); 507 write_unlock_bh(&call->state_lock);
@@ -530,7 +526,7 @@ static void rxrpc_post_packet_to_call(struct rxrpc_call *call,
530 read_lock(&call->state_lock); 526 read_lock(&call->state_lock);
531 switch (call->state) { 527 switch (call->state) {
532 case RXRPC_CALL_LOCALLY_ABORTED: 528 case RXRPC_CALL_LOCALLY_ABORTED:
533 if (!test_and_set_bit(RXRPC_CALL_ABORT, &call->events)) { 529 if (!test_and_set_bit(RXRPC_CALL_EV_ABORT, &call->events)) {
534 rxrpc_queue_call(call); 530 rxrpc_queue_call(call);
535 goto free_unlock; 531 goto free_unlock;
536 } 532 }
@@ -546,7 +542,7 @@ static void rxrpc_post_packet_to_call(struct rxrpc_call *call,
546 /* resend last packet of a completed call */ 542 /* resend last packet of a completed call */
547 _debug("final ack again"); 543 _debug("final ack again");
548 rxrpc_get_call(call); 544 rxrpc_get_call(call);
549 set_bit(RXRPC_CALL_ACK_FINAL, &call->events); 545 set_bit(RXRPC_CALL_EV_ACK_FINAL, &call->events);
550 rxrpc_queue_call(call); 546 rxrpc_queue_call(call);
551 goto free_unlock; 547 goto free_unlock;
552 default: 548 default:
@@ -607,6 +603,35 @@ static void rxrpc_post_packet_to_local(struct rxrpc_local *local,
607 rxrpc_queue_work(&local->event_processor); 603 rxrpc_queue_work(&local->event_processor);
608} 604}
609 605
606/*
607 * Extract the wire header from a packet and translate the byte order.
608 */
609static noinline
610int rxrpc_extract_header(struct rxrpc_skb_priv *sp, struct sk_buff *skb)
611{
612 struct rxrpc_wire_header whdr;
613
614 /* dig out the RxRPC connection details */
615 if (skb_copy_bits(skb, sizeof(struct udphdr), &whdr, sizeof(whdr)) < 0)
616 return -EBADMSG;
617 if (!pskb_pull(skb, sizeof(struct udphdr) + sizeof(whdr)))
618 BUG();
619
620 memset(sp, 0, sizeof(*sp));
621 sp->hdr.epoch = ntohl(whdr.epoch);
622 sp->hdr.cid = ntohl(whdr.cid);
623 sp->hdr.callNumber = ntohl(whdr.callNumber);
624 sp->hdr.seq = ntohl(whdr.seq);
625 sp->hdr.serial = ntohl(whdr.serial);
626 sp->hdr.flags = whdr.flags;
627 sp->hdr.type = whdr.type;
628 sp->hdr.userStatus = whdr.userStatus;
629 sp->hdr.securityIndex = whdr.securityIndex;
630 sp->hdr._rsvd = ntohs(whdr._rsvd);
631 sp->hdr.serviceId = ntohs(whdr.serviceId);
632 return 0;
633}
634
610static struct rxrpc_connection *rxrpc_conn_from_local(struct rxrpc_local *local, 635static struct rxrpc_connection *rxrpc_conn_from_local(struct rxrpc_local *local,
611 struct sk_buff *skb, 636 struct sk_buff *skb,
612 struct rxrpc_skb_priv *sp) 637 struct rxrpc_skb_priv *sp)
@@ -686,29 +711,25 @@ void rxrpc_data_ready(struct sock *sk)
686 711
687 UDP_INC_STATS_BH(&init_net, UDP_MIB_INDATAGRAMS, 0); 712 UDP_INC_STATS_BH(&init_net, UDP_MIB_INDATAGRAMS, 0);
688 713
689 /* the socket buffer we have is owned by UDP, with UDP's data all over 714 /* The socket buffer we have is owned by UDP, with UDP's data all over
690 * it, but we really want our own */ 715 * it, but we really want our own data there.
716 */
691 skb_orphan(skb); 717 skb_orphan(skb);
692 sp = rxrpc_skb(skb); 718 sp = rxrpc_skb(skb);
693 memset(sp, 0, sizeof(*sp));
694 719
695 _net("Rx UDP packet from %08x:%04hu", 720 _net("Rx UDP packet from %08x:%04hu",
696 ntohl(ip_hdr(skb)->saddr), ntohs(udp_hdr(skb)->source)); 721 ntohl(ip_hdr(skb)->saddr), ntohs(udp_hdr(skb)->source));
697 722
698 /* dig out the RxRPC connection details */ 723 /* dig out the RxRPC connection details */
699 if (skb_copy_bits(skb, sizeof(struct udphdr), &sp->hdr, 724 if (rxrpc_extract_header(sp, skb) < 0)
700 sizeof(sp->hdr)) < 0)
701 goto bad_message; 725 goto bad_message;
702 if (!pskb_pull(skb, sizeof(struct udphdr) + sizeof(sp->hdr)))
703 BUG();
704 726
705 _net("Rx RxRPC %s ep=%x call=%x:%x", 727 _net("Rx RxRPC %s ep=%x call=%x:%x",
706 sp->hdr.flags & RXRPC_CLIENT_INITIATED ? "ToServer" : "ToClient", 728 sp->hdr.flags & RXRPC_CLIENT_INITIATED ? "ToServer" : "ToClient",
707 ntohl(sp->hdr.epoch), 729 sp->hdr.epoch, sp->hdr.cid, sp->hdr.callNumber);
708 ntohl(sp->hdr.cid),
709 ntohl(sp->hdr.callNumber));
710 730
711 if (sp->hdr.type == 0 || sp->hdr.type >= RXRPC_N_PACKET_TYPES) { 731 if (sp->hdr.type >= RXRPC_N_PACKET_TYPES ||
732 !((RXRPC_SUPPORTED_PACKET_TYPES >> sp->hdr.type) & 1)) {
712 _proto("Rx Bad Packet Type %u", sp->hdr.type); 733 _proto("Rx Bad Packet Type %u", sp->hdr.type);
713 goto bad_message; 734 goto bad_message;
714 } 735 }
@@ -737,14 +758,9 @@ void rxrpc_data_ready(struct sock *sk)
737 rxrpc_put_connection(conn); 758 rxrpc_put_connection(conn);
738 } else { 759 } else {
739 struct rxrpc_call *call; 760 struct rxrpc_call *call;
740 u8 in_clientflag = 0; 761
741 762 call = rxrpc_find_call_hash(&sp->hdr, local,
742 if (sp->hdr.flags & RXRPC_CLIENT_INITIATED) 763 AF_INET, &ip_hdr(skb)->saddr);
743 in_clientflag = RXRPC_CLIENT_INITIATED;
744 call = rxrpc_find_call_hash(in_clientflag, sp->hdr.cid,
745 sp->hdr.callNumber, sp->hdr.epoch,
746 sp->hdr.serviceId, local, AF_INET,
747 (u8 *)&ip_hdr(skb)->saddr);
748 if (call) 764 if (call)
749 rxrpc_post_packet_to_call(call, skb); 765 rxrpc_post_packet_to_call(call, skb);
750 else 766 else
@@ -759,7 +775,7 @@ cant_route_call:
759 _debug("can't route call"); 775 _debug("can't route call");
760 if (sp->hdr.flags & RXRPC_CLIENT_INITIATED && 776 if (sp->hdr.flags & RXRPC_CLIENT_INITIATED &&
761 sp->hdr.type == RXRPC_PACKET_TYPE_DATA) { 777 sp->hdr.type == RXRPC_PACKET_TYPE_DATA) {
762 if (sp->hdr.seq == cpu_to_be32(1)) { 778 if (sp->hdr.seq == 1) {
763 _debug("first packet"); 779 _debug("first packet");
764 skb_queue_tail(&local->accept_queue, skb); 780 skb_queue_tail(&local->accept_queue, skb);
765 rxrpc_queue_work(&local->acceptor); 781 rxrpc_queue_work(&local->acceptor);
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index 2934a73a5981..cd6cdbe87125 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -16,7 +16,7 @@
16 BUG_ON(atomic_read((X)) >> (sizeof(atomic_t) - 2) == \ 16 BUG_ON(atomic_read((X)) >> (sizeof(atomic_t) - 2) == \
17 (POISON_FREE << 8 | POISON_FREE)) 17 (POISON_FREE << 8 | POISON_FREE))
18#else 18#else
19#define CHECK_SLAB_OKAY(X) do {} while(0) 19#define CHECK_SLAB_OKAY(X) do {} while (0)
20#endif 20#endif
21 21
22#define FCRYPT_BSIZE 8 22#define FCRYPT_BSIZE 8
@@ -70,12 +70,31 @@ struct rxrpc_sock {
70#define RXRPC_SECURITY_MAX RXRPC_SECURITY_ENCRYPT 70#define RXRPC_SECURITY_MAX RXRPC_SECURITY_ENCRYPT
71 struct sockaddr_rxrpc srx; /* local address */ 71 struct sockaddr_rxrpc srx; /* local address */
72 sa_family_t proto; /* protocol created with */ 72 sa_family_t proto; /* protocol created with */
73 __be16 service_id; /* service ID of local/remote service */
74}; 73};
75 74
76#define rxrpc_sk(__sk) container_of((__sk), struct rxrpc_sock, sk) 75#define rxrpc_sk(__sk) container_of((__sk), struct rxrpc_sock, sk)
77 76
78/* 77/*
78 * CPU-byteorder normalised Rx packet header.
79 */
80struct rxrpc_host_header {
81 u32 epoch; /* client boot timestamp */
82 u32 cid; /* connection and channel ID */
83 u32 callNumber; /* call ID (0 for connection-level packets) */
84 u32 seq; /* sequence number of pkt in call stream */
85 u32 serial; /* serial number of pkt sent to network */
86 u8 type; /* packet type */
87 u8 flags; /* packet flags */
88 u8 userStatus; /* app-layer defined status */
89 u8 securityIndex; /* security protocol ID */
90 union {
91 u16 _rsvd; /* reserved */
92 u16 cksum; /* kerberos security checksum */
93 };
94 u16 serviceId; /* service ID */
95} __packed;
96
97/*
79 * RxRPC socket buffer private variables 98 * RxRPC socket buffer private variables
80 * - max 48 bytes (struct sk_buff::cb) 99 * - max 48 bytes (struct sk_buff::cb)
81 */ 100 */
@@ -89,7 +108,7 @@ struct rxrpc_skb_priv {
89 bool need_resend; /* T if needs resending */ 108 bool need_resend; /* T if needs resending */
90 }; 109 };
91 110
92 struct rxrpc_header hdr; /* RxRPC packet header from this packet */ 111 struct rxrpc_host_header hdr; /* RxRPC packet header from this packet */
93}; 112};
94 113
95#define rxrpc_skb(__skb) ((struct rxrpc_skb_priv *) &(__skb)->cb) 114#define rxrpc_skb(__skb) ((struct rxrpc_skb_priv *) &(__skb)->cb)
@@ -230,7 +249,7 @@ struct rxrpc_conn_bundle {
230 atomic_t usage; 249 atomic_t usage;
231 int debug_id; /* debug ID for printks */ 250 int debug_id; /* debug ID for printks */
232 unsigned short num_conns; /* number of connections in this bundle */ 251 unsigned short num_conns; /* number of connections in this bundle */
233 __be16 service_id; /* service ID */ 252 u16 service_id; /* Service ID for this bundle */
234 u8 security_ix; /* security type */ 253 u8 security_ix; /* security type */
235}; 254};
236 255
@@ -252,7 +271,7 @@ struct rxrpc_connection {
252 struct rxrpc_security *security; /* applied security module */ 271 struct rxrpc_security *security; /* applied security module */
253 struct key *key; /* security for this connection (client) */ 272 struct key *key; /* security for this connection (client) */
254 struct key *server_key; /* security for this service */ 273 struct key *server_key; /* security for this service */
255 struct crypto_blkcipher *cipher; /* encryption handle */ 274 struct crypto_skcipher *cipher; /* encryption handle */
256 struct rxrpc_crypt csum_iv; /* packet checksum base */ 275 struct rxrpc_crypt csum_iv; /* packet checksum base */
257 unsigned long events; 276 unsigned long events;
258#define RXRPC_CONN_CHALLENGE 0 /* send challenge packet */ 277#define RXRPC_CONN_CHALLENGE 0 /* send challenge packet */
@@ -260,7 +279,6 @@ struct rxrpc_connection {
260 rwlock_t lock; /* access lock */ 279 rwlock_t lock; /* access lock */
261 spinlock_t state_lock; /* state-change lock */ 280 spinlock_t state_lock; /* state-change lock */
262 atomic_t usage; 281 atomic_t usage;
263 u32 real_conn_id; /* connection ID (host-endian) */
264 enum { /* current state of connection */ 282 enum { /* current state of connection */
265 RXRPC_CONN_UNUSED, /* - connection not yet attempted */ 283 RXRPC_CONN_UNUSED, /* - connection not yet attempted */
266 RXRPC_CONN_CLIENT, /* - client connection */ 284 RXRPC_CONN_CLIENT, /* - client connection */
@@ -282,17 +300,76 @@ struct rxrpc_connection {
282 u8 security_size; /* security header size */ 300 u8 security_size; /* security header size */
283 u32 security_level; /* security level negotiated */ 301 u32 security_level; /* security level negotiated */
284 u32 security_nonce; /* response re-use preventer */ 302 u32 security_nonce; /* response re-use preventer */
285 303 u32 epoch; /* epoch of this connection */
286 /* the following are all in net order */ 304 u32 cid; /* connection ID */
287 __be32 epoch; /* epoch of this connection */ 305 u16 service_id; /* service ID for this connection */
288 __be32 cid; /* connection ID */
289 __be16 service_id; /* service ID */
290 u8 security_ix; /* security type */ 306 u8 security_ix; /* security type */
291 u8 in_clientflag; /* RXRPC_CLIENT_INITIATED if we are server */ 307 u8 in_clientflag; /* RXRPC_CLIENT_INITIATED if we are server */
292 u8 out_clientflag; /* RXRPC_CLIENT_INITIATED if we are client */ 308 u8 out_clientflag; /* RXRPC_CLIENT_INITIATED if we are client */
293}; 309};
294 310
295/* 311/*
312 * Flags in call->flags.
313 */
314enum rxrpc_call_flag {
315 RXRPC_CALL_RELEASED, /* call has been released - no more message to userspace */
316 RXRPC_CALL_TERMINAL_MSG, /* call has given the socket its final message */
317 RXRPC_CALL_RCVD_LAST, /* all packets received */
318 RXRPC_CALL_RUN_RTIMER, /* Tx resend timer started */
319 RXRPC_CALL_TX_SOFT_ACK, /* sent some soft ACKs */
320 RXRPC_CALL_PROC_BUSY, /* the processor is busy */
321 RXRPC_CALL_INIT_ACCEPT, /* acceptance was initiated */
322 RXRPC_CALL_HAS_USERID, /* has a user ID attached */
323 RXRPC_CALL_EXPECT_OOS, /* expect out of sequence packets */
324};
325
326/*
327 * Events that can be raised on a call.
328 */
329enum rxrpc_call_event {
330 RXRPC_CALL_EV_RCVD_ACKALL, /* ACKALL or reply received */
331 RXRPC_CALL_EV_RCVD_BUSY, /* busy packet received */
332 RXRPC_CALL_EV_RCVD_ABORT, /* abort packet received */
333 RXRPC_CALL_EV_RCVD_ERROR, /* network error received */
334 RXRPC_CALL_EV_ACK_FINAL, /* need to generate final ACK (and release call) */
335 RXRPC_CALL_EV_ACK, /* need to generate ACK */
336 RXRPC_CALL_EV_REJECT_BUSY, /* need to generate busy message */
337 RXRPC_CALL_EV_ABORT, /* need to generate abort */
338 RXRPC_CALL_EV_CONN_ABORT, /* local connection abort generated */
339 RXRPC_CALL_EV_RESEND_TIMER, /* Tx resend timer expired */
340 RXRPC_CALL_EV_RESEND, /* Tx resend required */
341 RXRPC_CALL_EV_DRAIN_RX_OOS, /* drain the Rx out of sequence queue */
342 RXRPC_CALL_EV_LIFE_TIMER, /* call's lifetimer ran out */
343 RXRPC_CALL_EV_ACCEPTED, /* incoming call accepted by userspace app */
344 RXRPC_CALL_EV_SECURED, /* incoming call's connection is now secure */
345 RXRPC_CALL_EV_POST_ACCEPT, /* need to post an "accept?" message to the app */
346 RXRPC_CALL_EV_RELEASE, /* need to release the call's resources */
347};
348
349/*
350 * The states that a call can be in.
351 */
352enum rxrpc_call_state {
353 RXRPC_CALL_CLIENT_SEND_REQUEST, /* - client sending request phase */
354 RXRPC_CALL_CLIENT_AWAIT_REPLY, /* - client awaiting reply */
355 RXRPC_CALL_CLIENT_RECV_REPLY, /* - client receiving reply phase */
356 RXRPC_CALL_CLIENT_FINAL_ACK, /* - client sending final ACK phase */
357 RXRPC_CALL_SERVER_SECURING, /* - server securing request connection */
358 RXRPC_CALL_SERVER_ACCEPTING, /* - server accepting request */
359 RXRPC_CALL_SERVER_RECV_REQUEST, /* - server receiving request */
360 RXRPC_CALL_SERVER_ACK_REQUEST, /* - server pending ACK of request */
361 RXRPC_CALL_SERVER_SEND_REPLY, /* - server sending reply */
362 RXRPC_CALL_SERVER_AWAIT_ACK, /* - server awaiting final ACK */
363 RXRPC_CALL_COMPLETE, /* - call completed */
364 RXRPC_CALL_SERVER_BUSY, /* - call rejected by busy server */
365 RXRPC_CALL_REMOTELY_ABORTED, /* - call aborted by peer */
366 RXRPC_CALL_LOCALLY_ABORTED, /* - call aborted locally on error or close */
367 RXRPC_CALL_NETWORK_ERROR, /* - call terminated by network error */
368 RXRPC_CALL_DEAD, /* - call is dead */
369 NR__RXRPC_CALL_STATES
370};
371
372/*
296 * RxRPC call definition 373 * RxRPC call definition
297 * - matched by { connection, call_id } 374 * - matched by { connection, call_id }
298 */ 375 */
@@ -317,57 +394,13 @@ struct rxrpc_call {
317 unsigned long user_call_ID; /* user-defined call ID */ 394 unsigned long user_call_ID; /* user-defined call ID */
318 unsigned long creation_jif; /* time of call creation */ 395 unsigned long creation_jif; /* time of call creation */
319 unsigned long flags; 396 unsigned long flags;
320#define RXRPC_CALL_RELEASED 0 /* call has been released - no more message to userspace */
321#define RXRPC_CALL_TERMINAL_MSG 1 /* call has given the socket its final message */
322#define RXRPC_CALL_RCVD_LAST 2 /* all packets received */
323#define RXRPC_CALL_RUN_RTIMER 3 /* Tx resend timer started */
324#define RXRPC_CALL_TX_SOFT_ACK 4 /* sent some soft ACKs */
325#define RXRPC_CALL_PROC_BUSY 5 /* the processor is busy */
326#define RXRPC_CALL_INIT_ACCEPT 6 /* acceptance was initiated */
327#define RXRPC_CALL_HAS_USERID 7 /* has a user ID attached */
328#define RXRPC_CALL_EXPECT_OOS 8 /* expect out of sequence packets */
329 unsigned long events; 397 unsigned long events;
330#define RXRPC_CALL_RCVD_ACKALL 0 /* ACKALL or reply received */
331#define RXRPC_CALL_RCVD_BUSY 1 /* busy packet received */
332#define RXRPC_CALL_RCVD_ABORT 2 /* abort packet received */
333#define RXRPC_CALL_RCVD_ERROR 3 /* network error received */
334#define RXRPC_CALL_ACK_FINAL 4 /* need to generate final ACK (and release call) */
335#define RXRPC_CALL_ACK 5 /* need to generate ACK */
336#define RXRPC_CALL_REJECT_BUSY 6 /* need to generate busy message */
337#define RXRPC_CALL_ABORT 7 /* need to generate abort */
338#define RXRPC_CALL_CONN_ABORT 8 /* local connection abort generated */
339#define RXRPC_CALL_RESEND_TIMER 9 /* Tx resend timer expired */
340#define RXRPC_CALL_RESEND 10 /* Tx resend required */
341#define RXRPC_CALL_DRAIN_RX_OOS 11 /* drain the Rx out of sequence queue */
342#define RXRPC_CALL_LIFE_TIMER 12 /* call's lifetimer ran out */
343#define RXRPC_CALL_ACCEPTED 13 /* incoming call accepted by userspace app */
344#define RXRPC_CALL_SECURED 14 /* incoming call's connection is now secure */
345#define RXRPC_CALL_POST_ACCEPT 15 /* need to post an "accept?" message to the app */
346#define RXRPC_CALL_RELEASE 16 /* need to release the call's resources */
347
348 spinlock_t lock; 398 spinlock_t lock;
349 rwlock_t state_lock; /* lock for state transition */ 399 rwlock_t state_lock; /* lock for state transition */
350 atomic_t usage; 400 atomic_t usage;
351 atomic_t sequence; /* Tx data packet sequence counter */ 401 atomic_t sequence; /* Tx data packet sequence counter */
352 u32 abort_code; /* local/remote abort code */ 402 u32 abort_code; /* local/remote abort code */
353 enum { /* current state of call */ 403 enum rxrpc_call_state state : 8; /* current state of call */
354 RXRPC_CALL_CLIENT_SEND_REQUEST, /* - client sending request phase */
355 RXRPC_CALL_CLIENT_AWAIT_REPLY, /* - client awaiting reply */
356 RXRPC_CALL_CLIENT_RECV_REPLY, /* - client receiving reply phase */
357 RXRPC_CALL_CLIENT_FINAL_ACK, /* - client sending final ACK phase */
358 RXRPC_CALL_SERVER_SECURING, /* - server securing request connection */
359 RXRPC_CALL_SERVER_ACCEPTING, /* - server accepting request */
360 RXRPC_CALL_SERVER_RECV_REQUEST, /* - server receiving request */
361 RXRPC_CALL_SERVER_ACK_REQUEST, /* - server pending ACK of request */
362 RXRPC_CALL_SERVER_SEND_REPLY, /* - server sending reply */
363 RXRPC_CALL_SERVER_AWAIT_ACK, /* - server awaiting final ACK */
364 RXRPC_CALL_COMPLETE, /* - call completed */
365 RXRPC_CALL_SERVER_BUSY, /* - call rejected by busy server */
366 RXRPC_CALL_REMOTELY_ABORTED, /* - call aborted by peer */
367 RXRPC_CALL_LOCALLY_ABORTED, /* - call aborted locally on error or close */
368 RXRPC_CALL_NETWORK_ERROR, /* - call terminated by network error */
369 RXRPC_CALL_DEAD, /* - call is dead */
370 } state;
371 int debug_id; /* debug ID for printks */ 404 int debug_id; /* debug ID for printks */
372 u8 channel; /* connection channel occupied by this call */ 405 u8 channel; /* connection channel occupied by this call */
373 406
@@ -389,9 +422,9 @@ struct rxrpc_call {
389 rxrpc_seq_t rx_data_eaten; /* last data seq ID consumed by recvmsg */ 422 rxrpc_seq_t rx_data_eaten; /* last data seq ID consumed by recvmsg */
390 rxrpc_seq_t rx_first_oos; /* first packet in rx_oos_queue (or 0) */ 423 rxrpc_seq_t rx_first_oos; /* first packet in rx_oos_queue (or 0) */
391 rxrpc_seq_t ackr_win_top; /* top of ACK window (rx_data_eaten is bottom) */ 424 rxrpc_seq_t ackr_win_top; /* top of ACK window (rx_data_eaten is bottom) */
392 rxrpc_seq_net_t ackr_prev_seq; /* previous sequence number received */ 425 rxrpc_seq_t ackr_prev_seq; /* previous sequence number received */
393 u8 ackr_reason; /* reason to ACK */ 426 u8 ackr_reason; /* reason to ACK */
394 __be32 ackr_serial; /* serial of packet being ACK'd */ 427 rxrpc_serial_t ackr_serial; /* serial of packet being ACK'd */
395 atomic_t ackr_not_idle; /* number of packets in Rx queue */ 428 atomic_t ackr_not_idle; /* number of packets in Rx queue */
396 429
397 /* received packet records, 1 bit per record */ 430 /* received packet records, 1 bit per record */
@@ -403,11 +436,10 @@ struct rxrpc_call {
403 u8 in_clientflag; /* Copy of conn->in_clientflag for hashing */ 436 u8 in_clientflag; /* Copy of conn->in_clientflag for hashing */
404 struct rxrpc_local *local; /* Local endpoint. Used for hashing. */ 437 struct rxrpc_local *local; /* Local endpoint. Used for hashing. */
405 sa_family_t proto; /* Frame protocol */ 438 sa_family_t proto; /* Frame protocol */
406 /* the following should all be in net order */ 439 u32 call_id; /* call ID on connection */
407 __be32 cid; /* connection ID + channel index */ 440 u32 cid; /* connection ID plus channel index */
408 __be32 call_id; /* call ID on connection */ 441 u32 epoch; /* epoch of this connection */
409 __be32 epoch; /* epoch of this connection */ 442 u16 service_id; /* service ID */
410 __be16 service_id; /* service ID */
411 union { /* Peer IP address for hashing */ 443 union { /* Peer IP address for hashing */
412 __be32 ipv4_addr; 444 __be32 ipv4_addr;
413 __u8 ipv6_addr[16]; /* Anticipates eventual IPv6 support */ 445 __u8 ipv6_addr[16]; /* Anticipates eventual IPv6 support */
@@ -423,7 +455,7 @@ static inline void rxrpc_abort_call(struct rxrpc_call *call, u32 abort_code)
423 if (call->state < RXRPC_CALL_COMPLETE) { 455 if (call->state < RXRPC_CALL_COMPLETE) {
424 call->abort_code = abort_code; 456 call->abort_code = abort_code;
425 call->state = RXRPC_CALL_LOCALLY_ABORTED; 457 call->state = RXRPC_CALL_LOCALLY_ABORTED;
426 set_bit(RXRPC_CALL_ABORT, &call->events); 458 set_bit(RXRPC_CALL_EV_ABORT, &call->events);
427 } 459 }
428 write_unlock_bh(&call->state_lock); 460 write_unlock_bh(&call->state_lock);
429} 461}
@@ -432,7 +464,7 @@ static inline void rxrpc_abort_call(struct rxrpc_call *call, u32 abort_code)
432 * af_rxrpc.c 464 * af_rxrpc.c
433 */ 465 */
434extern atomic_t rxrpc_n_skbs; 466extern atomic_t rxrpc_n_skbs;
435extern __be32 rxrpc_epoch; 467extern u32 rxrpc_epoch;
436extern atomic_t rxrpc_debug_id; 468extern atomic_t rxrpc_debug_id;
437extern struct workqueue_struct *rxrpc_workqueue; 469extern struct workqueue_struct *rxrpc_workqueue;
438 470
@@ -446,35 +478,35 @@ int rxrpc_reject_call(struct rxrpc_sock *);
446/* 478/*
447 * ar-ack.c 479 * ar-ack.c
448 */ 480 */
449extern unsigned rxrpc_requested_ack_delay; 481extern unsigned int rxrpc_requested_ack_delay;
450extern unsigned rxrpc_soft_ack_delay; 482extern unsigned int rxrpc_soft_ack_delay;
451extern unsigned rxrpc_idle_ack_delay; 483extern unsigned int rxrpc_idle_ack_delay;
452extern unsigned rxrpc_rx_window_size; 484extern unsigned int rxrpc_rx_window_size;
453extern unsigned rxrpc_rx_mtu; 485extern unsigned int rxrpc_rx_mtu;
454extern unsigned rxrpc_rx_jumbo_max; 486extern unsigned int rxrpc_rx_jumbo_max;
455 487
456void __rxrpc_propose_ACK(struct rxrpc_call *, u8, __be32, bool); 488void __rxrpc_propose_ACK(struct rxrpc_call *, u8, u32, bool);
457void rxrpc_propose_ACK(struct rxrpc_call *, u8, __be32, bool); 489void rxrpc_propose_ACK(struct rxrpc_call *, u8, u32, bool);
458void rxrpc_process_call(struct work_struct *); 490void rxrpc_process_call(struct work_struct *);
459 491
460/* 492/*
461 * ar-call.c 493 * ar-call.c
462 */ 494 */
463extern unsigned rxrpc_max_call_lifetime; 495extern unsigned int rxrpc_max_call_lifetime;
464extern unsigned rxrpc_dead_call_expiry; 496extern unsigned int rxrpc_dead_call_expiry;
465extern struct kmem_cache *rxrpc_call_jar; 497extern struct kmem_cache *rxrpc_call_jar;
466extern struct list_head rxrpc_calls; 498extern struct list_head rxrpc_calls;
467extern rwlock_t rxrpc_call_lock; 499extern rwlock_t rxrpc_call_lock;
468 500
469struct rxrpc_call *rxrpc_find_call_hash(u8, __be32, __be32, __be32, 501struct rxrpc_call *rxrpc_find_call_hash(struct rxrpc_host_header *,
470 __be16, void *, sa_family_t, const u8 *); 502 void *, sa_family_t, const void *);
471struct rxrpc_call *rxrpc_get_client_call(struct rxrpc_sock *, 503struct rxrpc_call *rxrpc_get_client_call(struct rxrpc_sock *,
472 struct rxrpc_transport *, 504 struct rxrpc_transport *,
473 struct rxrpc_conn_bundle *, 505 struct rxrpc_conn_bundle *,
474 unsigned long, int, gfp_t); 506 unsigned long, int, gfp_t);
475struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *, 507struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *,
476 struct rxrpc_connection *, 508 struct rxrpc_connection *,
477 struct rxrpc_header *, gfp_t); 509 struct rxrpc_host_header *, gfp_t);
478struct rxrpc_call *rxrpc_find_server_call(struct rxrpc_sock *, unsigned long); 510struct rxrpc_call *rxrpc_find_server_call(struct rxrpc_sock *, unsigned long);
479void rxrpc_release_call(struct rxrpc_call *); 511void rxrpc_release_call(struct rxrpc_call *);
480void rxrpc_release_calls_on_socket(struct rxrpc_sock *); 512void rxrpc_release_calls_on_socket(struct rxrpc_sock *);
@@ -484,22 +516,22 @@ void __exit rxrpc_destroy_all_calls(void);
484/* 516/*
485 * ar-connection.c 517 * ar-connection.c
486 */ 518 */
487extern unsigned rxrpc_connection_expiry; 519extern unsigned int rxrpc_connection_expiry;
488extern struct list_head rxrpc_connections; 520extern struct list_head rxrpc_connections;
489extern rwlock_t rxrpc_connection_lock; 521extern rwlock_t rxrpc_connection_lock;
490 522
491struct rxrpc_conn_bundle *rxrpc_get_bundle(struct rxrpc_sock *, 523struct rxrpc_conn_bundle *rxrpc_get_bundle(struct rxrpc_sock *,
492 struct rxrpc_transport *, 524 struct rxrpc_transport *,
493 struct key *, __be16, gfp_t); 525 struct key *, u16, gfp_t);
494void rxrpc_put_bundle(struct rxrpc_transport *, struct rxrpc_conn_bundle *); 526void rxrpc_put_bundle(struct rxrpc_transport *, struct rxrpc_conn_bundle *);
495int rxrpc_connect_call(struct rxrpc_sock *, struct rxrpc_transport *, 527int rxrpc_connect_call(struct rxrpc_sock *, struct rxrpc_transport *,
496 struct rxrpc_conn_bundle *, struct rxrpc_call *, gfp_t); 528 struct rxrpc_conn_bundle *, struct rxrpc_call *, gfp_t);
497void rxrpc_put_connection(struct rxrpc_connection *); 529void rxrpc_put_connection(struct rxrpc_connection *);
498void __exit rxrpc_destroy_all_connections(void); 530void __exit rxrpc_destroy_all_connections(void);
499struct rxrpc_connection *rxrpc_find_connection(struct rxrpc_transport *, 531struct rxrpc_connection *rxrpc_find_connection(struct rxrpc_transport *,
500 struct rxrpc_header *); 532 struct rxrpc_host_header *);
501extern struct rxrpc_connection * 533extern struct rxrpc_connection *
502rxrpc_incoming_connection(struct rxrpc_transport *, struct rxrpc_header *, 534rxrpc_incoming_connection(struct rxrpc_transport *, struct rxrpc_host_header *,
503 gfp_t); 535 gfp_t);
504 536
505/* 537/*
@@ -547,7 +579,7 @@ int rxrpc_get_server_data_key(struct rxrpc_connection *, const void *, time_t,
547/* 579/*
548 * ar-output.c 580 * ar-output.c
549 */ 581 */
550extern unsigned rxrpc_resend_timeout; 582extern unsigned int rxrpc_resend_timeout;
551 583
552int rxrpc_send_packet(struct rxrpc_transport *, struct sk_buff *); 584int rxrpc_send_packet(struct rxrpc_transport *, struct sk_buff *);
553int rxrpc_client_sendmsg(struct rxrpc_sock *, struct rxrpc_transport *, 585int rxrpc_client_sendmsg(struct rxrpc_sock *, struct rxrpc_transport *,
@@ -595,7 +627,7 @@ void rxrpc_packet_destructor(struct sk_buff *);
595/* 627/*
596 * ar-transport.c 628 * ar-transport.c
597 */ 629 */
598extern unsigned rxrpc_transport_expiry; 630extern unsigned int rxrpc_transport_expiry;
599 631
600struct rxrpc_transport *rxrpc_get_transport(struct rxrpc_local *, 632struct rxrpc_transport *rxrpc_get_transport(struct rxrpc_local *,
601 struct rxrpc_peer *, gfp_t); 633 struct rxrpc_peer *, gfp_t);
@@ -694,7 +726,7 @@ do { \
694 printk(KERN_ERR "RxRPC: Assertion failed\n"); \ 726 printk(KERN_ERR "RxRPC: Assertion failed\n"); \
695 BUG(); \ 727 BUG(); \
696 } \ 728 } \
697} while(0) 729} while (0)
698 730
699#define ASSERTCMP(X, OP, Y) \ 731#define ASSERTCMP(X, OP, Y) \
700do { \ 732do { \
@@ -707,7 +739,7 @@ do { \
707 (unsigned long)(X), (unsigned long)(Y)); \ 739 (unsigned long)(X), (unsigned long)(Y)); \
708 BUG(); \ 740 BUG(); \
709 } \ 741 } \
710} while(0) 742} while (0)
711 743
712#define ASSERTIF(C, X) \ 744#define ASSERTIF(C, X) \
713do { \ 745do { \
@@ -716,7 +748,7 @@ do { \
716 printk(KERN_ERR "RxRPC: Assertion failed\n"); \ 748 printk(KERN_ERR "RxRPC: Assertion failed\n"); \
717 BUG(); \ 749 BUG(); \
718 } \ 750 } \
719} while(0) 751} while (0)
720 752
721#define ASSERTIFCMP(C, X, OP, Y) \ 753#define ASSERTIFCMP(C, X, OP, Y) \
722do { \ 754do { \
@@ -729,25 +761,25 @@ do { \
729 (unsigned long)(X), (unsigned long)(Y)); \ 761 (unsigned long)(X), (unsigned long)(Y)); \
730 BUG(); \ 762 BUG(); \
731 } \ 763 } \
732} while(0) 764} while (0)
733 765
734#else 766#else
735 767
736#define ASSERT(X) \ 768#define ASSERT(X) \
737do { \ 769do { \
738} while(0) 770} while (0)
739 771
740#define ASSERTCMP(X, OP, Y) \ 772#define ASSERTCMP(X, OP, Y) \
741do { \ 773do { \
742} while(0) 774} while (0)
743 775
744#define ASSERTIF(C, X) \ 776#define ASSERTIF(C, X) \
745do { \ 777do { \
746} while(0) 778} while (0)
747 779
748#define ASSERTIFCMP(C, X, OP, Y) \ 780#define ASSERTIFCMP(C, X, OP, Y) \
749do { \ 781do { \
750} while(0) 782} while (0)
751 783
752#endif /* __KDEBUGALL */ 784#endif /* __KDEBUGALL */
753 785
@@ -804,9 +836,9 @@ do { \
804 CHECK_SLAB_OKAY(&(CALL)->usage); \ 836 CHECK_SLAB_OKAY(&(CALL)->usage); \
805 if (atomic_inc_return(&(CALL)->usage) == 1) \ 837 if (atomic_inc_return(&(CALL)->usage) == 1) \
806 BUG(); \ 838 BUG(); \
807} while(0) 839} while (0)
808 840
809#define rxrpc_put_call(CALL) \ 841#define rxrpc_put_call(CALL) \
810do { \ 842do { \
811 __rxrpc_put_call(CALL); \ 843 __rxrpc_put_call(CALL); \
812} while(0) 844} while (0)
diff --git a/net/rxrpc/ar-key.c b/net/rxrpc/ar-key.c
index 3f6571651d32..3fb492eedeb9 100644
--- a/net/rxrpc/ar-key.c
+++ b/net/rxrpc/ar-key.c
@@ -12,11 +12,11 @@
12 * "afs@CAMBRIDGE.REDHAT.COM> 12 * "afs@CAMBRIDGE.REDHAT.COM>
13 */ 13 */
14 14
15#include <crypto/skcipher.h>
15#include <linux/module.h> 16#include <linux/module.h>
16#include <linux/net.h> 17#include <linux/net.h>
17#include <linux/skbuff.h> 18#include <linux/skbuff.h>
18#include <linux/key-type.h> 19#include <linux/key-type.h>
19#include <linux/crypto.h>
20#include <linux/ctype.h> 20#include <linux/ctype.h>
21#include <linux/slab.h> 21#include <linux/slab.h>
22#include <net/sock.h> 22#include <net/sock.h>
@@ -824,7 +824,7 @@ static void rxrpc_free_preparse(struct key_preparsed_payload *prep)
824 */ 824 */
825static int rxrpc_preparse_s(struct key_preparsed_payload *prep) 825static int rxrpc_preparse_s(struct key_preparsed_payload *prep)
826{ 826{
827 struct crypto_blkcipher *ci; 827 struct crypto_skcipher *ci;
828 828
829 _enter("%zu", prep->datalen); 829 _enter("%zu", prep->datalen);
830 830
@@ -833,13 +833,13 @@ static int rxrpc_preparse_s(struct key_preparsed_payload *prep)
833 833
834 memcpy(&prep->payload.data[2], prep->data, 8); 834 memcpy(&prep->payload.data[2], prep->data, 8);
835 835
836 ci = crypto_alloc_blkcipher("pcbc(des)", 0, CRYPTO_ALG_ASYNC); 836 ci = crypto_alloc_skcipher("pcbc(des)", 0, CRYPTO_ALG_ASYNC);
837 if (IS_ERR(ci)) { 837 if (IS_ERR(ci)) {
838 _leave(" = %ld", PTR_ERR(ci)); 838 _leave(" = %ld", PTR_ERR(ci));
839 return PTR_ERR(ci); 839 return PTR_ERR(ci);
840 } 840 }
841 841
842 if (crypto_blkcipher_setkey(ci, prep->data, 8) < 0) 842 if (crypto_skcipher_setkey(ci, prep->data, 8) < 0)
843 BUG(); 843 BUG();
844 844
845 prep->payload.data[0] = ci; 845 prep->payload.data[0] = ci;
@@ -853,7 +853,7 @@ static int rxrpc_preparse_s(struct key_preparsed_payload *prep)
853static void rxrpc_free_preparse_s(struct key_preparsed_payload *prep) 853static void rxrpc_free_preparse_s(struct key_preparsed_payload *prep)
854{ 854{
855 if (prep->payload.data[0]) 855 if (prep->payload.data[0])
856 crypto_free_blkcipher(prep->payload.data[0]); 856 crypto_free_skcipher(prep->payload.data[0]);
857} 857}
858 858
859/* 859/*
@@ -870,7 +870,7 @@ static void rxrpc_destroy(struct key *key)
870static void rxrpc_destroy_s(struct key *key) 870static void rxrpc_destroy_s(struct key *key)
871{ 871{
872 if (key->payload.data[0]) { 872 if (key->payload.data[0]) {
873 crypto_free_blkcipher(key->payload.data[0]); 873 crypto_free_skcipher(key->payload.data[0]);
874 key->payload.data[0] = NULL; 874 key->payload.data[0] = NULL;
875 } 875 }
876} 876}
diff --git a/net/rxrpc/ar-local.c b/net/rxrpc/ar-local.c
index 78483b4602bf..4e1e6db0050b 100644
--- a/net/rxrpc/ar-local.c
+++ b/net/rxrpc/ar-local.c
@@ -323,9 +323,11 @@ void __exit rxrpc_destroy_all_locals(void)
323 * Reply to a version request 323 * Reply to a version request
324 */ 324 */
325static void rxrpc_send_version_request(struct rxrpc_local *local, 325static void rxrpc_send_version_request(struct rxrpc_local *local,
326 struct rxrpc_header *hdr, 326 struct rxrpc_host_header *hdr,
327 struct sk_buff *skb) 327 struct sk_buff *skb)
328{ 328{
329 struct rxrpc_wire_header whdr;
330 struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
329 struct sockaddr_in sin; 331 struct sockaddr_in sin;
330 struct msghdr msg; 332 struct msghdr msg;
331 struct kvec iov[2]; 333 struct kvec iov[2];
@@ -344,15 +346,20 @@ static void rxrpc_send_version_request(struct rxrpc_local *local,
344 msg.msg_controllen = 0; 346 msg.msg_controllen = 0;
345 msg.msg_flags = 0; 347 msg.msg_flags = 0;
346 348
347 hdr->seq = 0; 349 whdr.epoch = htonl(sp->hdr.epoch);
348 hdr->serial = 0; 350 whdr.cid = htonl(sp->hdr.cid);
349 hdr->type = RXRPC_PACKET_TYPE_VERSION; 351 whdr.callNumber = htonl(sp->hdr.callNumber);
350 hdr->flags = RXRPC_LAST_PACKET | (~hdr->flags & RXRPC_CLIENT_INITIATED); 352 whdr.seq = 0;
351 hdr->userStatus = 0; 353 whdr.serial = 0;
352 hdr->_rsvd = 0; 354 whdr.type = RXRPC_PACKET_TYPE_VERSION;
353 355 whdr.flags = RXRPC_LAST_PACKET | (~hdr->flags & RXRPC_CLIENT_INITIATED);
354 iov[0].iov_base = hdr; 356 whdr.userStatus = 0;
355 iov[0].iov_len = sizeof(*hdr); 357 whdr.securityIndex = 0;
358 whdr._rsvd = 0;
359 whdr.serviceId = htons(sp->hdr.serviceId);
360
361 iov[0].iov_base = &whdr;
362 iov[0].iov_len = sizeof(whdr);
356 iov[1].iov_base = (char *)rxrpc_version_string; 363 iov[1].iov_base = (char *)rxrpc_version_string;
357 iov[1].iov_len = sizeof(rxrpc_version_string); 364 iov[1].iov_len = sizeof(rxrpc_version_string);
358 365
@@ -383,7 +390,7 @@ static void rxrpc_process_local_events(struct work_struct *work)
383 while ((skb = skb_dequeue(&local->event_queue))) { 390 while ((skb = skb_dequeue(&local->event_queue))) {
384 struct rxrpc_skb_priv *sp = rxrpc_skb(skb); 391 struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
385 392
386 kdebug("{%d},{%u}", local->debug_id, sp->hdr.type); 393 _debug("{%d},{%u}", local->debug_id, sp->hdr.type);
387 394
388 switch (sp->hdr.type) { 395 switch (sp->hdr.type) {
389 case RXRPC_PACKET_TYPE_VERSION: 396 case RXRPC_PACKET_TYPE_VERSION:
diff --git a/net/rxrpc/ar-output.c b/net/rxrpc/ar-output.c
index 14c4e12c47b0..d36fb6e1a29c 100644
--- a/net/rxrpc/ar-output.c
+++ b/net/rxrpc/ar-output.c
@@ -21,7 +21,7 @@
21/* 21/*
22 * Time till packet resend (in jiffies). 22 * Time till packet resend (in jiffies).
23 */ 23 */
24unsigned rxrpc_resend_timeout = 4 * HZ; 24unsigned int rxrpc_resend_timeout = 4 * HZ;
25 25
26static int rxrpc_send_data(struct rxrpc_sock *rx, 26static int rxrpc_send_data(struct rxrpc_sock *rx,
27 struct rxrpc_call *call, 27 struct rxrpc_call *call,
@@ -111,11 +111,11 @@ static void rxrpc_send_abort(struct rxrpc_call *call, u32 abort_code)
111 if (call->state <= RXRPC_CALL_COMPLETE) { 111 if (call->state <= RXRPC_CALL_COMPLETE) {
112 call->state = RXRPC_CALL_LOCALLY_ABORTED; 112 call->state = RXRPC_CALL_LOCALLY_ABORTED;
113 call->abort_code = abort_code; 113 call->abort_code = abort_code;
114 set_bit(RXRPC_CALL_ABORT, &call->events); 114 set_bit(RXRPC_CALL_EV_ABORT, &call->events);
115 del_timer_sync(&call->resend_timer); 115 del_timer_sync(&call->resend_timer);
116 del_timer_sync(&call->ack_timer); 116 del_timer_sync(&call->ack_timer);
117 clear_bit(RXRPC_CALL_RESEND_TIMER, &call->events); 117 clear_bit(RXRPC_CALL_EV_RESEND_TIMER, &call->events);
118 clear_bit(RXRPC_CALL_ACK, &call->events); 118 clear_bit(RXRPC_CALL_EV_ACK, &call->events);
119 clear_bit(RXRPC_CALL_RUN_RTIMER, &call->flags); 119 clear_bit(RXRPC_CALL_RUN_RTIMER, &call->flags);
120 rxrpc_queue_call(call); 120 rxrpc_queue_call(call);
121 } 121 }
@@ -136,7 +136,7 @@ int rxrpc_client_sendmsg(struct rxrpc_sock *rx, struct rxrpc_transport *trans,
136 struct rxrpc_call *call; 136 struct rxrpc_call *call;
137 unsigned long user_call_ID = 0; 137 unsigned long user_call_ID = 0;
138 struct key *key; 138 struct key *key;
139 __be16 service_id; 139 u16 service_id;
140 u32 abort_code = 0; 140 u32 abort_code = 0;
141 int ret; 141 int ret;
142 142
@@ -151,11 +151,11 @@ int rxrpc_client_sendmsg(struct rxrpc_sock *rx, struct rxrpc_transport *trans,
151 151
152 bundle = NULL; 152 bundle = NULL;
153 if (trans) { 153 if (trans) {
154 service_id = rx->service_id; 154 service_id = rx->srx.srx_service;
155 if (msg->msg_name) { 155 if (msg->msg_name) {
156 DECLARE_SOCKADDR(struct sockaddr_rxrpc *, srx, 156 DECLARE_SOCKADDR(struct sockaddr_rxrpc *, srx,
157 msg->msg_name); 157 msg->msg_name);
158 service_id = htons(srx->srx_service); 158 service_id = srx->srx_service;
159 } 159 }
160 key = rx->key; 160 key = rx->key;
161 if (key && !rx->key->payload.data[0]) 161 if (key && !rx->key->payload.data[0])
@@ -348,7 +348,7 @@ int rxrpc_send_packet(struct rxrpc_transport *trans, struct sk_buff *skb)
348 348
349 /* send the packet with the don't fragment bit set if we currently 349 /* send the packet with the don't fragment bit set if we currently
350 * think it's small enough */ 350 * think it's small enough */
351 if (skb->len - sizeof(struct rxrpc_header) < trans->peer->maxdata) { 351 if (skb->len - sizeof(struct rxrpc_wire_header) < trans->peer->maxdata) {
352 down_read(&trans->local->defrag_sem); 352 down_read(&trans->local->defrag_sem);
353 /* send the packet by UDP 353 /* send the packet by UDP
354 * - returns -EMSGSIZE if UDP would have to fragment the packet 354 * - returns -EMSGSIZE if UDP would have to fragment the packet
@@ -401,7 +401,8 @@ static int rxrpc_wait_for_tx_window(struct rxrpc_sock *rx,
401 int ret; 401 int ret;
402 402
403 _enter(",{%d},%ld", 403 _enter(",{%d},%ld",
404 CIRC_SPACE(call->acks_head, call->acks_tail, call->acks_winsz), 404 CIRC_SPACE(call->acks_head, ACCESS_ONCE(call->acks_tail),
405 call->acks_winsz),
405 *timeo); 406 *timeo);
406 407
407 add_wait_queue(&call->tx_waitq, &myself); 408 add_wait_queue(&call->tx_waitq, &myself);
@@ -409,7 +410,7 @@ static int rxrpc_wait_for_tx_window(struct rxrpc_sock *rx,
409 for (;;) { 410 for (;;) {
410 set_current_state(TASK_INTERRUPTIBLE); 411 set_current_state(TASK_INTERRUPTIBLE);
411 ret = 0; 412 ret = 0;
412 if (CIRC_SPACE(call->acks_head, call->acks_tail, 413 if (CIRC_SPACE(call->acks_head, ACCESS_ONCE(call->acks_tail),
413 call->acks_winsz) > 0) 414 call->acks_winsz) > 0)
414 break; 415 break;
415 if (signal_pending(current)) { 416 if (signal_pending(current)) {
@@ -437,7 +438,7 @@ static inline void rxrpc_instant_resend(struct rxrpc_call *call)
437 if (try_to_del_timer_sync(&call->resend_timer) >= 0) { 438 if (try_to_del_timer_sync(&call->resend_timer) >= 0) {
438 clear_bit(RXRPC_CALL_RUN_RTIMER, &call->flags); 439 clear_bit(RXRPC_CALL_RUN_RTIMER, &call->flags);
439 if (call->state < RXRPC_CALL_COMPLETE && 440 if (call->state < RXRPC_CALL_COMPLETE &&
440 !test_and_set_bit(RXRPC_CALL_RESEND_TIMER, &call->events)) 441 !test_and_set_bit(RXRPC_CALL_EV_RESEND_TIMER, &call->events))
441 rxrpc_queue_call(call); 442 rxrpc_queue_call(call);
442 } 443 }
443 read_unlock_bh(&call->state_lock); 444 read_unlock_bh(&call->state_lock);
@@ -480,8 +481,7 @@ static void rxrpc_queue_packet(struct rxrpc_call *call, struct sk_buff *skb,
480 write_unlock_bh(&call->state_lock); 481 write_unlock_bh(&call->state_lock);
481 } 482 }
482 483
483 _proto("Tx DATA %%%u { #%u }", 484 _proto("Tx DATA %%%u { #%u }", sp->hdr.serial, sp->hdr.seq);
484 ntohl(sp->hdr.serial), ntohl(sp->hdr.seq));
485 485
486 sp->need_resend = false; 486 sp->need_resend = false;
487 sp->resend_at = jiffies + rxrpc_resend_timeout; 487 sp->resend_at = jiffies + rxrpc_resend_timeout;
@@ -513,6 +513,29 @@ static void rxrpc_queue_packet(struct rxrpc_call *call, struct sk_buff *skb,
513} 513}
514 514
515/* 515/*
516 * Convert a host-endian header into a network-endian header.
517 */
518static void rxrpc_insert_header(struct sk_buff *skb)
519{
520 struct rxrpc_wire_header whdr;
521 struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
522
523 whdr.epoch = htonl(sp->hdr.epoch);
524 whdr.cid = htonl(sp->hdr.cid);
525 whdr.callNumber = htonl(sp->hdr.callNumber);
526 whdr.seq = htonl(sp->hdr.seq);
527 whdr.serial = htonl(sp->hdr.serial);
528 whdr.type = sp->hdr.type;
529 whdr.flags = sp->hdr.flags;
530 whdr.userStatus = sp->hdr.userStatus;
531 whdr.securityIndex = sp->hdr.securityIndex;
532 whdr._rsvd = htons(sp->hdr._rsvd);
533 whdr.serviceId = htons(sp->hdr.serviceId);
534
535 memcpy(skb->head, &whdr, sizeof(whdr));
536}
537
538/*
516 * send data through a socket 539 * send data through a socket
517 * - must be called in process context 540 * - must be called in process context
518 * - caller holds the socket locked 541 * - caller holds the socket locked
@@ -548,7 +571,8 @@ static int rxrpc_send_data(struct rxrpc_sock *rx,
548 571
549 _debug("alloc"); 572 _debug("alloc");
550 573
551 if (CIRC_SPACE(call->acks_head, call->acks_tail, 574 if (CIRC_SPACE(call->acks_head,
575 ACCESS_ONCE(call->acks_tail),
552 call->acks_winsz) <= 0) { 576 call->acks_winsz) <= 0) {
553 ret = -EAGAIN; 577 ret = -EAGAIN;
554 if (msg->msg_flags & MSG_DONTWAIT) 578 if (msg->msg_flags & MSG_DONTWAIT)
@@ -650,22 +674,22 @@ static int rxrpc_send_data(struct rxrpc_sock *rx,
650 674
651 seq = atomic_inc_return(&call->sequence); 675 seq = atomic_inc_return(&call->sequence);
652 676
653 sp->hdr.epoch = conn->epoch; 677 sp->hdr.epoch = conn->epoch;
654 sp->hdr.cid = call->cid; 678 sp->hdr.cid = call->cid;
655 sp->hdr.callNumber = call->call_id; 679 sp->hdr.callNumber = call->call_id;
656 sp->hdr.seq = htonl(seq); 680 sp->hdr.seq = seq;
657 sp->hdr.serial = 681 sp->hdr.serial = atomic_inc_return(&conn->serial);
658 htonl(atomic_inc_return(&conn->serial)); 682 sp->hdr.type = RXRPC_PACKET_TYPE_DATA;
659 sp->hdr.type = RXRPC_PACKET_TYPE_DATA;
660 sp->hdr.userStatus = 0; 683 sp->hdr.userStatus = 0;
661 sp->hdr.securityIndex = conn->security_ix; 684 sp->hdr.securityIndex = conn->security_ix;
662 sp->hdr._rsvd = 0; 685 sp->hdr._rsvd = 0;
663 sp->hdr.serviceId = conn->service_id; 686 sp->hdr.serviceId = call->service_id;
664 687
665 sp->hdr.flags = conn->out_clientflag; 688 sp->hdr.flags = conn->out_clientflag;
666 if (msg_data_left(msg) == 0 && !more) 689 if (msg_data_left(msg) == 0 && !more)
667 sp->hdr.flags |= RXRPC_LAST_PACKET; 690 sp->hdr.flags |= RXRPC_LAST_PACKET;
668 else if (CIRC_SPACE(call->acks_head, call->acks_tail, 691 else if (CIRC_SPACE(call->acks_head,
692 ACCESS_ONCE(call->acks_tail),
669 call->acks_winsz) > 1) 693 call->acks_winsz) > 1)
670 sp->hdr.flags |= RXRPC_MORE_PACKETS; 694 sp->hdr.flags |= RXRPC_MORE_PACKETS;
671 if (more && seq & 1) 695 if (more && seq & 1)
@@ -673,12 +697,11 @@ static int rxrpc_send_data(struct rxrpc_sock *rx,
673 697
674 ret = rxrpc_secure_packet( 698 ret = rxrpc_secure_packet(
675 call, skb, skb->mark, 699 call, skb, skb->mark,
676 skb->head + sizeof(struct rxrpc_header)); 700 skb->head + sizeof(struct rxrpc_wire_header));
677 if (ret < 0) 701 if (ret < 0)
678 goto out; 702 goto out;
679 703
680 memcpy(skb->head, &sp->hdr, 704 rxrpc_insert_header(skb);
681 sizeof(struct rxrpc_header));
682 rxrpc_queue_packet(call, skb, !msg_data_left(msg) && !more); 705 rxrpc_queue_packet(call, skb, !msg_data_left(msg) && !more);
683 skb = NULL; 706 skb = NULL;
684 } 707 }
diff --git a/net/rxrpc/ar-peer.c b/net/rxrpc/ar-peer.c
index bebaa43484bc..dc089b1976aa 100644
--- a/net/rxrpc/ar-peer.c
+++ b/net/rxrpc/ar-peer.c
@@ -92,7 +92,7 @@ static struct rxrpc_peer *rxrpc_alloc_peer(struct sockaddr_rxrpc *srx,
92 BUG(); 92 BUG();
93 } 93 }
94 94
95 peer->hdrsize += sizeof(struct rxrpc_header); 95 peer->hdrsize += sizeof(struct rxrpc_wire_header);
96 peer->maxdata = peer->mtu - peer->hdrsize; 96 peer->maxdata = peer->mtu - peer->hdrsize;
97 } 97 }
98 98
diff --git a/net/rxrpc/ar-proc.c b/net/rxrpc/ar-proc.c
index 38047f713f2c..525b2ba5a8f4 100644
--- a/net/rxrpc/ar-proc.c
+++ b/net/rxrpc/ar-proc.c
@@ -74,9 +74,9 @@ static int rxrpc_call_seq_show(struct seq_file *seq, void *v)
74 " %-8.8s %08x %lx\n", 74 " %-8.8s %08x %lx\n",
75 lbuff, 75 lbuff,
76 rbuff, 76 rbuff,
77 ntohs(call->conn->service_id), 77 call->conn->service_id,
78 ntohl(call->conn->cid), 78 call->cid,
79 ntohl(call->call_id), 79 call->call_id,
80 call->conn->in_clientflag ? "Svc" : "Clt", 80 call->conn->in_clientflag ? "Svc" : "Clt",
81 atomic_read(&call->usage), 81 atomic_read(&call->usage),
82 rxrpc_call_states[call->state], 82 rxrpc_call_states[call->state],
@@ -157,8 +157,8 @@ static int rxrpc_connection_seq_show(struct seq_file *seq, void *v)
157 " %s %08x %08x %08x\n", 157 " %s %08x %08x %08x\n",
158 lbuff, 158 lbuff,
159 rbuff, 159 rbuff,
160 ntohs(conn->service_id), 160 conn->service_id,
161 ntohl(conn->cid), 161 conn->cid,
162 conn->call_counter, 162 conn->call_counter,
163 conn->in_clientflag ? "Svc" : "Clt", 163 conn->in_clientflag ? "Svc" : "Clt",
164 atomic_read(&conn->usage), 164 atomic_read(&conn->usage),
diff --git a/net/rxrpc/ar-recvmsg.c b/net/rxrpc/ar-recvmsg.c
index b92beded7459..64facba24a45 100644
--- a/net/rxrpc/ar-recvmsg.c
+++ b/net/rxrpc/ar-recvmsg.c
@@ -33,7 +33,7 @@ void rxrpc_remove_user_ID(struct rxrpc_sock *rx, struct rxrpc_call *call)
33 33
34 read_lock_bh(&call->state_lock); 34 read_lock_bh(&call->state_lock);
35 if (!test_bit(RXRPC_CALL_RELEASED, &call->flags) && 35 if (!test_bit(RXRPC_CALL_RELEASED, &call->flags) &&
36 !test_and_set_bit(RXRPC_CALL_RELEASE, &call->events)) 36 !test_and_set_bit(RXRPC_CALL_EV_RELEASE, &call->events))
37 rxrpc_queue_call(call); 37 rxrpc_queue_call(call);
38 read_unlock_bh(&call->state_lock); 38 read_unlock_bh(&call->state_lock);
39} 39}
@@ -158,7 +158,7 @@ int rxrpc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
158 goto receive_non_data_message; 158 goto receive_non_data_message;
159 159
160 _debug("recvmsg DATA #%u { %d, %d }", 160 _debug("recvmsg DATA #%u { %d, %d }",
161 ntohl(sp->hdr.seq), skb->len, sp->offset); 161 sp->hdr.seq, skb->len, sp->offset);
162 162
163 if (!continue_call) { 163 if (!continue_call) {
164 /* only set the control data once per recvmsg() */ 164 /* only set the control data once per recvmsg() */
@@ -169,11 +169,11 @@ int rxrpc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
169 ASSERT(test_bit(RXRPC_CALL_HAS_USERID, &call->flags)); 169 ASSERT(test_bit(RXRPC_CALL_HAS_USERID, &call->flags));
170 } 170 }
171 171
172 ASSERTCMP(ntohl(sp->hdr.seq), >=, call->rx_data_recv); 172 ASSERTCMP(sp->hdr.seq, >=, call->rx_data_recv);
173 ASSERTCMP(ntohl(sp->hdr.seq), <=, call->rx_data_recv + 1); 173 ASSERTCMP(sp->hdr.seq, <=, call->rx_data_recv + 1);
174 call->rx_data_recv = ntohl(sp->hdr.seq); 174 call->rx_data_recv = sp->hdr.seq;
175 175
176 ASSERTCMP(ntohl(sp->hdr.seq), >, call->rx_data_eaten); 176 ASSERTCMP(sp->hdr.seq, >, call->rx_data_eaten);
177 177
178 offset = sp->offset; 178 offset = sp->offset;
179 copy = skb->len - offset; 179 copy = skb->len - offset;
@@ -364,11 +364,11 @@ void rxrpc_kernel_data_delivered(struct sk_buff *skb)
364 struct rxrpc_skb_priv *sp = rxrpc_skb(skb); 364 struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
365 struct rxrpc_call *call = sp->call; 365 struct rxrpc_call *call = sp->call;
366 366
367 ASSERTCMP(ntohl(sp->hdr.seq), >=, call->rx_data_recv); 367 ASSERTCMP(sp->hdr.seq, >=, call->rx_data_recv);
368 ASSERTCMP(ntohl(sp->hdr.seq), <=, call->rx_data_recv + 1); 368 ASSERTCMP(sp->hdr.seq, <=, call->rx_data_recv + 1);
369 call->rx_data_recv = ntohl(sp->hdr.seq); 369 call->rx_data_recv = sp->hdr.seq;
370 370
371 ASSERTCMP(ntohl(sp->hdr.seq), >, call->rx_data_eaten); 371 ASSERTCMP(sp->hdr.seq, >, call->rx_data_eaten);
372 rxrpc_free_skb(skb); 372 rxrpc_free_skb(skb);
373} 373}
374 374
diff --git a/net/rxrpc/ar-security.c b/net/rxrpc/ar-security.c
index 8334474eb26c..ceff6394a65f 100644
--- a/net/rxrpc/ar-security.c
+++ b/net/rxrpc/ar-security.c
@@ -167,11 +167,11 @@ int rxrpc_init_server_conn_security(struct rxrpc_connection *conn)
167 struct rxrpc_sock *rx; 167 struct rxrpc_sock *rx;
168 struct key *key; 168 struct key *key;
169 key_ref_t kref; 169 key_ref_t kref;
170 char kdesc[5+1+3+1]; 170 char kdesc[5 + 1 + 3 + 1];
171 171
172 _enter(""); 172 _enter("");
173 173
174 sprintf(kdesc, "%u:%u", ntohs(conn->service_id), conn->security_ix); 174 sprintf(kdesc, "%u:%u", conn->service_id, conn->security_ix);
175 175
176 sec = rxrpc_security_lookup(conn->security_ix); 176 sec = rxrpc_security_lookup(conn->security_ix);
177 if (!sec) { 177 if (!sec) {
@@ -182,7 +182,7 @@ int rxrpc_init_server_conn_security(struct rxrpc_connection *conn)
182 /* find the service */ 182 /* find the service */
183 read_lock_bh(&local->services_lock); 183 read_lock_bh(&local->services_lock);
184 list_for_each_entry(rx, &local->services, listen_link) { 184 list_for_each_entry(rx, &local->services, listen_link) {
185 if (rx->service_id == conn->service_id) 185 if (rx->srx.srx_service == conn->service_id)
186 goto found_service; 186 goto found_service;
187 } 187 }
188 188
diff --git a/net/rxrpc/ar-skbuff.c b/net/rxrpc/ar-skbuff.c
index 4cfab49e329d..62a267472fce 100644
--- a/net/rxrpc/ar-skbuff.c
+++ b/net/rxrpc/ar-skbuff.c
@@ -34,7 +34,7 @@ static void rxrpc_request_final_ACK(struct rxrpc_call *call)
34 /* get an extra ref on the call for the final-ACK generator to 34 /* get an extra ref on the call for the final-ACK generator to
35 * release */ 35 * release */
36 rxrpc_get_call(call); 36 rxrpc_get_call(call);
37 set_bit(RXRPC_CALL_ACK_FINAL, &call->events); 37 set_bit(RXRPC_CALL_EV_ACK_FINAL, &call->events);
38 if (try_to_del_timer_sync(&call->ack_timer) >= 0) 38 if (try_to_del_timer_sync(&call->ack_timer) >= 0)
39 rxrpc_queue_call(call); 39 rxrpc_queue_call(call);
40 break; 40 break;
@@ -59,7 +59,7 @@ static void rxrpc_hard_ACK_data(struct rxrpc_call *call,
59 59
60 spin_lock_bh(&call->lock); 60 spin_lock_bh(&call->lock);
61 61
62 _debug("hard ACK #%u", ntohl(sp->hdr.seq)); 62 _debug("hard ACK #%u", sp->hdr.seq);
63 63
64 for (loop = 0; loop < RXRPC_ACKR_WINDOW_ASZ; loop++) { 64 for (loop = 0; loop < RXRPC_ACKR_WINDOW_ASZ; loop++) {
65 call->ackr_window[loop] >>= 1; 65 call->ackr_window[loop] >>= 1;
@@ -67,7 +67,7 @@ static void rxrpc_hard_ACK_data(struct rxrpc_call *call,
67 call->ackr_window[loop + 1] << (BITS_PER_LONG - 1); 67 call->ackr_window[loop + 1] << (BITS_PER_LONG - 1);
68 } 68 }
69 69
70 seq = ntohl(sp->hdr.seq); 70 seq = sp->hdr.seq;
71 ASSERTCMP(seq, ==, call->rx_data_eaten + 1); 71 ASSERTCMP(seq, ==, call->rx_data_eaten + 1);
72 call->rx_data_eaten = seq; 72 call->rx_data_eaten = seq;
73 73
@@ -133,5 +133,4 @@ void rxrpc_kernel_free_skb(struct sk_buff *skb)
133{ 133{
134 rxrpc_free_skb(skb); 134 rxrpc_free_skb(skb);
135} 135}
136
137EXPORT_SYMBOL(rxrpc_kernel_free_skb); 136EXPORT_SYMBOL(rxrpc_kernel_free_skb);
diff --git a/net/rxrpc/ar-transport.c b/net/rxrpc/ar-transport.c
index 9946467f16b4..66a1a5676446 100644
--- a/net/rxrpc/ar-transport.c
+++ b/net/rxrpc/ar-transport.c
@@ -20,7 +20,7 @@
20/* 20/*
21 * Time after last use at which transport record is cleaned up. 21 * Time after last use at which transport record is cleaned up.
22 */ 22 */
23unsigned rxrpc_transport_expiry = 3600 * 24; 23unsigned int rxrpc_transport_expiry = 3600 * 24;
24 24
25static void rxrpc_transport_reaper(struct work_struct *work); 25static void rxrpc_transport_reaper(struct work_struct *work);
26 26
@@ -51,6 +51,7 @@ static struct rxrpc_transport *rxrpc_alloc_transport(struct rxrpc_local *local,
51 spin_lock_init(&trans->client_lock); 51 spin_lock_init(&trans->client_lock);
52 rwlock_init(&trans->conn_lock); 52 rwlock_init(&trans->conn_lock);
53 atomic_set(&trans->usage, 1); 53 atomic_set(&trans->usage, 1);
54 trans->conn_idcounter = peer->srx.srx_service << 16;
54 trans->debug_id = atomic_inc_return(&rxrpc_debug_id); 55 trans->debug_id = atomic_inc_return(&rxrpc_debug_id);
55 56
56 if (peer->srx.transport.family == AF_INET) { 57 if (peer->srx.transport.family == AF_INET) {
diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c
index d7a9ab5a9d9c..f0aeb8163688 100644
--- a/net/rxrpc/rxkad.c
+++ b/net/rxrpc/rxkad.c
@@ -9,11 +9,11 @@
9 * 2 of the License, or (at your option) any later version. 9 * 2 of the License, or (at your option) any later version.
10 */ 10 */
11 11
12#include <crypto/skcipher.h>
12#include <linux/module.h> 13#include <linux/module.h>
13#include <linux/net.h> 14#include <linux/net.h>
14#include <linux/skbuff.h> 15#include <linux/skbuff.h>
15#include <linux/udp.h> 16#include <linux/udp.h>
16#include <linux/crypto.h>
17#include <linux/scatterlist.h> 17#include <linux/scatterlist.h>
18#include <linux/ctype.h> 18#include <linux/ctype.h>
19#include <linux/slab.h> 19#include <linux/slab.h>
@@ -53,7 +53,7 @@ MODULE_LICENSE("GPL");
53 * alloc routine, but since we have it to hand, we use it to decrypt RESPONSE 53 * alloc routine, but since we have it to hand, we use it to decrypt RESPONSE
54 * packets 54 * packets
55 */ 55 */
56static struct crypto_blkcipher *rxkad_ci; 56static struct crypto_skcipher *rxkad_ci;
57static DEFINE_MUTEX(rxkad_ci_mutex); 57static DEFINE_MUTEX(rxkad_ci_mutex);
58 58
59/* 59/*
@@ -61,7 +61,7 @@ static DEFINE_MUTEX(rxkad_ci_mutex);
61 */ 61 */
62static int rxkad_init_connection_security(struct rxrpc_connection *conn) 62static int rxkad_init_connection_security(struct rxrpc_connection *conn)
63{ 63{
64 struct crypto_blkcipher *ci; 64 struct crypto_skcipher *ci;
65 struct rxrpc_key_token *token; 65 struct rxrpc_key_token *token;
66 int ret; 66 int ret;
67 67
@@ -70,15 +70,15 @@ static int rxkad_init_connection_security(struct rxrpc_connection *conn)
70 token = conn->key->payload.data[0]; 70 token = conn->key->payload.data[0];
71 conn->security_ix = token->security_index; 71 conn->security_ix = token->security_index;
72 72
73 ci = crypto_alloc_blkcipher("pcbc(fcrypt)", 0, CRYPTO_ALG_ASYNC); 73 ci = crypto_alloc_skcipher("pcbc(fcrypt)", 0, CRYPTO_ALG_ASYNC);
74 if (IS_ERR(ci)) { 74 if (IS_ERR(ci)) {
75 _debug("no cipher"); 75 _debug("no cipher");
76 ret = PTR_ERR(ci); 76 ret = PTR_ERR(ci);
77 goto error; 77 goto error;
78 } 78 }
79 79
80 if (crypto_blkcipher_setkey(ci, token->kad->session_key, 80 if (crypto_skcipher_setkey(ci, token->kad->session_key,
81 sizeof(token->kad->session_key)) < 0) 81 sizeof(token->kad->session_key)) < 0)
82 BUG(); 82 BUG();
83 83
84 switch (conn->security_level) { 84 switch (conn->security_level) {
@@ -113,7 +113,7 @@ error:
113static void rxkad_prime_packet_security(struct rxrpc_connection *conn) 113static void rxkad_prime_packet_security(struct rxrpc_connection *conn)
114{ 114{
115 struct rxrpc_key_token *token; 115 struct rxrpc_key_token *token;
116 struct blkcipher_desc desc; 116 SKCIPHER_REQUEST_ON_STACK(req, conn->cipher);
117 struct scatterlist sg[2]; 117 struct scatterlist sg[2];
118 struct rxrpc_crypt iv; 118 struct rxrpc_crypt iv;
119 struct { 119 struct {
@@ -128,21 +128,23 @@ static void rxkad_prime_packet_security(struct rxrpc_connection *conn)
128 token = conn->key->payload.data[0]; 128 token = conn->key->payload.data[0];
129 memcpy(&iv, token->kad->session_key, sizeof(iv)); 129 memcpy(&iv, token->kad->session_key, sizeof(iv));
130 130
131 desc.tfm = conn->cipher; 131 tmpbuf.x[0] = htonl(conn->epoch);
132 desc.info = iv.x; 132 tmpbuf.x[1] = htonl(conn->cid);
133 desc.flags = 0;
134
135 tmpbuf.x[0] = conn->epoch;
136 tmpbuf.x[1] = conn->cid;
137 tmpbuf.x[2] = 0; 133 tmpbuf.x[2] = 0;
138 tmpbuf.x[3] = htonl(conn->security_ix); 134 tmpbuf.x[3] = htonl(conn->security_ix);
139 135
140 sg_init_one(&sg[0], &tmpbuf, sizeof(tmpbuf)); 136 sg_init_one(&sg[0], &tmpbuf, sizeof(tmpbuf));
141 sg_init_one(&sg[1], &tmpbuf, sizeof(tmpbuf)); 137 sg_init_one(&sg[1], &tmpbuf, sizeof(tmpbuf));
142 crypto_blkcipher_encrypt_iv(&desc, &sg[0], &sg[1], sizeof(tmpbuf)); 138
139 skcipher_request_set_tfm(req, conn->cipher);
140 skcipher_request_set_callback(req, 0, NULL, NULL);
141 skcipher_request_set_crypt(req, &sg[1], &sg[0], sizeof(tmpbuf), iv.x);
142
143 crypto_skcipher_encrypt(req);
144 skcipher_request_zero(req);
143 145
144 memcpy(&conn->csum_iv, &tmpbuf.x[2], sizeof(conn->csum_iv)); 146 memcpy(&conn->csum_iv, &tmpbuf.x[2], sizeof(conn->csum_iv));
145 ASSERTCMP(conn->csum_iv.n[0], ==, tmpbuf.x[2]); 147 ASSERTCMP((u32 __force)conn->csum_iv.n[0], ==, (u32 __force)tmpbuf.x[2]);
146 148
147 _leave(""); 149 _leave("");
148} 150}
@@ -156,7 +158,7 @@ static int rxkad_secure_packet_auth(const struct rxrpc_call *call,
156 void *sechdr) 158 void *sechdr)
157{ 159{
158 struct rxrpc_skb_priv *sp; 160 struct rxrpc_skb_priv *sp;
159 struct blkcipher_desc desc; 161 SKCIPHER_REQUEST_ON_STACK(req, call->conn->cipher);
160 struct rxrpc_crypt iv; 162 struct rxrpc_crypt iv;
161 struct scatterlist sg[2]; 163 struct scatterlist sg[2];
162 struct { 164 struct {
@@ -169,21 +171,24 @@ static int rxkad_secure_packet_auth(const struct rxrpc_call *call,
169 171
170 _enter(""); 172 _enter("");
171 173
172 check = ntohl(sp->hdr.seq ^ sp->hdr.callNumber); 174 check = sp->hdr.seq ^ sp->hdr.callNumber;
173 data_size |= (u32) check << 16; 175 data_size |= (u32)check << 16;
174 176
175 tmpbuf.hdr.data_size = htonl(data_size); 177 tmpbuf.hdr.data_size = htonl(data_size);
176 memcpy(&tmpbuf.first, sechdr + 4, sizeof(tmpbuf.first)); 178 memcpy(&tmpbuf.first, sechdr + 4, sizeof(tmpbuf.first));
177 179
178 /* start the encryption afresh */ 180 /* start the encryption afresh */
179 memset(&iv, 0, sizeof(iv)); 181 memset(&iv, 0, sizeof(iv));
180 desc.tfm = call->conn->cipher;
181 desc.info = iv.x;
182 desc.flags = 0;
183 182
184 sg_init_one(&sg[0], &tmpbuf, sizeof(tmpbuf)); 183 sg_init_one(&sg[0], &tmpbuf, sizeof(tmpbuf));
185 sg_init_one(&sg[1], &tmpbuf, sizeof(tmpbuf)); 184 sg_init_one(&sg[1], &tmpbuf, sizeof(tmpbuf));
186 crypto_blkcipher_encrypt_iv(&desc, &sg[0], &sg[1], sizeof(tmpbuf)); 185
186 skcipher_request_set_tfm(req, call->conn->cipher);
187 skcipher_request_set_callback(req, 0, NULL, NULL);
188 skcipher_request_set_crypt(req, &sg[1], &sg[0], sizeof(tmpbuf), iv.x);
189
190 crypto_skcipher_encrypt(req);
191 skcipher_request_zero(req);
187 192
188 memcpy(sechdr, &tmpbuf, sizeof(tmpbuf)); 193 memcpy(sechdr, &tmpbuf, sizeof(tmpbuf));
189 194
@@ -195,81 +200,91 @@ static int rxkad_secure_packet_auth(const struct rxrpc_call *call,
195 * wholly encrypt a packet (level 2 security) 200 * wholly encrypt a packet (level 2 security)
196 */ 201 */
197static int rxkad_secure_packet_encrypt(const struct rxrpc_call *call, 202static int rxkad_secure_packet_encrypt(const struct rxrpc_call *call,
198 struct sk_buff *skb, 203 struct sk_buff *skb,
199 u32 data_size, 204 u32 data_size,
200 void *sechdr) 205 void *sechdr)
201{ 206{
202 const struct rxrpc_key_token *token; 207 const struct rxrpc_key_token *token;
203 struct rxkad_level2_hdr rxkhdr 208 struct rxkad_level2_hdr rxkhdr
204 __attribute__((aligned(8))); /* must be all on one page */ 209 __attribute__((aligned(8))); /* must be all on one page */
205 struct rxrpc_skb_priv *sp; 210 struct rxrpc_skb_priv *sp;
206 struct blkcipher_desc desc; 211 SKCIPHER_REQUEST_ON_STACK(req, call->conn->cipher);
207 struct rxrpc_crypt iv; 212 struct rxrpc_crypt iv;
208 struct scatterlist sg[16]; 213 struct scatterlist sg[16];
209 struct sk_buff *trailer; 214 struct sk_buff *trailer;
210 unsigned int len; 215 unsigned int len;
211 u16 check; 216 u16 check;
212 int nsg; 217 int nsg;
218 int err;
213 219
214 sp = rxrpc_skb(skb); 220 sp = rxrpc_skb(skb);
215 221
216 _enter(""); 222 _enter("");
217 223
218 check = ntohl(sp->hdr.seq ^ sp->hdr.callNumber); 224 check = sp->hdr.seq ^ sp->hdr.callNumber;
219 225
220 rxkhdr.data_size = htonl(data_size | (u32) check << 16); 226 rxkhdr.data_size = htonl(data_size | (u32)check << 16);
221 rxkhdr.checksum = 0; 227 rxkhdr.checksum = 0;
222 228
223 /* encrypt from the session key */ 229 /* encrypt from the session key */
224 token = call->conn->key->payload.data[0]; 230 token = call->conn->key->payload.data[0];
225 memcpy(&iv, token->kad->session_key, sizeof(iv)); 231 memcpy(&iv, token->kad->session_key, sizeof(iv));
226 desc.tfm = call->conn->cipher;
227 desc.info = iv.x;
228 desc.flags = 0;
229 232
230 sg_init_one(&sg[0], sechdr, sizeof(rxkhdr)); 233 sg_init_one(&sg[0], sechdr, sizeof(rxkhdr));
231 sg_init_one(&sg[1], &rxkhdr, sizeof(rxkhdr)); 234 sg_init_one(&sg[1], &rxkhdr, sizeof(rxkhdr));
232 crypto_blkcipher_encrypt_iv(&desc, &sg[0], &sg[1], sizeof(rxkhdr)); 235
236 skcipher_request_set_tfm(req, call->conn->cipher);
237 skcipher_request_set_callback(req, 0, NULL, NULL);
238 skcipher_request_set_crypt(req, &sg[1], &sg[0], sizeof(rxkhdr), iv.x);
239
240 crypto_skcipher_encrypt(req);
233 241
234 /* we want to encrypt the skbuff in-place */ 242 /* we want to encrypt the skbuff in-place */
235 nsg = skb_cow_data(skb, 0, &trailer); 243 nsg = skb_cow_data(skb, 0, &trailer);
244 err = -ENOMEM;
236 if (nsg < 0 || nsg > 16) 245 if (nsg < 0 || nsg > 16)
237 return -ENOMEM; 246 goto out;
238 247
239 len = data_size + call->conn->size_align - 1; 248 len = data_size + call->conn->size_align - 1;
240 len &= ~(call->conn->size_align - 1); 249 len &= ~(call->conn->size_align - 1);
241 250
242 sg_init_table(sg, nsg); 251 sg_init_table(sg, nsg);
243 skb_to_sgvec(skb, sg, 0, len); 252 skb_to_sgvec(skb, sg, 0, len);
244 crypto_blkcipher_encrypt_iv(&desc, sg, sg, len); 253
254 skcipher_request_set_crypt(req, sg, sg, len, iv.x);
255
256 crypto_skcipher_encrypt(req);
245 257
246 _leave(" = 0"); 258 _leave(" = 0");
247 return 0; 259 err = 0;
260
261out:
262 skcipher_request_zero(req);
263 return err;
248} 264}
249 265
250/* 266/*
251 * checksum an RxRPC packet header 267 * checksum an RxRPC packet header
252 */ 268 */
253static int rxkad_secure_packet(const struct rxrpc_call *call, 269static int rxkad_secure_packet(const struct rxrpc_call *call,
254 struct sk_buff *skb, 270 struct sk_buff *skb,
255 size_t data_size, 271 size_t data_size,
256 void *sechdr) 272 void *sechdr)
257{ 273{
258 struct rxrpc_skb_priv *sp; 274 struct rxrpc_skb_priv *sp;
259 struct blkcipher_desc desc; 275 SKCIPHER_REQUEST_ON_STACK(req, call->conn->cipher);
260 struct rxrpc_crypt iv; 276 struct rxrpc_crypt iv;
261 struct scatterlist sg[2]; 277 struct scatterlist sg[2];
262 struct { 278 struct {
263 __be32 x[2]; 279 __be32 x[2];
264 } tmpbuf __attribute__((aligned(8))); /* must all be in same page */ 280 } tmpbuf __attribute__((aligned(8))); /* must all be in same page */
265 __be32 x; 281 u32 x, y;
266 u32 y;
267 int ret; 282 int ret;
268 283
269 sp = rxrpc_skb(skb); 284 sp = rxrpc_skb(skb);
270 285
271 _enter("{%d{%x}},{#%u},%zu,", 286 _enter("{%d{%x}},{#%u},%zu,",
272 call->debug_id, key_serial(call->conn->key), ntohl(sp->hdr.seq), 287 call->debug_id, key_serial(call->conn->key), sp->hdr.seq,
273 data_size); 288 data_size);
274 289
275 if (!call->conn->cipher) 290 if (!call->conn->cipher)
@@ -281,25 +296,28 @@ static int rxkad_secure_packet(const struct rxrpc_call *call,
281 296
282 /* continue encrypting from where we left off */ 297 /* continue encrypting from where we left off */
283 memcpy(&iv, call->conn->csum_iv.x, sizeof(iv)); 298 memcpy(&iv, call->conn->csum_iv.x, sizeof(iv));
284 desc.tfm = call->conn->cipher;
285 desc.info = iv.x;
286 desc.flags = 0;
287 299
288 /* calculate the security checksum */ 300 /* calculate the security checksum */
289 x = htonl(call->channel << (32 - RXRPC_CIDSHIFT)); 301 x = call->channel << (32 - RXRPC_CIDSHIFT);
290 x |= sp->hdr.seq & cpu_to_be32(0x3fffffff); 302 x |= sp->hdr.seq & 0x3fffffff;
291 tmpbuf.x[0] = sp->hdr.callNumber; 303 tmpbuf.x[0] = htonl(sp->hdr.callNumber);
292 tmpbuf.x[1] = x; 304 tmpbuf.x[1] = htonl(x);
293 305
294 sg_init_one(&sg[0], &tmpbuf, sizeof(tmpbuf)); 306 sg_init_one(&sg[0], &tmpbuf, sizeof(tmpbuf));
295 sg_init_one(&sg[1], &tmpbuf, sizeof(tmpbuf)); 307 sg_init_one(&sg[1], &tmpbuf, sizeof(tmpbuf));
296 crypto_blkcipher_encrypt_iv(&desc, &sg[0], &sg[1], sizeof(tmpbuf)); 308
309 skcipher_request_set_tfm(req, call->conn->cipher);
310 skcipher_request_set_callback(req, 0, NULL, NULL);
311 skcipher_request_set_crypt(req, &sg[1], &sg[0], sizeof(tmpbuf), iv.x);
312
313 crypto_skcipher_encrypt(req);
314 skcipher_request_zero(req);
297 315
298 y = ntohl(tmpbuf.x[1]); 316 y = ntohl(tmpbuf.x[1]);
299 y = (y >> 16) & 0xffff; 317 y = (y >> 16) & 0xffff;
300 if (y == 0) 318 if (y == 0)
301 y = 1; /* zero checksums are not permitted */ 319 y = 1; /* zero checksums are not permitted */
302 sp->hdr.cksum = htons(y); 320 sp->hdr.cksum = y;
303 321
304 switch (call->conn->security_level) { 322 switch (call->conn->security_level) {
305 case RXRPC_SECURITY_PLAIN: 323 case RXRPC_SECURITY_PLAIN:
@@ -330,7 +348,7 @@ static int rxkad_verify_packet_auth(const struct rxrpc_call *call,
330{ 348{
331 struct rxkad_level1_hdr sechdr; 349 struct rxkad_level1_hdr sechdr;
332 struct rxrpc_skb_priv *sp; 350 struct rxrpc_skb_priv *sp;
333 struct blkcipher_desc desc; 351 SKCIPHER_REQUEST_ON_STACK(req, call->conn->cipher);
334 struct rxrpc_crypt iv; 352 struct rxrpc_crypt iv;
335 struct scatterlist sg[16]; 353 struct scatterlist sg[16];
336 struct sk_buff *trailer; 354 struct sk_buff *trailer;
@@ -352,11 +370,13 @@ static int rxkad_verify_packet_auth(const struct rxrpc_call *call,
352 370
353 /* start the decryption afresh */ 371 /* start the decryption afresh */
354 memset(&iv, 0, sizeof(iv)); 372 memset(&iv, 0, sizeof(iv));
355 desc.tfm = call->conn->cipher;
356 desc.info = iv.x;
357 desc.flags = 0;
358 373
359 crypto_blkcipher_decrypt_iv(&desc, sg, sg, 8); 374 skcipher_request_set_tfm(req, call->conn->cipher);
375 skcipher_request_set_callback(req, 0, NULL, NULL);
376 skcipher_request_set_crypt(req, sg, sg, 8, iv.x);
377
378 crypto_skcipher_decrypt(req);
379 skcipher_request_zero(req);
360 380
361 /* remove the decrypted packet length */ 381 /* remove the decrypted packet length */
362 if (skb_copy_bits(skb, 0, &sechdr, sizeof(sechdr)) < 0) 382 if (skb_copy_bits(skb, 0, &sechdr, sizeof(sechdr)) < 0)
@@ -368,7 +388,7 @@ static int rxkad_verify_packet_auth(const struct rxrpc_call *call,
368 data_size = buf & 0xffff; 388 data_size = buf & 0xffff;
369 389
370 check = buf >> 16; 390 check = buf >> 16;
371 check ^= ntohl(sp->hdr.seq ^ sp->hdr.callNumber); 391 check ^= sp->hdr.seq ^ sp->hdr.callNumber;
372 check &= 0xffff; 392 check &= 0xffff;
373 if (check != 0) { 393 if (check != 0) {
374 *_abort_code = RXKADSEALEDINCON; 394 *_abort_code = RXKADSEALEDINCON;
@@ -405,7 +425,7 @@ static int rxkad_verify_packet_encrypt(const struct rxrpc_call *call,
405 const struct rxrpc_key_token *token; 425 const struct rxrpc_key_token *token;
406 struct rxkad_level2_hdr sechdr; 426 struct rxkad_level2_hdr sechdr;
407 struct rxrpc_skb_priv *sp; 427 struct rxrpc_skb_priv *sp;
408 struct blkcipher_desc desc; 428 SKCIPHER_REQUEST_ON_STACK(req, call->conn->cipher);
409 struct rxrpc_crypt iv; 429 struct rxrpc_crypt iv;
410 struct scatterlist _sg[4], *sg; 430 struct scatterlist _sg[4], *sg;
411 struct sk_buff *trailer; 431 struct sk_buff *trailer;
@@ -435,11 +455,13 @@ static int rxkad_verify_packet_encrypt(const struct rxrpc_call *call,
435 /* decrypt from the session key */ 455 /* decrypt from the session key */
436 token = call->conn->key->payload.data[0]; 456 token = call->conn->key->payload.data[0];
437 memcpy(&iv, token->kad->session_key, sizeof(iv)); 457 memcpy(&iv, token->kad->session_key, sizeof(iv));
438 desc.tfm = call->conn->cipher;
439 desc.info = iv.x;
440 desc.flags = 0;
441 458
442 crypto_blkcipher_decrypt_iv(&desc, sg, sg, skb->len); 459 skcipher_request_set_tfm(req, call->conn->cipher);
460 skcipher_request_set_callback(req, 0, NULL, NULL);
461 skcipher_request_set_crypt(req, sg, sg, skb->len, iv.x);
462
463 crypto_skcipher_decrypt(req);
464 skcipher_request_zero(req);
443 if (sg != _sg) 465 if (sg != _sg)
444 kfree(sg); 466 kfree(sg);
445 467
@@ -453,7 +475,7 @@ static int rxkad_verify_packet_encrypt(const struct rxrpc_call *call,
453 data_size = buf & 0xffff; 475 data_size = buf & 0xffff;
454 476
455 check = buf >> 16; 477 check = buf >> 16;
456 check ^= ntohl(sp->hdr.seq ^ sp->hdr.callNumber); 478 check ^= sp->hdr.seq ^ sp->hdr.callNumber;
457 check &= 0xffff; 479 check &= 0xffff;
458 if (check != 0) { 480 if (check != 0) {
459 *_abort_code = RXKADSEALEDINCON; 481 *_abort_code = RXKADSEALEDINCON;
@@ -487,23 +509,21 @@ static int rxkad_verify_packet(const struct rxrpc_call *call,
487 struct sk_buff *skb, 509 struct sk_buff *skb,
488 u32 *_abort_code) 510 u32 *_abort_code)
489{ 511{
490 struct blkcipher_desc desc; 512 SKCIPHER_REQUEST_ON_STACK(req, call->conn->cipher);
491 struct rxrpc_skb_priv *sp; 513 struct rxrpc_skb_priv *sp;
492 struct rxrpc_crypt iv; 514 struct rxrpc_crypt iv;
493 struct scatterlist sg[2]; 515 struct scatterlist sg[2];
494 struct { 516 struct {
495 __be32 x[2]; 517 __be32 x[2];
496 } tmpbuf __attribute__((aligned(8))); /* must all be in same page */ 518 } tmpbuf __attribute__((aligned(8))); /* must all be in same page */
497 __be32 x; 519 u16 cksum;
498 __be16 cksum; 520 u32 x, y;
499 u32 y;
500 int ret; 521 int ret;
501 522
502 sp = rxrpc_skb(skb); 523 sp = rxrpc_skb(skb);
503 524
504 _enter("{%d{%x}},{#%u}", 525 _enter("{%d{%x}},{#%u}",
505 call->debug_id, key_serial(call->conn->key), 526 call->debug_id, key_serial(call->conn->key), sp->hdr.seq);
506 ntohl(sp->hdr.seq));
507 527
508 if (!call->conn->cipher) 528 if (!call->conn->cipher)
509 return 0; 529 return 0;
@@ -516,26 +536,28 @@ static int rxkad_verify_packet(const struct rxrpc_call *call,
516 536
517 /* continue encrypting from where we left off */ 537 /* continue encrypting from where we left off */
518 memcpy(&iv, call->conn->csum_iv.x, sizeof(iv)); 538 memcpy(&iv, call->conn->csum_iv.x, sizeof(iv));
519 desc.tfm = call->conn->cipher;
520 desc.info = iv.x;
521 desc.flags = 0;
522 539
523 /* validate the security checksum */ 540 /* validate the security checksum */
524 x = htonl(call->channel << (32 - RXRPC_CIDSHIFT)); 541 x = call->channel << (32 - RXRPC_CIDSHIFT);
525 x |= sp->hdr.seq & cpu_to_be32(0x3fffffff); 542 x |= sp->hdr.seq & 0x3fffffff;
526 tmpbuf.x[0] = call->call_id; 543 tmpbuf.x[0] = htonl(call->call_id);
527 tmpbuf.x[1] = x; 544 tmpbuf.x[1] = htonl(x);
528 545
529 sg_init_one(&sg[0], &tmpbuf, sizeof(tmpbuf)); 546 sg_init_one(&sg[0], &tmpbuf, sizeof(tmpbuf));
530 sg_init_one(&sg[1], &tmpbuf, sizeof(tmpbuf)); 547 sg_init_one(&sg[1], &tmpbuf, sizeof(tmpbuf));
531 crypto_blkcipher_encrypt_iv(&desc, &sg[0], &sg[1], sizeof(tmpbuf)); 548
549 skcipher_request_set_tfm(req, call->conn->cipher);
550 skcipher_request_set_callback(req, 0, NULL, NULL);
551 skcipher_request_set_crypt(req, &sg[1], &sg[0], sizeof(tmpbuf), iv.x);
552
553 crypto_skcipher_encrypt(req);
554 skcipher_request_zero(req);
532 555
533 y = ntohl(tmpbuf.x[1]); 556 y = ntohl(tmpbuf.x[1]);
534 y = (y >> 16) & 0xffff; 557 cksum = (y >> 16) & 0xffff;
535 if (y == 0) 558 if (cksum == 0)
536 y = 1; /* zero checksums are not permitted */ 559 cksum = 1; /* zero checksums are not permitted */
537 560
538 cksum = htons(y);
539 if (sp->hdr.cksum != cksum) { 561 if (sp->hdr.cksum != cksum) {
540 *_abort_code = RXKADSEALEDINCON; 562 *_abort_code = RXKADSEALEDINCON;
541 _leave(" = -EPROTO [csum failed]"); 563 _leave(" = -EPROTO [csum failed]");
@@ -567,10 +589,11 @@ static int rxkad_verify_packet(const struct rxrpc_call *call,
567static int rxkad_issue_challenge(struct rxrpc_connection *conn) 589static int rxkad_issue_challenge(struct rxrpc_connection *conn)
568{ 590{
569 struct rxkad_challenge challenge; 591 struct rxkad_challenge challenge;
570 struct rxrpc_header hdr; 592 struct rxrpc_wire_header whdr;
571 struct msghdr msg; 593 struct msghdr msg;
572 struct kvec iov[2]; 594 struct kvec iov[2];
573 size_t len; 595 size_t len;
596 u32 serial;
574 int ret; 597 int ret;
575 598
576 _enter("{%d,%x}", conn->debug_id, key_serial(conn->key)); 599 _enter("{%d,%x}", conn->debug_id, key_serial(conn->key));
@@ -592,26 +615,27 @@ static int rxkad_issue_challenge(struct rxrpc_connection *conn)
592 msg.msg_controllen = 0; 615 msg.msg_controllen = 0;
593 msg.msg_flags = 0; 616 msg.msg_flags = 0;
594 617
595 hdr.epoch = conn->epoch; 618 whdr.epoch = htonl(conn->epoch);
596 hdr.cid = conn->cid; 619 whdr.cid = htonl(conn->cid);
597 hdr.callNumber = 0; 620 whdr.callNumber = 0;
598 hdr.seq = 0; 621 whdr.seq = 0;
599 hdr.type = RXRPC_PACKET_TYPE_CHALLENGE; 622 whdr.type = RXRPC_PACKET_TYPE_CHALLENGE;
600 hdr.flags = conn->out_clientflag; 623 whdr.flags = conn->out_clientflag;
601 hdr.userStatus = 0; 624 whdr.userStatus = 0;
602 hdr.securityIndex = conn->security_ix; 625 whdr.securityIndex = conn->security_ix;
603 hdr._rsvd = 0; 626 whdr._rsvd = 0;
604 hdr.serviceId = conn->service_id; 627 whdr.serviceId = htons(conn->service_id);
605 628
606 iov[0].iov_base = &hdr; 629 iov[0].iov_base = &whdr;
607 iov[0].iov_len = sizeof(hdr); 630 iov[0].iov_len = sizeof(whdr);
608 iov[1].iov_base = &challenge; 631 iov[1].iov_base = &challenge;
609 iov[1].iov_len = sizeof(challenge); 632 iov[1].iov_len = sizeof(challenge);
610 633
611 len = iov[0].iov_len + iov[1].iov_len; 634 len = iov[0].iov_len + iov[1].iov_len;
612 635
613 hdr.serial = htonl(atomic_inc_return(&conn->serial)); 636 serial = atomic_inc_return(&conn->serial);
614 _proto("Tx CHALLENGE %%%u", ntohl(hdr.serial)); 637 whdr.serial = htonl(serial);
638 _proto("Tx CHALLENGE %%%u", serial);
615 639
616 ret = kernel_sendmsg(conn->trans->local->socket, &msg, iov, 2, len); 640 ret = kernel_sendmsg(conn->trans->local->socket, &msg, iov, 2, len);
617 if (ret < 0) { 641 if (ret < 0) {
@@ -627,13 +651,15 @@ static int rxkad_issue_challenge(struct rxrpc_connection *conn)
627 * send a Kerberos security response 651 * send a Kerberos security response
628 */ 652 */
629static int rxkad_send_response(struct rxrpc_connection *conn, 653static int rxkad_send_response(struct rxrpc_connection *conn,
630 struct rxrpc_header *hdr, 654 struct rxrpc_host_header *hdr,
631 struct rxkad_response *resp, 655 struct rxkad_response *resp,
632 const struct rxkad_key *s2) 656 const struct rxkad_key *s2)
633{ 657{
658 struct rxrpc_wire_header whdr;
634 struct msghdr msg; 659 struct msghdr msg;
635 struct kvec iov[3]; 660 struct kvec iov[3];
636 size_t len; 661 size_t len;
662 u32 serial;
637 int ret; 663 int ret;
638 664
639 _enter(""); 665 _enter("");
@@ -644,24 +670,26 @@ static int rxkad_send_response(struct rxrpc_connection *conn,
644 msg.msg_controllen = 0; 670 msg.msg_controllen = 0;
645 msg.msg_flags = 0; 671 msg.msg_flags = 0;
646 672
647 hdr->epoch = conn->epoch; 673 memset(&whdr, 0, sizeof(whdr));
648 hdr->seq = 0; 674 whdr.epoch = htonl(hdr->epoch);
649 hdr->type = RXRPC_PACKET_TYPE_RESPONSE; 675 whdr.cid = htonl(hdr->cid);
650 hdr->flags = conn->out_clientflag; 676 whdr.type = RXRPC_PACKET_TYPE_RESPONSE;
651 hdr->userStatus = 0; 677 whdr.flags = conn->out_clientflag;
652 hdr->_rsvd = 0; 678 whdr.securityIndex = hdr->securityIndex;
679 whdr.serviceId = htons(hdr->serviceId);
653 680
654 iov[0].iov_base = hdr; 681 iov[0].iov_base = &whdr;
655 iov[0].iov_len = sizeof(*hdr); 682 iov[0].iov_len = sizeof(whdr);
656 iov[1].iov_base = resp; 683 iov[1].iov_base = resp;
657 iov[1].iov_len = sizeof(*resp); 684 iov[1].iov_len = sizeof(*resp);
658 iov[2].iov_base = (void *) s2->ticket; 685 iov[2].iov_base = (void *)s2->ticket;
659 iov[2].iov_len = s2->ticket_len; 686 iov[2].iov_len = s2->ticket_len;
660 687
661 len = iov[0].iov_len + iov[1].iov_len + iov[2].iov_len; 688 len = iov[0].iov_len + iov[1].iov_len + iov[2].iov_len;
662 689
663 hdr->serial = htonl(atomic_inc_return(&conn->serial)); 690 serial = atomic_inc_return(&conn->serial);
664 _proto("Tx RESPONSE %%%u", ntohl(hdr->serial)); 691 whdr.serial = htonl(serial);
692 _proto("Tx RESPONSE %%%u", serial);
665 693
666 ret = kernel_sendmsg(conn->trans->local->socket, &msg, iov, 3, len); 694 ret = kernel_sendmsg(conn->trans->local->socket, &msg, iov, 3, len);
667 if (ret < 0) { 695 if (ret < 0) {
@@ -718,18 +746,21 @@ static void rxkad_encrypt_response(struct rxrpc_connection *conn,
718 struct rxkad_response *resp, 746 struct rxkad_response *resp,
719 const struct rxkad_key *s2) 747 const struct rxkad_key *s2)
720{ 748{
721 struct blkcipher_desc desc; 749 SKCIPHER_REQUEST_ON_STACK(req, conn->cipher);
722 struct rxrpc_crypt iv; 750 struct rxrpc_crypt iv;
723 struct scatterlist sg[2]; 751 struct scatterlist sg[2];
724 752
725 /* continue encrypting from where we left off */ 753 /* continue encrypting from where we left off */
726 memcpy(&iv, s2->session_key, sizeof(iv)); 754 memcpy(&iv, s2->session_key, sizeof(iv));
727 desc.tfm = conn->cipher;
728 desc.info = iv.x;
729 desc.flags = 0;
730 755
731 rxkad_sg_set_buf2(sg, &resp->encrypted, sizeof(resp->encrypted)); 756 rxkad_sg_set_buf2(sg, &resp->encrypted, sizeof(resp->encrypted));
732 crypto_blkcipher_encrypt_iv(&desc, sg, sg, sizeof(resp->encrypted)); 757
758 skcipher_request_set_tfm(req, conn->cipher);
759 skcipher_request_set_callback(req, 0, NULL, NULL);
760 skcipher_request_set_crypt(req, sg, sg, sizeof(resp->encrypted), iv.x);
761
762 crypto_skcipher_encrypt(req);
763 skcipher_request_zero(req);
733} 764}
734 765
735/* 766/*
@@ -770,7 +801,7 @@ static int rxkad_respond_to_challenge(struct rxrpc_connection *conn,
770 min_level = ntohl(challenge.min_level); 801 min_level = ntohl(challenge.min_level);
771 802
772 _proto("Rx CHALLENGE %%%u { v=%u n=%u ml=%u }", 803 _proto("Rx CHALLENGE %%%u { v=%u n=%u ml=%u }",
773 ntohl(sp->hdr.serial), version, nonce, min_level); 804 sp->hdr.serial, version, nonce, min_level);
774 805
775 abort_code = RXKADINCONSISTENCY; 806 abort_code = RXKADINCONSISTENCY;
776 if (version != RXKAD_VERSION) 807 if (version != RXKAD_VERSION)
@@ -785,22 +816,23 @@ static int rxkad_respond_to_challenge(struct rxrpc_connection *conn,
785 /* build the response packet */ 816 /* build the response packet */
786 memset(&resp, 0, sizeof(resp)); 817 memset(&resp, 0, sizeof(resp));
787 818
788 resp.version = RXKAD_VERSION; 819 resp.version = htonl(RXKAD_VERSION);
789 resp.encrypted.epoch = conn->epoch; 820 resp.encrypted.epoch = htonl(conn->epoch);
790 resp.encrypted.cid = conn->cid; 821 resp.encrypted.cid = htonl(conn->cid);
791 resp.encrypted.securityIndex = htonl(conn->security_ix); 822 resp.encrypted.securityIndex = htonl(conn->security_ix);
823 resp.encrypted.inc_nonce = htonl(nonce + 1);
824 resp.encrypted.level = htonl(conn->security_level);
825 resp.kvno = htonl(token->kad->kvno);
826 resp.ticket_len = htonl(token->kad->ticket_len);
827
792 resp.encrypted.call_id[0] = 828 resp.encrypted.call_id[0] =
793 (conn->channels[0] ? conn->channels[0]->call_id : 0); 829 htonl(conn->channels[0] ? conn->channels[0]->call_id : 0);
794 resp.encrypted.call_id[1] = 830 resp.encrypted.call_id[1] =
795 (conn->channels[1] ? conn->channels[1]->call_id : 0); 831 htonl(conn->channels[1] ? conn->channels[1]->call_id : 0);
796 resp.encrypted.call_id[2] = 832 resp.encrypted.call_id[2] =
797 (conn->channels[2] ? conn->channels[2]->call_id : 0); 833 htonl(conn->channels[2] ? conn->channels[2]->call_id : 0);
798 resp.encrypted.call_id[3] = 834 resp.encrypted.call_id[3] =
799 (conn->channels[3] ? conn->channels[3]->call_id : 0); 835 htonl(conn->channels[3] ? conn->channels[3]->call_id : 0);
800 resp.encrypted.inc_nonce = htonl(nonce + 1);
801 resp.encrypted.level = htonl(conn->security_level);
802 resp.kvno = htonl(token->kad->kvno);
803 resp.ticket_len = htonl(token->kad->ticket_len);
804 836
805 /* calculate the response checksum and then do the encryption */ 837 /* calculate the response checksum and then do the encryption */
806 rxkad_calc_response_checksum(&resp); 838 rxkad_calc_response_checksum(&resp);
@@ -822,7 +854,7 @@ static int rxkad_decrypt_ticket(struct rxrpc_connection *conn,
822 time_t *_expiry, 854 time_t *_expiry,
823 u32 *_abort_code) 855 u32 *_abort_code)
824{ 856{
825 struct blkcipher_desc desc; 857 struct skcipher_request *req;
826 struct rxrpc_crypt iv, key; 858 struct rxrpc_crypt iv, key;
827 struct scatterlist sg[1]; 859 struct scatterlist sg[1];
828 struct in_addr addr; 860 struct in_addr addr;
@@ -853,12 +885,21 @@ static int rxkad_decrypt_ticket(struct rxrpc_connection *conn,
853 885
854 memcpy(&iv, &conn->server_key->payload.data[2], sizeof(iv)); 886 memcpy(&iv, &conn->server_key->payload.data[2], sizeof(iv));
855 887
856 desc.tfm = conn->server_key->payload.data[0]; 888 req = skcipher_request_alloc(conn->server_key->payload.data[0],
857 desc.info = iv.x; 889 GFP_NOFS);
858 desc.flags = 0; 890 if (!req) {
891 *_abort_code = RXKADNOAUTH;
892 ret = -ENOMEM;
893 goto error;
894 }
859 895
860 sg_init_one(&sg[0], ticket, ticket_len); 896 sg_init_one(&sg[0], ticket, ticket_len);
861 crypto_blkcipher_decrypt_iv(&desc, sg, sg, ticket_len); 897
898 skcipher_request_set_callback(req, 0, NULL, NULL);
899 skcipher_request_set_crypt(req, sg, sg, ticket_len, iv.x);
900
901 crypto_skcipher_decrypt(req);
902 skcipher_request_free(req);
862 903
863 p = ticket; 904 p = ticket;
864 end = p + ticket_len; 905 end = p + ticket_len;
@@ -966,7 +1007,7 @@ static void rxkad_decrypt_response(struct rxrpc_connection *conn,
966 struct rxkad_response *resp, 1007 struct rxkad_response *resp,
967 const struct rxrpc_crypt *session_key) 1008 const struct rxrpc_crypt *session_key)
968{ 1009{
969 struct blkcipher_desc desc; 1010 SKCIPHER_REQUEST_ON_STACK(req, rxkad_ci);
970 struct scatterlist sg[2]; 1011 struct scatterlist sg[2];
971 struct rxrpc_crypt iv; 1012 struct rxrpc_crypt iv;
972 1013
@@ -976,17 +1017,21 @@ static void rxkad_decrypt_response(struct rxrpc_connection *conn,
976 ASSERT(rxkad_ci != NULL); 1017 ASSERT(rxkad_ci != NULL);
977 1018
978 mutex_lock(&rxkad_ci_mutex); 1019 mutex_lock(&rxkad_ci_mutex);
979 if (crypto_blkcipher_setkey(rxkad_ci, session_key->x, 1020 if (crypto_skcipher_setkey(rxkad_ci, session_key->x,
980 sizeof(*session_key)) < 0) 1021 sizeof(*session_key)) < 0)
981 BUG(); 1022 BUG();
982 1023
983 memcpy(&iv, session_key, sizeof(iv)); 1024 memcpy(&iv, session_key, sizeof(iv));
984 desc.tfm = rxkad_ci;
985 desc.info = iv.x;
986 desc.flags = 0;
987 1025
988 rxkad_sg_set_buf2(sg, &resp->encrypted, sizeof(resp->encrypted)); 1026 rxkad_sg_set_buf2(sg, &resp->encrypted, sizeof(resp->encrypted));
989 crypto_blkcipher_decrypt_iv(&desc, sg, sg, sizeof(resp->encrypted)); 1027
1028 skcipher_request_set_tfm(req, rxkad_ci);
1029 skcipher_request_set_callback(req, 0, NULL, NULL);
1030 skcipher_request_set_crypt(req, sg, sg, sizeof(resp->encrypted), iv.x);
1031
1032 crypto_skcipher_decrypt(req);
1033 skcipher_request_zero(req);
1034
990 mutex_unlock(&rxkad_ci_mutex); 1035 mutex_unlock(&rxkad_ci_mutex);
991 1036
992 _leave(""); 1037 _leave("");
@@ -1022,7 +1067,7 @@ static int rxkad_verify_response(struct rxrpc_connection *conn,
1022 kvno = ntohl(response.kvno); 1067 kvno = ntohl(response.kvno);
1023 sp = rxrpc_skb(skb); 1068 sp = rxrpc_skb(skb);
1024 _proto("Rx RESPONSE %%%u { v=%u kv=%u tl=%u }", 1069 _proto("Rx RESPONSE %%%u { v=%u kv=%u tl=%u }",
1025 ntohl(sp->hdr.serial), version, kvno, ticket_len); 1070 sp->hdr.serial, version, kvno, ticket_len);
1026 1071
1027 abort_code = RXKADINCONSISTENCY; 1072 abort_code = RXKADINCONSISTENCY;
1028 if (version != RXKAD_VERSION) 1073 if (version != RXKAD_VERSION)
@@ -1058,9 +1103,9 @@ static int rxkad_verify_response(struct rxrpc_connection *conn,
1058 rxkad_decrypt_response(conn, &response, &session_key); 1103 rxkad_decrypt_response(conn, &response, &session_key);
1059 1104
1060 abort_code = RXKADSEALEDINCON; 1105 abort_code = RXKADSEALEDINCON;
1061 if (response.encrypted.epoch != conn->epoch) 1106 if (ntohl(response.encrypted.epoch) != conn->epoch)
1062 goto protocol_error_free; 1107 goto protocol_error_free;
1063 if (response.encrypted.cid != conn->cid) 1108 if (ntohl(response.encrypted.cid) != conn->cid)
1064 goto protocol_error_free; 1109 goto protocol_error_free;
1065 if (ntohl(response.encrypted.securityIndex) != conn->security_ix) 1110 if (ntohl(response.encrypted.securityIndex) != conn->security_ix)
1066 goto protocol_error_free; 1111 goto protocol_error_free;
@@ -1077,7 +1122,7 @@ static int rxkad_verify_response(struct rxrpc_connection *conn,
1077 goto protocol_error_free; 1122 goto protocol_error_free;
1078 1123
1079 abort_code = RXKADOUTOFSEQUENCE; 1124 abort_code = RXKADOUTOFSEQUENCE;
1080 if (response.encrypted.inc_nonce != htonl(conn->security_nonce + 1)) 1125 if (ntohl(response.encrypted.inc_nonce) != conn->security_nonce + 1)
1081 goto protocol_error_free; 1126 goto protocol_error_free;
1082 1127
1083 abort_code = RXKADLEVELFAIL; 1128 abort_code = RXKADLEVELFAIL;
@@ -1115,7 +1160,7 @@ static void rxkad_clear(struct rxrpc_connection *conn)
1115 _enter(""); 1160 _enter("");
1116 1161
1117 if (conn->cipher) 1162 if (conn->cipher)
1118 crypto_free_blkcipher(conn->cipher); 1163 crypto_free_skcipher(conn->cipher);
1119} 1164}
1120 1165
1121/* 1166/*
@@ -1141,7 +1186,7 @@ static __init int rxkad_init(void)
1141 1186
1142 /* pin the cipher we need so that the crypto layer doesn't invoke 1187 /* pin the cipher we need so that the crypto layer doesn't invoke
1143 * keventd to go get it */ 1188 * keventd to go get it */
1144 rxkad_ci = crypto_alloc_blkcipher("pcbc(fcrypt)", 0, CRYPTO_ALG_ASYNC); 1189 rxkad_ci = crypto_alloc_skcipher("pcbc(fcrypt)", 0, CRYPTO_ALG_ASYNC);
1145 if (IS_ERR(rxkad_ci)) 1190 if (IS_ERR(rxkad_ci))
1146 return PTR_ERR(rxkad_ci); 1191 return PTR_ERR(rxkad_ci);
1147 1192
@@ -1155,7 +1200,7 @@ static __exit void rxkad_exit(void)
1155 _enter(""); 1200 _enter("");
1156 1201
1157 rxrpc_unregister_security(&rxkad); 1202 rxrpc_unregister_security(&rxkad);
1158 crypto_free_blkcipher(rxkad_ci); 1203 crypto_free_skcipher(rxkad_ci);
1159} 1204}
1160 1205
1161module_exit(rxkad_exit); 1206module_exit(rxkad_exit);
diff --git a/net/rxrpc/sysctl.c b/net/rxrpc/sysctl.c
index 50a98a910eb1..d20ed575acf4 100644
--- a/net/rxrpc/sysctl.c
+++ b/net/rxrpc/sysctl.c
@@ -15,11 +15,11 @@
15#include "ar-internal.h" 15#include "ar-internal.h"
16 16
17static struct ctl_table_header *rxrpc_sysctl_reg_table; 17static struct ctl_table_header *rxrpc_sysctl_reg_table;
18static const unsigned zero = 0; 18static const unsigned int zero = 0;
19static const unsigned one = 1; 19static const unsigned int one = 1;
20static const unsigned four = 4; 20static const unsigned int four = 4;
21static const unsigned n_65535 = 65535; 21static const unsigned int n_65535 = 65535;
22static const unsigned n_max_acks = RXRPC_MAXACKS; 22static const unsigned int n_max_acks = RXRPC_MAXACKS;
23 23
24/* 24/*
25 * RxRPC operating parameters. 25 * RxRPC operating parameters.
@@ -32,7 +32,7 @@ static struct ctl_table rxrpc_sysctl_table[] = {
32 { 32 {
33 .procname = "req_ack_delay", 33 .procname = "req_ack_delay",
34 .data = &rxrpc_requested_ack_delay, 34 .data = &rxrpc_requested_ack_delay,
35 .maxlen = sizeof(unsigned), 35 .maxlen = sizeof(unsigned int),
36 .mode = 0644, 36 .mode = 0644,
37 .proc_handler = proc_dointvec_ms_jiffies, 37 .proc_handler = proc_dointvec_ms_jiffies,
38 .extra1 = (void *)&zero, 38 .extra1 = (void *)&zero,
@@ -40,7 +40,7 @@ static struct ctl_table rxrpc_sysctl_table[] = {
40 { 40 {
41 .procname = "soft_ack_delay", 41 .procname = "soft_ack_delay",
42 .data = &rxrpc_soft_ack_delay, 42 .data = &rxrpc_soft_ack_delay,
43 .maxlen = sizeof(unsigned), 43 .maxlen = sizeof(unsigned int),
44 .mode = 0644, 44 .mode = 0644,
45 .proc_handler = proc_dointvec_ms_jiffies, 45 .proc_handler = proc_dointvec_ms_jiffies,
46 .extra1 = (void *)&one, 46 .extra1 = (void *)&one,
@@ -48,7 +48,7 @@ static struct ctl_table rxrpc_sysctl_table[] = {
48 { 48 {
49 .procname = "idle_ack_delay", 49 .procname = "idle_ack_delay",
50 .data = &rxrpc_idle_ack_delay, 50 .data = &rxrpc_idle_ack_delay,
51 .maxlen = sizeof(unsigned), 51 .maxlen = sizeof(unsigned int),
52 .mode = 0644, 52 .mode = 0644,
53 .proc_handler = proc_dointvec_ms_jiffies, 53 .proc_handler = proc_dointvec_ms_jiffies,
54 .extra1 = (void *)&one, 54 .extra1 = (void *)&one,
@@ -56,7 +56,7 @@ static struct ctl_table rxrpc_sysctl_table[] = {
56 { 56 {
57 .procname = "resend_timeout", 57 .procname = "resend_timeout",
58 .data = &rxrpc_resend_timeout, 58 .data = &rxrpc_resend_timeout,
59 .maxlen = sizeof(unsigned), 59 .maxlen = sizeof(unsigned int),
60 .mode = 0644, 60 .mode = 0644,
61 .proc_handler = proc_dointvec_ms_jiffies, 61 .proc_handler = proc_dointvec_ms_jiffies,
62 .extra1 = (void *)&one, 62 .extra1 = (void *)&one,
@@ -66,7 +66,7 @@ static struct ctl_table rxrpc_sysctl_table[] = {
66 { 66 {
67 .procname = "max_call_lifetime", 67 .procname = "max_call_lifetime",
68 .data = &rxrpc_max_call_lifetime, 68 .data = &rxrpc_max_call_lifetime,
69 .maxlen = sizeof(unsigned), 69 .maxlen = sizeof(unsigned int),
70 .mode = 0644, 70 .mode = 0644,
71 .proc_handler = proc_dointvec_jiffies, 71 .proc_handler = proc_dointvec_jiffies,
72 .extra1 = (void *)&one, 72 .extra1 = (void *)&one,
@@ -74,7 +74,7 @@ static struct ctl_table rxrpc_sysctl_table[] = {
74 { 74 {
75 .procname = "dead_call_expiry", 75 .procname = "dead_call_expiry",
76 .data = &rxrpc_dead_call_expiry, 76 .data = &rxrpc_dead_call_expiry,
77 .maxlen = sizeof(unsigned), 77 .maxlen = sizeof(unsigned int),
78 .mode = 0644, 78 .mode = 0644,
79 .proc_handler = proc_dointvec_jiffies, 79 .proc_handler = proc_dointvec_jiffies,
80 .extra1 = (void *)&one, 80 .extra1 = (void *)&one,
@@ -84,7 +84,7 @@ static struct ctl_table rxrpc_sysctl_table[] = {
84 { 84 {
85 .procname = "connection_expiry", 85 .procname = "connection_expiry",
86 .data = &rxrpc_connection_expiry, 86 .data = &rxrpc_connection_expiry,
87 .maxlen = sizeof(unsigned), 87 .maxlen = sizeof(unsigned int),
88 .mode = 0644, 88 .mode = 0644,
89 .proc_handler = proc_dointvec_minmax, 89 .proc_handler = proc_dointvec_minmax,
90 .extra1 = (void *)&one, 90 .extra1 = (void *)&one,
@@ -92,7 +92,7 @@ static struct ctl_table rxrpc_sysctl_table[] = {
92 { 92 {
93 .procname = "transport_expiry", 93 .procname = "transport_expiry",
94 .data = &rxrpc_transport_expiry, 94 .data = &rxrpc_transport_expiry,
95 .maxlen = sizeof(unsigned), 95 .maxlen = sizeof(unsigned int),
96 .mode = 0644, 96 .mode = 0644,
97 .proc_handler = proc_dointvec_minmax, 97 .proc_handler = proc_dointvec_minmax,
98 .extra1 = (void *)&one, 98 .extra1 = (void *)&one,
@@ -102,7 +102,7 @@ static struct ctl_table rxrpc_sysctl_table[] = {
102 { 102 {
103 .procname = "rx_window_size", 103 .procname = "rx_window_size",
104 .data = &rxrpc_rx_window_size, 104 .data = &rxrpc_rx_window_size,
105 .maxlen = sizeof(unsigned), 105 .maxlen = sizeof(unsigned int),
106 .mode = 0644, 106 .mode = 0644,
107 .proc_handler = proc_dointvec_minmax, 107 .proc_handler = proc_dointvec_minmax,
108 .extra1 = (void *)&one, 108 .extra1 = (void *)&one,
@@ -111,16 +111,16 @@ static struct ctl_table rxrpc_sysctl_table[] = {
111 { 111 {
112 .procname = "rx_mtu", 112 .procname = "rx_mtu",
113 .data = &rxrpc_rx_mtu, 113 .data = &rxrpc_rx_mtu,
114 .maxlen = sizeof(unsigned), 114 .maxlen = sizeof(unsigned int),
115 .mode = 0644, 115 .mode = 0644,
116 .proc_handler = proc_dointvec_minmax, 116 .proc_handler = proc_dointvec_minmax,
117 .extra1 = (void *)&one, 117 .extra1 = (void *)&one,
118 .extra1 = (void *)&n_65535, 118 .extra2 = (void *)&n_65535,
119 }, 119 },
120 { 120 {
121 .procname = "rx_jumbo_max", 121 .procname = "rx_jumbo_max",
122 .data = &rxrpc_rx_jumbo_max, 122 .data = &rxrpc_rx_jumbo_max,
123 .maxlen = sizeof(unsigned), 123 .maxlen = sizeof(unsigned int),
124 .mode = 0644, 124 .mode = 0644,
125 .proc_handler = proc_dointvec_minmax, 125 .proc_handler = proc_dointvec_minmax,
126 .extra1 = (void *)&one, 126 .extra1 = (void *)&one,
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 82830824fb1f..b148302bbaf2 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -739,6 +739,28 @@ config NET_ACT_CONNMARK
739 To compile this code as a module, choose M here: the 739 To compile this code as a module, choose M here: the
740 module will be called act_connmark. 740 module will be called act_connmark.
741 741
742config NET_ACT_IFE
743 tristate "Inter-FE action based on IETF ForCES InterFE LFB"
744 depends on NET_CLS_ACT
745 ---help---
746 Say Y here to allow for sourcing and terminating metadata
747 For details refer to netdev01 paper:
748 "Distributing Linux Traffic Control Classifier-Action Subsystem"
749 Authors: Jamal Hadi Salim and Damascene M. Joachimpillai
750
751 To compile this code as a module, choose M here: the
752 module will be called act_ife.
753
754config NET_IFE_SKBMARK
755 tristate "Support to encoding decoding skb mark on IFE action"
756 depends on NET_ACT_IFE
757 ---help---
758
759config NET_IFE_SKBPRIO
760 tristate "Support to encoding decoding skb prio on IFE action"
761 depends on NET_ACT_IFE
762 ---help---
763
742config NET_CLS_IND 764config NET_CLS_IND
743 bool "Incoming device classification" 765 bool "Incoming device classification"
744 depends on NET_CLS_U32 || NET_CLS_FW 766 depends on NET_CLS_U32 || NET_CLS_FW
diff --git a/net/sched/Makefile b/net/sched/Makefile
index 690c1689e090..84bddb373517 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -19,6 +19,9 @@ obj-$(CONFIG_NET_ACT_CSUM) += act_csum.o
19obj-$(CONFIG_NET_ACT_VLAN) += act_vlan.o 19obj-$(CONFIG_NET_ACT_VLAN) += act_vlan.o
20obj-$(CONFIG_NET_ACT_BPF) += act_bpf.o 20obj-$(CONFIG_NET_ACT_BPF) += act_bpf.o
21obj-$(CONFIG_NET_ACT_CONNMARK) += act_connmark.o 21obj-$(CONFIG_NET_ACT_CONNMARK) += act_connmark.o
22obj-$(CONFIG_NET_ACT_IFE) += act_ife.o
23obj-$(CONFIG_NET_IFE_SKBMARK) += act_meta_mark.o
24obj-$(CONFIG_NET_IFE_SKBPRIO) += act_meta_skbprio.o
22obj-$(CONFIG_NET_SCH_FIFO) += sch_fifo.o 25obj-$(CONFIG_NET_SCH_FIFO) += sch_fifo.o
23obj-$(CONFIG_NET_SCH_CBQ) += sch_cbq.o 26obj-$(CONFIG_NET_SCH_CBQ) += sch_cbq.o
24obj-$(CONFIG_NET_SCH_HTB) += sch_htb.o 27obj-$(CONFIG_NET_SCH_HTB) += sch_htb.o
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 06e7c4a37245..96066665e376 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -36,10 +36,9 @@ static void free_tcf(struct rcu_head *head)
36 kfree(p); 36 kfree(p);
37} 37}
38 38
39static void tcf_hash_destroy(struct tc_action *a) 39static void tcf_hash_destroy(struct tcf_hashinfo *hinfo, struct tc_action *a)
40{ 40{
41 struct tcf_common *p = a->priv; 41 struct tcf_common *p = a->priv;
42 struct tcf_hashinfo *hinfo = a->ops->hinfo;
43 42
44 spin_lock_bh(&hinfo->lock); 43 spin_lock_bh(&hinfo->lock);
45 hlist_del(&p->tcfc_head); 44 hlist_del(&p->tcfc_head);
@@ -68,8 +67,8 @@ int __tcf_hash_release(struct tc_action *a, bool bind, bool strict)
68 if (p->tcfc_bindcnt <= 0 && p->tcfc_refcnt <= 0) { 67 if (p->tcfc_bindcnt <= 0 && p->tcfc_refcnt <= 0) {
69 if (a->ops->cleanup) 68 if (a->ops->cleanup)
70 a->ops->cleanup(a, bind); 69 a->ops->cleanup(a, bind);
71 tcf_hash_destroy(a); 70 tcf_hash_destroy(a->hinfo, a);
72 ret = 1; 71 ret = ACT_P_DELETED;
73 } 72 }
74 } 73 }
75 74
@@ -77,10 +76,9 @@ int __tcf_hash_release(struct tc_action *a, bool bind, bool strict)
77} 76}
78EXPORT_SYMBOL(__tcf_hash_release); 77EXPORT_SYMBOL(__tcf_hash_release);
79 78
80static int tcf_dump_walker(struct sk_buff *skb, struct netlink_callback *cb, 79static int tcf_dump_walker(struct tcf_hashinfo *hinfo, struct sk_buff *skb,
81 struct tc_action *a) 80 struct netlink_callback *cb, struct tc_action *a)
82{ 81{
83 struct tcf_hashinfo *hinfo = a->ops->hinfo;
84 struct hlist_head *head; 82 struct hlist_head *head;
85 struct tcf_common *p; 83 struct tcf_common *p;
86 int err = 0, index = -1, i = 0, s_i = 0, n_i = 0; 84 int err = 0, index = -1, i = 0, s_i = 0, n_i = 0;
@@ -126,9 +124,9 @@ nla_put_failure:
126 goto done; 124 goto done;
127} 125}
128 126
129static int tcf_del_walker(struct sk_buff *skb, struct tc_action *a) 127static int tcf_del_walker(struct tcf_hashinfo *hinfo, struct sk_buff *skb,
128 struct tc_action *a)
130{ 129{
131 struct tcf_hashinfo *hinfo = a->ops->hinfo;
132 struct hlist_head *head; 130 struct hlist_head *head;
133 struct hlist_node *n; 131 struct hlist_node *n;
134 struct tcf_common *p; 132 struct tcf_common *p;
@@ -163,18 +161,24 @@ nla_put_failure:
163 return ret; 161 return ret;
164} 162}
165 163
166static int tcf_generic_walker(struct sk_buff *skb, struct netlink_callback *cb, 164int tcf_generic_walker(struct tc_action_net *tn, struct sk_buff *skb,
167 int type, struct tc_action *a) 165 struct netlink_callback *cb, int type,
166 struct tc_action *a)
168{ 167{
168 struct tcf_hashinfo *hinfo = tn->hinfo;
169
170 a->hinfo = hinfo;
171
169 if (type == RTM_DELACTION) { 172 if (type == RTM_DELACTION) {
170 return tcf_del_walker(skb, a); 173 return tcf_del_walker(hinfo, skb, a);
171 } else if (type == RTM_GETACTION) { 174 } else if (type == RTM_GETACTION) {
172 return tcf_dump_walker(skb, cb, a); 175 return tcf_dump_walker(hinfo, skb, cb, a);
173 } else { 176 } else {
174 WARN(1, "tcf_generic_walker: unknown action %d\n", type); 177 WARN(1, "tcf_generic_walker: unknown action %d\n", type);
175 return -EINVAL; 178 return -EINVAL;
176 } 179 }
177} 180}
181EXPORT_SYMBOL(tcf_generic_walker);
178 182
179static struct tcf_common *tcf_hash_lookup(u32 index, struct tcf_hashinfo *hinfo) 183static struct tcf_common *tcf_hash_lookup(u32 index, struct tcf_hashinfo *hinfo)
180{ 184{
@@ -191,8 +195,9 @@ static struct tcf_common *tcf_hash_lookup(u32 index, struct tcf_hashinfo *hinfo)
191 return p; 195 return p;
192} 196}
193 197
194u32 tcf_hash_new_index(struct tcf_hashinfo *hinfo) 198u32 tcf_hash_new_index(struct tc_action_net *tn)
195{ 199{
200 struct tcf_hashinfo *hinfo = tn->hinfo;
196 u32 val = hinfo->index; 201 u32 val = hinfo->index;
197 202
198 do { 203 do {
@@ -205,28 +210,31 @@ u32 tcf_hash_new_index(struct tcf_hashinfo *hinfo)
205} 210}
206EXPORT_SYMBOL(tcf_hash_new_index); 211EXPORT_SYMBOL(tcf_hash_new_index);
207 212
208int tcf_hash_search(struct tc_action *a, u32 index) 213int tcf_hash_search(struct tc_action_net *tn, struct tc_action *a, u32 index)
209{ 214{
210 struct tcf_hashinfo *hinfo = a->ops->hinfo; 215 struct tcf_hashinfo *hinfo = tn->hinfo;
211 struct tcf_common *p = tcf_hash_lookup(index, hinfo); 216 struct tcf_common *p = tcf_hash_lookup(index, hinfo);
212 217
213 if (p) { 218 if (p) {
214 a->priv = p; 219 a->priv = p;
220 a->hinfo = hinfo;
215 return 1; 221 return 1;
216 } 222 }
217 return 0; 223 return 0;
218} 224}
219EXPORT_SYMBOL(tcf_hash_search); 225EXPORT_SYMBOL(tcf_hash_search);
220 226
221int tcf_hash_check(u32 index, struct tc_action *a, int bind) 227int tcf_hash_check(struct tc_action_net *tn, u32 index, struct tc_action *a,
228 int bind)
222{ 229{
223 struct tcf_hashinfo *hinfo = a->ops->hinfo; 230 struct tcf_hashinfo *hinfo = tn->hinfo;
224 struct tcf_common *p = NULL; 231 struct tcf_common *p = NULL;
225 if (index && (p = tcf_hash_lookup(index, hinfo)) != NULL) { 232 if (index && (p = tcf_hash_lookup(index, hinfo)) != NULL) {
226 if (bind) 233 if (bind)
227 p->tcfc_bindcnt++; 234 p->tcfc_bindcnt++;
228 p->tcfc_refcnt++; 235 p->tcfc_refcnt++;
229 a->priv = p; 236 a->priv = p;
237 a->hinfo = hinfo;
230 return 1; 238 return 1;
231 } 239 }
232 return 0; 240 return 0;
@@ -243,11 +251,11 @@ void tcf_hash_cleanup(struct tc_action *a, struct nlattr *est)
243} 251}
244EXPORT_SYMBOL(tcf_hash_cleanup); 252EXPORT_SYMBOL(tcf_hash_cleanup);
245 253
246int tcf_hash_create(u32 index, struct nlattr *est, struct tc_action *a, 254int tcf_hash_create(struct tc_action_net *tn, u32 index, struct nlattr *est,
247 int size, int bind, bool cpustats) 255 struct tc_action *a, int size, int bind, bool cpustats)
248{ 256{
249 struct tcf_hashinfo *hinfo = a->ops->hinfo;
250 struct tcf_common *p = kzalloc(size, GFP_KERNEL); 257 struct tcf_common *p = kzalloc(size, GFP_KERNEL);
258 struct tcf_hashinfo *hinfo = tn->hinfo;
251 int err = -ENOMEM; 259 int err = -ENOMEM;
252 260
253 if (unlikely(!p)) 261 if (unlikely(!p))
@@ -272,7 +280,7 @@ err2:
272 } 280 }
273 spin_lock_init(&p->tcfc_lock); 281 spin_lock_init(&p->tcfc_lock);
274 INIT_HLIST_NODE(&p->tcfc_head); 282 INIT_HLIST_NODE(&p->tcfc_head);
275 p->tcfc_index = index ? index : tcf_hash_new_index(hinfo); 283 p->tcfc_index = index ? index : tcf_hash_new_index(tn);
276 p->tcfc_tm.install = jiffies; 284 p->tcfc_tm.install = jiffies;
277 p->tcfc_tm.lastuse = jiffies; 285 p->tcfc_tm.lastuse = jiffies;
278 if (est) { 286 if (est) {
@@ -286,14 +294,15 @@ err2:
286 } 294 }
287 295
288 a->priv = (void *) p; 296 a->priv = (void *) p;
297 a->hinfo = hinfo;
289 return 0; 298 return 0;
290} 299}
291EXPORT_SYMBOL(tcf_hash_create); 300EXPORT_SYMBOL(tcf_hash_create);
292 301
293void tcf_hash_insert(struct tc_action *a) 302void tcf_hash_insert(struct tc_action_net *tn, struct tc_action *a)
294{ 303{
295 struct tcf_common *p = a->priv; 304 struct tcf_common *p = a->priv;
296 struct tcf_hashinfo *hinfo = a->ops->hinfo; 305 struct tcf_hashinfo *hinfo = tn->hinfo;
297 unsigned int h = tcf_hash(p->tcfc_index, hinfo->hmask); 306 unsigned int h = tcf_hash(p->tcfc_index, hinfo->hmask);
298 307
299 spin_lock_bh(&hinfo->lock); 308 spin_lock_bh(&hinfo->lock);
@@ -302,59 +311,78 @@ void tcf_hash_insert(struct tc_action *a)
302} 311}
303EXPORT_SYMBOL(tcf_hash_insert); 312EXPORT_SYMBOL(tcf_hash_insert);
304 313
314void tcf_hashinfo_destroy(const struct tc_action_ops *ops,
315 struct tcf_hashinfo *hinfo)
316{
317 struct tc_action a = {
318 .ops = ops,
319 .hinfo = hinfo,
320 };
321 int i;
322
323 for (i = 0; i < hinfo->hmask + 1; i++) {
324 struct tcf_common *p;
325 struct hlist_node *n;
326
327 hlist_for_each_entry_safe(p, n, &hinfo->htab[i], tcfc_head) {
328 int ret;
329
330 a.priv = p;
331 ret = __tcf_hash_release(&a, false, true);
332 if (ret == ACT_P_DELETED)
333 module_put(ops->owner);
334 else if (ret < 0)
335 return;
336 }
337 }
338 kfree(hinfo->htab);
339}
340EXPORT_SYMBOL(tcf_hashinfo_destroy);
341
305static LIST_HEAD(act_base); 342static LIST_HEAD(act_base);
306static DEFINE_RWLOCK(act_mod_lock); 343static DEFINE_RWLOCK(act_mod_lock);
307 344
308int tcf_register_action(struct tc_action_ops *act, unsigned int mask) 345int tcf_register_action(struct tc_action_ops *act,
346 struct pernet_operations *ops)
309{ 347{
310 struct tc_action_ops *a; 348 struct tc_action_ops *a;
311 int err; 349 int ret;
312 350
313 /* Must supply act, dump and init */ 351 if (!act->act || !act->dump || !act->init || !act->walk || !act->lookup)
314 if (!act->act || !act->dump || !act->init)
315 return -EINVAL; 352 return -EINVAL;
316 353
317 /* Supply defaults */
318 if (!act->lookup)
319 act->lookup = tcf_hash_search;
320 if (!act->walk)
321 act->walk = tcf_generic_walker;
322
323 act->hinfo = kmalloc(sizeof(struct tcf_hashinfo), GFP_KERNEL);
324 if (!act->hinfo)
325 return -ENOMEM;
326 err = tcf_hashinfo_init(act->hinfo, mask);
327 if (err) {
328 kfree(act->hinfo);
329 return err;
330 }
331
332 write_lock(&act_mod_lock); 354 write_lock(&act_mod_lock);
333 list_for_each_entry(a, &act_base, head) { 355 list_for_each_entry(a, &act_base, head) {
334 if (act->type == a->type || (strcmp(act->kind, a->kind) == 0)) { 356 if (act->type == a->type || (strcmp(act->kind, a->kind) == 0)) {
335 write_unlock(&act_mod_lock); 357 write_unlock(&act_mod_lock);
336 tcf_hashinfo_destroy(act->hinfo);
337 kfree(act->hinfo);
338 return -EEXIST; 358 return -EEXIST;
339 } 359 }
340 } 360 }
341 list_add_tail(&act->head, &act_base); 361 list_add_tail(&act->head, &act_base);
342 write_unlock(&act_mod_lock); 362 write_unlock(&act_mod_lock);
363
364 ret = register_pernet_subsys(ops);
365 if (ret) {
366 tcf_unregister_action(act, ops);
367 return ret;
368 }
369
343 return 0; 370 return 0;
344} 371}
345EXPORT_SYMBOL(tcf_register_action); 372EXPORT_SYMBOL(tcf_register_action);
346 373
347int tcf_unregister_action(struct tc_action_ops *act) 374int tcf_unregister_action(struct tc_action_ops *act,
375 struct pernet_operations *ops)
348{ 376{
349 struct tc_action_ops *a; 377 struct tc_action_ops *a;
350 int err = -ENOENT; 378 int err = -ENOENT;
351 379
380 unregister_pernet_subsys(ops);
381
352 write_lock(&act_mod_lock); 382 write_lock(&act_mod_lock);
353 list_for_each_entry(a, &act_base, head) { 383 list_for_each_entry(a, &act_base, head) {
354 if (a == act) { 384 if (a == act) {
355 list_del(&act->head); 385 list_del(&act->head);
356 tcf_hashinfo_destroy(act->hinfo);
357 kfree(act->hinfo);
358 err = 0; 386 err = 0;
359 break; 387 break;
360 } 388 }
@@ -721,8 +749,8 @@ static struct tc_action *create_a(int i)
721 return act; 749 return act;
722} 750}
723 751
724static struct tc_action * 752static struct tc_action *tcf_action_get_1(struct net *net, struct nlattr *nla,
725tcf_action_get_1(struct nlattr *nla, struct nlmsghdr *n, u32 portid) 753 struct nlmsghdr *n, u32 portid)
726{ 754{
727 struct nlattr *tb[TCA_ACT_MAX + 1]; 755 struct nlattr *tb[TCA_ACT_MAX + 1];
728 struct tc_action *a; 756 struct tc_action *a;
@@ -749,7 +777,7 @@ tcf_action_get_1(struct nlattr *nla, struct nlmsghdr *n, u32 portid)
749 if (a->ops == NULL) /* could happen in batch of actions */ 777 if (a->ops == NULL) /* could happen in batch of actions */
750 goto err_free; 778 goto err_free;
751 err = -ENOENT; 779 err = -ENOENT;
752 if (a->ops->lookup(a, index) == 0) 780 if (a->ops->lookup(net, a, index) == 0)
753 goto err_mod; 781 goto err_mod;
754 782
755 module_put(a->ops->owner); 783 module_put(a->ops->owner);
@@ -819,7 +847,7 @@ static int tca_action_flush(struct net *net, struct nlattr *nla,
819 if (nest == NULL) 847 if (nest == NULL)
820 goto out_module_put; 848 goto out_module_put;
821 849
822 err = a.ops->walk(skb, &dcb, RTM_DELACTION, &a); 850 err = a.ops->walk(net, skb, &dcb, RTM_DELACTION, &a);
823 if (err < 0) 851 if (err < 0)
824 goto out_module_put; 852 goto out_module_put;
825 if (err == 0) 853 if (err == 0)
@@ -897,7 +925,7 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n,
897 } 925 }
898 926
899 for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) { 927 for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) {
900 act = tcf_action_get_1(tb[i], n, portid); 928 act = tcf_action_get_1(net, tb[i], n, portid);
901 if (IS_ERR(act)) { 929 if (IS_ERR(act)) {
902 ret = PTR_ERR(act); 930 ret = PTR_ERR(act);
903 goto err; 931 goto err;
@@ -1044,6 +1072,7 @@ find_dump_kind(const struct nlmsghdr *n)
1044static int 1072static int
1045tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb) 1073tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
1046{ 1074{
1075 struct net *net = sock_net(skb->sk);
1047 struct nlmsghdr *nlh; 1076 struct nlmsghdr *nlh;
1048 unsigned char *b = skb_tail_pointer(skb); 1077 unsigned char *b = skb_tail_pointer(skb);
1049 struct nlattr *nest; 1078 struct nlattr *nest;
@@ -1078,7 +1107,7 @@ tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
1078 if (nest == NULL) 1107 if (nest == NULL)
1079 goto out_module_put; 1108 goto out_module_put;
1080 1109
1081 ret = a_o->walk(skb, cb, RTM_GETACTION, &a); 1110 ret = a_o->walk(net, skb, cb, RTM_GETACTION, &a);
1082 if (ret < 0) 1111 if (ret < 0)
1083 goto out_module_put; 1112 goto out_module_put;
1084 1113
diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
index 0bc6f912f870..8c9f1f0459ab 100644
--- a/net/sched/act_bpf.c
+++ b/net/sched/act_bpf.c
@@ -33,6 +33,8 @@ struct tcf_bpf_cfg {
33 bool is_ebpf; 33 bool is_ebpf;
34}; 34};
35 35
36static int bpf_net_id;
37
36static int tcf_bpf(struct sk_buff *skb, const struct tc_action *act, 38static int tcf_bpf(struct sk_buff *skb, const struct tc_action *act,
37 struct tcf_result *res) 39 struct tcf_result *res)
38{ 40{
@@ -275,6 +277,7 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla,
275 struct nlattr *est, struct tc_action *act, 277 struct nlattr *est, struct tc_action *act,
276 int replace, int bind) 278 int replace, int bind)
277{ 279{
280 struct tc_action_net *tn = net_generic(net, bpf_net_id);
278 struct nlattr *tb[TCA_ACT_BPF_MAX + 1]; 281 struct nlattr *tb[TCA_ACT_BPF_MAX + 1];
279 struct tcf_bpf_cfg cfg, old; 282 struct tcf_bpf_cfg cfg, old;
280 struct tc_act_bpf *parm; 283 struct tc_act_bpf *parm;
@@ -294,8 +297,8 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla,
294 297
295 parm = nla_data(tb[TCA_ACT_BPF_PARMS]); 298 parm = nla_data(tb[TCA_ACT_BPF_PARMS]);
296 299
297 if (!tcf_hash_check(parm->index, act, bind)) { 300 if (!tcf_hash_check(tn, parm->index, act, bind)) {
298 ret = tcf_hash_create(parm->index, est, act, 301 ret = tcf_hash_create(tn, parm->index, est, act,
299 sizeof(*prog), bind, true); 302 sizeof(*prog), bind, true);
300 if (ret < 0) 303 if (ret < 0)
301 return ret; 304 return ret;
@@ -344,7 +347,7 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla,
344 rcu_assign_pointer(prog->filter, cfg.filter); 347 rcu_assign_pointer(prog->filter, cfg.filter);
345 348
346 if (res == ACT_P_CREATED) { 349 if (res == ACT_P_CREATED) {
347 tcf_hash_insert(act); 350 tcf_hash_insert(tn, act);
348 } else { 351 } else {
349 /* make sure the program being replaced is no longer executing */ 352 /* make sure the program being replaced is no longer executing */
350 synchronize_rcu(); 353 synchronize_rcu();
@@ -367,6 +370,22 @@ static void tcf_bpf_cleanup(struct tc_action *act, int bind)
367 tcf_bpf_cfg_cleanup(&tmp); 370 tcf_bpf_cfg_cleanup(&tmp);
368} 371}
369 372
373static int tcf_bpf_walker(struct net *net, struct sk_buff *skb,
374 struct netlink_callback *cb, int type,
375 struct tc_action *a)
376{
377 struct tc_action_net *tn = net_generic(net, bpf_net_id);
378
379 return tcf_generic_walker(tn, skb, cb, type, a);
380}
381
382static int tcf_bpf_search(struct net *net, struct tc_action *a, u32 index)
383{
384 struct tc_action_net *tn = net_generic(net, bpf_net_id);
385
386 return tcf_hash_search(tn, a, index);
387}
388
370static struct tc_action_ops act_bpf_ops __read_mostly = { 389static struct tc_action_ops act_bpf_ops __read_mostly = {
371 .kind = "bpf", 390 .kind = "bpf",
372 .type = TCA_ACT_BPF, 391 .type = TCA_ACT_BPF,
@@ -375,16 +394,39 @@ static struct tc_action_ops act_bpf_ops __read_mostly = {
375 .dump = tcf_bpf_dump, 394 .dump = tcf_bpf_dump,
376 .cleanup = tcf_bpf_cleanup, 395 .cleanup = tcf_bpf_cleanup,
377 .init = tcf_bpf_init, 396 .init = tcf_bpf_init,
397 .walk = tcf_bpf_walker,
398 .lookup = tcf_bpf_search,
399};
400
401static __net_init int bpf_init_net(struct net *net)
402{
403 struct tc_action_net *tn = net_generic(net, bpf_net_id);
404
405 return tc_action_net_init(tn, &act_bpf_ops, BPF_TAB_MASK);
406}
407
408static void __net_exit bpf_exit_net(struct net *net)
409{
410 struct tc_action_net *tn = net_generic(net, bpf_net_id);
411
412 tc_action_net_exit(tn);
413}
414
415static struct pernet_operations bpf_net_ops = {
416 .init = bpf_init_net,
417 .exit = bpf_exit_net,
418 .id = &bpf_net_id,
419 .size = sizeof(struct tc_action_net),
378}; 420};
379 421
380static int __init bpf_init_module(void) 422static int __init bpf_init_module(void)
381{ 423{
382 return tcf_register_action(&act_bpf_ops, BPF_TAB_MASK); 424 return tcf_register_action(&act_bpf_ops, &bpf_net_ops);
383} 425}
384 426
385static void __exit bpf_cleanup_module(void) 427static void __exit bpf_cleanup_module(void)
386{ 428{
387 tcf_unregister_action(&act_bpf_ops); 429 tcf_unregister_action(&act_bpf_ops, &bpf_net_ops);
388} 430}
389 431
390module_init(bpf_init_module); 432module_init(bpf_init_module);
diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c
index bb41699c6c49..c0ed93ce2391 100644
--- a/net/sched/act_connmark.c
+++ b/net/sched/act_connmark.c
@@ -30,6 +30,8 @@
30 30
31#define CONNMARK_TAB_MASK 3 31#define CONNMARK_TAB_MASK 3
32 32
33static int connmark_net_id;
34
33static int tcf_connmark(struct sk_buff *skb, const struct tc_action *a, 35static int tcf_connmark(struct sk_buff *skb, const struct tc_action *a,
34 struct tcf_result *res) 36 struct tcf_result *res)
35{ 37{
@@ -97,6 +99,7 @@ static int tcf_connmark_init(struct net *net, struct nlattr *nla,
97 struct nlattr *est, struct tc_action *a, 99 struct nlattr *est, struct tc_action *a,
98 int ovr, int bind) 100 int ovr, int bind)
99{ 101{
102 struct tc_action_net *tn = net_generic(net, connmark_net_id);
100 struct nlattr *tb[TCA_CONNMARK_MAX + 1]; 103 struct nlattr *tb[TCA_CONNMARK_MAX + 1];
101 struct tcf_connmark_info *ci; 104 struct tcf_connmark_info *ci;
102 struct tc_connmark *parm; 105 struct tc_connmark *parm;
@@ -111,9 +114,9 @@ static int tcf_connmark_init(struct net *net, struct nlattr *nla,
111 114
112 parm = nla_data(tb[TCA_CONNMARK_PARMS]); 115 parm = nla_data(tb[TCA_CONNMARK_PARMS]);
113 116
114 if (!tcf_hash_check(parm->index, a, bind)) { 117 if (!tcf_hash_check(tn, parm->index, a, bind)) {
115 ret = tcf_hash_create(parm->index, est, a, sizeof(*ci), 118 ret = tcf_hash_create(tn, parm->index, est, a,
116 bind, false); 119 sizeof(*ci), bind, false);
117 if (ret) 120 if (ret)
118 return ret; 121 return ret;
119 122
@@ -122,7 +125,7 @@ static int tcf_connmark_init(struct net *net, struct nlattr *nla,
122 ci->net = net; 125 ci->net = net;
123 ci->zone = parm->zone; 126 ci->zone = parm->zone;
124 127
125 tcf_hash_insert(a); 128 tcf_hash_insert(tn, a);
126 ret = ACT_P_CREATED; 129 ret = ACT_P_CREATED;
127 } else { 130 } else {
128 ci = to_connmark(a); 131 ci = to_connmark(a);
@@ -169,6 +172,22 @@ nla_put_failure:
169 return -1; 172 return -1;
170} 173}
171 174
175static int tcf_connmark_walker(struct net *net, struct sk_buff *skb,
176 struct netlink_callback *cb, int type,
177 struct tc_action *a)
178{
179 struct tc_action_net *tn = net_generic(net, connmark_net_id);
180
181 return tcf_generic_walker(tn, skb, cb, type, a);
182}
183
184static int tcf_connmark_search(struct net *net, struct tc_action *a, u32 index)
185{
186 struct tc_action_net *tn = net_generic(net, connmark_net_id);
187
188 return tcf_hash_search(tn, a, index);
189}
190
172static struct tc_action_ops act_connmark_ops = { 191static struct tc_action_ops act_connmark_ops = {
173 .kind = "connmark", 192 .kind = "connmark",
174 .type = TCA_ACT_CONNMARK, 193 .type = TCA_ACT_CONNMARK,
@@ -176,16 +195,39 @@ static struct tc_action_ops act_connmark_ops = {
176 .act = tcf_connmark, 195 .act = tcf_connmark,
177 .dump = tcf_connmark_dump, 196 .dump = tcf_connmark_dump,
178 .init = tcf_connmark_init, 197 .init = tcf_connmark_init,
198 .walk = tcf_connmark_walker,
199 .lookup = tcf_connmark_search,
200};
201
202static __net_init int connmark_init_net(struct net *net)
203{
204 struct tc_action_net *tn = net_generic(net, connmark_net_id);
205
206 return tc_action_net_init(tn, &act_connmark_ops, CONNMARK_TAB_MASK);
207}
208
209static void __net_exit connmark_exit_net(struct net *net)
210{
211 struct tc_action_net *tn = net_generic(net, connmark_net_id);
212
213 tc_action_net_exit(tn);
214}
215
216static struct pernet_operations connmark_net_ops = {
217 .init = connmark_init_net,
218 .exit = connmark_exit_net,
219 .id = &connmark_net_id,
220 .size = sizeof(struct tc_action_net),
179}; 221};
180 222
181static int __init connmark_init_module(void) 223static int __init connmark_init_module(void)
182{ 224{
183 return tcf_register_action(&act_connmark_ops, CONNMARK_TAB_MASK); 225 return tcf_register_action(&act_connmark_ops, &connmark_net_ops);
184} 226}
185 227
186static void __exit connmark_cleanup_module(void) 228static void __exit connmark_cleanup_module(void)
187{ 229{
188 tcf_unregister_action(&act_connmark_ops); 230 tcf_unregister_action(&act_connmark_ops, &connmark_net_ops);
189} 231}
190 232
191module_init(connmark_init_module); 233module_init(connmark_init_module);
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
index b07c535ba8e7..d22426cdebc0 100644
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c
@@ -42,9 +42,13 @@ static const struct nla_policy csum_policy[TCA_CSUM_MAX + 1] = {
42 [TCA_CSUM_PARMS] = { .len = sizeof(struct tc_csum), }, 42 [TCA_CSUM_PARMS] = { .len = sizeof(struct tc_csum), },
43}; 43};
44 44
45static int tcf_csum_init(struct net *n, struct nlattr *nla, struct nlattr *est, 45static int csum_net_id;
46 struct tc_action *a, int ovr, int bind) 46
47static int tcf_csum_init(struct net *net, struct nlattr *nla,
48 struct nlattr *est, struct tc_action *a, int ovr,
49 int bind)
47{ 50{
51 struct tc_action_net *tn = net_generic(net, csum_net_id);
48 struct nlattr *tb[TCA_CSUM_MAX + 1]; 52 struct nlattr *tb[TCA_CSUM_MAX + 1];
49 struct tc_csum *parm; 53 struct tc_csum *parm;
50 struct tcf_csum *p; 54 struct tcf_csum *p;
@@ -61,9 +65,9 @@ static int tcf_csum_init(struct net *n, struct nlattr *nla, struct nlattr *est,
61 return -EINVAL; 65 return -EINVAL;
62 parm = nla_data(tb[TCA_CSUM_PARMS]); 66 parm = nla_data(tb[TCA_CSUM_PARMS]);
63 67
64 if (!tcf_hash_check(parm->index, a, bind)) { 68 if (!tcf_hash_check(tn, parm->index, a, bind)) {
65 ret = tcf_hash_create(parm->index, est, a, sizeof(*p), 69 ret = tcf_hash_create(tn, parm->index, est, a,
66 bind, false); 70 sizeof(*p), bind, false);
67 if (ret) 71 if (ret)
68 return ret; 72 return ret;
69 ret = ACT_P_CREATED; 73 ret = ACT_P_CREATED;
@@ -82,7 +86,7 @@ static int tcf_csum_init(struct net *n, struct nlattr *nla, struct nlattr *est,
82 spin_unlock_bh(&p->tcf_lock); 86 spin_unlock_bh(&p->tcf_lock);
83 87
84 if (ret == ACT_P_CREATED) 88 if (ret == ACT_P_CREATED)
85 tcf_hash_insert(a); 89 tcf_hash_insert(tn, a);
86 90
87 return ret; 91 return ret;
88} 92}
@@ -105,9 +109,7 @@ static void *tcf_csum_skb_nextlayer(struct sk_buff *skb,
105 int hl = ihl + jhl; 109 int hl = ihl + jhl;
106 110
107 if (!pskb_may_pull(skb, ipl + ntkoff) || (ipl < hl) || 111 if (!pskb_may_pull(skb, ipl + ntkoff) || (ipl < hl) ||
108 (skb_cloned(skb) && 112 skb_try_make_writable(skb, hl + ntkoff))
109 !skb_clone_writable(skb, hl + ntkoff) &&
110 pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
111 return NULL; 113 return NULL;
112 else 114 else
113 return (void *)(skb_network_header(skb) + ihl); 115 return (void *)(skb_network_header(skb) + ihl);
@@ -365,9 +367,7 @@ static int tcf_csum_ipv4(struct sk_buff *skb, u32 update_flags)
365 } 367 }
366 368
367 if (update_flags & TCA_CSUM_UPDATE_FLAG_IPV4HDR) { 369 if (update_flags & TCA_CSUM_UPDATE_FLAG_IPV4HDR) {
368 if (skb_cloned(skb) && 370 if (skb_try_make_writable(skb, sizeof(*iph) + ntkoff))
369 !skb_clone_writable(skb, sizeof(*iph) + ntkoff) &&
370 pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
371 goto fail; 371 goto fail;
372 372
373 ip_send_check(ip_hdr(skb)); 373 ip_send_check(ip_hdr(skb));
@@ -559,6 +559,22 @@ nla_put_failure:
559 return -1; 559 return -1;
560} 560}
561 561
562static int tcf_csum_walker(struct net *net, struct sk_buff *skb,
563 struct netlink_callback *cb, int type,
564 struct tc_action *a)
565{
566 struct tc_action_net *tn = net_generic(net, csum_net_id);
567
568 return tcf_generic_walker(tn, skb, cb, type, a);
569}
570
571static int tcf_csum_search(struct net *net, struct tc_action *a, u32 index)
572{
573 struct tc_action_net *tn = net_generic(net, csum_net_id);
574
575 return tcf_hash_search(tn, a, index);
576}
577
562static struct tc_action_ops act_csum_ops = { 578static struct tc_action_ops act_csum_ops = {
563 .kind = "csum", 579 .kind = "csum",
564 .type = TCA_ACT_CSUM, 580 .type = TCA_ACT_CSUM,
@@ -566,6 +582,29 @@ static struct tc_action_ops act_csum_ops = {
566 .act = tcf_csum, 582 .act = tcf_csum,
567 .dump = tcf_csum_dump, 583 .dump = tcf_csum_dump,
568 .init = tcf_csum_init, 584 .init = tcf_csum_init,
585 .walk = tcf_csum_walker,
586 .lookup = tcf_csum_search,
587};
588
589static __net_init int csum_init_net(struct net *net)
590{
591 struct tc_action_net *tn = net_generic(net, csum_net_id);
592
593 return tc_action_net_init(tn, &act_csum_ops, CSUM_TAB_MASK);
594}
595
596static void __net_exit csum_exit_net(struct net *net)
597{
598 struct tc_action_net *tn = net_generic(net, csum_net_id);
599
600 tc_action_net_exit(tn);
601}
602
603static struct pernet_operations csum_net_ops = {
604 .init = csum_init_net,
605 .exit = csum_exit_net,
606 .id = &csum_net_id,
607 .size = sizeof(struct tc_action_net),
569}; 608};
570 609
571MODULE_DESCRIPTION("Checksum updating actions"); 610MODULE_DESCRIPTION("Checksum updating actions");
@@ -573,12 +612,12 @@ MODULE_LICENSE("GPL");
573 612
574static int __init csum_init_module(void) 613static int __init csum_init_module(void)
575{ 614{
576 return tcf_register_action(&act_csum_ops, CSUM_TAB_MASK); 615 return tcf_register_action(&act_csum_ops, &csum_net_ops);
577} 616}
578 617
579static void __exit csum_cleanup_module(void) 618static void __exit csum_cleanup_module(void)
580{ 619{
581 tcf_unregister_action(&act_csum_ops); 620 tcf_unregister_action(&act_csum_ops, &csum_net_ops);
582} 621}
583 622
584module_init(csum_init_module); 623module_init(csum_init_module);
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index 5c1b05170736..887fc1f209ff 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -25,6 +25,8 @@
25 25
26#define GACT_TAB_MASK 15 26#define GACT_TAB_MASK 15
27 27
28static int gact_net_id;
29
28#ifdef CONFIG_GACT_PROB 30#ifdef CONFIG_GACT_PROB
29static int gact_net_rand(struct tcf_gact *gact) 31static int gact_net_rand(struct tcf_gact *gact)
30{ 32{
@@ -57,6 +59,7 @@ static int tcf_gact_init(struct net *net, struct nlattr *nla,
57 struct nlattr *est, struct tc_action *a, 59 struct nlattr *est, struct tc_action *a,
58 int ovr, int bind) 60 int ovr, int bind)
59{ 61{
62 struct tc_action_net *tn = net_generic(net, gact_net_id);
60 struct nlattr *tb[TCA_GACT_MAX + 1]; 63 struct nlattr *tb[TCA_GACT_MAX + 1];
61 struct tc_gact *parm; 64 struct tc_gact *parm;
62 struct tcf_gact *gact; 65 struct tcf_gact *gact;
@@ -88,9 +91,9 @@ static int tcf_gact_init(struct net *net, struct nlattr *nla,
88 } 91 }
89#endif 92#endif
90 93
91 if (!tcf_hash_check(parm->index, a, bind)) { 94 if (!tcf_hash_check(tn, parm->index, a, bind)) {
92 ret = tcf_hash_create(parm->index, est, a, sizeof(*gact), 95 ret = tcf_hash_create(tn, parm->index, est, a,
93 bind, true); 96 sizeof(*gact), bind, true);
94 if (ret) 97 if (ret)
95 return ret; 98 return ret;
96 ret = ACT_P_CREATED; 99 ret = ACT_P_CREATED;
@@ -118,7 +121,7 @@ static int tcf_gact_init(struct net *net, struct nlattr *nla,
118 } 121 }
119#endif 122#endif
120 if (ret == ACT_P_CREATED) 123 if (ret == ACT_P_CREATED)
121 tcf_hash_insert(a); 124 tcf_hash_insert(tn, a);
122 return ret; 125 return ret;
123} 126}
124 127
@@ -183,6 +186,22 @@ nla_put_failure:
183 return -1; 186 return -1;
184} 187}
185 188
189static int tcf_gact_walker(struct net *net, struct sk_buff *skb,
190 struct netlink_callback *cb, int type,
191 struct tc_action *a)
192{
193 struct tc_action_net *tn = net_generic(net, gact_net_id);
194
195 return tcf_generic_walker(tn, skb, cb, type, a);
196}
197
198static int tcf_gact_search(struct net *net, struct tc_action *a, u32 index)
199{
200 struct tc_action_net *tn = net_generic(net, gact_net_id);
201
202 return tcf_hash_search(tn, a, index);
203}
204
186static struct tc_action_ops act_gact_ops = { 205static struct tc_action_ops act_gact_ops = {
187 .kind = "gact", 206 .kind = "gact",
188 .type = TCA_ACT_GACT, 207 .type = TCA_ACT_GACT,
@@ -190,6 +209,29 @@ static struct tc_action_ops act_gact_ops = {
190 .act = tcf_gact, 209 .act = tcf_gact,
191 .dump = tcf_gact_dump, 210 .dump = tcf_gact_dump,
192 .init = tcf_gact_init, 211 .init = tcf_gact_init,
212 .walk = tcf_gact_walker,
213 .lookup = tcf_gact_search,
214};
215
216static __net_init int gact_init_net(struct net *net)
217{
218 struct tc_action_net *tn = net_generic(net, gact_net_id);
219
220 return tc_action_net_init(tn, &act_gact_ops, GACT_TAB_MASK);
221}
222
223static void __net_exit gact_exit_net(struct net *net)
224{
225 struct tc_action_net *tn = net_generic(net, gact_net_id);
226
227 tc_action_net_exit(tn);
228}
229
230static struct pernet_operations gact_net_ops = {
231 .init = gact_init_net,
232 .exit = gact_exit_net,
233 .id = &gact_net_id,
234 .size = sizeof(struct tc_action_net),
193}; 235};
194 236
195MODULE_AUTHOR("Jamal Hadi Salim(2002-4)"); 237MODULE_AUTHOR("Jamal Hadi Salim(2002-4)");
@@ -203,12 +245,13 @@ static int __init gact_init_module(void)
203#else 245#else
204 pr_info("GACT probability NOT on\n"); 246 pr_info("GACT probability NOT on\n");
205#endif 247#endif
206 return tcf_register_action(&act_gact_ops, GACT_TAB_MASK); 248
249 return tcf_register_action(&act_gact_ops, &gact_net_ops);
207} 250}
208 251
209static void __exit gact_cleanup_module(void) 252static void __exit gact_cleanup_module(void)
210{ 253{
211 tcf_unregister_action(&act_gact_ops); 254 tcf_unregister_action(&act_gact_ops, &gact_net_ops);
212} 255}
213 256
214module_init(gact_init_module); 257module_init(gact_init_module);
diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c
new file mode 100644
index 000000000000..c589a9ba506a
--- /dev/null
+++ b/net/sched/act_ife.c
@@ -0,0 +1,870 @@
1/*
2 * net/sched/ife.c Inter-FE action based on ForCES WG InterFE LFB
3 *
4 * Refer to:
5 * draft-ietf-forces-interfelfb-03
6 * and
7 * netdev01 paper:
8 * "Distributing Linux Traffic Control Classifier-Action
9 * Subsystem"
10 * Authors: Jamal Hadi Salim and Damascene M. Joachimpillai
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
16 *
17 * copyright Jamal Hadi Salim (2015)
18 *
19*/
20
21#include <linux/types.h>
22#include <linux/kernel.h>
23#include <linux/string.h>
24#include <linux/errno.h>
25#include <linux/skbuff.h>
26#include <linux/rtnetlink.h>
27#include <linux/module.h>
28#include <linux/init.h>
29#include <net/net_namespace.h>
30#include <net/netlink.h>
31#include <net/pkt_sched.h>
32#include <uapi/linux/tc_act/tc_ife.h>
33#include <net/tc_act/tc_ife.h>
34#include <linux/etherdevice.h>
35
36#define IFE_TAB_MASK 15
37
38static int ife_net_id;
39static int max_metacnt = IFE_META_MAX + 1;
40
41static const struct nla_policy ife_policy[TCA_IFE_MAX + 1] = {
42 [TCA_IFE_PARMS] = { .len = sizeof(struct tc_ife)},
43 [TCA_IFE_DMAC] = { .len = ETH_ALEN},
44 [TCA_IFE_SMAC] = { .len = ETH_ALEN},
45 [TCA_IFE_TYPE] = { .type = NLA_U16},
46};
47
48/* Caller takes care of presenting data in network order
49*/
50int ife_tlv_meta_encode(void *skbdata, u16 attrtype, u16 dlen, const void *dval)
51{
52 u32 *tlv = (u32 *)(skbdata);
53 u16 totlen = nla_total_size(dlen); /*alignment + hdr */
54 char *dptr = (char *)tlv + NLA_HDRLEN;
55 u32 htlv = attrtype << 16 | totlen;
56
57 *tlv = htonl(htlv);
58 memset(dptr, 0, totlen - NLA_HDRLEN);
59 memcpy(dptr, dval, dlen);
60
61 return totlen;
62}
63EXPORT_SYMBOL_GPL(ife_tlv_meta_encode);
64
65int ife_get_meta_u32(struct sk_buff *skb, struct tcf_meta_info *mi)
66{
67 if (mi->metaval)
68 return nla_put_u32(skb, mi->metaid, *(u32 *)mi->metaval);
69 else
70 return nla_put(skb, mi->metaid, 0, NULL);
71}
72EXPORT_SYMBOL_GPL(ife_get_meta_u32);
73
74int ife_check_meta_u32(u32 metaval, struct tcf_meta_info *mi)
75{
76 if (metaval || mi->metaval)
77 return 8; /* T+L+V == 2+2+4 */
78
79 return 0;
80}
81EXPORT_SYMBOL_GPL(ife_check_meta_u32);
82
83int ife_encode_meta_u32(u32 metaval, void *skbdata, struct tcf_meta_info *mi)
84{
85 u32 edata = metaval;
86
87 if (mi->metaval)
88 edata = *(u32 *)mi->metaval;
89 else if (metaval)
90 edata = metaval;
91
92 if (!edata) /* will not encode */
93 return 0;
94
95 edata = htonl(edata);
96 return ife_tlv_meta_encode(skbdata, mi->metaid, 4, &edata);
97}
98EXPORT_SYMBOL_GPL(ife_encode_meta_u32);
99
100int ife_get_meta_u16(struct sk_buff *skb, struct tcf_meta_info *mi)
101{
102 if (mi->metaval)
103 return nla_put_u16(skb, mi->metaid, *(u16 *)mi->metaval);
104 else
105 return nla_put(skb, mi->metaid, 0, NULL);
106}
107EXPORT_SYMBOL_GPL(ife_get_meta_u16);
108
109int ife_alloc_meta_u32(struct tcf_meta_info *mi, void *metaval)
110{
111 mi->metaval = kmemdup(metaval, sizeof(u32), GFP_KERNEL);
112 if (!mi->metaval)
113 return -ENOMEM;
114
115 return 0;
116}
117EXPORT_SYMBOL_GPL(ife_alloc_meta_u32);
118
119int ife_alloc_meta_u16(struct tcf_meta_info *mi, void *metaval)
120{
121 mi->metaval = kmemdup(metaval, sizeof(u16), GFP_KERNEL);
122 if (!mi->metaval)
123 return -ENOMEM;
124
125 return 0;
126}
127EXPORT_SYMBOL_GPL(ife_alloc_meta_u16);
128
129void ife_release_meta_gen(struct tcf_meta_info *mi)
130{
131 kfree(mi->metaval);
132}
133EXPORT_SYMBOL_GPL(ife_release_meta_gen);
134
135int ife_validate_meta_u32(void *val, int len)
136{
137 if (len == 4)
138 return 0;
139
140 return -EINVAL;
141}
142EXPORT_SYMBOL_GPL(ife_validate_meta_u32);
143
144int ife_validate_meta_u16(void *val, int len)
145{
146 /* length will include padding */
147 if (len == NLA_ALIGN(2))
148 return 0;
149
150 return -EINVAL;
151}
152EXPORT_SYMBOL_GPL(ife_validate_meta_u16);
153
154static LIST_HEAD(ifeoplist);
155static DEFINE_RWLOCK(ife_mod_lock);
156
157static struct tcf_meta_ops *find_ife_oplist(u16 metaid)
158{
159 struct tcf_meta_ops *o;
160
161 read_lock(&ife_mod_lock);
162 list_for_each_entry(o, &ifeoplist, list) {
163 if (o->metaid == metaid) {
164 if (!try_module_get(o->owner))
165 o = NULL;
166 read_unlock(&ife_mod_lock);
167 return o;
168 }
169 }
170 read_unlock(&ife_mod_lock);
171
172 return NULL;
173}
174
175int register_ife_op(struct tcf_meta_ops *mops)
176{
177 struct tcf_meta_ops *m;
178
179 if (!mops->metaid || !mops->metatype || !mops->name ||
180 !mops->check_presence || !mops->encode || !mops->decode ||
181 !mops->get || !mops->alloc)
182 return -EINVAL;
183
184 write_lock(&ife_mod_lock);
185
186 list_for_each_entry(m, &ifeoplist, list) {
187 if (m->metaid == mops->metaid ||
188 (strcmp(mops->name, m->name) == 0)) {
189 write_unlock(&ife_mod_lock);
190 return -EEXIST;
191 }
192 }
193
194 if (!mops->release)
195 mops->release = ife_release_meta_gen;
196
197 list_add_tail(&mops->list, &ifeoplist);
198 write_unlock(&ife_mod_lock);
199 return 0;
200}
201EXPORT_SYMBOL_GPL(unregister_ife_op);
202
203int unregister_ife_op(struct tcf_meta_ops *mops)
204{
205 struct tcf_meta_ops *m;
206 int err = -ENOENT;
207
208 write_lock(&ife_mod_lock);
209 list_for_each_entry(m, &ifeoplist, list) {
210 if (m->metaid == mops->metaid) {
211 list_del(&mops->list);
212 err = 0;
213 break;
214 }
215 }
216 write_unlock(&ife_mod_lock);
217
218 return err;
219}
220EXPORT_SYMBOL_GPL(register_ife_op);
221
222static int ife_validate_metatype(struct tcf_meta_ops *ops, void *val, int len)
223{
224 int ret = 0;
225 /* XXX: unfortunately cant use nla_policy at this point
226 * because a length of 0 is valid in the case of
227 * "allow". "use" semantics do enforce for proper
228 * length and i couldve use nla_policy but it makes it hard
229 * to use it just for that..
230 */
231 if (ops->validate)
232 return ops->validate(val, len);
233
234 if (ops->metatype == NLA_U32)
235 ret = ife_validate_meta_u32(val, len);
236 else if (ops->metatype == NLA_U16)
237 ret = ife_validate_meta_u16(val, len);
238
239 return ret;
240}
241
242/* called when adding new meta information
243 * under ife->tcf_lock
244*/
245static int load_metaops_and_vet(struct tcf_ife_info *ife, u32 metaid,
246 void *val, int len)
247{
248 struct tcf_meta_ops *ops = find_ife_oplist(metaid);
249 int ret = 0;
250
251 if (!ops) {
252 ret = -ENOENT;
253#ifdef CONFIG_MODULES
254 spin_unlock_bh(&ife->tcf_lock);
255 rtnl_unlock();
256 request_module("ifemeta%u", metaid);
257 rtnl_lock();
258 spin_lock_bh(&ife->tcf_lock);
259 ops = find_ife_oplist(metaid);
260#endif
261 }
262
263 if (ops) {
264 ret = 0;
265 if (len)
266 ret = ife_validate_metatype(ops, val, len);
267
268 module_put(ops->owner);
269 }
270
271 return ret;
272}
273
274/* called when adding new meta information
275 * under ife->tcf_lock
276*/
277static int add_metainfo(struct tcf_ife_info *ife, u32 metaid, void *metaval,
278 int len)
279{
280 struct tcf_meta_info *mi = NULL;
281 struct tcf_meta_ops *ops = find_ife_oplist(metaid);
282 int ret = 0;
283
284 if (!ops)
285 return -ENOENT;
286
287 mi = kzalloc(sizeof(*mi), GFP_KERNEL);
288 if (!mi) {
289 /*put back what find_ife_oplist took */
290 module_put(ops->owner);
291 return -ENOMEM;
292 }
293
294 mi->metaid = metaid;
295 mi->ops = ops;
296 if (len > 0) {
297 ret = ops->alloc(mi, metaval);
298 if (ret != 0) {
299 kfree(mi);
300 module_put(ops->owner);
301 return ret;
302 }
303 }
304
305 list_add_tail(&mi->metalist, &ife->metalist);
306
307 return ret;
308}
309
310static int use_all_metadata(struct tcf_ife_info *ife)
311{
312 struct tcf_meta_ops *o;
313 int rc = 0;
314 int installed = 0;
315
316 list_for_each_entry(o, &ifeoplist, list) {
317 rc = add_metainfo(ife, o->metaid, NULL, 0);
318 if (rc == 0)
319 installed += 1;
320 }
321
322 if (installed)
323 return 0;
324 else
325 return -EINVAL;
326}
327
328static int dump_metalist(struct sk_buff *skb, struct tcf_ife_info *ife)
329{
330 struct tcf_meta_info *e;
331 struct nlattr *nest;
332 unsigned char *b = skb_tail_pointer(skb);
333 int total_encoded = 0;
334
335 /*can only happen on decode */
336 if (list_empty(&ife->metalist))
337 return 0;
338
339 nest = nla_nest_start(skb, TCA_IFE_METALST);
340 if (!nest)
341 goto out_nlmsg_trim;
342
343 list_for_each_entry(e, &ife->metalist, metalist) {
344 if (!e->ops->get(skb, e))
345 total_encoded += 1;
346 }
347
348 if (!total_encoded)
349 goto out_nlmsg_trim;
350
351 nla_nest_end(skb, nest);
352
353 return 0;
354
355out_nlmsg_trim:
356 nlmsg_trim(skb, b);
357 return -1;
358}
359
360/* under ife->tcf_lock */
361static void _tcf_ife_cleanup(struct tc_action *a, int bind)
362{
363 struct tcf_ife_info *ife = a->priv;
364 struct tcf_meta_info *e, *n;
365
366 list_for_each_entry_safe(e, n, &ife->metalist, metalist) {
367 module_put(e->ops->owner);
368 list_del(&e->metalist);
369 if (e->metaval) {
370 if (e->ops->release)
371 e->ops->release(e);
372 else
373 kfree(e->metaval);
374 }
375 kfree(e);
376 }
377}
378
379static void tcf_ife_cleanup(struct tc_action *a, int bind)
380{
381 struct tcf_ife_info *ife = a->priv;
382
383 spin_lock_bh(&ife->tcf_lock);
384 _tcf_ife_cleanup(a, bind);
385 spin_unlock_bh(&ife->tcf_lock);
386}
387
388/* under ife->tcf_lock */
389static int populate_metalist(struct tcf_ife_info *ife, struct nlattr **tb)
390{
391 int len = 0;
392 int rc = 0;
393 int i = 0;
394 void *val;
395
396 for (i = 1; i < max_metacnt; i++) {
397 if (tb[i]) {
398 val = nla_data(tb[i]);
399 len = nla_len(tb[i]);
400
401 rc = load_metaops_and_vet(ife, i, val, len);
402 if (rc != 0)
403 return rc;
404
405 rc = add_metainfo(ife, i, val, len);
406 if (rc)
407 return rc;
408 }
409 }
410
411 return rc;
412}
413
414static int tcf_ife_init(struct net *net, struct nlattr *nla,
415 struct nlattr *est, struct tc_action *a,
416 int ovr, int bind)
417{
418 struct tc_action_net *tn = net_generic(net, ife_net_id);
419 struct nlattr *tb[TCA_IFE_MAX + 1];
420 struct nlattr *tb2[IFE_META_MAX + 1];
421 struct tcf_ife_info *ife;
422 struct tc_ife *parm;
423 u16 ife_type = 0;
424 u8 *daddr = NULL;
425 u8 *saddr = NULL;
426 int ret = 0;
427 int err;
428
429 err = nla_parse_nested(tb, TCA_IFE_MAX, nla, ife_policy);
430 if (err < 0)
431 return err;
432
433 if (!tb[TCA_IFE_PARMS])
434 return -EINVAL;
435
436 parm = nla_data(tb[TCA_IFE_PARMS]);
437
438 if (parm->flags & IFE_ENCODE) {
439 /* Until we get issued the ethertype, we cant have
440 * a default..
441 **/
442 if (!tb[TCA_IFE_TYPE]) {
443 pr_info("You MUST pass etherype for encoding\n");
444 return -EINVAL;
445 }
446 }
447
448 if (!tcf_hash_check(tn, parm->index, a, bind)) {
449 ret = tcf_hash_create(tn, parm->index, est, a, sizeof(*ife),
450 bind, false);
451 if (ret)
452 return ret;
453 ret = ACT_P_CREATED;
454 } else {
455 if (bind) /* dont override defaults */
456 return 0;
457 tcf_hash_release(a, bind);
458 if (!ovr)
459 return -EEXIST;
460 }
461
462 ife = to_ife(a);
463 ife->flags = parm->flags;
464
465 if (parm->flags & IFE_ENCODE) {
466 ife_type = nla_get_u16(tb[TCA_IFE_TYPE]);
467 if (tb[TCA_IFE_DMAC])
468 daddr = nla_data(tb[TCA_IFE_DMAC]);
469 if (tb[TCA_IFE_SMAC])
470 saddr = nla_data(tb[TCA_IFE_SMAC]);
471 }
472
473 spin_lock_bh(&ife->tcf_lock);
474 ife->tcf_action = parm->action;
475
476 if (parm->flags & IFE_ENCODE) {
477 if (daddr)
478 ether_addr_copy(ife->eth_dst, daddr);
479 else
480 eth_zero_addr(ife->eth_dst);
481
482 if (saddr)
483 ether_addr_copy(ife->eth_src, saddr);
484 else
485 eth_zero_addr(ife->eth_src);
486
487 ife->eth_type = ife_type;
488 }
489
490 if (ret == ACT_P_CREATED)
491 INIT_LIST_HEAD(&ife->metalist);
492
493 if (tb[TCA_IFE_METALST]) {
494 err = nla_parse_nested(tb2, IFE_META_MAX, tb[TCA_IFE_METALST],
495 NULL);
496 if (err) {
497metadata_parse_err:
498 if (ret == ACT_P_CREATED)
499 _tcf_ife_cleanup(a, bind);
500
501 spin_unlock_bh(&ife->tcf_lock);
502 return err;
503 }
504
505 err = populate_metalist(ife, tb2);
506 if (err)
507 goto metadata_parse_err;
508
509 } else {
510 /* if no passed metadata allow list or passed allow-all
511 * then here we process by adding as many supported metadatum
512 * as we can. You better have at least one else we are
513 * going to bail out
514 */
515 err = use_all_metadata(ife);
516 if (err) {
517 if (ret == ACT_P_CREATED)
518 _tcf_ife_cleanup(a, bind);
519
520 spin_unlock_bh(&ife->tcf_lock);
521 return err;
522 }
523 }
524
525 spin_unlock_bh(&ife->tcf_lock);
526
527 if (ret == ACT_P_CREATED)
528 tcf_hash_insert(tn, a);
529
530 return ret;
531}
532
533static int tcf_ife_dump(struct sk_buff *skb, struct tc_action *a, int bind,
534 int ref)
535{
536 unsigned char *b = skb_tail_pointer(skb);
537 struct tcf_ife_info *ife = a->priv;
538 struct tc_ife opt = {
539 .index = ife->tcf_index,
540 .refcnt = ife->tcf_refcnt - ref,
541 .bindcnt = ife->tcf_bindcnt - bind,
542 .action = ife->tcf_action,
543 .flags = ife->flags,
544 };
545 struct tcf_t t;
546
547 if (nla_put(skb, TCA_IFE_PARMS, sizeof(opt), &opt))
548 goto nla_put_failure;
549
550 t.install = jiffies_to_clock_t(jiffies - ife->tcf_tm.install);
551 t.lastuse = jiffies_to_clock_t(jiffies - ife->tcf_tm.lastuse);
552 t.expires = jiffies_to_clock_t(ife->tcf_tm.expires);
553 if (nla_put(skb, TCA_IFE_TM, sizeof(t), &t))
554 goto nla_put_failure;
555
556 if (!is_zero_ether_addr(ife->eth_dst)) {
557 if (nla_put(skb, TCA_IFE_DMAC, ETH_ALEN, ife->eth_dst))
558 goto nla_put_failure;
559 }
560
561 if (!is_zero_ether_addr(ife->eth_src)) {
562 if (nla_put(skb, TCA_IFE_SMAC, ETH_ALEN, ife->eth_src))
563 goto nla_put_failure;
564 }
565
566 if (nla_put(skb, TCA_IFE_TYPE, 2, &ife->eth_type))
567 goto nla_put_failure;
568
569 if (dump_metalist(skb, ife)) {
570 /*ignore failure to dump metalist */
571 pr_info("Failed to dump metalist\n");
572 }
573
574 return skb->len;
575
576nla_put_failure:
577 nlmsg_trim(skb, b);
578 return -1;
579}
580
581int find_decode_metaid(struct sk_buff *skb, struct tcf_ife_info *ife,
582 u16 metaid, u16 mlen, void *mdata)
583{
584 struct tcf_meta_info *e;
585
586 /* XXX: use hash to speed up */
587 list_for_each_entry(e, &ife->metalist, metalist) {
588 if (metaid == e->metaid) {
589 if (e->ops) {
590 /* We check for decode presence already */
591 return e->ops->decode(skb, mdata, mlen);
592 }
593 }
594 }
595
596 return 0;
597}
598
599struct ifeheadr {
600 __be16 metalen;
601 u8 tlv_data[];
602};
603
604struct meta_tlvhdr {
605 __be16 type;
606 __be16 len;
607};
608
609static int tcf_ife_decode(struct sk_buff *skb, const struct tc_action *a,
610 struct tcf_result *res)
611{
612 struct tcf_ife_info *ife = a->priv;
613 int action = ife->tcf_action;
614 struct ifeheadr *ifehdr = (struct ifeheadr *)skb->data;
615 u16 ifehdrln = ifehdr->metalen;
616 struct meta_tlvhdr *tlv = (struct meta_tlvhdr *)(ifehdr->tlv_data);
617
618 spin_lock(&ife->tcf_lock);
619 bstats_update(&ife->tcf_bstats, skb);
620 ife->tcf_tm.lastuse = jiffies;
621 spin_unlock(&ife->tcf_lock);
622
623 ifehdrln = ntohs(ifehdrln);
624 if (unlikely(!pskb_may_pull(skb, ifehdrln))) {
625 spin_lock(&ife->tcf_lock);
626 ife->tcf_qstats.drops++;
627 spin_unlock(&ife->tcf_lock);
628 return TC_ACT_SHOT;
629 }
630
631 skb_set_mac_header(skb, ifehdrln);
632 __skb_pull(skb, ifehdrln);
633 skb->protocol = eth_type_trans(skb, skb->dev);
634 ifehdrln -= IFE_METAHDRLEN;
635
636 while (ifehdrln > 0) {
637 u8 *tlvdata = (u8 *)tlv;
638 u16 mtype = tlv->type;
639 u16 mlen = tlv->len;
640
641 mtype = ntohs(mtype);
642 mlen = ntohs(mlen);
643
644 if (find_decode_metaid(skb, ife, mtype, (mlen - 4),
645 (void *)(tlvdata + 4))) {
646 /* abuse overlimits to count when we receive metadata
647 * but dont have an ops for it
648 */
649 pr_info_ratelimited("Unknown metaid %d alnlen %d\n",
650 mtype, mlen);
651 ife->tcf_qstats.overlimits++;
652 }
653
654 tlvdata += mlen;
655 ifehdrln -= mlen;
656 tlv = (struct meta_tlvhdr *)tlvdata;
657 }
658
659 skb_reset_network_header(skb);
660 return action;
661}
662
663/*XXX: check if we can do this at install time instead of current
664 * send data path
665**/
666static int ife_get_sz(struct sk_buff *skb, struct tcf_ife_info *ife)
667{
668 struct tcf_meta_info *e, *n;
669 int tot_run_sz = 0, run_sz = 0;
670
671 list_for_each_entry_safe(e, n, &ife->metalist, metalist) {
672 if (e->ops->check_presence) {
673 run_sz = e->ops->check_presence(skb, e);
674 tot_run_sz += run_sz;
675 }
676 }
677
678 return tot_run_sz;
679}
680
681static int tcf_ife_encode(struct sk_buff *skb, const struct tc_action *a,
682 struct tcf_result *res)
683{
684 struct tcf_ife_info *ife = a->priv;
685 int action = ife->tcf_action;
686 struct ethhdr *oethh; /* outer ether header */
687 struct ethhdr *iethh; /* inner eth header */
688 struct tcf_meta_info *e;
689 /*
690 OUTERHDR:TOTMETALEN:{TLVHDR:Metadatum:TLVHDR..}:ORIGDATA
691 where ORIGDATA = original ethernet header ...
692 */
693 u16 metalen = ife_get_sz(skb, ife);
694 int hdrm = metalen + skb->dev->hard_header_len + IFE_METAHDRLEN;
695 unsigned int skboff = skb->dev->hard_header_len;
696 u32 at = G_TC_AT(skb->tc_verd);
697 int new_len = skb->len + hdrm;
698 bool exceed_mtu = false;
699 int err;
700
701 if (at & AT_EGRESS) {
702 if (new_len > skb->dev->mtu)
703 exceed_mtu = true;
704 }
705
706 spin_lock(&ife->tcf_lock);
707 bstats_update(&ife->tcf_bstats, skb);
708 ife->tcf_tm.lastuse = jiffies;
709
710 if (!metalen) { /* no metadata to send */
711 /* abuse overlimits to count when we allow packet
712 * with no metadata
713 */
714 ife->tcf_qstats.overlimits++;
715 spin_unlock(&ife->tcf_lock);
716 return action;
717 }
718 /* could be stupid policy setup or mtu config
719 * so lets be conservative.. */
720 if ((action == TC_ACT_SHOT) || exceed_mtu) {
721 ife->tcf_qstats.drops++;
722 spin_unlock(&ife->tcf_lock);
723 return TC_ACT_SHOT;
724 }
725
726 iethh = eth_hdr(skb);
727
728 err = skb_cow_head(skb, hdrm);
729 if (unlikely(err)) {
730 ife->tcf_qstats.drops++;
731 spin_unlock(&ife->tcf_lock);
732 return TC_ACT_SHOT;
733 }
734
735 if (!(at & AT_EGRESS))
736 skb_push(skb, skb->dev->hard_header_len);
737
738 __skb_push(skb, hdrm);
739 memcpy(skb->data, iethh, skb->mac_len);
740 skb_reset_mac_header(skb);
741 oethh = eth_hdr(skb);
742
743 /*total metadata length */
744 metalen += IFE_METAHDRLEN;
745 metalen = htons(metalen);
746 memcpy((skb->data + skboff), &metalen, IFE_METAHDRLEN);
747 skboff += IFE_METAHDRLEN;
748
749 /* XXX: we dont have a clever way of telling encode to
750 * not repeat some of the computations that are done by
751 * ops->presence_check...
752 */
753 list_for_each_entry(e, &ife->metalist, metalist) {
754 if (e->ops->encode) {
755 err = e->ops->encode(skb, (void *)(skb->data + skboff),
756 e);
757 }
758 if (err < 0) {
759 /* too corrupt to keep around if overwritten */
760 ife->tcf_qstats.drops++;
761 spin_unlock(&ife->tcf_lock);
762 return TC_ACT_SHOT;
763 }
764 skboff += err;
765 }
766
767 if (!is_zero_ether_addr(ife->eth_src))
768 ether_addr_copy(oethh->h_source, ife->eth_src);
769 else
770 ether_addr_copy(oethh->h_source, iethh->h_source);
771 if (!is_zero_ether_addr(ife->eth_dst))
772 ether_addr_copy(oethh->h_dest, ife->eth_dst);
773 else
774 ether_addr_copy(oethh->h_dest, iethh->h_dest);
775 oethh->h_proto = htons(ife->eth_type);
776
777 if (!(at & AT_EGRESS))
778 skb_pull(skb, skb->dev->hard_header_len);
779
780 spin_unlock(&ife->tcf_lock);
781
782 return action;
783}
784
785static int tcf_ife_act(struct sk_buff *skb, const struct tc_action *a,
786 struct tcf_result *res)
787{
788 struct tcf_ife_info *ife = a->priv;
789
790 if (ife->flags & IFE_ENCODE)
791 return tcf_ife_encode(skb, a, res);
792
793 if (!(ife->flags & IFE_ENCODE))
794 return tcf_ife_decode(skb, a, res);
795
796 pr_info_ratelimited("unknown failure(policy neither de/encode\n");
797 spin_lock(&ife->tcf_lock);
798 bstats_update(&ife->tcf_bstats, skb);
799 ife->tcf_tm.lastuse = jiffies;
800 ife->tcf_qstats.drops++;
801 spin_unlock(&ife->tcf_lock);
802
803 return TC_ACT_SHOT;
804}
805
806static int tcf_ife_walker(struct net *net, struct sk_buff *skb,
807 struct netlink_callback *cb, int type,
808 struct tc_action *a)
809{
810 struct tc_action_net *tn = net_generic(net, ife_net_id);
811
812 return tcf_generic_walker(tn, skb, cb, type, a);
813}
814
815static int tcf_ife_search(struct net *net, struct tc_action *a, u32 index)
816{
817 struct tc_action_net *tn = net_generic(net, ife_net_id);
818
819 return tcf_hash_search(tn, a, index);
820}
821
822static struct tc_action_ops act_ife_ops = {
823 .kind = "ife",
824 .type = TCA_ACT_IFE,
825 .owner = THIS_MODULE,
826 .act = tcf_ife_act,
827 .dump = tcf_ife_dump,
828 .cleanup = tcf_ife_cleanup,
829 .init = tcf_ife_init,
830 .walk = tcf_ife_walker,
831 .lookup = tcf_ife_search,
832};
833
834static __net_init int ife_init_net(struct net *net)
835{
836 struct tc_action_net *tn = net_generic(net, ife_net_id);
837
838 return tc_action_net_init(tn, &act_ife_ops, IFE_TAB_MASK);
839}
840
841static void __net_exit ife_exit_net(struct net *net)
842{
843 struct tc_action_net *tn = net_generic(net, ife_net_id);
844
845 tc_action_net_exit(tn);
846}
847
848static struct pernet_operations ife_net_ops = {
849 .init = ife_init_net,
850 .exit = ife_exit_net,
851 .id = &ife_net_id,
852 .size = sizeof(struct tc_action_net),
853};
854
855static int __init ife_init_module(void)
856{
857 return tcf_register_action(&act_ife_ops, &ife_net_ops);
858}
859
860static void __exit ife_cleanup_module(void)
861{
862 tcf_unregister_action(&act_ife_ops, &ife_net_ops);
863}
864
865module_init(ife_init_module);
866module_exit(ife_cleanup_module);
867
868MODULE_AUTHOR("Jamal Hadi Salim(2015)");
869MODULE_DESCRIPTION("Inter-FE LFB action");
870MODULE_LICENSE("GPL");
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index d05869646515..350e134cffb3 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -30,6 +30,10 @@
30 30
31#define IPT_TAB_MASK 15 31#define IPT_TAB_MASK 15
32 32
33static int ipt_net_id;
34
35static int xt_net_id;
36
33static int ipt_init_target(struct xt_entry_target *t, char *table, unsigned int hook) 37static int ipt_init_target(struct xt_entry_target *t, char *table, unsigned int hook)
34{ 38{
35 struct xt_tgchk_param par; 39 struct xt_tgchk_param par;
@@ -62,6 +66,7 @@ static void ipt_destroy_target(struct xt_entry_target *t)
62 struct xt_tgdtor_param par = { 66 struct xt_tgdtor_param par = {
63 .target = t->u.kernel.target, 67 .target = t->u.kernel.target,
64 .targinfo = t->data, 68 .targinfo = t->data,
69 .family = NFPROTO_IPV4,
65 }; 70 };
66 if (par.target->destroy != NULL) 71 if (par.target->destroy != NULL)
67 par.target->destroy(&par); 72 par.target->destroy(&par);
@@ -83,8 +88,9 @@ static const struct nla_policy ipt_policy[TCA_IPT_MAX + 1] = {
83 [TCA_IPT_TARG] = { .len = sizeof(struct xt_entry_target) }, 88 [TCA_IPT_TARG] = { .len = sizeof(struct xt_entry_target) },
84}; 89};
85 90
86static int tcf_ipt_init(struct net *net, struct nlattr *nla, struct nlattr *est, 91static int __tcf_ipt_init(struct tc_action_net *tn, struct nlattr *nla,
87 struct tc_action *a, int ovr, int bind) 92 struct nlattr *est, struct tc_action *a, int ovr,
93 int bind)
88{ 94{
89 struct nlattr *tb[TCA_IPT_MAX + 1]; 95 struct nlattr *tb[TCA_IPT_MAX + 1];
90 struct tcf_ipt *ipt; 96 struct tcf_ipt *ipt;
@@ -113,8 +119,9 @@ static int tcf_ipt_init(struct net *net, struct nlattr *nla, struct nlattr *est,
113 if (tb[TCA_IPT_INDEX] != NULL) 119 if (tb[TCA_IPT_INDEX] != NULL)
114 index = nla_get_u32(tb[TCA_IPT_INDEX]); 120 index = nla_get_u32(tb[TCA_IPT_INDEX]);
115 121
116 if (!tcf_hash_check(index, a, bind) ) { 122 if (!tcf_hash_check(tn, index, a, bind)) {
117 ret = tcf_hash_create(index, est, a, sizeof(*ipt), bind, false); 123 ret = tcf_hash_create(tn, index, est, a, sizeof(*ipt), bind,
124 false);
118 if (ret) 125 if (ret)
119 return ret; 126 return ret;
120 ret = ACT_P_CREATED; 127 ret = ACT_P_CREATED;
@@ -157,7 +164,7 @@ static int tcf_ipt_init(struct net *net, struct nlattr *nla, struct nlattr *est,
157 ipt->tcfi_hook = hook; 164 ipt->tcfi_hook = hook;
158 spin_unlock_bh(&ipt->tcf_lock); 165 spin_unlock_bh(&ipt->tcf_lock);
159 if (ret == ACT_P_CREATED) 166 if (ret == ACT_P_CREATED)
160 tcf_hash_insert(a); 167 tcf_hash_insert(tn, a);
161 return ret; 168 return ret;
162 169
163err3: 170err3:
@@ -170,6 +177,24 @@ err1:
170 return err; 177 return err;
171} 178}
172 179
180static int tcf_ipt_init(struct net *net, struct nlattr *nla,
181 struct nlattr *est, struct tc_action *a, int ovr,
182 int bind)
183{
184 struct tc_action_net *tn = net_generic(net, ipt_net_id);
185
186 return __tcf_ipt_init(tn, nla, est, a, ovr, bind);
187}
188
189static int tcf_xt_init(struct net *net, struct nlattr *nla,
190 struct nlattr *est, struct tc_action *a, int ovr,
191 int bind)
192{
193 struct tc_action_net *tn = net_generic(net, xt_net_id);
194
195 return __tcf_ipt_init(tn, nla, est, a, ovr, bind);
196}
197
173static int tcf_ipt(struct sk_buff *skb, const struct tc_action *a, 198static int tcf_ipt(struct sk_buff *skb, const struct tc_action *a,
174 struct tcf_result *res) 199 struct tcf_result *res)
175{ 200{
@@ -195,6 +220,7 @@ static int tcf_ipt(struct sk_buff *skb, const struct tc_action *a,
195 par.hooknum = ipt->tcfi_hook; 220 par.hooknum = ipt->tcfi_hook;
196 par.target = ipt->tcfi_t->u.kernel.target; 221 par.target = ipt->tcfi_t->u.kernel.target;
197 par.targinfo = ipt->tcfi_t->data; 222 par.targinfo = ipt->tcfi_t->data;
223 par.family = NFPROTO_IPV4;
198 ret = par.target->target(skb, &par); 224 ret = par.target->target(skb, &par);
199 225
200 switch (ret) { 226 switch (ret) {
@@ -260,6 +286,22 @@ nla_put_failure:
260 return -1; 286 return -1;
261} 287}
262 288
289static int tcf_ipt_walker(struct net *net, struct sk_buff *skb,
290 struct netlink_callback *cb, int type,
291 struct tc_action *a)
292{
293 struct tc_action_net *tn = net_generic(net, ipt_net_id);
294
295 return tcf_generic_walker(tn, skb, cb, type, a);
296}
297
298static int tcf_ipt_search(struct net *net, struct tc_action *a, u32 index)
299{
300 struct tc_action_net *tn = net_generic(net, ipt_net_id);
301
302 return tcf_hash_search(tn, a, index);
303}
304
263static struct tc_action_ops act_ipt_ops = { 305static struct tc_action_ops act_ipt_ops = {
264 .kind = "ipt", 306 .kind = "ipt",
265 .type = TCA_ACT_IPT, 307 .type = TCA_ACT_IPT,
@@ -268,8 +310,47 @@ static struct tc_action_ops act_ipt_ops = {
268 .dump = tcf_ipt_dump, 310 .dump = tcf_ipt_dump,
269 .cleanup = tcf_ipt_release, 311 .cleanup = tcf_ipt_release,
270 .init = tcf_ipt_init, 312 .init = tcf_ipt_init,
313 .walk = tcf_ipt_walker,
314 .lookup = tcf_ipt_search,
315};
316
317static __net_init int ipt_init_net(struct net *net)
318{
319 struct tc_action_net *tn = net_generic(net, ipt_net_id);
320
321 return tc_action_net_init(tn, &act_ipt_ops, IPT_TAB_MASK);
322}
323
324static void __net_exit ipt_exit_net(struct net *net)
325{
326 struct tc_action_net *tn = net_generic(net, ipt_net_id);
327
328 tc_action_net_exit(tn);
329}
330
331static struct pernet_operations ipt_net_ops = {
332 .init = ipt_init_net,
333 .exit = ipt_exit_net,
334 .id = &ipt_net_id,
335 .size = sizeof(struct tc_action_net),
271}; 336};
272 337
338static int tcf_xt_walker(struct net *net, struct sk_buff *skb,
339 struct netlink_callback *cb, int type,
340 struct tc_action *a)
341{
342 struct tc_action_net *tn = net_generic(net, xt_net_id);
343
344 return tcf_generic_walker(tn, skb, cb, type, a);
345}
346
347static int tcf_xt_search(struct net *net, struct tc_action *a, u32 index)
348{
349 struct tc_action_net *tn = net_generic(net, xt_net_id);
350
351 return tcf_hash_search(tn, a, index);
352}
353
273static struct tc_action_ops act_xt_ops = { 354static struct tc_action_ops act_xt_ops = {
274 .kind = "xt", 355 .kind = "xt",
275 .type = TCA_ACT_XT, 356 .type = TCA_ACT_XT,
@@ -277,7 +358,30 @@ static struct tc_action_ops act_xt_ops = {
277 .act = tcf_ipt, 358 .act = tcf_ipt,
278 .dump = tcf_ipt_dump, 359 .dump = tcf_ipt_dump,
279 .cleanup = tcf_ipt_release, 360 .cleanup = tcf_ipt_release,
280 .init = tcf_ipt_init, 361 .init = tcf_xt_init,
362 .walk = tcf_xt_walker,
363 .lookup = tcf_xt_search,
364};
365
366static __net_init int xt_init_net(struct net *net)
367{
368 struct tc_action_net *tn = net_generic(net, xt_net_id);
369
370 return tc_action_net_init(tn, &act_xt_ops, IPT_TAB_MASK);
371}
372
373static void __net_exit xt_exit_net(struct net *net)
374{
375 struct tc_action_net *tn = net_generic(net, xt_net_id);
376
377 tc_action_net_exit(tn);
378}
379
380static struct pernet_operations xt_net_ops = {
381 .init = xt_init_net,
382 .exit = xt_exit_net,
383 .id = &xt_net_id,
384 .size = sizeof(struct tc_action_net),
281}; 385};
282 386
283MODULE_AUTHOR("Jamal Hadi Salim(2002-13)"); 387MODULE_AUTHOR("Jamal Hadi Salim(2002-13)");
@@ -289,12 +393,13 @@ static int __init ipt_init_module(void)
289{ 393{
290 int ret1, ret2; 394 int ret1, ret2;
291 395
292 ret1 = tcf_register_action(&act_xt_ops, IPT_TAB_MASK); 396 ret1 = tcf_register_action(&act_xt_ops, &xt_net_ops);
293 if (ret1 < 0) 397 if (ret1 < 0)
294 printk("Failed to load xt action\n"); 398 pr_err("Failed to load xt action\n");
295 ret2 = tcf_register_action(&act_ipt_ops, IPT_TAB_MASK); 399
400 ret2 = tcf_register_action(&act_ipt_ops, &ipt_net_ops);
296 if (ret2 < 0) 401 if (ret2 < 0)
297 printk("Failed to load ipt action\n"); 402 pr_err("Failed to load ipt action\n");
298 403
299 if (ret1 < 0 && ret2 < 0) { 404 if (ret1 < 0 && ret2 < 0) {
300 return ret1; 405 return ret1;
@@ -304,8 +409,8 @@ static int __init ipt_init_module(void)
304 409
305static void __exit ipt_cleanup_module(void) 410static void __exit ipt_cleanup_module(void)
306{ 411{
307 tcf_unregister_action(&act_xt_ops); 412 tcf_unregister_action(&act_ipt_ops, &ipt_net_ops);
308 tcf_unregister_action(&act_ipt_ops); 413 tcf_unregister_action(&act_xt_ops, &xt_net_ops);
309} 414}
310 415
311module_init(ipt_init_module); 416module_init(ipt_init_module);
diff --git a/net/sched/act_meta_mark.c b/net/sched/act_meta_mark.c
new file mode 100644
index 000000000000..82892170ce4f
--- /dev/null
+++ b/net/sched/act_meta_mark.c
@@ -0,0 +1,79 @@
1/*
2 * net/sched/act_meta_mark.c IFE skb->mark metadata module
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * copyright Jamal Hadi Salim (2015)
10 *
11*/
12
13#include <linux/types.h>
14#include <linux/kernel.h>
15#include <linux/string.h>
16#include <linux/errno.h>
17#include <linux/skbuff.h>
18#include <linux/rtnetlink.h>
19#include <linux/module.h>
20#include <linux/init.h>
21#include <net/netlink.h>
22#include <net/pkt_sched.h>
23#include <uapi/linux/tc_act/tc_ife.h>
24#include <net/tc_act/tc_ife.h>
25#include <linux/rtnetlink.h>
26
27static int skbmark_encode(struct sk_buff *skb, void *skbdata,
28 struct tcf_meta_info *e)
29{
30 u32 ifemark = skb->mark;
31
32 return ife_encode_meta_u32(ifemark, skbdata, e);
33}
34
35static int skbmark_decode(struct sk_buff *skb, void *data, u16 len)
36{
37 u32 ifemark = *(u32 *)data;
38
39 skb->mark = ntohl(ifemark);
40 return 0;
41}
42
43static int skbmark_check(struct sk_buff *skb, struct tcf_meta_info *e)
44{
45 return ife_check_meta_u32(skb->mark, e);
46}
47
48static struct tcf_meta_ops ife_skbmark_ops = {
49 .metaid = IFE_META_SKBMARK,
50 .metatype = NLA_U32,
51 .name = "skbmark",
52 .synopsis = "skb mark 32 bit metadata",
53 .check_presence = skbmark_check,
54 .encode = skbmark_encode,
55 .decode = skbmark_decode,
56 .get = ife_get_meta_u32,
57 .alloc = ife_alloc_meta_u32,
58 .release = ife_release_meta_gen,
59 .validate = ife_validate_meta_u32,
60 .owner = THIS_MODULE,
61};
62
63static int __init ifemark_init_module(void)
64{
65 return register_ife_op(&ife_skbmark_ops);
66}
67
68static void __exit ifemark_cleanup_module(void)
69{
70 unregister_ife_op(&ife_skbmark_ops);
71}
72
73module_init(ifemark_init_module);
74module_exit(ifemark_cleanup_module);
75
76MODULE_AUTHOR("Jamal Hadi Salim(2015)");
77MODULE_DESCRIPTION("Inter-FE skb mark metadata module");
78MODULE_LICENSE("GPL");
79MODULE_ALIAS_IFE_META(IFE_META_SKBMARK);
diff --git a/net/sched/act_meta_skbprio.c b/net/sched/act_meta_skbprio.c
new file mode 100644
index 000000000000..26bf4d86030b
--- /dev/null
+++ b/net/sched/act_meta_skbprio.c
@@ -0,0 +1,76 @@
1/*
2 * net/sched/act_meta_prio.c IFE skb->priority metadata module
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * copyright Jamal Hadi Salim (2015)
10 *
11*/
12
13#include <linux/types.h>
14#include <linux/kernel.h>
15#include <linux/string.h>
16#include <linux/errno.h>
17#include <linux/skbuff.h>
18#include <linux/rtnetlink.h>
19#include <linux/module.h>
20#include <linux/init.h>
21#include <net/netlink.h>
22#include <net/pkt_sched.h>
23#include <uapi/linux/tc_act/tc_ife.h>
24#include <net/tc_act/tc_ife.h>
25
26static int skbprio_check(struct sk_buff *skb, struct tcf_meta_info *e)
27{
28 return ife_check_meta_u32(skb->priority, e);
29}
30
31static int skbprio_encode(struct sk_buff *skb, void *skbdata,
32 struct tcf_meta_info *e)
33{
34 u32 ifeprio = skb->priority; /* avoid having to cast skb->priority*/
35
36 return ife_encode_meta_u32(ifeprio, skbdata, e);
37}
38
39static int skbprio_decode(struct sk_buff *skb, void *data, u16 len)
40{
41 u32 ifeprio = *(u32 *)data;
42
43 skb->priority = ntohl(ifeprio);
44 return 0;
45}
46
47static struct tcf_meta_ops ife_prio_ops = {
48 .metaid = IFE_META_PRIO,
49 .metatype = NLA_U32,
50 .name = "skbprio",
51 .synopsis = "skb prio metadata",
52 .check_presence = skbprio_check,
53 .encode = skbprio_encode,
54 .decode = skbprio_decode,
55 .get = ife_get_meta_u32,
56 .alloc = ife_alloc_meta_u32,
57 .owner = THIS_MODULE,
58};
59
60static int __init ifeprio_init_module(void)
61{
62 return register_ife_op(&ife_prio_ops);
63}
64
65static void __exit ifeprio_cleanup_module(void)
66{
67 unregister_ife_op(&ife_prio_ops);
68}
69
70module_init(ifeprio_init_module);
71module_exit(ifeprio_cleanup_module);
72
73MODULE_AUTHOR("Jamal Hadi Salim(2015)");
74MODULE_DESCRIPTION("Inter-FE skb prio metadata action");
75MODULE_LICENSE("GPL");
76MODULE_ALIAS_IFE_META(IFE_META_PRIO);
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 32fcdecdb9e2..e8a760cf7775 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -50,10 +50,13 @@ static const struct nla_policy mirred_policy[TCA_MIRRED_MAX + 1] = {
50 [TCA_MIRRED_PARMS] = { .len = sizeof(struct tc_mirred) }, 50 [TCA_MIRRED_PARMS] = { .len = sizeof(struct tc_mirred) },
51}; 51};
52 52
53static int mirred_net_id;
54
53static int tcf_mirred_init(struct net *net, struct nlattr *nla, 55static int tcf_mirred_init(struct net *net, struct nlattr *nla,
54 struct nlattr *est, struct tc_action *a, int ovr, 56 struct nlattr *est, struct tc_action *a, int ovr,
55 int bind) 57 int bind)
56{ 58{
59 struct tc_action_net *tn = net_generic(net, mirred_net_id);
57 struct nlattr *tb[TCA_MIRRED_MAX + 1]; 60 struct nlattr *tb[TCA_MIRRED_MAX + 1];
58 struct tc_mirred *parm; 61 struct tc_mirred *parm;
59 struct tcf_mirred *m; 62 struct tcf_mirred *m;
@@ -96,11 +99,11 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
96 dev = NULL; 99 dev = NULL;
97 } 100 }
98 101
99 if (!tcf_hash_check(parm->index, a, bind)) { 102 if (!tcf_hash_check(tn, parm->index, a, bind)) {
100 if (dev == NULL) 103 if (dev == NULL)
101 return -EINVAL; 104 return -EINVAL;
102 ret = tcf_hash_create(parm->index, est, a, sizeof(*m), 105 ret = tcf_hash_create(tn, parm->index, est, a,
103 bind, true); 106 sizeof(*m), bind, true);
104 if (ret) 107 if (ret)
105 return ret; 108 return ret;
106 ret = ACT_P_CREATED; 109 ret = ACT_P_CREATED;
@@ -130,7 +133,7 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
130 spin_lock_bh(&mirred_list_lock); 133 spin_lock_bh(&mirred_list_lock);
131 list_add(&m->tcfm_list, &mirred_list); 134 list_add(&m->tcfm_list, &mirred_list);
132 spin_unlock_bh(&mirred_list_lock); 135 spin_unlock_bh(&mirred_list_lock);
133 tcf_hash_insert(a); 136 tcf_hash_insert(tn, a);
134 } 137 }
135 138
136 return ret; 139 return ret;
@@ -179,7 +182,6 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a,
179 182
180 skb2->skb_iif = skb->dev->ifindex; 183 skb2->skb_iif = skb->dev->ifindex;
181 skb2->dev = dev; 184 skb2->dev = dev;
182 skb_sender_cpu_clear(skb2);
183 err = dev_queue_xmit(skb2); 185 err = dev_queue_xmit(skb2);
184 186
185 if (err) { 187 if (err) {
@@ -221,6 +223,22 @@ nla_put_failure:
221 return -1; 223 return -1;
222} 224}
223 225
226static int tcf_mirred_walker(struct net *net, struct sk_buff *skb,
227 struct netlink_callback *cb, int type,
228 struct tc_action *a)
229{
230 struct tc_action_net *tn = net_generic(net, mirred_net_id);
231
232 return tcf_generic_walker(tn, skb, cb, type, a);
233}
234
235static int tcf_mirred_search(struct net *net, struct tc_action *a, u32 index)
236{
237 struct tc_action_net *tn = net_generic(net, mirred_net_id);
238
239 return tcf_hash_search(tn, a, index);
240}
241
224static int mirred_device_event(struct notifier_block *unused, 242static int mirred_device_event(struct notifier_block *unused,
225 unsigned long event, void *ptr) 243 unsigned long event, void *ptr)
226{ 244{
@@ -257,6 +275,29 @@ static struct tc_action_ops act_mirred_ops = {
257 .dump = tcf_mirred_dump, 275 .dump = tcf_mirred_dump,
258 .cleanup = tcf_mirred_release, 276 .cleanup = tcf_mirred_release,
259 .init = tcf_mirred_init, 277 .init = tcf_mirred_init,
278 .walk = tcf_mirred_walker,
279 .lookup = tcf_mirred_search,
280};
281
282static __net_init int mirred_init_net(struct net *net)
283{
284 struct tc_action_net *tn = net_generic(net, mirred_net_id);
285
286 return tc_action_net_init(tn, &act_mirred_ops, MIRRED_TAB_MASK);
287}
288
289static void __net_exit mirred_exit_net(struct net *net)
290{
291 struct tc_action_net *tn = net_generic(net, mirred_net_id);
292
293 tc_action_net_exit(tn);
294}
295
296static struct pernet_operations mirred_net_ops = {
297 .init = mirred_init_net,
298 .exit = mirred_exit_net,
299 .id = &mirred_net_id,
300 .size = sizeof(struct tc_action_net),
260}; 301};
261 302
262MODULE_AUTHOR("Jamal Hadi Salim(2002)"); 303MODULE_AUTHOR("Jamal Hadi Salim(2002)");
@@ -270,12 +311,12 @@ static int __init mirred_init_module(void)
270 return err; 311 return err;
271 312
272 pr_info("Mirror/redirect action on\n"); 313 pr_info("Mirror/redirect action on\n");
273 return tcf_register_action(&act_mirred_ops, MIRRED_TAB_MASK); 314 return tcf_register_action(&act_mirred_ops, &mirred_net_ops);
274} 315}
275 316
276static void __exit mirred_cleanup_module(void) 317static void __exit mirred_cleanup_module(void)
277{ 318{
278 tcf_unregister_action(&act_mirred_ops); 319 tcf_unregister_action(&act_mirred_ops, &mirred_net_ops);
279 unregister_netdevice_notifier(&mirred_device_notifier); 320 unregister_netdevice_notifier(&mirred_device_notifier);
280} 321}
281 322
diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c
index b7c4ead8b5a8..0f65cdfbfb1d 100644
--- a/net/sched/act_nat.c
+++ b/net/sched/act_nat.c
@@ -31,6 +31,8 @@
31 31
32#define NAT_TAB_MASK 15 32#define NAT_TAB_MASK 15
33 33
34static int nat_net_id;
35
34static const struct nla_policy nat_policy[TCA_NAT_MAX + 1] = { 36static const struct nla_policy nat_policy[TCA_NAT_MAX + 1] = {
35 [TCA_NAT_PARMS] = { .len = sizeof(struct tc_nat) }, 37 [TCA_NAT_PARMS] = { .len = sizeof(struct tc_nat) },
36}; 38};
@@ -38,6 +40,7 @@ static const struct nla_policy nat_policy[TCA_NAT_MAX + 1] = {
38static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est, 40static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est,
39 struct tc_action *a, int ovr, int bind) 41 struct tc_action *a, int ovr, int bind)
40{ 42{
43 struct tc_action_net *tn = net_generic(net, nat_net_id);
41 struct nlattr *tb[TCA_NAT_MAX + 1]; 44 struct nlattr *tb[TCA_NAT_MAX + 1];
42 struct tc_nat *parm; 45 struct tc_nat *parm;
43 int ret = 0, err; 46 int ret = 0, err;
@@ -54,9 +57,9 @@ static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est,
54 return -EINVAL; 57 return -EINVAL;
55 parm = nla_data(tb[TCA_NAT_PARMS]); 58 parm = nla_data(tb[TCA_NAT_PARMS]);
56 59
57 if (!tcf_hash_check(parm->index, a, bind)) { 60 if (!tcf_hash_check(tn, parm->index, a, bind)) {
58 ret = tcf_hash_create(parm->index, est, a, sizeof(*p), 61 ret = tcf_hash_create(tn, parm->index, est, a,
59 bind, false); 62 sizeof(*p), bind, false);
60 if (ret) 63 if (ret)
61 return ret; 64 return ret;
62 ret = ACT_P_CREATED; 65 ret = ACT_P_CREATED;
@@ -79,7 +82,7 @@ static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est,
79 spin_unlock_bh(&p->tcf_lock); 82 spin_unlock_bh(&p->tcf_lock);
80 83
81 if (ret == ACT_P_CREATED) 84 if (ret == ACT_P_CREATED)
82 tcf_hash_insert(a); 85 tcf_hash_insert(tn, a);
83 86
84 return ret; 87 return ret;
85} 88}
@@ -126,9 +129,7 @@ static int tcf_nat(struct sk_buff *skb, const struct tc_action *a,
126 addr = iph->daddr; 129 addr = iph->daddr;
127 130
128 if (!((old_addr ^ addr) & mask)) { 131 if (!((old_addr ^ addr) & mask)) {
129 if (skb_cloned(skb) && 132 if (skb_try_make_writable(skb, sizeof(*iph) + noff))
130 !skb_clone_writable(skb, sizeof(*iph) + noff) &&
131 pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
132 goto drop; 133 goto drop;
133 134
134 new_addr &= mask; 135 new_addr &= mask;
@@ -156,9 +157,7 @@ static int tcf_nat(struct sk_buff *skb, const struct tc_action *a,
156 struct tcphdr *tcph; 157 struct tcphdr *tcph;
157 158
158 if (!pskb_may_pull(skb, ihl + sizeof(*tcph) + noff) || 159 if (!pskb_may_pull(skb, ihl + sizeof(*tcph) + noff) ||
159 (skb_cloned(skb) && 160 skb_try_make_writable(skb, ihl + sizeof(*tcph) + noff))
160 !skb_clone_writable(skb, ihl + sizeof(*tcph) + noff) &&
161 pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
162 goto drop; 161 goto drop;
163 162
164 tcph = (void *)(skb_network_header(skb) + ihl); 163 tcph = (void *)(skb_network_header(skb) + ihl);
@@ -171,9 +170,7 @@ static int tcf_nat(struct sk_buff *skb, const struct tc_action *a,
171 struct udphdr *udph; 170 struct udphdr *udph;
172 171
173 if (!pskb_may_pull(skb, ihl + sizeof(*udph) + noff) || 172 if (!pskb_may_pull(skb, ihl + sizeof(*udph) + noff) ||
174 (skb_cloned(skb) && 173 skb_try_make_writable(skb, ihl + sizeof(*udph) + noff))
175 !skb_clone_writable(skb, ihl + sizeof(*udph) + noff) &&
176 pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
177 goto drop; 174 goto drop;
178 175
179 udph = (void *)(skb_network_header(skb) + ihl); 176 udph = (void *)(skb_network_header(skb) + ihl);
@@ -213,10 +210,8 @@ static int tcf_nat(struct sk_buff *skb, const struct tc_action *a,
213 if ((old_addr ^ addr) & mask) 210 if ((old_addr ^ addr) & mask)
214 break; 211 break;
215 212
216 if (skb_cloned(skb) && 213 if (skb_try_make_writable(skb, ihl + sizeof(*icmph) +
217 !skb_clone_writable(skb, ihl + sizeof(*icmph) + 214 sizeof(*iph) + noff))
218 sizeof(*iph) + noff) &&
219 pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
220 goto drop; 215 goto drop;
221 216
222 icmph = (void *)(skb_network_header(skb) + ihl); 217 icmph = (void *)(skb_network_header(skb) + ihl);
@@ -282,6 +277,22 @@ nla_put_failure:
282 return -1; 277 return -1;
283} 278}
284 279
280static int tcf_nat_walker(struct net *net, struct sk_buff *skb,
281 struct netlink_callback *cb, int type,
282 struct tc_action *a)
283{
284 struct tc_action_net *tn = net_generic(net, nat_net_id);
285
286 return tcf_generic_walker(tn, skb, cb, type, a);
287}
288
289static int tcf_nat_search(struct net *net, struct tc_action *a, u32 index)
290{
291 struct tc_action_net *tn = net_generic(net, nat_net_id);
292
293 return tcf_hash_search(tn, a, index);
294}
295
285static struct tc_action_ops act_nat_ops = { 296static struct tc_action_ops act_nat_ops = {
286 .kind = "nat", 297 .kind = "nat",
287 .type = TCA_ACT_NAT, 298 .type = TCA_ACT_NAT,
@@ -289,6 +300,29 @@ static struct tc_action_ops act_nat_ops = {
289 .act = tcf_nat, 300 .act = tcf_nat,
290 .dump = tcf_nat_dump, 301 .dump = tcf_nat_dump,
291 .init = tcf_nat_init, 302 .init = tcf_nat_init,
303 .walk = tcf_nat_walker,
304 .lookup = tcf_nat_search,
305};
306
307static __net_init int nat_init_net(struct net *net)
308{
309 struct tc_action_net *tn = net_generic(net, nat_net_id);
310
311 return tc_action_net_init(tn, &act_nat_ops, NAT_TAB_MASK);
312}
313
314static void __net_exit nat_exit_net(struct net *net)
315{
316 struct tc_action_net *tn = net_generic(net, nat_net_id);
317
318 tc_action_net_exit(tn);
319}
320
321static struct pernet_operations nat_net_ops = {
322 .init = nat_init_net,
323 .exit = nat_exit_net,
324 .id = &nat_net_id,
325 .size = sizeof(struct tc_action_net),
292}; 326};
293 327
294MODULE_DESCRIPTION("Stateless NAT actions"); 328MODULE_DESCRIPTION("Stateless NAT actions");
@@ -296,12 +330,12 @@ MODULE_LICENSE("GPL");
296 330
297static int __init nat_init_module(void) 331static int __init nat_init_module(void)
298{ 332{
299 return tcf_register_action(&act_nat_ops, NAT_TAB_MASK); 333 return tcf_register_action(&act_nat_ops, &nat_net_ops);
300} 334}
301 335
302static void __exit nat_cleanup_module(void) 336static void __exit nat_cleanup_module(void)
303{ 337{
304 tcf_unregister_action(&act_nat_ops); 338 tcf_unregister_action(&act_nat_ops, &nat_net_ops);
305} 339}
306 340
307module_init(nat_init_module); 341module_init(nat_init_module);
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index e38a7701f154..429c3ab65142 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -25,6 +25,8 @@
25 25
26#define PEDIT_TAB_MASK 15 26#define PEDIT_TAB_MASK 15
27 27
28static int pedit_net_id;
29
28static const struct nla_policy pedit_policy[TCA_PEDIT_MAX + 1] = { 30static const struct nla_policy pedit_policy[TCA_PEDIT_MAX + 1] = {
29 [TCA_PEDIT_PARMS] = { .len = sizeof(struct tc_pedit) }, 31 [TCA_PEDIT_PARMS] = { .len = sizeof(struct tc_pedit) },
30}; 32};
@@ -33,6 +35,7 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
33 struct nlattr *est, struct tc_action *a, 35 struct nlattr *est, struct tc_action *a,
34 int ovr, int bind) 36 int ovr, int bind)
35{ 37{
38 struct tc_action_net *tn = net_generic(net, pedit_net_id);
36 struct nlattr *tb[TCA_PEDIT_MAX + 1]; 39 struct nlattr *tb[TCA_PEDIT_MAX + 1];
37 struct tc_pedit *parm; 40 struct tc_pedit *parm;
38 int ret = 0, err; 41 int ret = 0, err;
@@ -54,11 +57,11 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
54 if (nla_len(tb[TCA_PEDIT_PARMS]) < sizeof(*parm) + ksize) 57 if (nla_len(tb[TCA_PEDIT_PARMS]) < sizeof(*parm) + ksize)
55 return -EINVAL; 58 return -EINVAL;
56 59
57 if (!tcf_hash_check(parm->index, a, bind)) { 60 if (!tcf_hash_check(tn, parm->index, a, bind)) {
58 if (!parm->nkeys) 61 if (!parm->nkeys)
59 return -EINVAL; 62 return -EINVAL;
60 ret = tcf_hash_create(parm->index, est, a, sizeof(*p), 63 ret = tcf_hash_create(tn, parm->index, est, a,
61 bind, false); 64 sizeof(*p), bind, false);
62 if (ret) 65 if (ret)
63 return ret; 66 return ret;
64 p = to_pedit(a); 67 p = to_pedit(a);
@@ -93,7 +96,7 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
93 memcpy(p->tcfp_keys, parm->keys, ksize); 96 memcpy(p->tcfp_keys, parm->keys, ksize);
94 spin_unlock_bh(&p->tcf_lock); 97 spin_unlock_bh(&p->tcf_lock);
95 if (ret == ACT_P_CREATED) 98 if (ret == ACT_P_CREATED)
96 tcf_hash_insert(a); 99 tcf_hash_insert(tn, a);
97 return ret; 100 return ret;
98} 101}
99 102
@@ -211,6 +214,22 @@ nla_put_failure:
211 return -1; 214 return -1;
212} 215}
213 216
217static int tcf_pedit_walker(struct net *net, struct sk_buff *skb,
218 struct netlink_callback *cb, int type,
219 struct tc_action *a)
220{
221 struct tc_action_net *tn = net_generic(net, pedit_net_id);
222
223 return tcf_generic_walker(tn, skb, cb, type, a);
224}
225
226static int tcf_pedit_search(struct net *net, struct tc_action *a, u32 index)
227{
228 struct tc_action_net *tn = net_generic(net, pedit_net_id);
229
230 return tcf_hash_search(tn, a, index);
231}
232
214static struct tc_action_ops act_pedit_ops = { 233static struct tc_action_ops act_pedit_ops = {
215 .kind = "pedit", 234 .kind = "pedit",
216 .type = TCA_ACT_PEDIT, 235 .type = TCA_ACT_PEDIT,
@@ -219,6 +238,29 @@ static struct tc_action_ops act_pedit_ops = {
219 .dump = tcf_pedit_dump, 238 .dump = tcf_pedit_dump,
220 .cleanup = tcf_pedit_cleanup, 239 .cleanup = tcf_pedit_cleanup,
221 .init = tcf_pedit_init, 240 .init = tcf_pedit_init,
241 .walk = tcf_pedit_walker,
242 .lookup = tcf_pedit_search,
243};
244
245static __net_init int pedit_init_net(struct net *net)
246{
247 struct tc_action_net *tn = net_generic(net, pedit_net_id);
248
249 return tc_action_net_init(tn, &act_pedit_ops, PEDIT_TAB_MASK);
250}
251
252static void __net_exit pedit_exit_net(struct net *net)
253{
254 struct tc_action_net *tn = net_generic(net, pedit_net_id);
255
256 tc_action_net_exit(tn);
257}
258
259static struct pernet_operations pedit_net_ops = {
260 .init = pedit_init_net,
261 .exit = pedit_exit_net,
262 .id = &pedit_net_id,
263 .size = sizeof(struct tc_action_net),
222}; 264};
223 265
224MODULE_AUTHOR("Jamal Hadi Salim(2002-4)"); 266MODULE_AUTHOR("Jamal Hadi Salim(2002-4)");
@@ -227,12 +269,12 @@ MODULE_LICENSE("GPL");
227 269
228static int __init pedit_init_module(void) 270static int __init pedit_init_module(void)
229{ 271{
230 return tcf_register_action(&act_pedit_ops, PEDIT_TAB_MASK); 272 return tcf_register_action(&act_pedit_ops, &pedit_net_ops);
231} 273}
232 274
233static void __exit pedit_cleanup_module(void) 275static void __exit pedit_cleanup_module(void)
234{ 276{
235 tcf_unregister_action(&act_pedit_ops); 277 tcf_unregister_action(&act_pedit_ops, &pedit_net_ops);
236} 278}
237 279
238module_init(pedit_init_module); 280module_init(pedit_init_module);
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index 9a1c42a43f92..330f14e302e8 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -55,10 +55,14 @@ struct tc_police_compat {
55 55
56/* Each policer is serialized by its individual spinlock */ 56/* Each policer is serialized by its individual spinlock */
57 57
58static int tcf_act_police_walker(struct sk_buff *skb, struct netlink_callback *cb, 58static int police_net_id;
59 int type, struct tc_action *a) 59
60static int tcf_act_police_walker(struct net *net, struct sk_buff *skb,
61 struct netlink_callback *cb, int type,
62 struct tc_action *a)
60{ 63{
61 struct tcf_hashinfo *hinfo = a->ops->hinfo; 64 struct tc_action_net *tn = net_generic(net, police_net_id);
65 struct tcf_hashinfo *hinfo = tn->hinfo;
62 struct hlist_head *head; 66 struct hlist_head *head;
63 struct tcf_common *p; 67 struct tcf_common *p;
64 int err = 0, index = -1, i = 0, s_i = 0, n_i = 0; 68 int err = 0, index = -1, i = 0, s_i = 0, n_i = 0;
@@ -121,7 +125,8 @@ static int tcf_act_police_locate(struct net *net, struct nlattr *nla,
121 struct tc_police *parm; 125 struct tc_police *parm;
122 struct tcf_police *police; 126 struct tcf_police *police;
123 struct qdisc_rate_table *R_tab = NULL, *P_tab = NULL; 127 struct qdisc_rate_table *R_tab = NULL, *P_tab = NULL;
124 struct tcf_hashinfo *hinfo = a->ops->hinfo; 128 struct tc_action_net *tn = net_generic(net, police_net_id);
129 struct tcf_hashinfo *hinfo = tn->hinfo;
125 int size; 130 int size;
126 131
127 if (nla == NULL) 132 if (nla == NULL)
@@ -139,7 +144,7 @@ static int tcf_act_police_locate(struct net *net, struct nlattr *nla,
139 parm = nla_data(tb[TCA_POLICE_TBF]); 144 parm = nla_data(tb[TCA_POLICE_TBF]);
140 145
141 if (parm->index) { 146 if (parm->index) {
142 if (tcf_hash_search(a, parm->index)) { 147 if (tcf_hash_search(tn, a, parm->index)) {
143 police = to_police(a->priv); 148 police = to_police(a->priv);
144 if (bind) { 149 if (bind) {
145 police->tcf_bindcnt += 1; 150 police->tcf_bindcnt += 1;
@@ -233,7 +238,7 @@ override:
233 238
234 police->tcfp_t_c = ktime_get_ns(); 239 police->tcfp_t_c = ktime_get_ns();
235 police->tcf_index = parm->index ? parm->index : 240 police->tcf_index = parm->index ? parm->index :
236 tcf_hash_new_index(hinfo); 241 tcf_hash_new_index(tn);
237 h = tcf_hash(police->tcf_index, POL_TAB_MASK); 242 h = tcf_hash(police->tcf_index, POL_TAB_MASK);
238 spin_lock_bh(&hinfo->lock); 243 spin_lock_bh(&hinfo->lock);
239 hlist_add_head(&police->tcf_head, &hinfo->htab[h]); 244 hlist_add_head(&police->tcf_head, &hinfo->htab[h]);
@@ -342,6 +347,13 @@ nla_put_failure:
342 return -1; 347 return -1;
343} 348}
344 349
350static int tcf_police_search(struct net *net, struct tc_action *a, u32 index)
351{
352 struct tc_action_net *tn = net_generic(net, police_net_id);
353
354 return tcf_hash_search(tn, a, index);
355}
356
345MODULE_AUTHOR("Alexey Kuznetsov"); 357MODULE_AUTHOR("Alexey Kuznetsov");
346MODULE_DESCRIPTION("Policing actions"); 358MODULE_DESCRIPTION("Policing actions");
347MODULE_LICENSE("GPL"); 359MODULE_LICENSE("GPL");
@@ -353,19 +365,41 @@ static struct tc_action_ops act_police_ops = {
353 .act = tcf_act_police, 365 .act = tcf_act_police,
354 .dump = tcf_act_police_dump, 366 .dump = tcf_act_police_dump,
355 .init = tcf_act_police_locate, 367 .init = tcf_act_police_locate,
356 .walk = tcf_act_police_walker 368 .walk = tcf_act_police_walker,
369 .lookup = tcf_police_search,
370};
371
372static __net_init int police_init_net(struct net *net)
373{
374 struct tc_action_net *tn = net_generic(net, police_net_id);
375
376 return tc_action_net_init(tn, &act_police_ops, POL_TAB_MASK);
377}
378
379static void __net_exit police_exit_net(struct net *net)
380{
381 struct tc_action_net *tn = net_generic(net, police_net_id);
382
383 tc_action_net_exit(tn);
384}
385
386static struct pernet_operations police_net_ops = {
387 .init = police_init_net,
388 .exit = police_exit_net,
389 .id = &police_net_id,
390 .size = sizeof(struct tc_action_net),
357}; 391};
358 392
359static int __init 393static int __init
360police_init_module(void) 394police_init_module(void)
361{ 395{
362 return tcf_register_action(&act_police_ops, POL_TAB_MASK); 396 return tcf_register_action(&act_police_ops, &police_net_ops);
363} 397}
364 398
365static void __exit 399static void __exit
366police_cleanup_module(void) 400police_cleanup_module(void)
367{ 401{
368 tcf_unregister_action(&act_police_ops); 402 tcf_unregister_action(&act_police_ops, &police_net_ops);
369} 403}
370 404
371module_init(police_init_module); 405module_init(police_init_module);
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index d6b708d6afdf..75b2be13fbcc 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -26,6 +26,8 @@
26 26
27#define SIMP_TAB_MASK 7 27#define SIMP_TAB_MASK 7
28 28
29static int simp_net_id;
30
29#define SIMP_MAX_DATA 32 31#define SIMP_MAX_DATA 32
30static int tcf_simp(struct sk_buff *skb, const struct tc_action *a, 32static int tcf_simp(struct sk_buff *skb, const struct tc_action *a,
31 struct tcf_result *res) 33 struct tcf_result *res)
@@ -80,6 +82,7 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla,
80 struct nlattr *est, struct tc_action *a, 82 struct nlattr *est, struct tc_action *a,
81 int ovr, int bind) 83 int ovr, int bind)
82{ 84{
85 struct tc_action_net *tn = net_generic(net, simp_net_id);
83 struct nlattr *tb[TCA_DEF_MAX + 1]; 86 struct nlattr *tb[TCA_DEF_MAX + 1];
84 struct tc_defact *parm; 87 struct tc_defact *parm;
85 struct tcf_defact *d; 88 struct tcf_defact *d;
@@ -102,9 +105,9 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla,
102 parm = nla_data(tb[TCA_DEF_PARMS]); 105 parm = nla_data(tb[TCA_DEF_PARMS]);
103 defdata = nla_data(tb[TCA_DEF_DATA]); 106 defdata = nla_data(tb[TCA_DEF_DATA]);
104 107
105 if (!tcf_hash_check(parm->index, a, bind)) { 108 if (!tcf_hash_check(tn, parm->index, a, bind)) {
106 ret = tcf_hash_create(parm->index, est, a, sizeof(*d), 109 ret = tcf_hash_create(tn, parm->index, est, a,
107 bind, false); 110 sizeof(*d), bind, false);
108 if (ret) 111 if (ret)
109 return ret; 112 return ret;
110 113
@@ -129,7 +132,7 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla,
129 } 132 }
130 133
131 if (ret == ACT_P_CREATED) 134 if (ret == ACT_P_CREATED)
132 tcf_hash_insert(a); 135 tcf_hash_insert(tn, a);
133 return ret; 136 return ret;
134} 137}
135 138
@@ -161,6 +164,22 @@ nla_put_failure:
161 return -1; 164 return -1;
162} 165}
163 166
167static int tcf_simp_walker(struct net *net, struct sk_buff *skb,
168 struct netlink_callback *cb, int type,
169 struct tc_action *a)
170{
171 struct tc_action_net *tn = net_generic(net, simp_net_id);
172
173 return tcf_generic_walker(tn, skb, cb, type, a);
174}
175
176static int tcf_simp_search(struct net *net, struct tc_action *a, u32 index)
177{
178 struct tc_action_net *tn = net_generic(net, simp_net_id);
179
180 return tcf_hash_search(tn, a, index);
181}
182
164static struct tc_action_ops act_simp_ops = { 183static struct tc_action_ops act_simp_ops = {
165 .kind = "simple", 184 .kind = "simple",
166 .type = TCA_ACT_SIMP, 185 .type = TCA_ACT_SIMP,
@@ -169,6 +188,29 @@ static struct tc_action_ops act_simp_ops = {
169 .dump = tcf_simp_dump, 188 .dump = tcf_simp_dump,
170 .cleanup = tcf_simp_release, 189 .cleanup = tcf_simp_release,
171 .init = tcf_simp_init, 190 .init = tcf_simp_init,
191 .walk = tcf_simp_walker,
192 .lookup = tcf_simp_search,
193};
194
195static __net_init int simp_init_net(struct net *net)
196{
197 struct tc_action_net *tn = net_generic(net, simp_net_id);
198
199 return tc_action_net_init(tn, &act_simp_ops, SIMP_TAB_MASK);
200}
201
202static void __net_exit simp_exit_net(struct net *net)
203{
204 struct tc_action_net *tn = net_generic(net, simp_net_id);
205
206 tc_action_net_exit(tn);
207}
208
209static struct pernet_operations simp_net_ops = {
210 .init = simp_init_net,
211 .exit = simp_exit_net,
212 .id = &simp_net_id,
213 .size = sizeof(struct tc_action_net),
172}; 214};
173 215
174MODULE_AUTHOR("Jamal Hadi Salim(2005)"); 216MODULE_AUTHOR("Jamal Hadi Salim(2005)");
@@ -177,8 +219,7 @@ MODULE_LICENSE("GPL");
177 219
178static int __init simp_init_module(void) 220static int __init simp_init_module(void)
179{ 221{
180 int ret; 222 int ret = tcf_register_action(&act_simp_ops, &simp_net_ops);
181 ret = tcf_register_action(&act_simp_ops, SIMP_TAB_MASK);
182 if (!ret) 223 if (!ret)
183 pr_info("Simple TC action Loaded\n"); 224 pr_info("Simple TC action Loaded\n");
184 return ret; 225 return ret;
@@ -186,7 +227,7 @@ static int __init simp_init_module(void)
186 227
187static void __exit simp_cleanup_module(void) 228static void __exit simp_cleanup_module(void)
188{ 229{
189 tcf_unregister_action(&act_simp_ops); 230 tcf_unregister_action(&act_simp_ops, &simp_net_ops);
190} 231}
191 232
192module_init(simp_init_module); 233module_init(simp_init_module);
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
index 6751b5f8c046..cfcdbdc00c9b 100644
--- a/net/sched/act_skbedit.c
+++ b/net/sched/act_skbedit.c
@@ -29,6 +29,8 @@
29 29
30#define SKBEDIT_TAB_MASK 15 30#define SKBEDIT_TAB_MASK 15
31 31
32static int skbedit_net_id;
33
32static int tcf_skbedit(struct sk_buff *skb, const struct tc_action *a, 34static int tcf_skbedit(struct sk_buff *skb, const struct tc_action *a,
33 struct tcf_result *res) 35 struct tcf_result *res)
34{ 36{
@@ -61,6 +63,7 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
61 struct nlattr *est, struct tc_action *a, 63 struct nlattr *est, struct tc_action *a,
62 int ovr, int bind) 64 int ovr, int bind)
63{ 65{
66 struct tc_action_net *tn = net_generic(net, skbedit_net_id);
64 struct nlattr *tb[TCA_SKBEDIT_MAX + 1]; 67 struct nlattr *tb[TCA_SKBEDIT_MAX + 1];
65 struct tc_skbedit *parm; 68 struct tc_skbedit *parm;
66 struct tcf_skbedit *d; 69 struct tcf_skbedit *d;
@@ -98,9 +101,9 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
98 101
99 parm = nla_data(tb[TCA_SKBEDIT_PARMS]); 102 parm = nla_data(tb[TCA_SKBEDIT_PARMS]);
100 103
101 if (!tcf_hash_check(parm->index, a, bind)) { 104 if (!tcf_hash_check(tn, parm->index, a, bind)) {
102 ret = tcf_hash_create(parm->index, est, a, sizeof(*d), 105 ret = tcf_hash_create(tn, parm->index, est, a,
103 bind, false); 106 sizeof(*d), bind, false);
104 if (ret) 107 if (ret)
105 return ret; 108 return ret;
106 109
@@ -130,7 +133,7 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
130 spin_unlock_bh(&d->tcf_lock); 133 spin_unlock_bh(&d->tcf_lock);
131 134
132 if (ret == ACT_P_CREATED) 135 if (ret == ACT_P_CREATED)
133 tcf_hash_insert(a); 136 tcf_hash_insert(tn, a);
134 return ret; 137 return ret;
135} 138}
136 139
@@ -173,6 +176,22 @@ nla_put_failure:
173 return -1; 176 return -1;
174} 177}
175 178
179static int tcf_skbedit_walker(struct net *net, struct sk_buff *skb,
180 struct netlink_callback *cb, int type,
181 struct tc_action *a)
182{
183 struct tc_action_net *tn = net_generic(net, skbedit_net_id);
184
185 return tcf_generic_walker(tn, skb, cb, type, a);
186}
187
188static int tcf_skbedit_search(struct net *net, struct tc_action *a, u32 index)
189{
190 struct tc_action_net *tn = net_generic(net, skbedit_net_id);
191
192 return tcf_hash_search(tn, a, index);
193}
194
176static struct tc_action_ops act_skbedit_ops = { 195static struct tc_action_ops act_skbedit_ops = {
177 .kind = "skbedit", 196 .kind = "skbedit",
178 .type = TCA_ACT_SKBEDIT, 197 .type = TCA_ACT_SKBEDIT,
@@ -180,6 +199,29 @@ static struct tc_action_ops act_skbedit_ops = {
180 .act = tcf_skbedit, 199 .act = tcf_skbedit,
181 .dump = tcf_skbedit_dump, 200 .dump = tcf_skbedit_dump,
182 .init = tcf_skbedit_init, 201 .init = tcf_skbedit_init,
202 .walk = tcf_skbedit_walker,
203 .lookup = tcf_skbedit_search,
204};
205
206static __net_init int skbedit_init_net(struct net *net)
207{
208 struct tc_action_net *tn = net_generic(net, skbedit_net_id);
209
210 return tc_action_net_init(tn, &act_skbedit_ops, SKBEDIT_TAB_MASK);
211}
212
213static void __net_exit skbedit_exit_net(struct net *net)
214{
215 struct tc_action_net *tn = net_generic(net, skbedit_net_id);
216
217 tc_action_net_exit(tn);
218}
219
220static struct pernet_operations skbedit_net_ops = {
221 .init = skbedit_init_net,
222 .exit = skbedit_exit_net,
223 .id = &skbedit_net_id,
224 .size = sizeof(struct tc_action_net),
183}; 225};
184 226
185MODULE_AUTHOR("Alexander Duyck, <alexander.h.duyck@intel.com>"); 227MODULE_AUTHOR("Alexander Duyck, <alexander.h.duyck@intel.com>");
@@ -188,12 +230,12 @@ MODULE_LICENSE("GPL");
188 230
189static int __init skbedit_init_module(void) 231static int __init skbedit_init_module(void)
190{ 232{
191 return tcf_register_action(&act_skbedit_ops, SKBEDIT_TAB_MASK); 233 return tcf_register_action(&act_skbedit_ops, &skbedit_net_ops);
192} 234}
193 235
194static void __exit skbedit_cleanup_module(void) 236static void __exit skbedit_cleanup_module(void)
195{ 237{
196 tcf_unregister_action(&act_skbedit_ops); 238 tcf_unregister_action(&act_skbedit_ops, &skbedit_net_ops);
197} 239}
198 240
199module_init(skbedit_init_module); 241module_init(skbedit_init_module);
diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c
index 796785e0bf96..bab8ae0cefc0 100644
--- a/net/sched/act_vlan.c
+++ b/net/sched/act_vlan.c
@@ -21,6 +21,8 @@
21 21
22#define VLAN_TAB_MASK 15 22#define VLAN_TAB_MASK 15
23 23
24static int vlan_net_id;
25
24static int tcf_vlan(struct sk_buff *skb, const struct tc_action *a, 26static int tcf_vlan(struct sk_buff *skb, const struct tc_action *a,
25 struct tcf_result *res) 27 struct tcf_result *res)
26{ 28{
@@ -68,6 +70,7 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
68 struct nlattr *est, struct tc_action *a, 70 struct nlattr *est, struct tc_action *a,
69 int ovr, int bind) 71 int ovr, int bind)
70{ 72{
73 struct tc_action_net *tn = net_generic(net, vlan_net_id);
71 struct nlattr *tb[TCA_VLAN_MAX + 1]; 74 struct nlattr *tb[TCA_VLAN_MAX + 1];
72 struct tc_vlan *parm; 75 struct tc_vlan *parm;
73 struct tcf_vlan *v; 76 struct tcf_vlan *v;
@@ -115,9 +118,9 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
115 } 118 }
116 action = parm->v_action; 119 action = parm->v_action;
117 120
118 if (!tcf_hash_check(parm->index, a, bind)) { 121 if (!tcf_hash_check(tn, parm->index, a, bind)) {
119 ret = tcf_hash_create(parm->index, est, a, sizeof(*v), 122 ret = tcf_hash_create(tn, parm->index, est, a,
120 bind, false); 123 sizeof(*v), bind, false);
121 if (ret) 124 if (ret)
122 return ret; 125 return ret;
123 126
@@ -143,7 +146,7 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
143 spin_unlock_bh(&v->tcf_lock); 146 spin_unlock_bh(&v->tcf_lock);
144 147
145 if (ret == ACT_P_CREATED) 148 if (ret == ACT_P_CREATED)
146 tcf_hash_insert(a); 149 tcf_hash_insert(tn, a);
147 return ret; 150 return ret;
148} 151}
149 152
@@ -181,6 +184,22 @@ nla_put_failure:
181 return -1; 184 return -1;
182} 185}
183 186
187static int tcf_vlan_walker(struct net *net, struct sk_buff *skb,
188 struct netlink_callback *cb, int type,
189 struct tc_action *a)
190{
191 struct tc_action_net *tn = net_generic(net, vlan_net_id);
192
193 return tcf_generic_walker(tn, skb, cb, type, a);
194}
195
196static int tcf_vlan_search(struct net *net, struct tc_action *a, u32 index)
197{
198 struct tc_action_net *tn = net_generic(net, vlan_net_id);
199
200 return tcf_hash_search(tn, a, index);
201}
202
184static struct tc_action_ops act_vlan_ops = { 203static struct tc_action_ops act_vlan_ops = {
185 .kind = "vlan", 204 .kind = "vlan",
186 .type = TCA_ACT_VLAN, 205 .type = TCA_ACT_VLAN,
@@ -188,16 +207,39 @@ static struct tc_action_ops act_vlan_ops = {
188 .act = tcf_vlan, 207 .act = tcf_vlan,
189 .dump = tcf_vlan_dump, 208 .dump = tcf_vlan_dump,
190 .init = tcf_vlan_init, 209 .init = tcf_vlan_init,
210 .walk = tcf_vlan_walker,
211 .lookup = tcf_vlan_search,
212};
213
214static __net_init int vlan_init_net(struct net *net)
215{
216 struct tc_action_net *tn = net_generic(net, vlan_net_id);
217
218 return tc_action_net_init(tn, &act_vlan_ops, VLAN_TAB_MASK);
219}
220
221static void __net_exit vlan_exit_net(struct net *net)
222{
223 struct tc_action_net *tn = net_generic(net, vlan_net_id);
224
225 tc_action_net_exit(tn);
226}
227
228static struct pernet_operations vlan_net_ops = {
229 .init = vlan_init_net,
230 .exit = vlan_exit_net,
231 .id = &vlan_net_id,
232 .size = sizeof(struct tc_action_net),
191}; 233};
192 234
193static int __init vlan_init_module(void) 235static int __init vlan_init_module(void)
194{ 236{
195 return tcf_register_action(&act_vlan_ops, VLAN_TAB_MASK); 237 return tcf_register_action(&act_vlan_ops, &vlan_net_ops);
196} 238}
197 239
198static void __exit vlan_cleanup_module(void) 240static void __exit vlan_cleanup_module(void)
199{ 241{
200 tcf_unregister_action(&act_vlan_ops); 242 tcf_unregister_action(&act_vlan_ops, &vlan_net_ops);
201} 243}
202 244
203module_init(vlan_init_module); 245module_init(vlan_init_module);
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index 8dc84300ee79..425fe6a0eda3 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -103,8 +103,9 @@ static int cls_bpf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
103 } 103 }
104 104
105 if (prog->exts_integrated) { 105 if (prog->exts_integrated) {
106 res->class = prog->res.class; 106 res->class = 0;
107 res->classid = qdisc_skb_cb(skb)->tc_classid; 107 res->classid = TC_H_MAJ(prog->res.classid) |
108 qdisc_skb_cb(skb)->tc_classid;
108 109
109 ret = cls_bpf_exec_opcode(filter_res); 110 ret = cls_bpf_exec_opcode(filter_res);
110 if (ret == TC_ACT_UNSPEC) 111 if (ret == TC_ACT_UNSPEC)
@@ -114,10 +115,12 @@ static int cls_bpf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
114 115
115 if (filter_res == 0) 116 if (filter_res == 0)
116 continue; 117 continue;
117 118 if (filter_res != -1) {
118 *res = prog->res; 119 res->class = 0;
119 if (filter_res != -1)
120 res->classid = filter_res; 120 res->classid = filter_res;
121 } else {
122 *res = prog->res;
123 }
121 124
122 ret = tcf_exts_exec(skb, &prog->exts, res); 125 ret = tcf_exts_exec(skb, &prog->exts, res);
123 if (ret < 0) 126 if (ret < 0)
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 95b021243233..2181ffc76638 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -165,6 +165,51 @@ static void fl_destroy_filter(struct rcu_head *head)
165 kfree(f); 165 kfree(f);
166} 166}
167 167
168static void fl_hw_destroy_filter(struct tcf_proto *tp, unsigned long cookie)
169{
170 struct net_device *dev = tp->q->dev_queue->dev;
171 struct tc_cls_flower_offload offload = {0};
172 struct tc_to_netdev tc;
173
174 if (!tc_should_offload(dev, 0))
175 return;
176
177 offload.command = TC_CLSFLOWER_DESTROY;
178 offload.cookie = cookie;
179
180 tc.type = TC_SETUP_CLSFLOWER;
181 tc.cls_flower = &offload;
182
183 dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol, &tc);
184}
185
186static void fl_hw_replace_filter(struct tcf_proto *tp,
187 struct flow_dissector *dissector,
188 struct fl_flow_key *mask,
189 struct fl_flow_key *key,
190 struct tcf_exts *actions,
191 unsigned long cookie, u32 flags)
192{
193 struct net_device *dev = tp->q->dev_queue->dev;
194 struct tc_cls_flower_offload offload = {0};
195 struct tc_to_netdev tc;
196
197 if (!tc_should_offload(dev, flags))
198 return;
199
200 offload.command = TC_CLSFLOWER_REPLACE;
201 offload.cookie = cookie;
202 offload.dissector = dissector;
203 offload.mask = mask;
204 offload.key = key;
205 offload.exts = actions;
206
207 tc.type = TC_SETUP_CLSFLOWER;
208 tc.cls_flower = &offload;
209
210 dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol, &tc);
211}
212
168static bool fl_destroy(struct tcf_proto *tp, bool force) 213static bool fl_destroy(struct tcf_proto *tp, bool force)
169{ 214{
170 struct cls_fl_head *head = rtnl_dereference(tp->root); 215 struct cls_fl_head *head = rtnl_dereference(tp->root);
@@ -174,6 +219,7 @@ static bool fl_destroy(struct tcf_proto *tp, bool force)
174 return false; 219 return false;
175 220
176 list_for_each_entry_safe(f, next, &head->filters, list) { 221 list_for_each_entry_safe(f, next, &head->filters, list) {
222 fl_hw_destroy_filter(tp, (unsigned long)f);
177 list_del_rcu(&f->list); 223 list_del_rcu(&f->list);
178 call_rcu(&f->rcu, fl_destroy_filter); 224 call_rcu(&f->rcu, fl_destroy_filter);
179 } 225 }
@@ -459,6 +505,7 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
459 struct cls_fl_filter *fnew; 505 struct cls_fl_filter *fnew;
460 struct nlattr *tb[TCA_FLOWER_MAX + 1]; 506 struct nlattr *tb[TCA_FLOWER_MAX + 1];
461 struct fl_flow_mask mask = {}; 507 struct fl_flow_mask mask = {};
508 u32 flags = 0;
462 int err; 509 int err;
463 510
464 if (!tca[TCA_OPTIONS]) 511 if (!tca[TCA_OPTIONS])
@@ -486,6 +533,9 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
486 } 533 }
487 fnew->handle = handle; 534 fnew->handle = handle;
488 535
536 if (tb[TCA_FLOWER_FLAGS])
537 flags = nla_get_u32(tb[TCA_FLOWER_FLAGS]);
538
489 err = fl_set_parms(net, tp, fnew, &mask, base, tb, tca[TCA_RATE], ovr); 539 err = fl_set_parms(net, tp, fnew, &mask, base, tb, tca[TCA_RATE], ovr);
490 if (err) 540 if (err)
491 goto errout; 541 goto errout;
@@ -498,9 +548,20 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
498 head->ht_params); 548 head->ht_params);
499 if (err) 549 if (err)
500 goto errout; 550 goto errout;
501 if (fold) 551
552 fl_hw_replace_filter(tp,
553 &head->dissector,
554 &mask.key,
555 &fnew->key,
556 &fnew->exts,
557 (unsigned long)fnew,
558 flags);
559
560 if (fold) {
502 rhashtable_remove_fast(&head->ht, &fold->ht_node, 561 rhashtable_remove_fast(&head->ht, &fold->ht_node,
503 head->ht_params); 562 head->ht_params);
563 fl_hw_destroy_filter(tp, (unsigned long)fold);
564 }
504 565
505 *arg = (unsigned long) fnew; 566 *arg = (unsigned long) fnew;
506 567
@@ -527,6 +588,7 @@ static int fl_delete(struct tcf_proto *tp, unsigned long arg)
527 rhashtable_remove_fast(&head->ht, &f->ht_node, 588 rhashtable_remove_fast(&head->ht, &f->ht_node,
528 head->ht_params); 589 head->ht_params);
529 list_del_rcu(&f->list); 590 list_del_rcu(&f->list);
591 fl_hw_destroy_filter(tp, (unsigned long)f);
530 tcf_unbind_filter(tp, &f->res); 592 tcf_unbind_filter(tp, &f->res);
531 call_rcu(&f->rcu, fl_destroy_filter); 593 call_rcu(&f->rcu, fl_destroy_filter);
532 return 0; 594 return 0;
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index 4fbb67430ce4..563cdad76448 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -43,6 +43,7 @@
43#include <net/netlink.h> 43#include <net/netlink.h>
44#include <net/act_api.h> 44#include <net/act_api.h>
45#include <net/pkt_cls.h> 45#include <net/pkt_cls.h>
46#include <linux/netdevice.h>
46 47
47struct tc_u_knode { 48struct tc_u_knode {
48 struct tc_u_knode __rcu *next; 49 struct tc_u_knode __rcu *next;
@@ -58,6 +59,7 @@ struct tc_u_knode {
58#ifdef CONFIG_CLS_U32_PERF 59#ifdef CONFIG_CLS_U32_PERF
59 struct tc_u32_pcnt __percpu *pf; 60 struct tc_u32_pcnt __percpu *pf;
60#endif 61#endif
62 u32 flags;
61#ifdef CONFIG_CLS_U32_MARK 63#ifdef CONFIG_CLS_U32_MARK
62 u32 val; 64 u32 val;
63 u32 mask; 65 u32 mask;
@@ -424,6 +426,97 @@ static int u32_delete_key(struct tcf_proto *tp, struct tc_u_knode *key)
424 return 0; 426 return 0;
425} 427}
426 428
429static void u32_remove_hw_knode(struct tcf_proto *tp, u32 handle)
430{
431 struct net_device *dev = tp->q->dev_queue->dev;
432 struct tc_cls_u32_offload u32_offload = {0};
433 struct tc_to_netdev offload;
434
435 offload.type = TC_SETUP_CLSU32;
436 offload.cls_u32 = &u32_offload;
437
438 if (tc_should_offload(dev, 0)) {
439 offload.cls_u32->command = TC_CLSU32_DELETE_KNODE;
440 offload.cls_u32->knode.handle = handle;
441 dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle,
442 tp->protocol, &offload);
443 }
444}
445
446static void u32_replace_hw_hnode(struct tcf_proto *tp,
447 struct tc_u_hnode *h,
448 u32 flags)
449{
450 struct net_device *dev = tp->q->dev_queue->dev;
451 struct tc_cls_u32_offload u32_offload = {0};
452 struct tc_to_netdev offload;
453
454 offload.type = TC_SETUP_CLSU32;
455 offload.cls_u32 = &u32_offload;
456
457 if (tc_should_offload(dev, flags)) {
458 offload.cls_u32->command = TC_CLSU32_NEW_HNODE;
459 offload.cls_u32->hnode.divisor = h->divisor;
460 offload.cls_u32->hnode.handle = h->handle;
461 offload.cls_u32->hnode.prio = h->prio;
462
463 dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle,
464 tp->protocol, &offload);
465 }
466}
467
468static void u32_clear_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h)
469{
470 struct net_device *dev = tp->q->dev_queue->dev;
471 struct tc_cls_u32_offload u32_offload = {0};
472 struct tc_to_netdev offload;
473
474 offload.type = TC_SETUP_CLSU32;
475 offload.cls_u32 = &u32_offload;
476
477 if (tc_should_offload(dev, 0)) {
478 offload.cls_u32->command = TC_CLSU32_DELETE_HNODE;
479 offload.cls_u32->hnode.divisor = h->divisor;
480 offload.cls_u32->hnode.handle = h->handle;
481 offload.cls_u32->hnode.prio = h->prio;
482
483 dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle,
484 tp->protocol, &offload);
485 }
486}
487
488static void u32_replace_hw_knode(struct tcf_proto *tp,
489 struct tc_u_knode *n,
490 u32 flags)
491{
492 struct net_device *dev = tp->q->dev_queue->dev;
493 struct tc_cls_u32_offload u32_offload = {0};
494 struct tc_to_netdev offload;
495
496 offload.type = TC_SETUP_CLSU32;
497 offload.cls_u32 = &u32_offload;
498
499 if (tc_should_offload(dev, flags)) {
500 offload.cls_u32->command = TC_CLSU32_REPLACE_KNODE;
501 offload.cls_u32->knode.handle = n->handle;
502 offload.cls_u32->knode.fshift = n->fshift;
503#ifdef CONFIG_CLS_U32_MARK
504 offload.cls_u32->knode.val = n->val;
505 offload.cls_u32->knode.mask = n->mask;
506#else
507 offload.cls_u32->knode.val = 0;
508 offload.cls_u32->knode.mask = 0;
509#endif
510 offload.cls_u32->knode.sel = &n->sel;
511 offload.cls_u32->knode.exts = &n->exts;
512 if (n->ht_down)
513 offload.cls_u32->knode.link_handle = n->ht_down->handle;
514
515 dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle,
516 tp->protocol, &offload);
517 }
518}
519
427static void u32_clear_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht) 520static void u32_clear_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht)
428{ 521{
429 struct tc_u_knode *n; 522 struct tc_u_knode *n;
@@ -434,6 +527,7 @@ static void u32_clear_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht)
434 RCU_INIT_POINTER(ht->ht[h], 527 RCU_INIT_POINTER(ht->ht[h],
435 rtnl_dereference(n->next)); 528 rtnl_dereference(n->next));
436 tcf_unbind_filter(tp, &n->res); 529 tcf_unbind_filter(tp, &n->res);
530 u32_remove_hw_knode(tp, n->handle);
437 call_rcu(&n->rcu, u32_delete_key_freepf_rcu); 531 call_rcu(&n->rcu, u32_delete_key_freepf_rcu);
438 } 532 }
439 } 533 }
@@ -454,6 +548,7 @@ static int u32_destroy_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht)
454 phn; 548 phn;
455 hn = &phn->next, phn = rtnl_dereference(*hn)) { 549 hn = &phn->next, phn = rtnl_dereference(*hn)) {
456 if (phn == ht) { 550 if (phn == ht) {
551 u32_clear_hw_hnode(tp, ht);
457 RCU_INIT_POINTER(*hn, ht->next); 552 RCU_INIT_POINTER(*hn, ht->next);
458 kfree_rcu(ht, rcu); 553 kfree_rcu(ht, rcu);
459 return 0; 554 return 0;
@@ -540,8 +635,10 @@ static int u32_delete(struct tcf_proto *tp, unsigned long arg)
540 if (ht == NULL) 635 if (ht == NULL)
541 return 0; 636 return 0;
542 637
543 if (TC_U32_KEY(ht->handle)) 638 if (TC_U32_KEY(ht->handle)) {
639 u32_remove_hw_knode(tp, ht->handle);
544 return u32_delete_key(tp, (struct tc_u_knode *)ht); 640 return u32_delete_key(tp, (struct tc_u_knode *)ht);
641 }
545 642
546 if (root_ht == ht) 643 if (root_ht == ht)
547 return -EINVAL; 644 return -EINVAL;
@@ -587,6 +684,7 @@ static const struct nla_policy u32_policy[TCA_U32_MAX + 1] = {
587 [TCA_U32_SEL] = { .len = sizeof(struct tc_u32_sel) }, 684 [TCA_U32_SEL] = { .len = sizeof(struct tc_u32_sel) },
588 [TCA_U32_INDEV] = { .type = NLA_STRING, .len = IFNAMSIZ }, 685 [TCA_U32_INDEV] = { .type = NLA_STRING, .len = IFNAMSIZ },
589 [TCA_U32_MARK] = { .len = sizeof(struct tc_u32_mark) }, 686 [TCA_U32_MARK] = { .len = sizeof(struct tc_u32_mark) },
687 [TCA_U32_FLAGS] = { .type = NLA_U32 },
590}; 688};
591 689
592static int u32_set_parms(struct net *net, struct tcf_proto *tp, 690static int u32_set_parms(struct net *net, struct tcf_proto *tp,
@@ -694,6 +792,7 @@ static struct tc_u_knode *u32_init_knode(struct tcf_proto *tp,
694#endif 792#endif
695 new->fshift = n->fshift; 793 new->fshift = n->fshift;
696 new->res = n->res; 794 new->res = n->res;
795 new->flags = n->flags;
697 RCU_INIT_POINTER(new->ht_down, n->ht_down); 796 RCU_INIT_POINTER(new->ht_down, n->ht_down);
698 797
699 /* bump reference count as long as we hold pointer to structure */ 798 /* bump reference count as long as we hold pointer to structure */
@@ -733,7 +832,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
733 struct tc_u32_sel *s; 832 struct tc_u32_sel *s;
734 struct nlattr *opt = tca[TCA_OPTIONS]; 833 struct nlattr *opt = tca[TCA_OPTIONS];
735 struct nlattr *tb[TCA_U32_MAX + 1]; 834 struct nlattr *tb[TCA_U32_MAX + 1];
736 u32 htid; 835 u32 htid, flags = 0;
737 int err; 836 int err;
738#ifdef CONFIG_CLS_U32_PERF 837#ifdef CONFIG_CLS_U32_PERF
739 size_t size; 838 size_t size;
@@ -746,6 +845,9 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
746 if (err < 0) 845 if (err < 0)
747 return err; 846 return err;
748 847
848 if (tb[TCA_U32_FLAGS])
849 flags = nla_get_u32(tb[TCA_U32_FLAGS]);
850
749 n = (struct tc_u_knode *)*arg; 851 n = (struct tc_u_knode *)*arg;
750 if (n) { 852 if (n) {
751 struct tc_u_knode *new; 853 struct tc_u_knode *new;
@@ -753,6 +855,9 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
753 if (TC_U32_KEY(n->handle) == 0) 855 if (TC_U32_KEY(n->handle) == 0)
754 return -EINVAL; 856 return -EINVAL;
755 857
858 if (n->flags != flags)
859 return -EINVAL;
860
756 new = u32_init_knode(tp, n); 861 new = u32_init_knode(tp, n);
757 if (!new) 862 if (!new)
758 return -ENOMEM; 863 return -ENOMEM;
@@ -769,6 +874,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
769 u32_replace_knode(tp, tp_c, new); 874 u32_replace_knode(tp, tp_c, new);
770 tcf_unbind_filter(tp, &n->res); 875 tcf_unbind_filter(tp, &n->res);
771 call_rcu(&n->rcu, u32_delete_key_rcu); 876 call_rcu(&n->rcu, u32_delete_key_rcu);
877 u32_replace_hw_knode(tp, new, flags);
772 return 0; 878 return 0;
773 } 879 }
774 880
@@ -795,6 +901,8 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
795 RCU_INIT_POINTER(ht->next, tp_c->hlist); 901 RCU_INIT_POINTER(ht->next, tp_c->hlist);
796 rcu_assign_pointer(tp_c->hlist, ht); 902 rcu_assign_pointer(tp_c->hlist, ht);
797 *arg = (unsigned long)ht; 903 *arg = (unsigned long)ht;
904
905 u32_replace_hw_hnode(tp, ht, flags);
798 return 0; 906 return 0;
799 } 907 }
800 908
@@ -845,6 +953,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
845 RCU_INIT_POINTER(n->ht_up, ht); 953 RCU_INIT_POINTER(n->ht_up, ht);
846 n->handle = handle; 954 n->handle = handle;
847 n->fshift = s->hmask ? ffs(ntohl(s->hmask)) - 1 : 0; 955 n->fshift = s->hmask ? ffs(ntohl(s->hmask)) - 1 : 0;
956 n->flags = flags;
848 tcf_exts_init(&n->exts, TCA_U32_ACT, TCA_U32_POLICE); 957 tcf_exts_init(&n->exts, TCA_U32_ACT, TCA_U32_POLICE);
849 n->tp = tp; 958 n->tp = tp;
850 959
@@ -877,7 +986,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
877 986
878 RCU_INIT_POINTER(n->next, pins); 987 RCU_INIT_POINTER(n->next, pins);
879 rcu_assign_pointer(*ins, n); 988 rcu_assign_pointer(*ins, n);
880 989 u32_replace_hw_knode(tp, n, flags);
881 *arg = (unsigned long)n; 990 *arg = (unsigned long)n;
882 return 0; 991 return 0;
883 } 992 }
@@ -982,6 +1091,9 @@ static int u32_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
982 nla_put_u32(skb, TCA_U32_LINK, ht_down->handle)) 1091 nla_put_u32(skb, TCA_U32_LINK, ht_down->handle))
983 goto nla_put_failure; 1092 goto nla_put_failure;
984 1093
1094 if (n->flags && nla_put_u32(skb, TCA_U32_FLAGS, n->flags))
1095 goto nla_put_failure;
1096
985#ifdef CONFIG_CLS_U32_MARK 1097#ifdef CONFIG_CLS_U32_MARK
986 if ((n->val || n->mask)) { 1098 if ((n->val || n->mask)) {
987 struct tc_u32_mark mark = {.val = n->val, 1099 struct tc_u32_mark mark = {.val = n->val,
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index b5c2cf2aa6d4..3b180ff72f79 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -744,14 +744,15 @@ static u32 qdisc_alloc_handle(struct net_device *dev)
744 return 0; 744 return 0;
745} 745}
746 746
747void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n) 747void qdisc_tree_reduce_backlog(struct Qdisc *sch, unsigned int n,
748 unsigned int len)
748{ 749{
749 const struct Qdisc_class_ops *cops; 750 const struct Qdisc_class_ops *cops;
750 unsigned long cl; 751 unsigned long cl;
751 u32 parentid; 752 u32 parentid;
752 int drops; 753 int drops;
753 754
754 if (n == 0) 755 if (n == 0 && len == 0)
755 return; 756 return;
756 drops = max_t(int, n, 0); 757 drops = max_t(int, n, 0);
757 rcu_read_lock(); 758 rcu_read_lock();
@@ -774,11 +775,12 @@ void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n)
774 cops->put(sch, cl); 775 cops->put(sch, cl);
775 } 776 }
776 sch->q.qlen -= n; 777 sch->q.qlen -= n;
778 sch->qstats.backlog -= len;
777 __qdisc_qstats_drop(sch, drops); 779 __qdisc_qstats_drop(sch, drops);
778 } 780 }
779 rcu_read_unlock(); 781 rcu_read_unlock();
780} 782}
781EXPORT_SYMBOL(qdisc_tree_decrease_qlen); 783EXPORT_SYMBOL(qdisc_tree_reduce_backlog);
782 784
783static void notify_and_destroy(struct net *net, struct sk_buff *skb, 785static void notify_and_destroy(struct net *net, struct sk_buff *skb,
784 struct nlmsghdr *n, u32 clid, 786 struct nlmsghdr *n, u32 clid,
@@ -1841,7 +1843,7 @@ reclassify:
1841 return err; 1843 return err;
1842 } 1844 }
1843 1845
1844 return -1; 1846 return TC_ACT_UNSPEC; /* signal: continue lookup */
1845#ifdef CONFIG_NET_CLS_ACT 1847#ifdef CONFIG_NET_CLS_ACT
1846reset: 1848reset:
1847 if (unlikely(limit++ >= MAX_REC_LOOP)) { 1849 if (unlikely(limit++ >= MAX_REC_LOOP)) {
@@ -1852,6 +1854,7 @@ reset:
1852 } 1854 }
1853 1855
1854 tp = old_tp; 1856 tp = old_tp;
1857 protocol = tc_skb_protocol(skb);
1855 goto reclassify; 1858 goto reclassify;
1856#endif 1859#endif
1857} 1860}
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index c538d9e4a8f6..baafddf229ce 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -1624,13 +1624,8 @@ static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1624 new->reshape_fail = cbq_reshape_fail; 1624 new->reshape_fail = cbq_reshape_fail;
1625#endif 1625#endif
1626 } 1626 }
1627 sch_tree_lock(sch);
1628 *old = cl->q;
1629 cl->q = new;
1630 qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
1631 qdisc_reset(*old);
1632 sch_tree_unlock(sch);
1633 1627
1628 *old = qdisc_replace(sch, new, &cl->q);
1634 return 0; 1629 return 0;
1635} 1630}
1636 1631
@@ -1914,7 +1909,7 @@ static int cbq_delete(struct Qdisc *sch, unsigned long arg)
1914{ 1909{
1915 struct cbq_sched_data *q = qdisc_priv(sch); 1910 struct cbq_sched_data *q = qdisc_priv(sch);
1916 struct cbq_class *cl = (struct cbq_class *)arg; 1911 struct cbq_class *cl = (struct cbq_class *)arg;
1917 unsigned int qlen; 1912 unsigned int qlen, backlog;
1918 1913
1919 if (cl->filters || cl->children || cl == &q->link) 1914 if (cl->filters || cl->children || cl == &q->link)
1920 return -EBUSY; 1915 return -EBUSY;
@@ -1922,8 +1917,9 @@ static int cbq_delete(struct Qdisc *sch, unsigned long arg)
1922 sch_tree_lock(sch); 1917 sch_tree_lock(sch);
1923 1918
1924 qlen = cl->q->q.qlen; 1919 qlen = cl->q->q.qlen;
1920 backlog = cl->q->qstats.backlog;
1925 qdisc_reset(cl->q); 1921 qdisc_reset(cl->q);
1926 qdisc_tree_decrease_qlen(cl->q, qlen); 1922 qdisc_tree_reduce_backlog(cl->q, qlen, backlog);
1927 1923
1928 if (cl->next_alive) 1924 if (cl->next_alive)
1929 cbq_deactivate_class(cl); 1925 cbq_deactivate_class(cl);
diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c
index 5ffb8b8337c7..0a08c860eee4 100644
--- a/net/sched/sch_choke.c
+++ b/net/sched/sch_choke.c
@@ -128,8 +128,8 @@ static void choke_drop_by_idx(struct Qdisc *sch, unsigned int idx)
128 choke_zap_tail_holes(q); 128 choke_zap_tail_holes(q);
129 129
130 qdisc_qstats_backlog_dec(sch, skb); 130 qdisc_qstats_backlog_dec(sch, skb);
131 qdisc_tree_reduce_backlog(sch, 1, qdisc_pkt_len(skb));
131 qdisc_drop(skb, sch); 132 qdisc_drop(skb, sch);
132 qdisc_tree_decrease_qlen(sch, 1);
133 --sch->q.qlen; 133 --sch->q.qlen;
134} 134}
135 135
@@ -456,6 +456,7 @@ static int choke_change(struct Qdisc *sch, struct nlattr *opt)
456 old = q->tab; 456 old = q->tab;
457 if (old) { 457 if (old) {
458 unsigned int oqlen = sch->q.qlen, tail = 0; 458 unsigned int oqlen = sch->q.qlen, tail = 0;
459 unsigned dropped = 0;
459 460
460 while (q->head != q->tail) { 461 while (q->head != q->tail) {
461 struct sk_buff *skb = q->tab[q->head]; 462 struct sk_buff *skb = q->tab[q->head];
@@ -467,11 +468,12 @@ static int choke_change(struct Qdisc *sch, struct nlattr *opt)
467 ntab[tail++] = skb; 468 ntab[tail++] = skb;
468 continue; 469 continue;
469 } 470 }
471 dropped += qdisc_pkt_len(skb);
470 qdisc_qstats_backlog_dec(sch, skb); 472 qdisc_qstats_backlog_dec(sch, skb);
471 --sch->q.qlen; 473 --sch->q.qlen;
472 qdisc_drop(skb, sch); 474 qdisc_drop(skb, sch);
473 } 475 }
474 qdisc_tree_decrease_qlen(sch, oqlen - sch->q.qlen); 476 qdisc_tree_reduce_backlog(sch, oqlen - sch->q.qlen, dropped);
475 q->head = 0; 477 q->head = 0;
476 q->tail = tail; 478 q->tail = tail;
477 } 479 }
diff --git a/net/sched/sch_codel.c b/net/sched/sch_codel.c
index 535007d5f0b5..9b7e2980ee5c 100644
--- a/net/sched/sch_codel.c
+++ b/net/sched/sch_codel.c
@@ -79,12 +79,13 @@ static struct sk_buff *codel_qdisc_dequeue(struct Qdisc *sch)
79 79
80 skb = codel_dequeue(sch, &q->params, &q->vars, &q->stats, dequeue); 80 skb = codel_dequeue(sch, &q->params, &q->vars, &q->stats, dequeue);
81 81
82 /* We cant call qdisc_tree_decrease_qlen() if our qlen is 0, 82 /* We cant call qdisc_tree_reduce_backlog() if our qlen is 0,
83 * or HTB crashes. Defer it for next round. 83 * or HTB crashes. Defer it for next round.
84 */ 84 */
85 if (q->stats.drop_count && sch->q.qlen) { 85 if (q->stats.drop_count && sch->q.qlen) {
86 qdisc_tree_decrease_qlen(sch, q->stats.drop_count); 86 qdisc_tree_reduce_backlog(sch, q->stats.drop_count, q->stats.drop_len);
87 q->stats.drop_count = 0; 87 q->stats.drop_count = 0;
88 q->stats.drop_len = 0;
88 } 89 }
89 if (skb) 90 if (skb)
90 qdisc_bstats_update(sch, skb); 91 qdisc_bstats_update(sch, skb);
@@ -116,7 +117,7 @@ static int codel_change(struct Qdisc *sch, struct nlattr *opt)
116{ 117{
117 struct codel_sched_data *q = qdisc_priv(sch); 118 struct codel_sched_data *q = qdisc_priv(sch);
118 struct nlattr *tb[TCA_CODEL_MAX + 1]; 119 struct nlattr *tb[TCA_CODEL_MAX + 1];
119 unsigned int qlen; 120 unsigned int qlen, dropped = 0;
120 int err; 121 int err;
121 122
122 if (!opt) 123 if (!opt)
@@ -156,10 +157,11 @@ static int codel_change(struct Qdisc *sch, struct nlattr *opt)
156 while (sch->q.qlen > sch->limit) { 157 while (sch->q.qlen > sch->limit) {
157 struct sk_buff *skb = __skb_dequeue(&sch->q); 158 struct sk_buff *skb = __skb_dequeue(&sch->q);
158 159
160 dropped += qdisc_pkt_len(skb);
159 qdisc_qstats_backlog_dec(sch, skb); 161 qdisc_qstats_backlog_dec(sch, skb);
160 qdisc_drop(skb, sch); 162 qdisc_drop(skb, sch);
161 } 163 }
162 qdisc_tree_decrease_qlen(sch, qlen - sch->q.qlen); 164 qdisc_tree_reduce_backlog(sch, qlen - sch->q.qlen, dropped);
163 165
164 sch_tree_unlock(sch); 166 sch_tree_unlock(sch);
165 return 0; 167 return 0;
diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c
index f26bdea875c1..a63e879e8975 100644
--- a/net/sched/sch_drr.c
+++ b/net/sched/sch_drr.c
@@ -53,9 +53,10 @@ static struct drr_class *drr_find_class(struct Qdisc *sch, u32 classid)
53static void drr_purge_queue(struct drr_class *cl) 53static void drr_purge_queue(struct drr_class *cl)
54{ 54{
55 unsigned int len = cl->qdisc->q.qlen; 55 unsigned int len = cl->qdisc->q.qlen;
56 unsigned int backlog = cl->qdisc->qstats.backlog;
56 57
57 qdisc_reset(cl->qdisc); 58 qdisc_reset(cl->qdisc);
58 qdisc_tree_decrease_qlen(cl->qdisc, len); 59 qdisc_tree_reduce_backlog(cl->qdisc, len, backlog);
59} 60}
60 61
61static const struct nla_policy drr_policy[TCA_DRR_MAX + 1] = { 62static const struct nla_policy drr_policy[TCA_DRR_MAX + 1] = {
@@ -226,11 +227,7 @@ static int drr_graft_class(struct Qdisc *sch, unsigned long arg,
226 new = &noop_qdisc; 227 new = &noop_qdisc;
227 } 228 }
228 229
229 sch_tree_lock(sch); 230 *old = qdisc_replace(sch, new, &cl->qdisc);
230 drr_purge_queue(cl);
231 *old = cl->qdisc;
232 cl->qdisc = new;
233 sch_tree_unlock(sch);
234 return 0; 231 return 0;
235} 232}
236 233
@@ -403,6 +400,8 @@ static struct sk_buff *drr_dequeue(struct Qdisc *sch)
403 if (len <= cl->deficit) { 400 if (len <= cl->deficit) {
404 cl->deficit -= len; 401 cl->deficit -= len;
405 skb = qdisc_dequeue_peeked(cl->qdisc); 402 skb = qdisc_dequeue_peeked(cl->qdisc);
403 if (unlikely(skb == NULL))
404 goto out;
406 if (cl->qdisc->q.qlen == 0) 405 if (cl->qdisc->q.qlen == 0)
407 list_del(&cl->alist); 406 list_del(&cl->alist);
408 407
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index f357f34d02d2..34b4ddaca27c 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -73,13 +73,7 @@ static int dsmark_graft(struct Qdisc *sch, unsigned long arg,
73 new = &noop_qdisc; 73 new = &noop_qdisc;
74 } 74 }
75 75
76 sch_tree_lock(sch); 76 *old = qdisc_replace(sch, new, &p->q);
77 *old = p->q;
78 p->q = new;
79 qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
80 qdisc_reset(*old);
81 sch_tree_unlock(sch);
82
83 return 0; 77 return 0;
84} 78}
85 79
@@ -264,6 +258,7 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch)
264 return err; 258 return err;
265 } 259 }
266 260
261 qdisc_qstats_backlog_inc(sch, skb);
267 sch->q.qlen++; 262 sch->q.qlen++;
268 263
269 return NET_XMIT_SUCCESS; 264 return NET_XMIT_SUCCESS;
@@ -281,11 +276,12 @@ static struct sk_buff *dsmark_dequeue(struct Qdisc *sch)
281 276
282 pr_debug("%s(sch %p,[qdisc %p])\n", __func__, sch, p); 277 pr_debug("%s(sch %p,[qdisc %p])\n", __func__, sch, p);
283 278
284 skb = p->q->ops->dequeue(p->q); 279 skb = qdisc_dequeue_peeked(p->q);
285 if (skb == NULL) 280 if (skb == NULL)
286 return NULL; 281 return NULL;
287 282
288 qdisc_bstats_update(sch, skb); 283 qdisc_bstats_update(sch, skb);
284 qdisc_qstats_backlog_dec(sch, skb);
289 sch->q.qlen--; 285 sch->q.qlen--;
290 286
291 index = skb->tc_index & (p->indices - 1); 287 index = skb->tc_index & (p->indices - 1);
@@ -401,6 +397,7 @@ static void dsmark_reset(struct Qdisc *sch)
401 397
402 pr_debug("%s(sch %p,[qdisc %p])\n", __func__, sch, p); 398 pr_debug("%s(sch %p,[qdisc %p])\n", __func__, sch, p);
403 qdisc_reset(p->q); 399 qdisc_reset(p->q);
400 sch->qstats.backlog = 0;
404 sch->q.qlen = 0; 401 sch->q.qlen = 0;
405} 402}
406 403
diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
index 109b2322778f..3c6a47d66a04 100644
--- a/net/sched/sch_fq.c
+++ b/net/sched/sch_fq.c
@@ -662,6 +662,7 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt)
662 struct fq_sched_data *q = qdisc_priv(sch); 662 struct fq_sched_data *q = qdisc_priv(sch);
663 struct nlattr *tb[TCA_FQ_MAX + 1]; 663 struct nlattr *tb[TCA_FQ_MAX + 1];
664 int err, drop_count = 0; 664 int err, drop_count = 0;
665 unsigned drop_len = 0;
665 u32 fq_log; 666 u32 fq_log;
666 667
667 if (!opt) 668 if (!opt)
@@ -736,10 +737,11 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt)
736 737
737 if (!skb) 738 if (!skb)
738 break; 739 break;
740 drop_len += qdisc_pkt_len(skb);
739 kfree_skb(skb); 741 kfree_skb(skb);
740 drop_count++; 742 drop_count++;
741 } 743 }
742 qdisc_tree_decrease_qlen(sch, drop_count); 744 qdisc_tree_reduce_backlog(sch, drop_count, drop_len);
743 745
744 sch_tree_unlock(sch); 746 sch_tree_unlock(sch);
745 return err; 747 return err;
diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c
index 4c834e93dafb..d3fc8f9dd3d4 100644
--- a/net/sched/sch_fq_codel.c
+++ b/net/sched/sch_fq_codel.c
@@ -175,7 +175,7 @@ static unsigned int fq_codel_qdisc_drop(struct Qdisc *sch)
175static int fq_codel_enqueue(struct sk_buff *skb, struct Qdisc *sch) 175static int fq_codel_enqueue(struct sk_buff *skb, struct Qdisc *sch)
176{ 176{
177 struct fq_codel_sched_data *q = qdisc_priv(sch); 177 struct fq_codel_sched_data *q = qdisc_priv(sch);
178 unsigned int idx; 178 unsigned int idx, prev_backlog;
179 struct fq_codel_flow *flow; 179 struct fq_codel_flow *flow;
180 int uninitialized_var(ret); 180 int uninitialized_var(ret);
181 181
@@ -203,6 +203,7 @@ static int fq_codel_enqueue(struct sk_buff *skb, struct Qdisc *sch)
203 if (++sch->q.qlen <= sch->limit) 203 if (++sch->q.qlen <= sch->limit)
204 return NET_XMIT_SUCCESS; 204 return NET_XMIT_SUCCESS;
205 205
206 prev_backlog = sch->qstats.backlog;
206 q->drop_overlimit++; 207 q->drop_overlimit++;
207 /* Return Congestion Notification only if we dropped a packet 208 /* Return Congestion Notification only if we dropped a packet
208 * from this flow. 209 * from this flow.
@@ -211,7 +212,7 @@ static int fq_codel_enqueue(struct sk_buff *skb, struct Qdisc *sch)
211 return NET_XMIT_CN; 212 return NET_XMIT_CN;
212 213
213 /* As we dropped a packet, better let upper stack know this */ 214 /* As we dropped a packet, better let upper stack know this */
214 qdisc_tree_decrease_qlen(sch, 1); 215 qdisc_tree_reduce_backlog(sch, 1, prev_backlog - sch->qstats.backlog);
215 return NET_XMIT_SUCCESS; 216 return NET_XMIT_SUCCESS;
216} 217}
217 218
@@ -241,6 +242,7 @@ static struct sk_buff *fq_codel_dequeue(struct Qdisc *sch)
241 struct fq_codel_flow *flow; 242 struct fq_codel_flow *flow;
242 struct list_head *head; 243 struct list_head *head;
243 u32 prev_drop_count, prev_ecn_mark; 244 u32 prev_drop_count, prev_ecn_mark;
245 unsigned int prev_backlog;
244 246
245begin: 247begin:
246 head = &q->new_flows; 248 head = &q->new_flows;
@@ -259,6 +261,7 @@ begin:
259 261
260 prev_drop_count = q->cstats.drop_count; 262 prev_drop_count = q->cstats.drop_count;
261 prev_ecn_mark = q->cstats.ecn_mark; 263 prev_ecn_mark = q->cstats.ecn_mark;
264 prev_backlog = sch->qstats.backlog;
262 265
263 skb = codel_dequeue(sch, &q->cparams, &flow->cvars, &q->cstats, 266 skb = codel_dequeue(sch, &q->cparams, &flow->cvars, &q->cstats,
264 dequeue); 267 dequeue);
@@ -276,12 +279,14 @@ begin:
276 } 279 }
277 qdisc_bstats_update(sch, skb); 280 qdisc_bstats_update(sch, skb);
278 flow->deficit -= qdisc_pkt_len(skb); 281 flow->deficit -= qdisc_pkt_len(skb);
279 /* We cant call qdisc_tree_decrease_qlen() if our qlen is 0, 282 /* We cant call qdisc_tree_reduce_backlog() if our qlen is 0,
280 * or HTB crashes. Defer it for next round. 283 * or HTB crashes. Defer it for next round.
281 */ 284 */
282 if (q->cstats.drop_count && sch->q.qlen) { 285 if (q->cstats.drop_count && sch->q.qlen) {
283 qdisc_tree_decrease_qlen(sch, q->cstats.drop_count); 286 qdisc_tree_reduce_backlog(sch, q->cstats.drop_count,
287 q->cstats.drop_len);
284 q->cstats.drop_count = 0; 288 q->cstats.drop_count = 0;
289 q->cstats.drop_len = 0;
285 } 290 }
286 return skb; 291 return skb;
287} 292}
@@ -372,11 +377,13 @@ static int fq_codel_change(struct Qdisc *sch, struct nlattr *opt)
372 while (sch->q.qlen > sch->limit) { 377 while (sch->q.qlen > sch->limit) {
373 struct sk_buff *skb = fq_codel_dequeue(sch); 378 struct sk_buff *skb = fq_codel_dequeue(sch);
374 379
380 q->cstats.drop_len += qdisc_pkt_len(skb);
375 kfree_skb(skb); 381 kfree_skb(skb);
376 q->cstats.drop_count++; 382 q->cstats.drop_count++;
377 } 383 }
378 qdisc_tree_decrease_qlen(sch, q->cstats.drop_count); 384 qdisc_tree_reduce_backlog(sch, q->cstats.drop_count, q->cstats.drop_len);
379 q->cstats.drop_count = 0; 385 q->cstats.drop_count = 0;
386 q->cstats.drop_len = 0;
380 387
381 sch_tree_unlock(sch); 388 sch_tree_unlock(sch);
382 return 0; 389 return 0;
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 16bc83b2842a..f18c35024207 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -567,6 +567,7 @@ struct Qdisc_ops pfifo_fast_ops __read_mostly = {
567 .dump = pfifo_fast_dump, 567 .dump = pfifo_fast_dump,
568 .owner = THIS_MODULE, 568 .owner = THIS_MODULE,
569}; 569};
570EXPORT_SYMBOL(pfifo_fast_ops);
570 571
571static struct lock_class_key qdisc_tx_busylock; 572static struct lock_class_key qdisc_tx_busylock;
572 573
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index b7ebe2c87586..d783d7cc3348 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -895,9 +895,10 @@ static void
895hfsc_purge_queue(struct Qdisc *sch, struct hfsc_class *cl) 895hfsc_purge_queue(struct Qdisc *sch, struct hfsc_class *cl)
896{ 896{
897 unsigned int len = cl->qdisc->q.qlen; 897 unsigned int len = cl->qdisc->q.qlen;
898 unsigned int backlog = cl->qdisc->qstats.backlog;
898 899
899 qdisc_reset(cl->qdisc); 900 qdisc_reset(cl->qdisc);
900 qdisc_tree_decrease_qlen(cl->qdisc, len); 901 qdisc_tree_reduce_backlog(cl->qdisc, len, backlog);
901} 902}
902 903
903static void 904static void
@@ -1215,11 +1216,7 @@ hfsc_graft_class(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1215 new = &noop_qdisc; 1216 new = &noop_qdisc;
1216 } 1217 }
1217 1218
1218 sch_tree_lock(sch); 1219 *old = qdisc_replace(sch, new, &cl->qdisc);
1219 hfsc_purge_queue(sch, cl);
1220 *old = cl->qdisc;
1221 cl->qdisc = new;
1222 sch_tree_unlock(sch);
1223 return 0; 1220 return 0;
1224} 1221}
1225 1222
diff --git a/net/sched/sch_hhf.c b/net/sched/sch_hhf.c
index 86b04e31e60b..13d6f83ec491 100644
--- a/net/sched/sch_hhf.c
+++ b/net/sched/sch_hhf.c
@@ -382,6 +382,7 @@ static int hhf_enqueue(struct sk_buff *skb, struct Qdisc *sch)
382 struct hhf_sched_data *q = qdisc_priv(sch); 382 struct hhf_sched_data *q = qdisc_priv(sch);
383 enum wdrr_bucket_idx idx; 383 enum wdrr_bucket_idx idx;
384 struct wdrr_bucket *bucket; 384 struct wdrr_bucket *bucket;
385 unsigned int prev_backlog;
385 386
386 idx = hhf_classify(skb, sch); 387 idx = hhf_classify(skb, sch);
387 388
@@ -409,6 +410,7 @@ static int hhf_enqueue(struct sk_buff *skb, struct Qdisc *sch)
409 if (++sch->q.qlen <= sch->limit) 410 if (++sch->q.qlen <= sch->limit)
410 return NET_XMIT_SUCCESS; 411 return NET_XMIT_SUCCESS;
411 412
413 prev_backlog = sch->qstats.backlog;
412 q->drop_overlimit++; 414 q->drop_overlimit++;
413 /* Return Congestion Notification only if we dropped a packet from this 415 /* Return Congestion Notification only if we dropped a packet from this
414 * bucket. 416 * bucket.
@@ -417,7 +419,7 @@ static int hhf_enqueue(struct sk_buff *skb, struct Qdisc *sch)
417 return NET_XMIT_CN; 419 return NET_XMIT_CN;
418 420
419 /* As we dropped a packet, better let upper stack know this. */ 421 /* As we dropped a packet, better let upper stack know this. */
420 qdisc_tree_decrease_qlen(sch, 1); 422 qdisc_tree_reduce_backlog(sch, 1, prev_backlog - sch->qstats.backlog);
421 return NET_XMIT_SUCCESS; 423 return NET_XMIT_SUCCESS;
422} 424}
423 425
@@ -527,7 +529,7 @@ static int hhf_change(struct Qdisc *sch, struct nlattr *opt)
527{ 529{
528 struct hhf_sched_data *q = qdisc_priv(sch); 530 struct hhf_sched_data *q = qdisc_priv(sch);
529 struct nlattr *tb[TCA_HHF_MAX + 1]; 531 struct nlattr *tb[TCA_HHF_MAX + 1];
530 unsigned int qlen; 532 unsigned int qlen, prev_backlog;
531 int err; 533 int err;
532 u64 non_hh_quantum; 534 u64 non_hh_quantum;
533 u32 new_quantum = q->quantum; 535 u32 new_quantum = q->quantum;
@@ -577,12 +579,14 @@ static int hhf_change(struct Qdisc *sch, struct nlattr *opt)
577 } 579 }
578 580
579 qlen = sch->q.qlen; 581 qlen = sch->q.qlen;
582 prev_backlog = sch->qstats.backlog;
580 while (sch->q.qlen > sch->limit) { 583 while (sch->q.qlen > sch->limit) {
581 struct sk_buff *skb = hhf_dequeue(sch); 584 struct sk_buff *skb = hhf_dequeue(sch);
582 585
583 kfree_skb(skb); 586 kfree_skb(skb);
584 } 587 }
585 qdisc_tree_decrease_qlen(sch, qlen - sch->q.qlen); 588 qdisc_tree_reduce_backlog(sch, qlen - sch->q.qlen,
589 prev_backlog - sch->qstats.backlog);
586 590
587 sch_tree_unlock(sch); 591 sch_tree_unlock(sch);
588 return 0; 592 return 0;
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 15ccd7f8fb2a..87b02ed3d5f2 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -600,6 +600,7 @@ static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
600 htb_activate(q, cl); 600 htb_activate(q, cl);
601 } 601 }
602 602
603 qdisc_qstats_backlog_inc(sch, skb);
603 sch->q.qlen++; 604 sch->q.qlen++;
604 return NET_XMIT_SUCCESS; 605 return NET_XMIT_SUCCESS;
605} 606}
@@ -889,6 +890,7 @@ static struct sk_buff *htb_dequeue(struct Qdisc *sch)
889ok: 890ok:
890 qdisc_bstats_update(sch, skb); 891 qdisc_bstats_update(sch, skb);
891 qdisc_unthrottled(sch); 892 qdisc_unthrottled(sch);
893 qdisc_qstats_backlog_dec(sch, skb);
892 sch->q.qlen--; 894 sch->q.qlen--;
893 return skb; 895 return skb;
894 } 896 }
@@ -955,6 +957,7 @@ static unsigned int htb_drop(struct Qdisc *sch)
955 unsigned int len; 957 unsigned int len;
956 if (cl->un.leaf.q->ops->drop && 958 if (cl->un.leaf.q->ops->drop &&
957 (len = cl->un.leaf.q->ops->drop(cl->un.leaf.q))) { 959 (len = cl->un.leaf.q->ops->drop(cl->un.leaf.q))) {
960 sch->qstats.backlog -= len;
958 sch->q.qlen--; 961 sch->q.qlen--;
959 if (!cl->un.leaf.q->q.qlen) 962 if (!cl->un.leaf.q->q.qlen)
960 htb_deactivate(q, cl); 963 htb_deactivate(q, cl);
@@ -984,12 +987,12 @@ static void htb_reset(struct Qdisc *sch)
984 } 987 }
985 cl->prio_activity = 0; 988 cl->prio_activity = 0;
986 cl->cmode = HTB_CAN_SEND; 989 cl->cmode = HTB_CAN_SEND;
987
988 } 990 }
989 } 991 }
990 qdisc_watchdog_cancel(&q->watchdog); 992 qdisc_watchdog_cancel(&q->watchdog);
991 __skb_queue_purge(&q->direct_queue); 993 __skb_queue_purge(&q->direct_queue);
992 sch->q.qlen = 0; 994 sch->q.qlen = 0;
995 sch->qstats.backlog = 0;
993 memset(q->hlevel, 0, sizeof(q->hlevel)); 996 memset(q->hlevel, 0, sizeof(q->hlevel));
994 memset(q->row_mask, 0, sizeof(q->row_mask)); 997 memset(q->row_mask, 0, sizeof(q->row_mask));
995 for (i = 0; i < TC_HTB_NUMPRIO; i++) 998 for (i = 0; i < TC_HTB_NUMPRIO; i++)
@@ -1163,14 +1166,7 @@ static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1163 cl->common.classid)) == NULL) 1166 cl->common.classid)) == NULL)
1164 return -ENOBUFS; 1167 return -ENOBUFS;
1165 1168
1166 sch_tree_lock(sch); 1169 *old = qdisc_replace(sch, new, &cl->un.leaf.q);
1167 *old = cl->un.leaf.q;
1168 cl->un.leaf.q = new;
1169 if (*old != NULL) {
1170 qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
1171 qdisc_reset(*old);
1172 }
1173 sch_tree_unlock(sch);
1174 return 0; 1170 return 0;
1175} 1171}
1176 1172
@@ -1272,7 +1268,6 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg)
1272{ 1268{
1273 struct htb_sched *q = qdisc_priv(sch); 1269 struct htb_sched *q = qdisc_priv(sch);
1274 struct htb_class *cl = (struct htb_class *)arg; 1270 struct htb_class *cl = (struct htb_class *)arg;
1275 unsigned int qlen;
1276 struct Qdisc *new_q = NULL; 1271 struct Qdisc *new_q = NULL;
1277 int last_child = 0; 1272 int last_child = 0;
1278 1273
@@ -1292,9 +1287,11 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg)
1292 sch_tree_lock(sch); 1287 sch_tree_lock(sch);
1293 1288
1294 if (!cl->level) { 1289 if (!cl->level) {
1295 qlen = cl->un.leaf.q->q.qlen; 1290 unsigned int qlen = cl->un.leaf.q->q.qlen;
1291 unsigned int backlog = cl->un.leaf.q->qstats.backlog;
1292
1296 qdisc_reset(cl->un.leaf.q); 1293 qdisc_reset(cl->un.leaf.q);
1297 qdisc_tree_decrease_qlen(cl->un.leaf.q, qlen); 1294 qdisc_tree_reduce_backlog(cl->un.leaf.q, qlen, backlog);
1298 } 1295 }
1299 1296
1300 /* delete from hash and active; remainder in destroy_class */ 1297 /* delete from hash and active; remainder in destroy_class */
@@ -1428,10 +1425,11 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
1428 sch_tree_lock(sch); 1425 sch_tree_lock(sch);
1429 if (parent && !parent->level) { 1426 if (parent && !parent->level) {
1430 unsigned int qlen = parent->un.leaf.q->q.qlen; 1427 unsigned int qlen = parent->un.leaf.q->q.qlen;
1428 unsigned int backlog = parent->un.leaf.q->qstats.backlog;
1431 1429
1432 /* turn parent into inner node */ 1430 /* turn parent into inner node */
1433 qdisc_reset(parent->un.leaf.q); 1431 qdisc_reset(parent->un.leaf.q);
1434 qdisc_tree_decrease_qlen(parent->un.leaf.q, qlen); 1432 qdisc_tree_reduce_backlog(parent->un.leaf.q, qlen, backlog);
1435 qdisc_destroy(parent->un.leaf.q); 1433 qdisc_destroy(parent->un.leaf.q);
1436 if (parent->prio_activity) 1434 if (parent->prio_activity)
1437 htb_deactivate(q, parent); 1435 htb_deactivate(q, parent);
diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c
index 3e82f047caaf..56a77b878eb3 100644
--- a/net/sched/sch_mq.c
+++ b/net/sched/sch_mq.c
@@ -57,7 +57,7 @@ static int mq_init(struct Qdisc *sch, struct nlattr *opt)
57 57
58 for (ntx = 0; ntx < dev->num_tx_queues; ntx++) { 58 for (ntx = 0; ntx < dev->num_tx_queues; ntx++) {
59 dev_queue = netdev_get_tx_queue(dev, ntx); 59 dev_queue = netdev_get_tx_queue(dev, ntx);
60 qdisc = qdisc_create_dflt(dev_queue, default_qdisc_ops, 60 qdisc = qdisc_create_dflt(dev_queue, get_default_qdisc_ops(dev, ntx),
61 TC_H_MAKE(TC_H_MAJ(sch->handle), 61 TC_H_MAKE(TC_H_MAJ(sch->handle),
62 TC_H_MIN(ntx + 1))); 62 TC_H_MIN(ntx + 1)));
63 if (qdisc == NULL) 63 if (qdisc == NULL)
diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c
index ad70ecf57ce7..b8002ce3d010 100644
--- a/net/sched/sch_mqprio.c
+++ b/net/sched/sch_mqprio.c
@@ -28,6 +28,7 @@ static void mqprio_destroy(struct Qdisc *sch)
28{ 28{
29 struct net_device *dev = qdisc_dev(sch); 29 struct net_device *dev = qdisc_dev(sch);
30 struct mqprio_sched *priv = qdisc_priv(sch); 30 struct mqprio_sched *priv = qdisc_priv(sch);
31 struct tc_to_netdev tc = {.type = TC_SETUP_MQPRIO};
31 unsigned int ntx; 32 unsigned int ntx;
32 33
33 if (priv->qdiscs) { 34 if (priv->qdiscs) {
@@ -39,7 +40,7 @@ static void mqprio_destroy(struct Qdisc *sch)
39 } 40 }
40 41
41 if (priv->hw_owned && dev->netdev_ops->ndo_setup_tc) 42 if (priv->hw_owned && dev->netdev_ops->ndo_setup_tc)
42 dev->netdev_ops->ndo_setup_tc(dev, 0); 43 dev->netdev_ops->ndo_setup_tc(dev, sch->handle, 0, &tc);
43 else 44 else
44 netdev_set_num_tc(dev, 0); 45 netdev_set_num_tc(dev, 0);
45} 46}
@@ -124,7 +125,8 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt)
124 125
125 for (i = 0; i < dev->num_tx_queues; i++) { 126 for (i = 0; i < dev->num_tx_queues; i++) {
126 dev_queue = netdev_get_tx_queue(dev, i); 127 dev_queue = netdev_get_tx_queue(dev, i);
127 qdisc = qdisc_create_dflt(dev_queue, default_qdisc_ops, 128 qdisc = qdisc_create_dflt(dev_queue,
129 get_default_qdisc_ops(dev, i),
128 TC_H_MAKE(TC_H_MAJ(sch->handle), 130 TC_H_MAKE(TC_H_MAJ(sch->handle),
129 TC_H_MIN(i + 1))); 131 TC_H_MIN(i + 1)));
130 if (qdisc == NULL) { 132 if (qdisc == NULL) {
@@ -140,8 +142,11 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt)
140 * supplied and verified mapping 142 * supplied and verified mapping
141 */ 143 */
142 if (qopt->hw) { 144 if (qopt->hw) {
145 struct tc_to_netdev tc = {.type = TC_SETUP_MQPRIO,
146 { .tc = qopt->num_tc }};
147
143 priv->hw_owned = 1; 148 priv->hw_owned = 1;
144 err = dev->netdev_ops->ndo_setup_tc(dev, qopt->num_tc); 149 err = dev->netdev_ops->ndo_setup_tc(dev, sch->handle, 0, &tc);
145 if (err) 150 if (err)
146 goto err; 151 goto err;
147 } else { 152 } else {
diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c
index 4e904ca0af9d..bcdd54bb101c 100644
--- a/net/sched/sch_multiq.c
+++ b/net/sched/sch_multiq.c
@@ -218,7 +218,8 @@ static int multiq_tune(struct Qdisc *sch, struct nlattr *opt)
218 if (q->queues[i] != &noop_qdisc) { 218 if (q->queues[i] != &noop_qdisc) {
219 struct Qdisc *child = q->queues[i]; 219 struct Qdisc *child = q->queues[i];
220 q->queues[i] = &noop_qdisc; 220 q->queues[i] = &noop_qdisc;
221 qdisc_tree_decrease_qlen(child, child->q.qlen); 221 qdisc_tree_reduce_backlog(child, child->q.qlen,
222 child->qstats.backlog);
222 qdisc_destroy(child); 223 qdisc_destroy(child);
223 } 224 }
224 } 225 }
@@ -238,8 +239,9 @@ static int multiq_tune(struct Qdisc *sch, struct nlattr *opt)
238 q->queues[i] = child; 239 q->queues[i] = child;
239 240
240 if (old != &noop_qdisc) { 241 if (old != &noop_qdisc) {
241 qdisc_tree_decrease_qlen(old, 242 qdisc_tree_reduce_backlog(old,
242 old->q.qlen); 243 old->q.qlen,
244 old->qstats.backlog);
243 qdisc_destroy(old); 245 qdisc_destroy(old);
244 } 246 }
245 sch_tree_unlock(sch); 247 sch_tree_unlock(sch);
@@ -303,13 +305,7 @@ static int multiq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
303 if (new == NULL) 305 if (new == NULL)
304 new = &noop_qdisc; 306 new = &noop_qdisc;
305 307
306 sch_tree_lock(sch); 308 *old = qdisc_replace(sch, new, &q->queues[band]);
307 *old = q->queues[band];
308 q->queues[band] = new;
309 qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
310 qdisc_reset(*old);
311 sch_tree_unlock(sch);
312
313 return 0; 309 return 0;
314} 310}
315 311
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 5abd1d9de989..9640bb39a5d2 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -598,7 +598,8 @@ deliver:
598 if (unlikely(err != NET_XMIT_SUCCESS)) { 598 if (unlikely(err != NET_XMIT_SUCCESS)) {
599 if (net_xmit_drop_count(err)) { 599 if (net_xmit_drop_count(err)) {
600 qdisc_qstats_drop(sch); 600 qdisc_qstats_drop(sch);
601 qdisc_tree_decrease_qlen(sch, 1); 601 qdisc_tree_reduce_backlog(sch, 1,
602 qdisc_pkt_len(skb));
602 } 603 }
603 } 604 }
604 goto tfifo_dequeue; 605 goto tfifo_dequeue;
@@ -1037,15 +1038,7 @@ static int netem_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1037{ 1038{
1038 struct netem_sched_data *q = qdisc_priv(sch); 1039 struct netem_sched_data *q = qdisc_priv(sch);
1039 1040
1040 sch_tree_lock(sch); 1041 *old = qdisc_replace(sch, new, &q->qdisc);
1041 *old = q->qdisc;
1042 q->qdisc = new;
1043 if (*old) {
1044 qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
1045 qdisc_reset(*old);
1046 }
1047 sch_tree_unlock(sch);
1048
1049 return 0; 1042 return 0;
1050} 1043}
1051 1044
diff --git a/net/sched/sch_pie.c b/net/sched/sch_pie.c
index b783a446d884..71ae3b9629f9 100644
--- a/net/sched/sch_pie.c
+++ b/net/sched/sch_pie.c
@@ -183,7 +183,7 @@ static int pie_change(struct Qdisc *sch, struct nlattr *opt)
183{ 183{
184 struct pie_sched_data *q = qdisc_priv(sch); 184 struct pie_sched_data *q = qdisc_priv(sch);
185 struct nlattr *tb[TCA_PIE_MAX + 1]; 185 struct nlattr *tb[TCA_PIE_MAX + 1];
186 unsigned int qlen; 186 unsigned int qlen, dropped = 0;
187 int err; 187 int err;
188 188
189 if (!opt) 189 if (!opt)
@@ -232,10 +232,11 @@ static int pie_change(struct Qdisc *sch, struct nlattr *opt)
232 while (sch->q.qlen > sch->limit) { 232 while (sch->q.qlen > sch->limit) {
233 struct sk_buff *skb = __skb_dequeue(&sch->q); 233 struct sk_buff *skb = __skb_dequeue(&sch->q);
234 234
235 dropped += qdisc_pkt_len(skb);
235 qdisc_qstats_backlog_dec(sch, skb); 236 qdisc_qstats_backlog_dec(sch, skb);
236 qdisc_drop(skb, sch); 237 qdisc_drop(skb, sch);
237 } 238 }
238 qdisc_tree_decrease_qlen(sch, qlen - sch->q.qlen); 239 qdisc_tree_reduce_backlog(sch, qlen - sch->q.qlen, dropped);
239 240
240 sch_tree_unlock(sch); 241 sch_tree_unlock(sch);
241 return 0; 242 return 0;
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index ba6487f2741f..fee1b15506b2 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -191,7 +191,7 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt)
191 struct Qdisc *child = q->queues[i]; 191 struct Qdisc *child = q->queues[i];
192 q->queues[i] = &noop_qdisc; 192 q->queues[i] = &noop_qdisc;
193 if (child != &noop_qdisc) { 193 if (child != &noop_qdisc) {
194 qdisc_tree_decrease_qlen(child, child->q.qlen); 194 qdisc_tree_reduce_backlog(child, child->q.qlen, child->qstats.backlog);
195 qdisc_destroy(child); 195 qdisc_destroy(child);
196 } 196 }
197 } 197 }
@@ -210,8 +210,9 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt)
210 q->queues[i] = child; 210 q->queues[i] = child;
211 211
212 if (old != &noop_qdisc) { 212 if (old != &noop_qdisc) {
213 qdisc_tree_decrease_qlen(old, 213 qdisc_tree_reduce_backlog(old,
214 old->q.qlen); 214 old->q.qlen,
215 old->qstats.backlog);
215 qdisc_destroy(old); 216 qdisc_destroy(old);
216 } 217 }
217 sch_tree_unlock(sch); 218 sch_tree_unlock(sch);
@@ -268,13 +269,7 @@ static int prio_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
268 if (new == NULL) 269 if (new == NULL)
269 new = &noop_qdisc; 270 new = &noop_qdisc;
270 271
271 sch_tree_lock(sch); 272 *old = qdisc_replace(sch, new, &q->queues[band]);
272 *old = q->queues[band];
273 q->queues[band] = new;
274 qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
275 qdisc_reset(*old);
276 sch_tree_unlock(sch);
277
278 return 0; 273 return 0;
279} 274}
280 275
diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c
index 3dc3a6e56052..8d2d8d953432 100644
--- a/net/sched/sch_qfq.c
+++ b/net/sched/sch_qfq.c
@@ -220,9 +220,10 @@ static struct qfq_class *qfq_find_class(struct Qdisc *sch, u32 classid)
220static void qfq_purge_queue(struct qfq_class *cl) 220static void qfq_purge_queue(struct qfq_class *cl)
221{ 221{
222 unsigned int len = cl->qdisc->q.qlen; 222 unsigned int len = cl->qdisc->q.qlen;
223 unsigned int backlog = cl->qdisc->qstats.backlog;
223 224
224 qdisc_reset(cl->qdisc); 225 qdisc_reset(cl->qdisc);
225 qdisc_tree_decrease_qlen(cl->qdisc, len); 226 qdisc_tree_reduce_backlog(cl->qdisc, len, backlog);
226} 227}
227 228
228static const struct nla_policy qfq_policy[TCA_QFQ_MAX + 1] = { 229static const struct nla_policy qfq_policy[TCA_QFQ_MAX + 1] = {
@@ -617,11 +618,7 @@ static int qfq_graft_class(struct Qdisc *sch, unsigned long arg,
617 new = &noop_qdisc; 618 new = &noop_qdisc;
618 } 619 }
619 620
620 sch_tree_lock(sch); 621 *old = qdisc_replace(sch, new, &cl->qdisc);
621 qfq_purge_queue(cl);
622 *old = cl->qdisc;
623 cl->qdisc = new;
624 sch_tree_unlock(sch);
625 return 0; 622 return 0;
626} 623}
627 624
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index 6c0534cc7758..8c0508c0e287 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -210,7 +210,8 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt)
210 q->flags = ctl->flags; 210 q->flags = ctl->flags;
211 q->limit = ctl->limit; 211 q->limit = ctl->limit;
212 if (child) { 212 if (child) {
213 qdisc_tree_decrease_qlen(q->qdisc, q->qdisc->q.qlen); 213 qdisc_tree_reduce_backlog(q->qdisc, q->qdisc->q.qlen,
214 q->qdisc->qstats.backlog);
214 qdisc_destroy(q->qdisc); 215 qdisc_destroy(q->qdisc);
215 q->qdisc = child; 216 q->qdisc = child;
216 } 217 }
@@ -313,12 +314,7 @@ static int red_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
313 if (new == NULL) 314 if (new == NULL)
314 new = &noop_qdisc; 315 new = &noop_qdisc;
315 316
316 sch_tree_lock(sch); 317 *old = qdisc_replace(sch, new, &q->qdisc);
317 *old = q->qdisc;
318 q->qdisc = new;
319 qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
320 qdisc_reset(*old);
321 sch_tree_unlock(sch);
322 return 0; 318 return 0;
323} 319}
324 320
diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c
index 5bbb6332ec57..c69611640fa5 100644
--- a/net/sched/sch_sfb.c
+++ b/net/sched/sch_sfb.c
@@ -510,7 +510,8 @@ static int sfb_change(struct Qdisc *sch, struct nlattr *opt)
510 510
511 sch_tree_lock(sch); 511 sch_tree_lock(sch);
512 512
513 qdisc_tree_decrease_qlen(q->qdisc, q->qdisc->q.qlen); 513 qdisc_tree_reduce_backlog(q->qdisc, q->qdisc->q.qlen,
514 q->qdisc->qstats.backlog);
514 qdisc_destroy(q->qdisc); 515 qdisc_destroy(q->qdisc);
515 q->qdisc = child; 516 q->qdisc = child;
516 517
@@ -606,12 +607,7 @@ static int sfb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
606 if (new == NULL) 607 if (new == NULL)
607 new = &noop_qdisc; 608 new = &noop_qdisc;
608 609
609 sch_tree_lock(sch); 610 *old = qdisc_replace(sch, new, &q->qdisc);
610 *old = q->qdisc;
611 q->qdisc = new;
612 qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
613 qdisc_reset(*old);
614 sch_tree_unlock(sch);
615 return 0; 611 return 0;
616} 612}
617 613
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 3abab534eb5c..498f0a2cb47f 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -346,7 +346,7 @@ static int
346sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch) 346sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
347{ 347{
348 struct sfq_sched_data *q = qdisc_priv(sch); 348 struct sfq_sched_data *q = qdisc_priv(sch);
349 unsigned int hash; 349 unsigned int hash, dropped;
350 sfq_index x, qlen; 350 sfq_index x, qlen;
351 struct sfq_slot *slot; 351 struct sfq_slot *slot;
352 int uninitialized_var(ret); 352 int uninitialized_var(ret);
@@ -461,7 +461,7 @@ enqueue:
461 return NET_XMIT_SUCCESS; 461 return NET_XMIT_SUCCESS;
462 462
463 qlen = slot->qlen; 463 qlen = slot->qlen;
464 sfq_drop(sch); 464 dropped = sfq_drop(sch);
465 /* Return Congestion Notification only if we dropped a packet 465 /* Return Congestion Notification only if we dropped a packet
466 * from this flow. 466 * from this flow.
467 */ 467 */
@@ -469,7 +469,7 @@ enqueue:
469 return NET_XMIT_CN; 469 return NET_XMIT_CN;
470 470
471 /* As we dropped a packet, better let upper stack know this */ 471 /* As we dropped a packet, better let upper stack know this */
472 qdisc_tree_decrease_qlen(sch, 1); 472 qdisc_tree_reduce_backlog(sch, 1, dropped);
473 return NET_XMIT_SUCCESS; 473 return NET_XMIT_SUCCESS;
474} 474}
475 475
@@ -537,6 +537,7 @@ static void sfq_rehash(struct Qdisc *sch)
537 struct sfq_slot *slot; 537 struct sfq_slot *slot;
538 struct sk_buff_head list; 538 struct sk_buff_head list;
539 int dropped = 0; 539 int dropped = 0;
540 unsigned int drop_len = 0;
540 541
541 __skb_queue_head_init(&list); 542 __skb_queue_head_init(&list);
542 543
@@ -565,6 +566,7 @@ static void sfq_rehash(struct Qdisc *sch)
565 if (x >= SFQ_MAX_FLOWS) { 566 if (x >= SFQ_MAX_FLOWS) {
566drop: 567drop:
567 qdisc_qstats_backlog_dec(sch, skb); 568 qdisc_qstats_backlog_dec(sch, skb);
569 drop_len += qdisc_pkt_len(skb);
568 kfree_skb(skb); 570 kfree_skb(skb);
569 dropped++; 571 dropped++;
570 continue; 572 continue;
@@ -594,7 +596,7 @@ drop:
594 } 596 }
595 } 597 }
596 sch->q.qlen -= dropped; 598 sch->q.qlen -= dropped;
597 qdisc_tree_decrease_qlen(sch, dropped); 599 qdisc_tree_reduce_backlog(sch, dropped, drop_len);
598} 600}
599 601
600static void sfq_perturbation(unsigned long arg) 602static void sfq_perturbation(unsigned long arg)
@@ -618,7 +620,7 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt)
618 struct sfq_sched_data *q = qdisc_priv(sch); 620 struct sfq_sched_data *q = qdisc_priv(sch);
619 struct tc_sfq_qopt *ctl = nla_data(opt); 621 struct tc_sfq_qopt *ctl = nla_data(opt);
620 struct tc_sfq_qopt_v1 *ctl_v1 = NULL; 622 struct tc_sfq_qopt_v1 *ctl_v1 = NULL;
621 unsigned int qlen; 623 unsigned int qlen, dropped = 0;
622 struct red_parms *p = NULL; 624 struct red_parms *p = NULL;
623 625
624 if (opt->nla_len < nla_attr_size(sizeof(*ctl))) 626 if (opt->nla_len < nla_attr_size(sizeof(*ctl)))
@@ -667,8 +669,8 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt)
667 669
668 qlen = sch->q.qlen; 670 qlen = sch->q.qlen;
669 while (sch->q.qlen > q->limit) 671 while (sch->q.qlen > q->limit)
670 sfq_drop(sch); 672 dropped += sfq_drop(sch);
671 qdisc_tree_decrease_qlen(sch, qlen - sch->q.qlen); 673 qdisc_tree_reduce_backlog(sch, qlen - sch->q.qlen, dropped);
672 674
673 del_timer(&q->perturb_timer); 675 del_timer(&q->perturb_timer);
674 if (q->perturb_period) { 676 if (q->perturb_period) {
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index a4afde14e865..c2fbde742f37 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -160,6 +160,7 @@ static int tbf_segment(struct sk_buff *skb, struct Qdisc *sch)
160 struct tbf_sched_data *q = qdisc_priv(sch); 160 struct tbf_sched_data *q = qdisc_priv(sch);
161 struct sk_buff *segs, *nskb; 161 struct sk_buff *segs, *nskb;
162 netdev_features_t features = netif_skb_features(skb); 162 netdev_features_t features = netif_skb_features(skb);
163 unsigned int len = 0, prev_len = qdisc_pkt_len(skb);
163 int ret, nb; 164 int ret, nb;
164 165
165 segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK); 166 segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
@@ -172,6 +173,7 @@ static int tbf_segment(struct sk_buff *skb, struct Qdisc *sch)
172 nskb = segs->next; 173 nskb = segs->next;
173 segs->next = NULL; 174 segs->next = NULL;
174 qdisc_skb_cb(segs)->pkt_len = segs->len; 175 qdisc_skb_cb(segs)->pkt_len = segs->len;
176 len += segs->len;
175 ret = qdisc_enqueue(segs, q->qdisc); 177 ret = qdisc_enqueue(segs, q->qdisc);
176 if (ret != NET_XMIT_SUCCESS) { 178 if (ret != NET_XMIT_SUCCESS) {
177 if (net_xmit_drop_count(ret)) 179 if (net_xmit_drop_count(ret))
@@ -183,7 +185,7 @@ static int tbf_segment(struct sk_buff *skb, struct Qdisc *sch)
183 } 185 }
184 sch->q.qlen += nb; 186 sch->q.qlen += nb;
185 if (nb > 1) 187 if (nb > 1)
186 qdisc_tree_decrease_qlen(sch, 1 - nb); 188 qdisc_tree_reduce_backlog(sch, 1 - nb, prev_len - len);
187 consume_skb(skb); 189 consume_skb(skb);
188 return nb > 0 ? NET_XMIT_SUCCESS : NET_XMIT_DROP; 190 return nb > 0 ? NET_XMIT_SUCCESS : NET_XMIT_DROP;
189} 191}
@@ -399,7 +401,8 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt)
399 401
400 sch_tree_lock(sch); 402 sch_tree_lock(sch);
401 if (child) { 403 if (child) {
402 qdisc_tree_decrease_qlen(q->qdisc, q->qdisc->q.qlen); 404 qdisc_tree_reduce_backlog(q->qdisc, q->qdisc->q.qlen,
405 q->qdisc->qstats.backlog);
403 qdisc_destroy(q->qdisc); 406 qdisc_destroy(q->qdisc);
404 q->qdisc = child; 407 q->qdisc = child;
405 } 408 }
@@ -502,13 +505,7 @@ static int tbf_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
502 if (new == NULL) 505 if (new == NULL)
503 new = &noop_qdisc; 506 new = &noop_qdisc;
504 507
505 sch_tree_lock(sch); 508 *old = qdisc_replace(sch, new, &q->qdisc);
506 *old = q->qdisc;
507 q->qdisc = new;
508 qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
509 qdisc_reset(*old);
510 sch_tree_unlock(sch);
511
512 return 0; 509 return 0;
513} 510}
514 511
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index 2bf8ec92dde4..e1849f3714ad 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -1263,7 +1263,7 @@ static struct sctp_transport *sctp_trans_elect_best(struct sctp_transport *curr,
1263 if (score_curr > score_best) 1263 if (score_curr > score_best)
1264 return curr; 1264 return curr;
1265 else if (score_curr == score_best) 1265 else if (score_curr == score_best)
1266 return sctp_trans_elect_tie(curr, best); 1266 return sctp_trans_elect_tie(best, curr);
1267 else 1267 else
1268 return best; 1268 return best;
1269} 1269}
@@ -1406,7 +1406,8 @@ void sctp_assoc_sync_pmtu(struct sock *sk, struct sctp_association *asoc)
1406 list_for_each_entry(t, &asoc->peer.transport_addr_list, 1406 list_for_each_entry(t, &asoc->peer.transport_addr_list,
1407 transports) { 1407 transports) {
1408 if (t->pmtu_pending && t->dst) { 1408 if (t->pmtu_pending && t->dst) {
1409 sctp_transport_update_pmtu(sk, t, dst_mtu(t->dst)); 1409 sctp_transport_update_pmtu(sk, t,
1410 WORD_TRUNC(dst_mtu(t->dst)));
1410 t->pmtu_pending = 0; 1411 t->pmtu_pending = 0;
1411 } 1412 }
1412 if (!pmtu || (t->pathmtu < pmtu)) 1413 if (!pmtu || (t->pathmtu < pmtu))
@@ -1493,7 +1494,7 @@ void sctp_assoc_rwnd_increase(struct sctp_association *asoc, unsigned int len)
1493 1494
1494 asoc->peer.sack_needed = 0; 1495 asoc->peer.sack_needed = 0;
1495 1496
1496 sctp_outq_tail(&asoc->outqueue, sack); 1497 sctp_outq_tail(&asoc->outqueue, sack, GFP_ATOMIC);
1497 1498
1498 /* Stop the SACK timer. */ 1499 /* Stop the SACK timer. */
1499 timer = &asoc->timers[SCTP_EVENT_TIMEOUT_SACK]; 1500 timer = &asoc->timers[SCTP_EVENT_TIMEOUT_SACK];
diff --git a/net/sctp/auth.c b/net/sctp/auth.c
index 1543e39f47c3..912eb1685a5d 100644
--- a/net/sctp/auth.c
+++ b/net/sctp/auth.c
@@ -27,9 +27,9 @@
27 * Vlad Yasevich <vladislav.yasevich@hp.com> 27 * Vlad Yasevich <vladislav.yasevich@hp.com>
28 */ 28 */
29 29
30#include <crypto/hash.h>
30#include <linux/slab.h> 31#include <linux/slab.h>
31#include <linux/types.h> 32#include <linux/types.h>
32#include <linux/crypto.h>
33#include <linux/scatterlist.h> 33#include <linux/scatterlist.h>
34#include <net/sctp/sctp.h> 34#include <net/sctp/sctp.h>
35#include <net/sctp/auth.h> 35#include <net/sctp/auth.h>
@@ -448,7 +448,7 @@ struct sctp_shared_key *sctp_auth_get_shkey(
448 */ 448 */
449int sctp_auth_init_hmacs(struct sctp_endpoint *ep, gfp_t gfp) 449int sctp_auth_init_hmacs(struct sctp_endpoint *ep, gfp_t gfp)
450{ 450{
451 struct crypto_hash *tfm = NULL; 451 struct crypto_shash *tfm = NULL;
452 __u16 id; 452 __u16 id;
453 453
454 /* If AUTH extension is disabled, we are done */ 454 /* If AUTH extension is disabled, we are done */
@@ -462,9 +462,8 @@ int sctp_auth_init_hmacs(struct sctp_endpoint *ep, gfp_t gfp)
462 return 0; 462 return 0;
463 463
464 /* Allocated the array of pointers to transorms */ 464 /* Allocated the array of pointers to transorms */
465 ep->auth_hmacs = kzalloc( 465 ep->auth_hmacs = kzalloc(sizeof(struct crypto_shash *) *
466 sizeof(struct crypto_hash *) * SCTP_AUTH_NUM_HMACS, 466 SCTP_AUTH_NUM_HMACS, gfp);
467 gfp);
468 if (!ep->auth_hmacs) 467 if (!ep->auth_hmacs)
469 return -ENOMEM; 468 return -ENOMEM;
470 469
@@ -483,8 +482,7 @@ int sctp_auth_init_hmacs(struct sctp_endpoint *ep, gfp_t gfp)
483 continue; 482 continue;
484 483
485 /* Allocate the ID */ 484 /* Allocate the ID */
486 tfm = crypto_alloc_hash(sctp_hmac_list[id].hmac_name, 0, 485 tfm = crypto_alloc_shash(sctp_hmac_list[id].hmac_name, 0, 0);
487 CRYPTO_ALG_ASYNC);
488 if (IS_ERR(tfm)) 486 if (IS_ERR(tfm))
489 goto out_err; 487 goto out_err;
490 488
@@ -500,7 +498,7 @@ out_err:
500} 498}
501 499
502/* Destroy the hmac tfm array */ 500/* Destroy the hmac tfm array */
503void sctp_auth_destroy_hmacs(struct crypto_hash *auth_hmacs[]) 501void sctp_auth_destroy_hmacs(struct crypto_shash *auth_hmacs[])
504{ 502{
505 int i; 503 int i;
506 504
@@ -508,8 +506,7 @@ void sctp_auth_destroy_hmacs(struct crypto_hash *auth_hmacs[])
508 return; 506 return;
509 507
510 for (i = 0; i < SCTP_AUTH_NUM_HMACS; i++) { 508 for (i = 0; i < SCTP_AUTH_NUM_HMACS; i++) {
511 if (auth_hmacs[i]) 509 crypto_free_shash(auth_hmacs[i]);
512 crypto_free_hash(auth_hmacs[i]);
513 } 510 }
514 kfree(auth_hmacs); 511 kfree(auth_hmacs);
515} 512}
@@ -709,8 +706,7 @@ void sctp_auth_calculate_hmac(const struct sctp_association *asoc,
709 struct sctp_auth_chunk *auth, 706 struct sctp_auth_chunk *auth,
710 gfp_t gfp) 707 gfp_t gfp)
711{ 708{
712 struct scatterlist sg; 709 struct crypto_shash *tfm;
713 struct hash_desc desc;
714 struct sctp_auth_bytes *asoc_key; 710 struct sctp_auth_bytes *asoc_key;
715 __u16 key_id, hmac_id; 711 __u16 key_id, hmac_id;
716 __u8 *digest; 712 __u8 *digest;
@@ -742,16 +738,22 @@ void sctp_auth_calculate_hmac(const struct sctp_association *asoc,
742 738
743 /* set up scatter list */ 739 /* set up scatter list */
744 end = skb_tail_pointer(skb); 740 end = skb_tail_pointer(skb);
745 sg_init_one(&sg, auth, end - (unsigned char *)auth);
746 741
747 desc.tfm = asoc->ep->auth_hmacs[hmac_id]; 742 tfm = asoc->ep->auth_hmacs[hmac_id];
748 desc.flags = 0;
749 743
750 digest = auth->auth_hdr.hmac; 744 digest = auth->auth_hdr.hmac;
751 if (crypto_hash_setkey(desc.tfm, &asoc_key->data[0], asoc_key->len)) 745 if (crypto_shash_setkey(tfm, &asoc_key->data[0], asoc_key->len))
752 goto free; 746 goto free;
753 747
754 crypto_hash_digest(&desc, &sg, sg.length, digest); 748 {
749 SHASH_DESC_ON_STACK(desc, tfm);
750
751 desc->tfm = tfm;
752 desc->flags = 0;
753 crypto_shash_digest(desc, (u8 *)auth,
754 end - (unsigned char *)auth, digest);
755 shash_desc_zero(desc);
756 }
755 757
756free: 758free:
757 if (free_key) 759 if (free_key)
diff --git a/net/sctp/bind_addr.c b/net/sctp/bind_addr.c
index 871cdf9567e6..401c60750b20 100644
--- a/net/sctp/bind_addr.c
+++ b/net/sctp/bind_addr.c
@@ -111,7 +111,8 @@ int sctp_bind_addr_dup(struct sctp_bind_addr *dest,
111 dest->port = src->port; 111 dest->port = src->port;
112 112
113 list_for_each_entry(addr, &src->address_list, list) { 113 list_for_each_entry(addr, &src->address_list, list) {
114 error = sctp_add_bind_addr(dest, &addr->a, 1, gfp); 114 error = sctp_add_bind_addr(dest, &addr->a, sizeof(addr->a),
115 1, gfp);
115 if (error < 0) 116 if (error < 0)
116 break; 117 break;
117 } 118 }
@@ -150,7 +151,7 @@ void sctp_bind_addr_free(struct sctp_bind_addr *bp)
150 151
151/* Add an address to the bind address list in the SCTP_bind_addr structure. */ 152/* Add an address to the bind address list in the SCTP_bind_addr structure. */
152int sctp_add_bind_addr(struct sctp_bind_addr *bp, union sctp_addr *new, 153int sctp_add_bind_addr(struct sctp_bind_addr *bp, union sctp_addr *new,
153 __u8 addr_state, gfp_t gfp) 154 int new_size, __u8 addr_state, gfp_t gfp)
154{ 155{
155 struct sctp_sockaddr_entry *addr; 156 struct sctp_sockaddr_entry *addr;
156 157
@@ -159,7 +160,7 @@ int sctp_add_bind_addr(struct sctp_bind_addr *bp, union sctp_addr *new,
159 if (!addr) 160 if (!addr)
160 return -ENOMEM; 161 return -ENOMEM;
161 162
162 memcpy(&addr->a, new, sizeof(*new)); 163 memcpy(&addr->a, new, min_t(size_t, sizeof(*new), new_size));
163 164
164 /* Fix up the port if it has not yet been set. 165 /* Fix up the port if it has not yet been set.
165 * Both v4 and v6 have the port at the same offset. 166 * Both v4 and v6 have the port at the same offset.
@@ -291,7 +292,8 @@ int sctp_raw_to_bind_addrs(struct sctp_bind_addr *bp, __u8 *raw_addr_list,
291 } 292 }
292 293
293 af->from_addr_param(&addr, rawaddr, htons(port), 0); 294 af->from_addr_param(&addr, rawaddr, htons(port), 0);
294 retval = sctp_add_bind_addr(bp, &addr, SCTP_ADDR_SRC, gfp); 295 retval = sctp_add_bind_addr(bp, &addr, sizeof(addr),
296 SCTP_ADDR_SRC, gfp);
295 if (retval) { 297 if (retval) {
296 /* Can't finish building the list, clean up. */ 298 /* Can't finish building the list, clean up. */
297 sctp_bind_addr_clean(bp); 299 sctp_bind_addr_clean(bp);
@@ -453,8 +455,8 @@ static int sctp_copy_one_addr(struct net *net, struct sctp_bind_addr *dest,
453 (((AF_INET6 == addr->sa.sa_family) && 455 (((AF_INET6 == addr->sa.sa_family) &&
454 (flags & SCTP_ADDR6_ALLOWED) && 456 (flags & SCTP_ADDR6_ALLOWED) &&
455 (flags & SCTP_ADDR6_PEERSUPP)))) 457 (flags & SCTP_ADDR6_PEERSUPP))))
456 error = sctp_add_bind_addr(dest, addr, SCTP_ADDR_SRC, 458 error = sctp_add_bind_addr(dest, addr, sizeof(*addr),
457 gfp); 459 SCTP_ADDR_SRC, gfp);
458 } 460 }
459 461
460 return error; 462 return error;
diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c
index a3380917f197..958ef5f33f4b 100644
--- a/net/sctp/chunk.c
+++ b/net/sctp/chunk.c
@@ -70,19 +70,6 @@ static struct sctp_datamsg *sctp_datamsg_new(gfp_t gfp)
70 return msg; 70 return msg;
71} 71}
72 72
73void sctp_datamsg_free(struct sctp_datamsg *msg)
74{
75 struct sctp_chunk *chunk;
76
77 /* This doesn't have to be a _safe vairant because
78 * sctp_chunk_free() only drops the refs.
79 */
80 list_for_each_entry(chunk, &msg->chunks, frag_list)
81 sctp_chunk_free(chunk);
82
83 sctp_datamsg_put(msg);
84}
85
86/* Final destructruction of datamsg memory. */ 73/* Final destructruction of datamsg memory. */
87static void sctp_datamsg_destroy(struct sctp_datamsg *msg) 74static void sctp_datamsg_destroy(struct sctp_datamsg *msg)
88{ 75{
@@ -273,7 +260,8 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc,
273 frag |= SCTP_DATA_SACK_IMM; 260 frag |= SCTP_DATA_SACK_IMM;
274 } 261 }
275 262
276 chunk = sctp_make_datafrag_empty(asoc, sinfo, len, frag, 0); 263 chunk = sctp_make_datafrag_empty(asoc, sinfo, len, frag,
264 0, GFP_KERNEL);
277 265
278 if (!chunk) { 266 if (!chunk) {
279 err = -ENOMEM; 267 err = -ENOMEM;
@@ -309,7 +297,8 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc,
309 (sinfo->sinfo_flags & SCTP_SACK_IMMEDIATELY)) 297 (sinfo->sinfo_flags & SCTP_SACK_IMMEDIATELY))
310 frag |= SCTP_DATA_SACK_IMM; 298 frag |= SCTP_DATA_SACK_IMM;
311 299
312 chunk = sctp_make_datafrag_empty(asoc, sinfo, over, frag, 0); 300 chunk = sctp_make_datafrag_empty(asoc, sinfo, over, frag,
301 0, GFP_KERNEL);
313 302
314 if (!chunk) { 303 if (!chunk) {
315 err = -ENOMEM; 304 err = -ENOMEM;
diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c
index 2522a6175291..9d494e35e7f9 100644
--- a/net/sctp/endpointola.c
+++ b/net/sctp/endpointola.c
@@ -42,7 +42,6 @@
42#include <linux/slab.h> 42#include <linux/slab.h>
43#include <linux/in.h> 43#include <linux/in.h>
44#include <linux/random.h> /* get_random_bytes() */ 44#include <linux/random.h> /* get_random_bytes() */
45#include <linux/crypto.h>
46#include <net/sock.h> 45#include <net/sock.h>
47#include <net/ipv6.h> 46#include <net/ipv6.h>
48#include <net/sctp/sctp.h> 47#include <net/sctp/sctp.h>
diff --git a/net/sctp/input.c b/net/sctp/input.c
index bf61dfb8e09e..00b8445364e3 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -221,7 +221,7 @@ int sctp_rcv(struct sk_buff *skb)
221 goto discard_release; 221 goto discard_release;
222 222
223 /* Create an SCTP packet structure. */ 223 /* Create an SCTP packet structure. */
224 chunk = sctp_chunkify(skb, asoc, sk); 224 chunk = sctp_chunkify(skb, asoc, sk, GFP_ATOMIC);
225 if (!chunk) 225 if (!chunk)
226 goto discard_release; 226 goto discard_release;
227 SCTP_INPUT_CB(skb)->chunk = chunk; 227 SCTP_INPUT_CB(skb)->chunk = chunk;
@@ -606,7 +606,8 @@ void sctp_v4_err(struct sk_buff *skb, __u32 info)
606 606
607 /* PMTU discovery (RFC1191) */ 607 /* PMTU discovery (RFC1191) */
608 if (ICMP_FRAG_NEEDED == code) { 608 if (ICMP_FRAG_NEEDED == code) {
609 sctp_icmp_frag_needed(sk, asoc, transport, info); 609 sctp_icmp_frag_needed(sk, asoc, transport,
610 WORD_TRUNC(info));
610 goto out_unlock; 611 goto out_unlock;
611 } else { 612 } else {
612 if (ICMP_PROT_UNREACH == code) { 613 if (ICMP_PROT_UNREACH == code) {
@@ -935,15 +936,20 @@ static struct sctp_association *__sctp_lookup_association(
935 struct sctp_transport **pt) 936 struct sctp_transport **pt)
936{ 937{
937 struct sctp_transport *t; 938 struct sctp_transport *t;
939 struct sctp_association *asoc = NULL;
938 940
939 t = sctp_addrs_lookup_transport(net, local, peer); 941 t = sctp_addrs_lookup_transport(net, local, peer);
940 if (!t || t->dead) 942 if (!t || !sctp_transport_hold(t))
941 return NULL; 943 goto out;
942 944
943 sctp_association_hold(t->asoc); 945 asoc = t->asoc;
946 sctp_association_hold(asoc);
944 *pt = t; 947 *pt = t;
945 948
946 return t->asoc; 949 sctp_transport_put(t);
950
951out:
952 return asoc;
947} 953}
948 954
949/* Look up an association. protected by RCU read lock */ 955/* Look up an association. protected by RCU read lock */
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index ec529121f38a..ce46f1c7f133 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -526,6 +526,8 @@ static int sctp_v6_cmp_addr(const union sctp_addr *addr1,
526 } 526 }
527 return 0; 527 return 0;
528 } 528 }
529 if (addr1->v6.sin6_port != addr2->v6.sin6_port)
530 return 0;
529 if (!ipv6_addr_equal(&addr1->v6.sin6_addr, &addr2->v6.sin6_addr)) 531 if (!ipv6_addr_equal(&addr1->v6.sin6_addr, &addr2->v6.sin6_addr))
530 return 0; 532 return 0;
531 /* If this is a linklocal address, compare the scope_id. */ 533 /* If this is a linklocal address, compare the scope_id. */
diff --git a/net/sctp/output.c b/net/sctp/output.c
index 9d610eddd19e..736c004abfbc 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -153,7 +153,7 @@ void sctp_packet_free(struct sctp_packet *packet)
153 */ 153 */
154sctp_xmit_t sctp_packet_transmit_chunk(struct sctp_packet *packet, 154sctp_xmit_t sctp_packet_transmit_chunk(struct sctp_packet *packet,
155 struct sctp_chunk *chunk, 155 struct sctp_chunk *chunk,
156 int one_packet) 156 int one_packet, gfp_t gfp)
157{ 157{
158 sctp_xmit_t retval; 158 sctp_xmit_t retval;
159 int error = 0; 159 int error = 0;
@@ -163,7 +163,7 @@ sctp_xmit_t sctp_packet_transmit_chunk(struct sctp_packet *packet,
163 switch ((retval = (sctp_packet_append_chunk(packet, chunk)))) { 163 switch ((retval = (sctp_packet_append_chunk(packet, chunk)))) {
164 case SCTP_XMIT_PMTU_FULL: 164 case SCTP_XMIT_PMTU_FULL:
165 if (!packet->has_cookie_echo) { 165 if (!packet->has_cookie_echo) {
166 error = sctp_packet_transmit(packet); 166 error = sctp_packet_transmit(packet, gfp);
167 if (error < 0) 167 if (error < 0)
168 chunk->skb->sk->sk_err = -error; 168 chunk->skb->sk->sk_err = -error;
169 169
@@ -376,7 +376,7 @@ static void sctp_packet_set_owner_w(struct sk_buff *skb, struct sock *sk)
376 * 376 *
377 * The return value is a normal kernel error return value. 377 * The return value is a normal kernel error return value.
378 */ 378 */
379int sctp_packet_transmit(struct sctp_packet *packet) 379int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp)
380{ 380{
381 struct sctp_transport *tp = packet->transport; 381 struct sctp_transport *tp = packet->transport;
382 struct sctp_association *asoc = tp->asoc; 382 struct sctp_association *asoc = tp->asoc;
diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index c0380cfb16ae..8d3d3625130e 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -68,7 +68,7 @@ static void sctp_mark_missing(struct sctp_outq *q,
68 68
69static void sctp_generate_fwdtsn(struct sctp_outq *q, __u32 sack_ctsn); 69static void sctp_generate_fwdtsn(struct sctp_outq *q, __u32 sack_ctsn);
70 70
71static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout); 71static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp);
72 72
73/* Add data to the front of the queue. */ 73/* Add data to the front of the queue. */
74static inline void sctp_outq_head_data(struct sctp_outq *q, 74static inline void sctp_outq_head_data(struct sctp_outq *q,
@@ -285,7 +285,7 @@ void sctp_outq_free(struct sctp_outq *q)
285} 285}
286 286
287/* Put a new chunk in an sctp_outq. */ 287/* Put a new chunk in an sctp_outq. */
288int sctp_outq_tail(struct sctp_outq *q, struct sctp_chunk *chunk) 288int sctp_outq_tail(struct sctp_outq *q, struct sctp_chunk *chunk, gfp_t gfp)
289{ 289{
290 struct net *net = sock_net(q->asoc->base.sk); 290 struct net *net = sock_net(q->asoc->base.sk);
291 int error = 0; 291 int error = 0;
@@ -341,7 +341,7 @@ int sctp_outq_tail(struct sctp_outq *q, struct sctp_chunk *chunk)
341 return error; 341 return error;
342 342
343 if (!q->cork) 343 if (!q->cork)
344 error = sctp_outq_flush(q, 0); 344 error = sctp_outq_flush(q, 0, gfp);
345 345
346 return error; 346 return error;
347} 347}
@@ -510,7 +510,7 @@ void sctp_retransmit(struct sctp_outq *q, struct sctp_transport *transport,
510 * will be flushed at the end. 510 * will be flushed at the end.
511 */ 511 */
512 if (reason != SCTP_RTXR_FAST_RTX) 512 if (reason != SCTP_RTXR_FAST_RTX)
513 error = sctp_outq_flush(q, /* rtx_timeout */ 1); 513 error = sctp_outq_flush(q, /* rtx_timeout */ 1, GFP_ATOMIC);
514 514
515 if (error) 515 if (error)
516 q->asoc->base.sk->sk_err = -error; 516 q->asoc->base.sk->sk_err = -error;
@@ -601,12 +601,12 @@ redo:
601 * control chunks are already freed so there 601 * control chunks are already freed so there
602 * is nothing we can do. 602 * is nothing we can do.
603 */ 603 */
604 sctp_packet_transmit(pkt); 604 sctp_packet_transmit(pkt, GFP_ATOMIC);
605 goto redo; 605 goto redo;
606 } 606 }
607 607
608 /* Send this packet. */ 608 /* Send this packet. */
609 error = sctp_packet_transmit(pkt); 609 error = sctp_packet_transmit(pkt, GFP_ATOMIC);
610 610
611 /* If we are retransmitting, we should only 611 /* If we are retransmitting, we should only
612 * send a single packet. 612 * send a single packet.
@@ -622,7 +622,7 @@ redo:
622 622
623 case SCTP_XMIT_RWND_FULL: 623 case SCTP_XMIT_RWND_FULL:
624 /* Send this packet. */ 624 /* Send this packet. */
625 error = sctp_packet_transmit(pkt); 625 error = sctp_packet_transmit(pkt, GFP_ATOMIC);
626 626
627 /* Stop sending DATA as there is no more room 627 /* Stop sending DATA as there is no more room
628 * at the receiver. 628 * at the receiver.
@@ -632,7 +632,7 @@ redo:
632 632
633 case SCTP_XMIT_DELAY: 633 case SCTP_XMIT_DELAY:
634 /* Send this packet. */ 634 /* Send this packet. */
635 error = sctp_packet_transmit(pkt); 635 error = sctp_packet_transmit(pkt, GFP_ATOMIC);
636 636
637 /* Stop sending DATA because of nagle delay. */ 637 /* Stop sending DATA because of nagle delay. */
638 done = 1; 638 done = 1;
@@ -685,12 +685,12 @@ redo:
685} 685}
686 686
687/* Cork the outqueue so queued chunks are really queued. */ 687/* Cork the outqueue so queued chunks are really queued. */
688int sctp_outq_uncork(struct sctp_outq *q) 688int sctp_outq_uncork(struct sctp_outq *q, gfp_t gfp)
689{ 689{
690 if (q->cork) 690 if (q->cork)
691 q->cork = 0; 691 q->cork = 0;
692 692
693 return sctp_outq_flush(q, 0); 693 return sctp_outq_flush(q, 0, gfp);
694} 694}
695 695
696 696
@@ -703,7 +703,7 @@ int sctp_outq_uncork(struct sctp_outq *q)
703 * locking concerns must be made. Today we use the sock lock to protect 703 * locking concerns must be made. Today we use the sock lock to protect
704 * this function. 704 * this function.
705 */ 705 */
706static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout) 706static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp)
707{ 707{
708 struct sctp_packet *packet; 708 struct sctp_packet *packet;
709 struct sctp_packet singleton; 709 struct sctp_packet singleton;
@@ -825,7 +825,7 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout)
825 sctp_packet_init(&singleton, transport, sport, dport); 825 sctp_packet_init(&singleton, transport, sport, dport);
826 sctp_packet_config(&singleton, vtag, 0); 826 sctp_packet_config(&singleton, vtag, 0);
827 sctp_packet_append_chunk(&singleton, chunk); 827 sctp_packet_append_chunk(&singleton, chunk);
828 error = sctp_packet_transmit(&singleton); 828 error = sctp_packet_transmit(&singleton, gfp);
829 if (error < 0) 829 if (error < 0)
830 return error; 830 return error;
831 break; 831 break;
@@ -856,7 +856,7 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout)
856 case SCTP_CID_ASCONF: 856 case SCTP_CID_ASCONF:
857 case SCTP_CID_FWD_TSN: 857 case SCTP_CID_FWD_TSN:
858 status = sctp_packet_transmit_chunk(packet, chunk, 858 status = sctp_packet_transmit_chunk(packet, chunk,
859 one_packet); 859 one_packet, gfp);
860 if (status != SCTP_XMIT_OK) { 860 if (status != SCTP_XMIT_OK) {
861 /* put the chunk back */ 861 /* put the chunk back */
862 list_add(&chunk->list, &q->control_chunk_list); 862 list_add(&chunk->list, &q->control_chunk_list);
@@ -978,8 +978,12 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout)
978 (new_transport->state == SCTP_UNCONFIRMED) || 978 (new_transport->state == SCTP_UNCONFIRMED) ||
979 (new_transport->state == SCTP_PF))) 979 (new_transport->state == SCTP_PF)))
980 new_transport = asoc->peer.active_path; 980 new_transport = asoc->peer.active_path;
981 if (new_transport->state == SCTP_UNCONFIRMED) 981 if (new_transport->state == SCTP_UNCONFIRMED) {
982 WARN_ONCE(1, "Atempt to send packet on unconfirmed path.");
983 sctp_chunk_fail(chunk, 0);
984 sctp_chunk_free(chunk);
982 continue; 985 continue;
986 }
983 987
984 /* Change packets if necessary. */ 988 /* Change packets if necessary. */
985 if (new_transport != transport) { 989 if (new_transport != transport) {
@@ -1011,7 +1015,7 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout)
1011 atomic_read(&chunk->skb->users) : -1); 1015 atomic_read(&chunk->skb->users) : -1);
1012 1016
1013 /* Add the chunk to the packet. */ 1017 /* Add the chunk to the packet. */
1014 status = sctp_packet_transmit_chunk(packet, chunk, 0); 1018 status = sctp_packet_transmit_chunk(packet, chunk, 0, gfp);
1015 1019
1016 switch (status) { 1020 switch (status) {
1017 case SCTP_XMIT_PMTU_FULL: 1021 case SCTP_XMIT_PMTU_FULL:
@@ -1088,7 +1092,7 @@ sctp_flush_out:
1088 send_ready); 1092 send_ready);
1089 packet = &t->packet; 1093 packet = &t->packet;
1090 if (!sctp_packet_empty(packet)) 1094 if (!sctp_packet_empty(packet))
1091 error = sctp_packet_transmit(packet); 1095 error = sctp_packet_transmit(packet, gfp);
1092 1096
1093 /* Clear the burst limited state, if any */ 1097 /* Clear the burst limited state, if any */
1094 sctp_transport_burst_reset(t); 1098 sctp_transport_burst_reset(t);
diff --git a/net/sctp/probe.c b/net/sctp/probe.c
index 5e68b94ee640..6cc2152e0740 100644
--- a/net/sctp/probe.c
+++ b/net/sctp/probe.c
@@ -65,7 +65,7 @@ static struct {
65 struct kfifo fifo; 65 struct kfifo fifo;
66 spinlock_t lock; 66 spinlock_t lock;
67 wait_queue_head_t wait; 67 wait_queue_head_t wait;
68 struct timespec tstart; 68 struct timespec64 tstart;
69} sctpw; 69} sctpw;
70 70
71static __printf(1, 2) void printl(const char *fmt, ...) 71static __printf(1, 2) void printl(const char *fmt, ...)
@@ -85,7 +85,7 @@ static __printf(1, 2) void printl(const char *fmt, ...)
85static int sctpprobe_open(struct inode *inode, struct file *file) 85static int sctpprobe_open(struct inode *inode, struct file *file)
86{ 86{
87 kfifo_reset(&sctpw.fifo); 87 kfifo_reset(&sctpw.fifo);
88 getnstimeofday(&sctpw.tstart); 88 ktime_get_ts64(&sctpw.tstart);
89 89
90 return 0; 90 return 0;
91} 91}
@@ -138,7 +138,7 @@ static sctp_disposition_t jsctp_sf_eat_sack(struct net *net,
138 struct sk_buff *skb = chunk->skb; 138 struct sk_buff *skb = chunk->skb;
139 struct sctp_transport *sp; 139 struct sctp_transport *sp;
140 static __u32 lcwnd = 0; 140 static __u32 lcwnd = 0;
141 struct timespec now; 141 struct timespec64 now;
142 142
143 sp = asoc->peer.primary_path; 143 sp = asoc->peer.primary_path;
144 144
@@ -149,8 +149,8 @@ static sctp_disposition_t jsctp_sf_eat_sack(struct net *net,
149 (full || sp->cwnd != lcwnd)) { 149 (full || sp->cwnd != lcwnd)) {
150 lcwnd = sp->cwnd; 150 lcwnd = sp->cwnd;
151 151
152 getnstimeofday(&now); 152 ktime_get_ts64(&now);
153 now = timespec_sub(now, sctpw.tstart); 153 now = timespec64_sub(now, sctpw.tstart);
154 154
155 printl("%lu.%06lu ", (unsigned long) now.tv_sec, 155 printl("%lu.%06lu ", (unsigned long) now.tv_sec,
156 (unsigned long) now.tv_nsec / NSEC_PER_USEC); 156 (unsigned long) now.tv_nsec / NSEC_PER_USEC);
diff --git a/net/sctp/proc.c b/net/sctp/proc.c
index 684c5b31563b..5cfac8d5d3b3 100644
--- a/net/sctp/proc.c
+++ b/net/sctp/proc.c
@@ -161,12 +161,9 @@ static void sctp_seq_dump_remote_addrs(struct seq_file *seq, struct sctp_associa
161 struct sctp_af *af; 161 struct sctp_af *af;
162 162
163 primary = &assoc->peer.primary_addr; 163 primary = &assoc->peer.primary_addr;
164 rcu_read_lock();
165 list_for_each_entry_rcu(transport, &assoc->peer.transport_addr_list, 164 list_for_each_entry_rcu(transport, &assoc->peer.transport_addr_list,
166 transports) { 165 transports) {
167 addr = &transport->ipaddr; 166 addr = &transport->ipaddr;
168 if (transport->dead)
169 continue;
170 167
171 af = sctp_get_af_specific(addr->sa.sa_family); 168 af = sctp_get_af_specific(addr->sa.sa_family);
172 if (af->cmp_addr(addr, primary)) { 169 if (af->cmp_addr(addr, primary)) {
@@ -174,7 +171,6 @@ static void sctp_seq_dump_remote_addrs(struct seq_file *seq, struct sctp_associa
174 } 171 }
175 af->seq_dump_addr(seq, addr); 172 af->seq_dump_addr(seq, addr);
176 } 173 }
177 rcu_read_unlock();
178} 174}
179 175
180static void *sctp_eps_seq_start(struct seq_file *seq, loff_t *pos) 176static void *sctp_eps_seq_start(struct seq_file *seq, loff_t *pos)
@@ -380,6 +376,8 @@ static int sctp_assocs_seq_show(struct seq_file *seq, void *v)
380 } 376 }
381 377
382 transport = (struct sctp_transport *)v; 378 transport = (struct sctp_transport *)v;
379 if (!sctp_transport_hold(transport))
380 return 0;
383 assoc = transport->asoc; 381 assoc = transport->asoc;
384 epb = &assoc->base; 382 epb = &assoc->base;
385 sk = epb->sk; 383 sk = epb->sk;
@@ -412,6 +410,8 @@ static int sctp_assocs_seq_show(struct seq_file *seq, void *v)
412 sk->sk_rcvbuf); 410 sk->sk_rcvbuf);
413 seq_printf(seq, "\n"); 411 seq_printf(seq, "\n");
414 412
413 sctp_transport_put(transport);
414
415 return 0; 415 return 0;
416} 416}
417 417
@@ -480,7 +480,7 @@ static void sctp_remaddr_seq_stop(struct seq_file *seq, void *v)
480static int sctp_remaddr_seq_show(struct seq_file *seq, void *v) 480static int sctp_remaddr_seq_show(struct seq_file *seq, void *v)
481{ 481{
482 struct sctp_association *assoc; 482 struct sctp_association *assoc;
483 struct sctp_transport *tsp; 483 struct sctp_transport *transport, *tsp;
484 484
485 if (v == SEQ_START_TOKEN) { 485 if (v == SEQ_START_TOKEN) {
486 seq_printf(seq, "ADDR ASSOC_ID HB_ACT RTO MAX_PATH_RTX " 486 seq_printf(seq, "ADDR ASSOC_ID HB_ACT RTO MAX_PATH_RTX "
@@ -488,13 +488,13 @@ static int sctp_remaddr_seq_show(struct seq_file *seq, void *v)
488 return 0; 488 return 0;
489 } 489 }
490 490
491 tsp = (struct sctp_transport *)v; 491 transport = (struct sctp_transport *)v;
492 assoc = tsp->asoc; 492 if (!sctp_transport_hold(transport))
493 return 0;
494 assoc = transport->asoc;
493 495
494 list_for_each_entry_rcu(tsp, &assoc->peer.transport_addr_list, 496 list_for_each_entry_rcu(tsp, &assoc->peer.transport_addr_list,
495 transports) { 497 transports) {
496 if (tsp->dead)
497 continue;
498 /* 498 /*
499 * The remote address (ADDR) 499 * The remote address (ADDR)
500 */ 500 */
@@ -544,6 +544,8 @@ static int sctp_remaddr_seq_show(struct seq_file *seq, void *v)
544 seq_printf(seq, "\n"); 544 seq_printf(seq, "\n");
545 } 545 }
546 546
547 sctp_transport_put(transport);
548
547 return 0; 549 return 0;
548} 550}
549 551
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index ab0d538a74ed..d3d50daa248b 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -60,6 +60,8 @@
60#include <net/inet_common.h> 60#include <net/inet_common.h>
61#include <net/inet_ecn.h> 61#include <net/inet_ecn.h>
62 62
63#define MAX_SCTP_PORT_HASH_ENTRIES (64 * 1024)
64
63/* Global data structures. */ 65/* Global data structures. */
64struct sctp_globals sctp_globals __read_mostly; 66struct sctp_globals sctp_globals __read_mostly;
65 67
@@ -214,6 +216,7 @@ int sctp_copy_local_addr_list(struct net *net, struct sctp_bind_addr *bp,
214 (copy_flags & SCTP_ADDR6_ALLOWED) && 216 (copy_flags & SCTP_ADDR6_ALLOWED) &&
215 (copy_flags & SCTP_ADDR6_PEERSUPP)))) { 217 (copy_flags & SCTP_ADDR6_PEERSUPP)))) {
216 error = sctp_add_bind_addr(bp, &addr->a, 218 error = sctp_add_bind_addr(bp, &addr->a,
219 sizeof(addr->a),
217 SCTP_ADDR_SRC, GFP_ATOMIC); 220 SCTP_ADDR_SRC, GFP_ATOMIC);
218 if (error) 221 if (error)
219 goto end_copy; 222 goto end_copy;
@@ -1355,6 +1358,8 @@ static __init int sctp_init(void)
1355 unsigned long limit; 1358 unsigned long limit;
1356 int max_share; 1359 int max_share;
1357 int order; 1360 int order;
1361 int num_entries;
1362 int max_entry_order;
1358 1363
1359 sock_skb_cb_check_size(sizeof(struct sctp_ulpevent)); 1364 sock_skb_cb_check_size(sizeof(struct sctp_ulpevent));
1360 1365
@@ -1407,14 +1412,24 @@ static __init int sctp_init(void)
1407 1412
1408 /* Size and allocate the association hash table. 1413 /* Size and allocate the association hash table.
1409 * The methodology is similar to that of the tcp hash tables. 1414 * The methodology is similar to that of the tcp hash tables.
1415 * Though not identical. Start by getting a goal size
1410 */ 1416 */
1411 if (totalram_pages >= (128 * 1024)) 1417 if (totalram_pages >= (128 * 1024))
1412 goal = totalram_pages >> (22 - PAGE_SHIFT); 1418 goal = totalram_pages >> (22 - PAGE_SHIFT);
1413 else 1419 else
1414 goal = totalram_pages >> (24 - PAGE_SHIFT); 1420 goal = totalram_pages >> (24 - PAGE_SHIFT);
1415 1421
1416 for (order = 0; (1UL << order) < goal; order++) 1422 /* Then compute the page order for said goal */
1417 ; 1423 order = get_order(goal);
1424
1425 /* Now compute the required page order for the maximum sized table we
1426 * want to create
1427 */
1428 max_entry_order = get_order(MAX_SCTP_PORT_HASH_ENTRIES *
1429 sizeof(struct sctp_bind_hashbucket));
1430
1431 /* Limit the page order by that maximum hash table size */
1432 order = min(order, max_entry_order);
1418 1433
1419 /* Allocate and initialize the endpoint hash table. */ 1434 /* Allocate and initialize the endpoint hash table. */
1420 sctp_ep_hashsize = 64; 1435 sctp_ep_hashsize = 64;
@@ -1430,20 +1445,35 @@ static __init int sctp_init(void)
1430 INIT_HLIST_HEAD(&sctp_ep_hashtable[i].chain); 1445 INIT_HLIST_HEAD(&sctp_ep_hashtable[i].chain);
1431 } 1446 }
1432 1447
1433 /* Allocate and initialize the SCTP port hash table. */ 1448 /* Allocate and initialize the SCTP port hash table.
1449 * Note that order is initalized to start at the max sized
1450 * table we want to support. If we can't get that many pages
1451 * reduce the order and try again
1452 */
1434 do { 1453 do {
1435 sctp_port_hashsize = (1UL << order) * PAGE_SIZE /
1436 sizeof(struct sctp_bind_hashbucket);
1437 if ((sctp_port_hashsize > (64 * 1024)) && order > 0)
1438 continue;
1439 sctp_port_hashtable = (struct sctp_bind_hashbucket *) 1454 sctp_port_hashtable = (struct sctp_bind_hashbucket *)
1440 __get_free_pages(GFP_KERNEL | __GFP_NOWARN, order); 1455 __get_free_pages(GFP_KERNEL | __GFP_NOWARN, order);
1441 } while (!sctp_port_hashtable && --order > 0); 1456 } while (!sctp_port_hashtable && --order > 0);
1457
1442 if (!sctp_port_hashtable) { 1458 if (!sctp_port_hashtable) {
1443 pr_err("Failed bind hash alloc\n"); 1459 pr_err("Failed bind hash alloc\n");
1444 status = -ENOMEM; 1460 status = -ENOMEM;
1445 goto err_bhash_alloc; 1461 goto err_bhash_alloc;
1446 } 1462 }
1463
1464 /* Now compute the number of entries that will fit in the
1465 * port hash space we allocated
1466 */
1467 num_entries = (1UL << order) * PAGE_SIZE /
1468 sizeof(struct sctp_bind_hashbucket);
1469
1470 /* And finish by rounding it down to the nearest power of two
1471 * this wastes some memory of course, but its needed because
1472 * the hash function operates based on the assumption that
1473 * that the number of entries is a power of two
1474 */
1475 sctp_port_hashsize = rounddown_pow_of_two(num_entries);
1476
1447 for (i = 0; i < sctp_port_hashsize; i++) { 1477 for (i = 0; i < sctp_port_hashsize; i++) {
1448 spin_lock_init(&sctp_port_hashtable[i].lock); 1478 spin_lock_init(&sctp_port_hashtable[i].lock);
1449 INIT_HLIST_HEAD(&sctp_port_hashtable[i].chain); 1479 INIT_HLIST_HEAD(&sctp_port_hashtable[i].chain);
@@ -1452,7 +1482,8 @@ static __init int sctp_init(void)
1452 if (sctp_transport_hashtable_init()) 1482 if (sctp_transport_hashtable_init())
1453 goto err_thash_alloc; 1483 goto err_thash_alloc;
1454 1484
1455 pr_info("Hash tables configured (bind %d)\n", sctp_port_hashsize); 1485 pr_info("Hash tables configured (bind %d/%d)\n", sctp_port_hashsize,
1486 num_entries);
1456 1487
1457 sctp_sysctl_register(); 1488 sctp_sysctl_register();
1458 1489
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 5d6a03fad378..7f0bf798205b 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -45,6 +45,7 @@
45 45
46#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 46#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
47 47
48#include <crypto/hash.h>
48#include <linux/types.h> 49#include <linux/types.h>
49#include <linux/kernel.h> 50#include <linux/kernel.h>
50#include <linux/ip.h> 51#include <linux/ip.h>
@@ -52,7 +53,6 @@
52#include <linux/net.h> 53#include <linux/net.h>
53#include <linux/inet.h> 54#include <linux/inet.h>
54#include <linux/scatterlist.h> 55#include <linux/scatterlist.h>
55#include <linux/crypto.h>
56#include <linux/slab.h> 56#include <linux/slab.h>
57#include <net/sock.h> 57#include <net/sock.h>
58 58
@@ -62,11 +62,13 @@
62#include <net/sctp/sm.h> 62#include <net/sctp/sm.h>
63 63
64static struct sctp_chunk *sctp_make_control(const struct sctp_association *asoc, 64static struct sctp_chunk *sctp_make_control(const struct sctp_association *asoc,
65 __u8 type, __u8 flags, int paylen); 65 __u8 type, __u8 flags, int paylen,
66 gfp_t gfp);
66static struct sctp_chunk *sctp_make_data(const struct sctp_association *asoc, 67static struct sctp_chunk *sctp_make_data(const struct sctp_association *asoc,
67 __u8 flags, int paylen); 68 __u8 flags, int paylen, gfp_t gfp);
68static struct sctp_chunk *_sctp_make_chunk(const struct sctp_association *asoc, 69static struct sctp_chunk *_sctp_make_chunk(const struct sctp_association *asoc,
69 __u8 type, __u8 flags, int paylen); 70 __u8 type, __u8 flags, int paylen,
71 gfp_t gfp);
70static sctp_cookie_param_t *sctp_pack_cookie(const struct sctp_endpoint *ep, 72static sctp_cookie_param_t *sctp_pack_cookie(const struct sctp_endpoint *ep,
71 const struct sctp_association *asoc, 73 const struct sctp_association *asoc,
72 const struct sctp_chunk *init_chunk, 74 const struct sctp_chunk *init_chunk,
@@ -318,7 +320,7 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc,
318 * PLEASE DO NOT FIXME [This version does not support Host Name.] 320 * PLEASE DO NOT FIXME [This version does not support Host Name.]
319 */ 321 */
320 322
321 retval = sctp_make_control(asoc, SCTP_CID_INIT, 0, chunksize); 323 retval = sctp_make_control(asoc, SCTP_CID_INIT, 0, chunksize, gfp);
322 if (!retval) 324 if (!retval)
323 goto nodata; 325 goto nodata;
324 326
@@ -465,7 +467,7 @@ struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc,
465 num_ext); 467 num_ext);
466 468
467 /* Now allocate and fill out the chunk. */ 469 /* Now allocate and fill out the chunk. */
468 retval = sctp_make_control(asoc, SCTP_CID_INIT_ACK, 0, chunksize); 470 retval = sctp_make_control(asoc, SCTP_CID_INIT_ACK, 0, chunksize, gfp);
469 if (!retval) 471 if (!retval)
470 goto nomem_chunk; 472 goto nomem_chunk;
471 473
@@ -570,7 +572,8 @@ struct sctp_chunk *sctp_make_cookie_echo(const struct sctp_association *asoc,
570 cookie_len = asoc->peer.cookie_len; 572 cookie_len = asoc->peer.cookie_len;
571 573
572 /* Build a cookie echo chunk. */ 574 /* Build a cookie echo chunk. */
573 retval = sctp_make_control(asoc, SCTP_CID_COOKIE_ECHO, 0, cookie_len); 575 retval = sctp_make_control(asoc, SCTP_CID_COOKIE_ECHO, 0,
576 cookie_len, GFP_ATOMIC);
574 if (!retval) 577 if (!retval)
575 goto nodata; 578 goto nodata;
576 retval->subh.cookie_hdr = 579 retval->subh.cookie_hdr =
@@ -615,7 +618,7 @@ struct sctp_chunk *sctp_make_cookie_ack(const struct sctp_association *asoc,
615{ 618{
616 struct sctp_chunk *retval; 619 struct sctp_chunk *retval;
617 620
618 retval = sctp_make_control(asoc, SCTP_CID_COOKIE_ACK, 0, 0); 621 retval = sctp_make_control(asoc, SCTP_CID_COOKIE_ACK, 0, 0, GFP_ATOMIC);
619 622
620 /* RFC 2960 6.4 Multi-homed SCTP Endpoints 623 /* RFC 2960 6.4 Multi-homed SCTP Endpoints
621 * 624 *
@@ -664,7 +667,7 @@ struct sctp_chunk *sctp_make_cwr(const struct sctp_association *asoc,
664 667
665 cwr.lowest_tsn = htonl(lowest_tsn); 668 cwr.lowest_tsn = htonl(lowest_tsn);
666 retval = sctp_make_control(asoc, SCTP_CID_ECN_CWR, 0, 669 retval = sctp_make_control(asoc, SCTP_CID_ECN_CWR, 0,
667 sizeof(sctp_cwrhdr_t)); 670 sizeof(sctp_cwrhdr_t), GFP_ATOMIC);
668 671
669 if (!retval) 672 if (!retval)
670 goto nodata; 673 goto nodata;
@@ -698,7 +701,7 @@ struct sctp_chunk *sctp_make_ecne(const struct sctp_association *asoc,
698 701
699 ecne.lowest_tsn = htonl(lowest_tsn); 702 ecne.lowest_tsn = htonl(lowest_tsn);
700 retval = sctp_make_control(asoc, SCTP_CID_ECN_ECNE, 0, 703 retval = sctp_make_control(asoc, SCTP_CID_ECN_ECNE, 0,
701 sizeof(sctp_ecnehdr_t)); 704 sizeof(sctp_ecnehdr_t), GFP_ATOMIC);
702 if (!retval) 705 if (!retval)
703 goto nodata; 706 goto nodata;
704 retval->subh.ecne_hdr = 707 retval->subh.ecne_hdr =
@@ -713,7 +716,8 @@ nodata:
713 */ 716 */
714struct sctp_chunk *sctp_make_datafrag_empty(struct sctp_association *asoc, 717struct sctp_chunk *sctp_make_datafrag_empty(struct sctp_association *asoc,
715 const struct sctp_sndrcvinfo *sinfo, 718 const struct sctp_sndrcvinfo *sinfo,
716 int data_len, __u8 flags, __u16 ssn) 719 int data_len, __u8 flags, __u16 ssn,
720 gfp_t gfp)
717{ 721{
718 struct sctp_chunk *retval; 722 struct sctp_chunk *retval;
719 struct sctp_datahdr dp; 723 struct sctp_datahdr dp;
@@ -734,7 +738,7 @@ struct sctp_chunk *sctp_make_datafrag_empty(struct sctp_association *asoc,
734 dp.ssn = htons(ssn); 738 dp.ssn = htons(ssn);
735 739
736 chunk_len = sizeof(dp) + data_len; 740 chunk_len = sizeof(dp) + data_len;
737 retval = sctp_make_data(asoc, flags, chunk_len); 741 retval = sctp_make_data(asoc, flags, chunk_len, gfp);
738 if (!retval) 742 if (!retval)
739 goto nodata; 743 goto nodata;
740 744
@@ -781,7 +785,7 @@ struct sctp_chunk *sctp_make_sack(const struct sctp_association *asoc)
781 + sizeof(__u32) * num_dup_tsns; 785 + sizeof(__u32) * num_dup_tsns;
782 786
783 /* Create the chunk. */ 787 /* Create the chunk. */
784 retval = sctp_make_control(asoc, SCTP_CID_SACK, 0, len); 788 retval = sctp_make_control(asoc, SCTP_CID_SACK, 0, len, GFP_ATOMIC);
785 if (!retval) 789 if (!retval)
786 goto nodata; 790 goto nodata;
787 791
@@ -861,7 +865,7 @@ struct sctp_chunk *sctp_make_shutdown(const struct sctp_association *asoc,
861 shut.cum_tsn_ack = htonl(ctsn); 865 shut.cum_tsn_ack = htonl(ctsn);
862 866
863 retval = sctp_make_control(asoc, SCTP_CID_SHUTDOWN, 0, 867 retval = sctp_make_control(asoc, SCTP_CID_SHUTDOWN, 0,
864 sizeof(sctp_shutdownhdr_t)); 868 sizeof(sctp_shutdownhdr_t), GFP_ATOMIC);
865 if (!retval) 869 if (!retval)
866 goto nodata; 870 goto nodata;
867 871
@@ -879,7 +883,8 @@ struct sctp_chunk *sctp_make_shutdown_ack(const struct sctp_association *asoc,
879{ 883{
880 struct sctp_chunk *retval; 884 struct sctp_chunk *retval;
881 885
882 retval = sctp_make_control(asoc, SCTP_CID_SHUTDOWN_ACK, 0, 0); 886 retval = sctp_make_control(asoc, SCTP_CID_SHUTDOWN_ACK, 0, 0,
887 GFP_ATOMIC);
883 888
884 /* RFC 2960 6.4 Multi-homed SCTP Endpoints 889 /* RFC 2960 6.4 Multi-homed SCTP Endpoints
885 * 890 *
@@ -908,7 +913,8 @@ struct sctp_chunk *sctp_make_shutdown_complete(
908 */ 913 */
909 flags |= asoc ? 0 : SCTP_CHUNK_FLAG_T; 914 flags |= asoc ? 0 : SCTP_CHUNK_FLAG_T;
910 915
911 retval = sctp_make_control(asoc, SCTP_CID_SHUTDOWN_COMPLETE, flags, 0); 916 retval = sctp_make_control(asoc, SCTP_CID_SHUTDOWN_COMPLETE, flags,
917 0, GFP_ATOMIC);
912 918
913 /* RFC 2960 6.4 Multi-homed SCTP Endpoints 919 /* RFC 2960 6.4 Multi-homed SCTP Endpoints
914 * 920 *
@@ -947,7 +953,8 @@ struct sctp_chunk *sctp_make_abort(const struct sctp_association *asoc,
947 flags = SCTP_CHUNK_FLAG_T; 953 flags = SCTP_CHUNK_FLAG_T;
948 } 954 }
949 955
950 retval = sctp_make_control(asoc, SCTP_CID_ABORT, flags, hint); 956 retval = sctp_make_control(asoc, SCTP_CID_ABORT, flags, hint,
957 GFP_ATOMIC);
951 958
952 /* RFC 2960 6.4 Multi-homed SCTP Endpoints 959 /* RFC 2960 6.4 Multi-homed SCTP Endpoints
953 * 960 *
@@ -1139,7 +1146,8 @@ struct sctp_chunk *sctp_make_heartbeat(const struct sctp_association *asoc,
1139 struct sctp_chunk *retval; 1146 struct sctp_chunk *retval;
1140 sctp_sender_hb_info_t hbinfo; 1147 sctp_sender_hb_info_t hbinfo;
1141 1148
1142 retval = sctp_make_control(asoc, SCTP_CID_HEARTBEAT, 0, sizeof(hbinfo)); 1149 retval = sctp_make_control(asoc, SCTP_CID_HEARTBEAT, 0,
1150 sizeof(hbinfo), GFP_ATOMIC);
1143 1151
1144 if (!retval) 1152 if (!retval)
1145 goto nodata; 1153 goto nodata;
@@ -1167,7 +1175,8 @@ struct sctp_chunk *sctp_make_heartbeat_ack(const struct sctp_association *asoc,
1167{ 1175{
1168 struct sctp_chunk *retval; 1176 struct sctp_chunk *retval;
1169 1177
1170 retval = sctp_make_control(asoc, SCTP_CID_HEARTBEAT_ACK, 0, paylen); 1178 retval = sctp_make_control(asoc, SCTP_CID_HEARTBEAT_ACK, 0, paylen,
1179 GFP_ATOMIC);
1171 if (!retval) 1180 if (!retval)
1172 goto nodata; 1181 goto nodata;
1173 1182
@@ -1200,7 +1209,7 @@ static struct sctp_chunk *sctp_make_op_error_space(
1200 struct sctp_chunk *retval; 1209 struct sctp_chunk *retval;
1201 1210
1202 retval = sctp_make_control(asoc, SCTP_CID_ERROR, 0, 1211 retval = sctp_make_control(asoc, SCTP_CID_ERROR, 0,
1203 sizeof(sctp_errhdr_t) + size); 1212 sizeof(sctp_errhdr_t) + size, GFP_ATOMIC);
1204 if (!retval) 1213 if (!retval)
1205 goto nodata; 1214 goto nodata;
1206 1215
@@ -1271,7 +1280,8 @@ struct sctp_chunk *sctp_make_auth(const struct sctp_association *asoc)
1271 return NULL; 1280 return NULL;
1272 1281
1273 retval = sctp_make_control(asoc, SCTP_CID_AUTH, 0, 1282 retval = sctp_make_control(asoc, SCTP_CID_AUTH, 0,
1274 hmac_desc->hmac_len + sizeof(sctp_authhdr_t)); 1283 hmac_desc->hmac_len + sizeof(sctp_authhdr_t),
1284 GFP_ATOMIC);
1275 if (!retval) 1285 if (!retval)
1276 return NULL; 1286 return NULL;
1277 1287
@@ -1309,11 +1319,11 @@ struct sctp_chunk *sctp_make_auth(const struct sctp_association *asoc)
1309 */ 1319 */
1310struct sctp_chunk *sctp_chunkify(struct sk_buff *skb, 1320struct sctp_chunk *sctp_chunkify(struct sk_buff *skb,
1311 const struct sctp_association *asoc, 1321 const struct sctp_association *asoc,
1312 struct sock *sk) 1322 struct sock *sk, gfp_t gfp)
1313{ 1323{
1314 struct sctp_chunk *retval; 1324 struct sctp_chunk *retval;
1315 1325
1316 retval = kmem_cache_zalloc(sctp_chunk_cachep, GFP_ATOMIC); 1326 retval = kmem_cache_zalloc(sctp_chunk_cachep, gfp);
1317 1327
1318 if (!retval) 1328 if (!retval)
1319 goto nodata; 1329 goto nodata;
@@ -1361,7 +1371,8 @@ const union sctp_addr *sctp_source(const struct sctp_chunk *chunk)
1361 * arguments, reserving enough space for a 'paylen' byte payload. 1371 * arguments, reserving enough space for a 'paylen' byte payload.
1362 */ 1372 */
1363static struct sctp_chunk *_sctp_make_chunk(const struct sctp_association *asoc, 1373static struct sctp_chunk *_sctp_make_chunk(const struct sctp_association *asoc,
1364 __u8 type, __u8 flags, int paylen) 1374 __u8 type, __u8 flags, int paylen,
1375 gfp_t gfp)
1365{ 1376{
1366 struct sctp_chunk *retval; 1377 struct sctp_chunk *retval;
1367 sctp_chunkhdr_t *chunk_hdr; 1378 sctp_chunkhdr_t *chunk_hdr;
@@ -1369,8 +1380,7 @@ static struct sctp_chunk *_sctp_make_chunk(const struct sctp_association *asoc,
1369 struct sock *sk; 1380 struct sock *sk;
1370 1381
1371 /* No need to allocate LL here, as this is only a chunk. */ 1382 /* No need to allocate LL here, as this is only a chunk. */
1372 skb = alloc_skb(WORD_ROUND(sizeof(sctp_chunkhdr_t) + paylen), 1383 skb = alloc_skb(WORD_ROUND(sizeof(sctp_chunkhdr_t) + paylen), gfp);
1373 GFP_ATOMIC);
1374 if (!skb) 1384 if (!skb)
1375 goto nodata; 1385 goto nodata;
1376 1386
@@ -1381,7 +1391,7 @@ static struct sctp_chunk *_sctp_make_chunk(const struct sctp_association *asoc,
1381 chunk_hdr->length = htons(sizeof(sctp_chunkhdr_t)); 1391 chunk_hdr->length = htons(sizeof(sctp_chunkhdr_t));
1382 1392
1383 sk = asoc ? asoc->base.sk : NULL; 1393 sk = asoc ? asoc->base.sk : NULL;
1384 retval = sctp_chunkify(skb, asoc, sk); 1394 retval = sctp_chunkify(skb, asoc, sk, gfp);
1385 if (!retval) { 1395 if (!retval) {
1386 kfree_skb(skb); 1396 kfree_skb(skb);
1387 goto nodata; 1397 goto nodata;
@@ -1400,16 +1410,18 @@ nodata:
1400} 1410}
1401 1411
1402static struct sctp_chunk *sctp_make_data(const struct sctp_association *asoc, 1412static struct sctp_chunk *sctp_make_data(const struct sctp_association *asoc,
1403 __u8 flags, int paylen) 1413 __u8 flags, int paylen, gfp_t gfp)
1404{ 1414{
1405 return _sctp_make_chunk(asoc, SCTP_CID_DATA, flags, paylen); 1415 return _sctp_make_chunk(asoc, SCTP_CID_DATA, flags, paylen, gfp);
1406} 1416}
1407 1417
1408static struct sctp_chunk *sctp_make_control(const struct sctp_association *asoc, 1418static struct sctp_chunk *sctp_make_control(const struct sctp_association *asoc,
1409 __u8 type, __u8 flags, int paylen) 1419 __u8 type, __u8 flags, int paylen,
1420 gfp_t gfp)
1410{ 1421{
1411 struct sctp_chunk *chunk = _sctp_make_chunk(asoc, type, flags, paylen); 1422 struct sctp_chunk *chunk;
1412 1423
1424 chunk = _sctp_make_chunk(asoc, type, flags, paylen, gfp);
1413 if (chunk) 1425 if (chunk)
1414 sctp_control_set_owner_w(chunk); 1426 sctp_control_set_owner_w(chunk);
1415 1427
@@ -1606,7 +1618,6 @@ static sctp_cookie_param_t *sctp_pack_cookie(const struct sctp_endpoint *ep,
1606{ 1618{
1607 sctp_cookie_param_t *retval; 1619 sctp_cookie_param_t *retval;
1608 struct sctp_signed_cookie *cookie; 1620 struct sctp_signed_cookie *cookie;
1609 struct scatterlist sg;
1610 int headersize, bodysize; 1621 int headersize, bodysize;
1611 1622
1612 /* Header size is static data prior to the actual cookie, including 1623 /* Header size is static data prior to the actual cookie, including
@@ -1663,16 +1674,19 @@ static sctp_cookie_param_t *sctp_pack_cookie(const struct sctp_endpoint *ep,
1663 ntohs(init_chunk->chunk_hdr->length), raw_addrs, addrs_len); 1674 ntohs(init_chunk->chunk_hdr->length), raw_addrs, addrs_len);
1664 1675
1665 if (sctp_sk(ep->base.sk)->hmac) { 1676 if (sctp_sk(ep->base.sk)->hmac) {
1666 struct hash_desc desc; 1677 SHASH_DESC_ON_STACK(desc, sctp_sk(ep->base.sk)->hmac);
1678 int err;
1667 1679
1668 /* Sign the message. */ 1680 /* Sign the message. */
1669 sg_init_one(&sg, &cookie->c, bodysize); 1681 desc->tfm = sctp_sk(ep->base.sk)->hmac;
1670 desc.tfm = sctp_sk(ep->base.sk)->hmac; 1682 desc->flags = 0;
1671 desc.flags = 0; 1683
1672 1684 err = crypto_shash_setkey(desc->tfm, ep->secret_key,
1673 if (crypto_hash_setkey(desc.tfm, ep->secret_key, 1685 sizeof(ep->secret_key)) ?:
1674 sizeof(ep->secret_key)) || 1686 crypto_shash_digest(desc, (u8 *)&cookie->c, bodysize,
1675 crypto_hash_digest(&desc, &sg, bodysize, cookie->signature)) 1687 cookie->signature);
1688 shash_desc_zero(desc);
1689 if (err)
1676 goto free_cookie; 1690 goto free_cookie;
1677 } 1691 }
1678 1692
@@ -1697,12 +1711,10 @@ struct sctp_association *sctp_unpack_cookie(
1697 struct sctp_cookie *bear_cookie; 1711 struct sctp_cookie *bear_cookie;
1698 int headersize, bodysize, fixed_size; 1712 int headersize, bodysize, fixed_size;
1699 __u8 *digest = ep->digest; 1713 __u8 *digest = ep->digest;
1700 struct scatterlist sg;
1701 unsigned int len; 1714 unsigned int len;
1702 sctp_scope_t scope; 1715 sctp_scope_t scope;
1703 struct sk_buff *skb = chunk->skb; 1716 struct sk_buff *skb = chunk->skb;
1704 ktime_t kt; 1717 ktime_t kt;
1705 struct hash_desc desc;
1706 1718
1707 /* Header size is static data prior to the actual cookie, including 1719 /* Header size is static data prior to the actual cookie, including
1708 * any padding. 1720 * any padding.
@@ -1733,16 +1745,23 @@ struct sctp_association *sctp_unpack_cookie(
1733 goto no_hmac; 1745 goto no_hmac;
1734 1746
1735 /* Check the signature. */ 1747 /* Check the signature. */
1736 sg_init_one(&sg, bear_cookie, bodysize); 1748 {
1737 desc.tfm = sctp_sk(ep->base.sk)->hmac; 1749 SHASH_DESC_ON_STACK(desc, sctp_sk(ep->base.sk)->hmac);
1738 desc.flags = 0; 1750 int err;
1739 1751
1740 memset(digest, 0x00, SCTP_SIGNATURE_SIZE); 1752 desc->tfm = sctp_sk(ep->base.sk)->hmac;
1741 if (crypto_hash_setkey(desc.tfm, ep->secret_key, 1753 desc->flags = 0;
1742 sizeof(ep->secret_key)) || 1754
1743 crypto_hash_digest(&desc, &sg, bodysize, digest)) { 1755 err = crypto_shash_setkey(desc->tfm, ep->secret_key,
1744 *error = -SCTP_IERROR_NOMEM; 1756 sizeof(ep->secret_key)) ?:
1745 goto fail; 1757 crypto_shash_digest(desc, (u8 *)bear_cookie, bodysize,
1758 digest);
1759 shash_desc_zero(desc);
1760
1761 if (err) {
1762 *error = -SCTP_IERROR_NOMEM;
1763 goto fail;
1764 }
1746 } 1765 }
1747 1766
1748 if (memcmp(digest, cookie->signature, SCTP_SIGNATURE_SIZE)) { 1767 if (memcmp(digest, cookie->signature, SCTP_SIGNATURE_SIZE)) {
@@ -1830,7 +1849,8 @@ no_hmac:
1830 /* Also, add the destination address. */ 1849 /* Also, add the destination address. */
1831 if (list_empty(&retval->base.bind_addr.address_list)) { 1850 if (list_empty(&retval->base.bind_addr.address_list)) {
1832 sctp_add_bind_addr(&retval->base.bind_addr, &chunk->dest, 1851 sctp_add_bind_addr(&retval->base.bind_addr, &chunk->dest,
1833 SCTP_ADDR_SRC, GFP_ATOMIC); 1852 sizeof(chunk->dest), SCTP_ADDR_SRC,
1853 GFP_ATOMIC);
1834 } 1854 }
1835 1855
1836 retval->next_tsn = retval->c.initial_tsn; 1856 retval->next_tsn = retval->c.initial_tsn;
@@ -2756,7 +2776,8 @@ static struct sctp_chunk *sctp_make_asconf(struct sctp_association *asoc,
2756 length += addrlen; 2776 length += addrlen;
2757 2777
2758 /* Create the chunk. */ 2778 /* Create the chunk. */
2759 retval = sctp_make_control(asoc, SCTP_CID_ASCONF, 0, length); 2779 retval = sctp_make_control(asoc, SCTP_CID_ASCONF, 0, length,
2780 GFP_ATOMIC);
2760 if (!retval) 2781 if (!retval)
2761 return NULL; 2782 return NULL;
2762 2783
@@ -2940,7 +2961,8 @@ static struct sctp_chunk *sctp_make_asconf_ack(const struct sctp_association *as
2940 int length = sizeof(asconf) + vparam_len; 2961 int length = sizeof(asconf) + vparam_len;
2941 2962
2942 /* Create the chunk. */ 2963 /* Create the chunk. */
2943 retval = sctp_make_control(asoc, SCTP_CID_ASCONF_ACK, 0, length); 2964 retval = sctp_make_control(asoc, SCTP_CID_ASCONF_ACK, 0, length,
2965 GFP_ATOMIC);
2944 if (!retval) 2966 if (!retval)
2945 return NULL; 2967 return NULL;
2946 2968
@@ -3500,7 +3522,7 @@ struct sctp_chunk *sctp_make_fwdtsn(const struct sctp_association *asoc,
3500 3522
3501 hint = (nstreams + 1) * sizeof(__u32); 3523 hint = (nstreams + 1) * sizeof(__u32);
3502 3524
3503 retval = sctp_make_control(asoc, SCTP_CID_FWD_TSN, 0, hint); 3525 retval = sctp_make_control(asoc, SCTP_CID_FWD_TSN, 0, hint, GFP_ATOMIC);
3504 3526
3505 if (!retval) 3527 if (!retval)
3506 return NULL; 3528 return NULL;
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index 2e21384697c2..7fe56d0acabf 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -215,10 +215,14 @@ static int sctp_gen_sack(struct sctp_association *asoc, int force,
215 sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_RESTART, 215 sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_RESTART,
216 SCTP_TO(SCTP_EVENT_TIMEOUT_SACK)); 216 SCTP_TO(SCTP_EVENT_TIMEOUT_SACK));
217 } else { 217 } else {
218 __u32 old_a_rwnd = asoc->a_rwnd;
219
218 asoc->a_rwnd = asoc->rwnd; 220 asoc->a_rwnd = asoc->rwnd;
219 sack = sctp_make_sack(asoc); 221 sack = sctp_make_sack(asoc);
220 if (!sack) 222 if (!sack) {
223 asoc->a_rwnd = old_a_rwnd;
221 goto nomem; 224 goto nomem;
225 }
222 226
223 asoc->peer.sack_needed = 0; 227 asoc->peer.sack_needed = 0;
224 asoc->peer.sack_cnt = 0; 228 asoc->peer.sack_cnt = 0;
@@ -259,12 +263,6 @@ void sctp_generate_t3_rtx_event(unsigned long peer)
259 goto out_unlock; 263 goto out_unlock;
260 } 264 }
261 265
262 /* Is this transport really dead and just waiting around for
263 * the timer to let go of the reference?
264 */
265 if (transport->dead)
266 goto out_unlock;
267
268 /* Run through the state machine. */ 266 /* Run through the state machine. */
269 error = sctp_do_sm(net, SCTP_EVENT_T_TIMEOUT, 267 error = sctp_do_sm(net, SCTP_EVENT_T_TIMEOUT,
270 SCTP_ST_TIMEOUT(SCTP_EVENT_TIMEOUT_T3_RTX), 268 SCTP_ST_TIMEOUT(SCTP_EVENT_TIMEOUT_T3_RTX),
@@ -380,12 +378,6 @@ void sctp_generate_heartbeat_event(unsigned long data)
380 goto out_unlock; 378 goto out_unlock;
381 } 379 }
382 380
383 /* Is this structure just waiting around for us to actually
384 * get destroyed?
385 */
386 if (transport->dead)
387 goto out_unlock;
388
389 error = sctp_do_sm(net, SCTP_EVENT_T_TIMEOUT, 381 error = sctp_do_sm(net, SCTP_EVENT_T_TIMEOUT,
390 SCTP_ST_TIMEOUT(SCTP_EVENT_TIMEOUT_HEARTBEAT), 382 SCTP_ST_TIMEOUT(SCTP_EVENT_TIMEOUT_HEARTBEAT),
391 asoc->state, asoc->ep, asoc, 383 asoc->state, asoc->ep, asoc,
@@ -1031,13 +1023,13 @@ static void sctp_cmd_t1_timer_update(struct sctp_association *asoc,
1031 * encouraged for small fragments. 1023 * encouraged for small fragments.
1032 */ 1024 */
1033static int sctp_cmd_send_msg(struct sctp_association *asoc, 1025static int sctp_cmd_send_msg(struct sctp_association *asoc,
1034 struct sctp_datamsg *msg) 1026 struct sctp_datamsg *msg, gfp_t gfp)
1035{ 1027{
1036 struct sctp_chunk *chunk; 1028 struct sctp_chunk *chunk;
1037 int error = 0; 1029 int error = 0;
1038 1030
1039 list_for_each_entry(chunk, &msg->chunks, frag_list) { 1031 list_for_each_entry(chunk, &msg->chunks, frag_list) {
1040 error = sctp_outq_tail(&asoc->outqueue, chunk); 1032 error = sctp_outq_tail(&asoc->outqueue, chunk, gfp);
1041 if (error) 1033 if (error)
1042 break; 1034 break;
1043 } 1035 }
@@ -1261,7 +1253,7 @@ static int sctp_cmd_interpreter(sctp_event_t event_type,
1261 case SCTP_CMD_NEW_ASOC: 1253 case SCTP_CMD_NEW_ASOC:
1262 /* Register a new association. */ 1254 /* Register a new association. */
1263 if (local_cork) { 1255 if (local_cork) {
1264 sctp_outq_uncork(&asoc->outqueue); 1256 sctp_outq_uncork(&asoc->outqueue, gfp);
1265 local_cork = 0; 1257 local_cork = 0;
1266 } 1258 }
1267 1259
@@ -1281,7 +1273,7 @@ static int sctp_cmd_interpreter(sctp_event_t event_type,
1281 1273
1282 case SCTP_CMD_DELETE_TCB: 1274 case SCTP_CMD_DELETE_TCB:
1283 if (local_cork) { 1275 if (local_cork) {
1284 sctp_outq_uncork(&asoc->outqueue); 1276 sctp_outq_uncork(&asoc->outqueue, gfp);
1285 local_cork = 0; 1277 local_cork = 0;
1286 } 1278 }
1287 /* Delete the current association. */ 1279 /* Delete the current association. */
@@ -1435,13 +1427,14 @@ static int sctp_cmd_interpreter(sctp_event_t event_type,
1435 local_cork = 1; 1427 local_cork = 1;
1436 } 1428 }
1437 /* Send a chunk to our peer. */ 1429 /* Send a chunk to our peer. */
1438 error = sctp_outq_tail(&asoc->outqueue, cmd->obj.chunk); 1430 error = sctp_outq_tail(&asoc->outqueue, cmd->obj.chunk,
1431 gfp);
1439 break; 1432 break;
1440 1433
1441 case SCTP_CMD_SEND_PKT: 1434 case SCTP_CMD_SEND_PKT:
1442 /* Send a full packet to our peer. */ 1435 /* Send a full packet to our peer. */
1443 packet = cmd->obj.packet; 1436 packet = cmd->obj.packet;
1444 sctp_packet_transmit(packet); 1437 sctp_packet_transmit(packet, gfp);
1445 sctp_ootb_pkt_free(packet); 1438 sctp_ootb_pkt_free(packet);
1446 break; 1439 break;
1447 1440
@@ -1651,7 +1644,7 @@ static int sctp_cmd_interpreter(sctp_event_t event_type,
1651 */ 1644 */
1652 chunk->pdiscard = 1; 1645 chunk->pdiscard = 1;
1653 if (asoc) { 1646 if (asoc) {
1654 sctp_outq_uncork(&asoc->outqueue); 1647 sctp_outq_uncork(&asoc->outqueue, gfp);
1655 local_cork = 0; 1648 local_cork = 0;
1656 } 1649 }
1657 break; 1650 break;
@@ -1689,7 +1682,7 @@ static int sctp_cmd_interpreter(sctp_event_t event_type,
1689 case SCTP_CMD_FORCE_PRIM_RETRAN: 1682 case SCTP_CMD_FORCE_PRIM_RETRAN:
1690 t = asoc->peer.retran_path; 1683 t = asoc->peer.retran_path;
1691 asoc->peer.retran_path = asoc->peer.primary_path; 1684 asoc->peer.retran_path = asoc->peer.primary_path;
1692 error = sctp_outq_uncork(&asoc->outqueue); 1685 error = sctp_outq_uncork(&asoc->outqueue, gfp);
1693 local_cork = 0; 1686 local_cork = 0;
1694 asoc->peer.retran_path = t; 1687 asoc->peer.retran_path = t;
1695 break; 1688 break;
@@ -1716,7 +1709,7 @@ static int sctp_cmd_interpreter(sctp_event_t event_type,
1716 sctp_outq_cork(&asoc->outqueue); 1709 sctp_outq_cork(&asoc->outqueue);
1717 local_cork = 1; 1710 local_cork = 1;
1718 } 1711 }
1719 error = sctp_cmd_send_msg(asoc, cmd->obj.msg); 1712 error = sctp_cmd_send_msg(asoc, cmd->obj.msg, gfp);
1720 break; 1713 break;
1721 case SCTP_CMD_SEND_NEXT_ASCONF: 1714 case SCTP_CMD_SEND_NEXT_ASCONF:
1722 sctp_cmd_send_asconf(asoc); 1715 sctp_cmd_send_asconf(asoc);
@@ -1746,9 +1739,9 @@ out:
1746 */ 1739 */
1747 if (asoc && SCTP_EVENT_T_CHUNK == event_type && chunk) { 1740 if (asoc && SCTP_EVENT_T_CHUNK == event_type && chunk) {
1748 if (chunk->end_of_packet || chunk->singleton) 1741 if (chunk->end_of_packet || chunk->singleton)
1749 error = sctp_outq_uncork(&asoc->outqueue); 1742 error = sctp_outq_uncork(&asoc->outqueue, gfp);
1750 } else if (local_cork) 1743 } else if (local_cork)
1751 error = sctp_outq_uncork(&asoc->outqueue); 1744 error = sctp_outq_uncork(&asoc->outqueue, gfp);
1752 return error; 1745 return error;
1753nomem: 1746nomem:
1754 error = -ENOMEM; 1747 error = -ENOMEM;
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 9bb80ec4c08f..878d28eda1a6 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -52,6 +52,7 @@
52 52
53#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 53#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
54 54
55#include <crypto/hash.h>
55#include <linux/types.h> 56#include <linux/types.h>
56#include <linux/kernel.h> 57#include <linux/kernel.h>
57#include <linux/wait.h> 58#include <linux/wait.h>
@@ -61,7 +62,6 @@
61#include <linux/fcntl.h> 62#include <linux/fcntl.h>
62#include <linux/poll.h> 63#include <linux/poll.h>
63#include <linux/init.h> 64#include <linux/init.h>
64#include <linux/crypto.h>
65#include <linux/slab.h> 65#include <linux/slab.h>
66#include <linux/file.h> 66#include <linux/file.h>
67#include <linux/compat.h> 67#include <linux/compat.h>
@@ -386,7 +386,8 @@ static int sctp_do_bind(struct sock *sk, union sctp_addr *addr, int len)
386 /* Add the address to the bind address list. 386 /* Add the address to the bind address list.
387 * Use GFP_ATOMIC since BHs will be disabled. 387 * Use GFP_ATOMIC since BHs will be disabled.
388 */ 388 */
389 ret = sctp_add_bind_addr(bp, addr, SCTP_ADDR_SRC, GFP_ATOMIC); 389 ret = sctp_add_bind_addr(bp, addr, af->sockaddr_len,
390 SCTP_ADDR_SRC, GFP_ATOMIC);
390 391
391 /* Copy back into socket for getsockname() use. */ 392 /* Copy back into socket for getsockname() use. */
392 if (!ret) { 393 if (!ret) {
@@ -577,6 +578,7 @@ static int sctp_send_asconf_add_ip(struct sock *sk,
577 af = sctp_get_af_specific(addr->v4.sin_family); 578 af = sctp_get_af_specific(addr->v4.sin_family);
578 memcpy(&saveaddr, addr, af->sockaddr_len); 579 memcpy(&saveaddr, addr, af->sockaddr_len);
579 retval = sctp_add_bind_addr(bp, &saveaddr, 580 retval = sctp_add_bind_addr(bp, &saveaddr,
581 sizeof(saveaddr),
580 SCTP_ADDR_NEW, GFP_ATOMIC); 582 SCTP_ADDR_NEW, GFP_ATOMIC);
581 addr_buf += af->sockaddr_len; 583 addr_buf += af->sockaddr_len;
582 } 584 }
@@ -1389,7 +1391,7 @@ static int sctp_getsockopt_connectx3(struct sock *sk, int len,
1389 int err = 0; 1391 int err = 0;
1390 1392
1391#ifdef CONFIG_COMPAT 1393#ifdef CONFIG_COMPAT
1392 if (is_compat_task()) { 1394 if (in_compat_syscall()) {
1393 struct compat_sctp_getaddrs_old param32; 1395 struct compat_sctp_getaddrs_old param32;
1394 1396
1395 if (len < sizeof(param32)) 1397 if (len < sizeof(param32))
@@ -4160,7 +4162,7 @@ static void sctp_destruct_sock(struct sock *sk)
4160 struct sctp_sock *sp = sctp_sk(sk); 4162 struct sctp_sock *sp = sctp_sk(sk);
4161 4163
4162 /* Free up the HMAC transform. */ 4164 /* Free up the HMAC transform. */
4163 crypto_free_hash(sp->hmac); 4165 crypto_free_shash(sp->hmac);
4164 4166
4165 inet_sock_destruct(sk); 4167 inet_sock_destruct(sk);
4166} 4168}
@@ -5538,6 +5540,7 @@ static int sctp_getsockopt_hmac_ident(struct sock *sk, int len,
5538 struct sctp_hmac_algo_param *hmacs; 5540 struct sctp_hmac_algo_param *hmacs;
5539 __u16 data_len = 0; 5541 __u16 data_len = 0;
5540 u32 num_idents; 5542 u32 num_idents;
5543 int i;
5541 5544
5542 if (!ep->auth_enable) 5545 if (!ep->auth_enable)
5543 return -EACCES; 5546 return -EACCES;
@@ -5555,8 +5558,12 @@ static int sctp_getsockopt_hmac_ident(struct sock *sk, int len,
5555 return -EFAULT; 5558 return -EFAULT;
5556 if (put_user(num_idents, &p->shmac_num_idents)) 5559 if (put_user(num_idents, &p->shmac_num_idents))
5557 return -EFAULT; 5560 return -EFAULT;
5558 if (copy_to_user(p->shmac_idents, hmacs->hmac_ids, data_len)) 5561 for (i = 0; i < num_idents; i++) {
5559 return -EFAULT; 5562 __u16 hmacid = ntohs(hmacs->hmac_ids[i]);
5563
5564 if (copy_to_user(&p->shmac_idents[i], &hmacid, sizeof(__u16)))
5565 return -EFAULT;
5566 }
5560 return 0; 5567 return 0;
5561} 5568}
5562 5569
@@ -6101,9 +6108,10 @@ static int sctp_getsockopt(struct sock *sk, int level, int optname,
6101 return retval; 6108 return retval;
6102} 6109}
6103 6110
6104static void sctp_hash(struct sock *sk) 6111static int sctp_hash(struct sock *sk)
6105{ 6112{
6106 /* STUB */ 6113 /* STUB */
6114 return 0;
6107} 6115}
6108 6116
6109static void sctp_unhash(struct sock *sk) 6117static void sctp_unhash(struct sock *sk)
@@ -6299,13 +6307,13 @@ static int sctp_listen_start(struct sock *sk, int backlog)
6299{ 6307{
6300 struct sctp_sock *sp = sctp_sk(sk); 6308 struct sctp_sock *sp = sctp_sk(sk);
6301 struct sctp_endpoint *ep = sp->ep; 6309 struct sctp_endpoint *ep = sp->ep;
6302 struct crypto_hash *tfm = NULL; 6310 struct crypto_shash *tfm = NULL;
6303 char alg[32]; 6311 char alg[32];
6304 6312
6305 /* Allocate HMAC for generating cookie. */ 6313 /* Allocate HMAC for generating cookie. */
6306 if (!sp->hmac && sp->sctp_hmac_alg) { 6314 if (!sp->hmac && sp->sctp_hmac_alg) {
6307 sprintf(alg, "hmac(%s)", sp->sctp_hmac_alg); 6315 sprintf(alg, "hmac(%s)", sp->sctp_hmac_alg);
6308 tfm = crypto_alloc_hash(alg, 0, CRYPTO_ALG_ASYNC); 6316 tfm = crypto_alloc_shash(alg, 0, 0);
6309 if (IS_ERR(tfm)) { 6317 if (IS_ERR(tfm)) {
6310 net_info_ratelimited("failed to load transform for %s: %ld\n", 6318 net_info_ratelimited("failed to load transform for %s: %ld\n",
6311 sp->sctp_hmac_alg, PTR_ERR(tfm)); 6319 sp->sctp_hmac_alg, PTR_ERR(tfm));
@@ -6636,6 +6644,7 @@ static int sctp_msghdr_parse(const struct msghdr *msg, sctp_cmsgs_t *cmsgs)
6636 6644
6637 if (cmsgs->srinfo->sinfo_flags & 6645 if (cmsgs->srinfo->sinfo_flags &
6638 ~(SCTP_UNORDERED | SCTP_ADDR_OVER | 6646 ~(SCTP_UNORDERED | SCTP_ADDR_OVER |
6647 SCTP_SACK_IMMEDIATELY |
6639 SCTP_ABORT | SCTP_EOF)) 6648 SCTP_ABORT | SCTP_EOF))
6640 return -EINVAL; 6649 return -EINVAL;
6641 break; 6650 break;
@@ -6659,6 +6668,7 @@ static int sctp_msghdr_parse(const struct msghdr *msg, sctp_cmsgs_t *cmsgs)
6659 6668
6660 if (cmsgs->sinfo->snd_flags & 6669 if (cmsgs->sinfo->snd_flags &
6661 ~(SCTP_UNORDERED | SCTP_ADDR_OVER | 6670 ~(SCTP_UNORDERED | SCTP_ADDR_OVER |
6671 SCTP_SACK_IMMEDIATELY |
6662 SCTP_ABORT | SCTP_EOF)) 6672 SCTP_ABORT | SCTP_EOF))
6663 return -EINVAL; 6673 return -EINVAL;
6664 break; 6674 break;
@@ -7246,14 +7256,12 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk,
7246 /* Hook this new socket in to the bind_hash list. */ 7256 /* Hook this new socket in to the bind_hash list. */
7247 head = &sctp_port_hashtable[sctp_phashfn(sock_net(oldsk), 7257 head = &sctp_port_hashtable[sctp_phashfn(sock_net(oldsk),
7248 inet_sk(oldsk)->inet_num)]; 7258 inet_sk(oldsk)->inet_num)];
7249 local_bh_disable(); 7259 spin_lock_bh(&head->lock);
7250 spin_lock(&head->lock);
7251 pp = sctp_sk(oldsk)->bind_hash; 7260 pp = sctp_sk(oldsk)->bind_hash;
7252 sk_add_bind_node(newsk, &pp->owner); 7261 sk_add_bind_node(newsk, &pp->owner);
7253 sctp_sk(newsk)->bind_hash = pp; 7262 sctp_sk(newsk)->bind_hash = pp;
7254 inet_sk(newsk)->inet_num = inet_sk(oldsk)->inet_num; 7263 inet_sk(newsk)->inet_num = inet_sk(oldsk)->inet_num;
7255 spin_unlock(&head->lock); 7264 spin_unlock_bh(&head->lock);
7256 local_bh_enable();
7257 7265
7258 /* Copy the bind_addr list from the original endpoint to the new 7266 /* Copy the bind_addr list from the original endpoint to the new
7259 * endpoint so that we can handle restarts properly 7267 * endpoint so that we can handle restarts properly
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index aab9e3f29755..9b6b48c7524e 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -72,7 +72,7 @@ static struct sctp_transport *sctp_transport_init(struct net *net,
72 */ 72 */
73 peer->rto = msecs_to_jiffies(net->sctp.rto_initial); 73 peer->rto = msecs_to_jiffies(net->sctp.rto_initial);
74 74
75 peer->last_time_heard = ktime_get(); 75 peer->last_time_heard = ktime_set(0, 0);
76 peer->last_time_ecne_reduced = jiffies; 76 peer->last_time_ecne_reduced = jiffies;
77 77
78 peer->param_flags = SPP_HB_DISABLE | 78 peer->param_flags = SPP_HB_DISABLE |
@@ -132,8 +132,6 @@ fail:
132 */ 132 */
133void sctp_transport_free(struct sctp_transport *transport) 133void sctp_transport_free(struct sctp_transport *transport)
134{ 134{
135 transport->dead = 1;
136
137 /* Try to delete the heartbeat timer. */ 135 /* Try to delete the heartbeat timer. */
138 if (del_timer(&transport->hb_timer)) 136 if (del_timer(&transport->hb_timer))
139 sctp_transport_put(transport); 137 sctp_transport_put(transport);
@@ -169,7 +167,7 @@ static void sctp_transport_destroy_rcu(struct rcu_head *head)
169 */ 167 */
170static void sctp_transport_destroy(struct sctp_transport *transport) 168static void sctp_transport_destroy(struct sctp_transport *transport)
171{ 169{
172 if (unlikely(!transport->dead)) { 170 if (unlikely(atomic_read(&transport->refcnt))) {
173 WARN(1, "Attempt to destroy undead transport %p!\n", transport); 171 WARN(1, "Attempt to destroy undead transport %p!\n", transport);
174 return; 172 return;
175 } 173 }
@@ -228,7 +226,7 @@ void sctp_transport_pmtu(struct sctp_transport *transport, struct sock *sk)
228 } 226 }
229 227
230 if (transport->dst) { 228 if (transport->dst) {
231 transport->pathmtu = dst_mtu(transport->dst); 229 transport->pathmtu = WORD_TRUNC(dst_mtu(transport->dst));
232 } else 230 } else
233 transport->pathmtu = SCTP_DEFAULT_MAXSEGMENT; 231 transport->pathmtu = SCTP_DEFAULT_MAXSEGMENT;
234} 232}
@@ -282,7 +280,7 @@ void sctp_transport_route(struct sctp_transport *transport,
282 return; 280 return;
283 } 281 }
284 if (transport->dst) { 282 if (transport->dst) {
285 transport->pathmtu = dst_mtu(transport->dst); 283 transport->pathmtu = WORD_TRUNC(dst_mtu(transport->dst));
286 284
287 /* Initialize sk->sk_rcv_saddr, if the transport is the 285 /* Initialize sk->sk_rcv_saddr, if the transport is the
288 * association's active path for getsockname(). 286 * association's active path for getsockname().
@@ -296,9 +294,9 @@ void sctp_transport_route(struct sctp_transport *transport,
296} 294}
297 295
298/* Hold a reference to a transport. */ 296/* Hold a reference to a transport. */
299void sctp_transport_hold(struct sctp_transport *transport) 297int sctp_transport_hold(struct sctp_transport *transport)
300{ 298{
301 atomic_inc(&transport->refcnt); 299 return atomic_add_unless(&transport->refcnt, 1, 0);
302} 300}
303 301
304/* Release a reference to a transport and clean up 302/* Release a reference to a transport and clean up
diff --git a/net/socket.c b/net/socket.c
index c044d1e8508c..5f77a8e93830 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -533,7 +533,7 @@ static const struct inode_operations sockfs_inode_ops = {
533 * NULL is returned. 533 * NULL is returned.
534 */ 534 */
535 535
536static struct socket *sock_alloc(void) 536struct socket *sock_alloc(void)
537{ 537{
538 struct inode *inode; 538 struct inode *inode;
539 struct socket *sock; 539 struct socket *sock;
@@ -554,6 +554,7 @@ static struct socket *sock_alloc(void)
554 this_cpu_add(sockets_in_use, 1); 554 this_cpu_add(sockets_in_use, 1);
555 return sock; 555 return sock;
556} 556}
557EXPORT_SYMBOL(sock_alloc);
557 558
558/** 559/**
559 * sock_release - close a socket 560 * sock_release - close a socket
@@ -1106,12 +1107,8 @@ int __sock_create(struct net *net, int family, int type, int protocol,
1106 deadlock in module load. 1107 deadlock in module load.
1107 */ 1108 */
1108 if (family == PF_INET && type == SOCK_PACKET) { 1109 if (family == PF_INET && type == SOCK_PACKET) {
1109 static int warned; 1110 pr_info_once("%s uses obsolete (PF_INET,SOCK_PACKET)\n",
1110 if (!warned) { 1111 current->comm);
1111 warned = 1;
1112 pr_info("%s uses obsolete (PF_INET,SOCK_PACKET)\n",
1113 current->comm);
1114 }
1115 family = PF_PACKET; 1112 family = PF_PACKET;
1116 } 1113 }
1117 1114
@@ -1874,7 +1871,8 @@ static int copy_msghdr_from_user(struct msghdr *kmsg,
1874 1871
1875static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg, 1872static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg,
1876 struct msghdr *msg_sys, unsigned int flags, 1873 struct msghdr *msg_sys, unsigned int flags,
1877 struct used_address *used_address) 1874 struct used_address *used_address,
1875 unsigned int allowed_msghdr_flags)
1878{ 1876{
1879 struct compat_msghdr __user *msg_compat = 1877 struct compat_msghdr __user *msg_compat =
1880 (struct compat_msghdr __user *)msg; 1878 (struct compat_msghdr __user *)msg;
@@ -1900,6 +1898,7 @@ static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg,
1900 1898
1901 if (msg_sys->msg_controllen > INT_MAX) 1899 if (msg_sys->msg_controllen > INT_MAX)
1902 goto out_freeiov; 1900 goto out_freeiov;
1901 flags |= (msg_sys->msg_flags & allowed_msghdr_flags);
1903 ctl_len = msg_sys->msg_controllen; 1902 ctl_len = msg_sys->msg_controllen;
1904 if ((MSG_CMSG_COMPAT & flags) && ctl_len) { 1903 if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
1905 err = 1904 err =
@@ -1978,7 +1977,7 @@ long __sys_sendmsg(int fd, struct user_msghdr __user *msg, unsigned flags)
1978 if (!sock) 1977 if (!sock)
1979 goto out; 1978 goto out;
1980 1979
1981 err = ___sys_sendmsg(sock, msg, &msg_sys, flags, NULL); 1980 err = ___sys_sendmsg(sock, msg, &msg_sys, flags, NULL, 0);
1982 1981
1983 fput_light(sock->file, fput_needed); 1982 fput_light(sock->file, fput_needed);
1984out: 1983out:
@@ -2005,6 +2004,7 @@ int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
2005 struct compat_mmsghdr __user *compat_entry; 2004 struct compat_mmsghdr __user *compat_entry;
2006 struct msghdr msg_sys; 2005 struct msghdr msg_sys;
2007 struct used_address used_address; 2006 struct used_address used_address;
2007 unsigned int oflags = flags;
2008 2008
2009 if (vlen > UIO_MAXIOV) 2009 if (vlen > UIO_MAXIOV)
2010 vlen = UIO_MAXIOV; 2010 vlen = UIO_MAXIOV;
@@ -2019,11 +2019,15 @@ int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
2019 entry = mmsg; 2019 entry = mmsg;
2020 compat_entry = (struct compat_mmsghdr __user *)mmsg; 2020 compat_entry = (struct compat_mmsghdr __user *)mmsg;
2021 err = 0; 2021 err = 0;
2022 flags |= MSG_BATCH;
2022 2023
2023 while (datagrams < vlen) { 2024 while (datagrams < vlen) {
2025 if (datagrams == vlen - 1)
2026 flags = oflags;
2027
2024 if (MSG_CMSG_COMPAT & flags) { 2028 if (MSG_CMSG_COMPAT & flags) {
2025 err = ___sys_sendmsg(sock, (struct user_msghdr __user *)compat_entry, 2029 err = ___sys_sendmsg(sock, (struct user_msghdr __user *)compat_entry,
2026 &msg_sys, flags, &used_address); 2030 &msg_sys, flags, &used_address, MSG_EOR);
2027 if (err < 0) 2031 if (err < 0)
2028 break; 2032 break;
2029 err = __put_user(err, &compat_entry->msg_len); 2033 err = __put_user(err, &compat_entry->msg_len);
@@ -2031,7 +2035,7 @@ int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
2031 } else { 2035 } else {
2032 err = ___sys_sendmsg(sock, 2036 err = ___sys_sendmsg(sock,
2033 (struct user_msghdr __user *)entry, 2037 (struct user_msghdr __user *)entry,
2034 &msg_sys, flags, &used_address); 2038 &msg_sys, flags, &used_address, MSG_EOR);
2035 if (err < 0) 2039 if (err < 0)
2036 break; 2040 break;
2037 err = put_user(err, &entry->msg_len); 2041 err = put_user(err, &entry->msg_len);
@@ -2240,31 +2244,31 @@ int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
2240 cond_resched(); 2244 cond_resched();
2241 } 2245 }
2242 2246
2243out_put:
2244 fput_light(sock->file, fput_needed);
2245
2246 if (err == 0) 2247 if (err == 0)
2247 return datagrams; 2248 goto out_put;
2249
2250 if (datagrams == 0) {
2251 datagrams = err;
2252 goto out_put;
2253 }
2248 2254
2249 if (datagrams != 0) { 2255 /*
2256 * We may return less entries than requested (vlen) if the
2257 * sock is non block and there aren't enough datagrams...
2258 */
2259 if (err != -EAGAIN) {
2250 /* 2260 /*
2251 * We may return less entries than requested (vlen) if the 2261 * ... or if recvmsg returns an error after we
2252 * sock is non block and there aren't enough datagrams... 2262 * received some datagrams, where we record the
2263 * error to return on the next call or if the
2264 * app asks about it using getsockopt(SO_ERROR).
2253 */ 2265 */
2254 if (err != -EAGAIN) { 2266 sock->sk->sk_err = -err;
2255 /*
2256 * ... or if recvmsg returns an error after we
2257 * received some datagrams, where we record the
2258 * error to return on the next call or if the
2259 * app asks about it using getsockopt(SO_ERROR).
2260 */
2261 sock->sk->sk_err = -err;
2262 }
2263
2264 return datagrams;
2265 } 2267 }
2268out_put:
2269 fput_light(sock->file, fput_needed);
2266 2270
2267 return err; 2271 return datagrams;
2268} 2272}
2269 2273
2270SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg, 2274SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg,
diff --git a/net/sunrpc/Makefile b/net/sunrpc/Makefile
index b512fbd9d79a..ea7ffa12e0f9 100644
--- a/net/sunrpc/Makefile
+++ b/net/sunrpc/Makefile
@@ -12,7 +12,8 @@ sunrpc-y := clnt.o xprt.o socklib.o xprtsock.o sched.o \
12 svc.o svcsock.o svcauth.o svcauth_unix.o \ 12 svc.o svcsock.o svcauth.o svcauth_unix.o \
13 addr.o rpcb_clnt.o timer.o xdr.o \ 13 addr.o rpcb_clnt.o timer.o xdr.o \
14 sunrpc_syms.o cache.o rpc_pipe.o \ 14 sunrpc_syms.o cache.o rpc_pipe.o \
15 svc_xprt.o 15 svc_xprt.o \
16 xprtmultipath.o
16sunrpc-$(CONFIG_SUNRPC_DEBUG) += debugfs.o 17sunrpc-$(CONFIG_SUNRPC_DEBUG) += debugfs.o
17sunrpc-$(CONFIG_SUNRPC_BACKCHANNEL) += backchannel_rqst.o 18sunrpc-$(CONFIG_SUNRPC_BACKCHANNEL) += backchannel_rqst.o
18sunrpc-$(CONFIG_PROC_FS) += stats.o 19sunrpc-$(CONFIG_PROC_FS) += stats.o
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index 799e65b944b9..8c6bc795f060 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -740,7 +740,7 @@ gss_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
740 default: 740 default:
741 printk(KERN_CRIT "%s: bad return from " 741 printk(KERN_CRIT "%s: bad return from "
742 "gss_fill_context: %zd\n", __func__, err); 742 "gss_fill_context: %zd\n", __func__, err);
743 BUG(); 743 gss_msg->msg.errno = -EIO;
744 } 744 }
745 goto err_release_msg; 745 goto err_release_msg;
746 } 746 }
@@ -1181,12 +1181,12 @@ static struct rpc_auth *
1181gss_create(struct rpc_auth_create_args *args, struct rpc_clnt *clnt) 1181gss_create(struct rpc_auth_create_args *args, struct rpc_clnt *clnt)
1182{ 1182{
1183 struct gss_auth *gss_auth; 1183 struct gss_auth *gss_auth;
1184 struct rpc_xprt *xprt = rcu_access_pointer(clnt->cl_xprt); 1184 struct rpc_xprt_switch *xps = rcu_access_pointer(clnt->cl_xpi.xpi_xpswitch);
1185 1185
1186 while (clnt != clnt->cl_parent) { 1186 while (clnt != clnt->cl_parent) {
1187 struct rpc_clnt *parent = clnt->cl_parent; 1187 struct rpc_clnt *parent = clnt->cl_parent;
1188 /* Find the original parent for this transport */ 1188 /* Find the original parent for this transport */
1189 if (rcu_access_pointer(parent->cl_xprt) != xprt) 1189 if (rcu_access_pointer(parent->cl_xpi.xpi_xpswitch) != xps)
1190 break; 1190 break;
1191 clnt = parent; 1191 clnt = parent;
1192 } 1192 }
diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c
index fee3c15a4b52..d94a8e1e9f05 100644
--- a/net/sunrpc/auth_gss/gss_krb5_crypto.c
+++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c
@@ -34,11 +34,12 @@
34 * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. 34 * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
35 */ 35 */
36 36
37#include <crypto/hash.h>
38#include <crypto/skcipher.h>
37#include <linux/err.h> 39#include <linux/err.h>
38#include <linux/types.h> 40#include <linux/types.h>
39#include <linux/mm.h> 41#include <linux/mm.h>
40#include <linux/scatterlist.h> 42#include <linux/scatterlist.h>
41#include <linux/crypto.h>
42#include <linux/highmem.h> 43#include <linux/highmem.h>
43#include <linux/pagemap.h> 44#include <linux/pagemap.h>
44#include <linux/random.h> 45#include <linux/random.h>
@@ -51,7 +52,7 @@
51 52
52u32 53u32
53krb5_encrypt( 54krb5_encrypt(
54 struct crypto_blkcipher *tfm, 55 struct crypto_skcipher *tfm,
55 void * iv, 56 void * iv,
56 void * in, 57 void * in,
57 void * out, 58 void * out,
@@ -60,24 +61,28 @@ krb5_encrypt(
60 u32 ret = -EINVAL; 61 u32 ret = -EINVAL;
61 struct scatterlist sg[1]; 62 struct scatterlist sg[1];
62 u8 local_iv[GSS_KRB5_MAX_BLOCKSIZE] = {0}; 63 u8 local_iv[GSS_KRB5_MAX_BLOCKSIZE] = {0};
63 struct blkcipher_desc desc = { .tfm = tfm, .info = local_iv }; 64 SKCIPHER_REQUEST_ON_STACK(req, tfm);
64 65
65 if (length % crypto_blkcipher_blocksize(tfm) != 0) 66 if (length % crypto_skcipher_blocksize(tfm) != 0)
66 goto out; 67 goto out;
67 68
68 if (crypto_blkcipher_ivsize(tfm) > GSS_KRB5_MAX_BLOCKSIZE) { 69 if (crypto_skcipher_ivsize(tfm) > GSS_KRB5_MAX_BLOCKSIZE) {
69 dprintk("RPC: gss_k5encrypt: tfm iv size too large %d\n", 70 dprintk("RPC: gss_k5encrypt: tfm iv size too large %d\n",
70 crypto_blkcipher_ivsize(tfm)); 71 crypto_skcipher_ivsize(tfm));
71 goto out; 72 goto out;
72 } 73 }
73 74
74 if (iv) 75 if (iv)
75 memcpy(local_iv, iv, crypto_blkcipher_ivsize(tfm)); 76 memcpy(local_iv, iv, crypto_skcipher_ivsize(tfm));
76 77
77 memcpy(out, in, length); 78 memcpy(out, in, length);
78 sg_init_one(sg, out, length); 79 sg_init_one(sg, out, length);
79 80
80 ret = crypto_blkcipher_encrypt_iv(&desc, sg, sg, length); 81 skcipher_request_set_callback(req, 0, NULL, NULL);
82 skcipher_request_set_crypt(req, sg, sg, length, local_iv);
83
84 ret = crypto_skcipher_encrypt(req);
85 skcipher_request_zero(req);
81out: 86out:
82 dprintk("RPC: krb5_encrypt returns %d\n", ret); 87 dprintk("RPC: krb5_encrypt returns %d\n", ret);
83 return ret; 88 return ret;
@@ -85,7 +90,7 @@ out:
85 90
86u32 91u32
87krb5_decrypt( 92krb5_decrypt(
88 struct crypto_blkcipher *tfm, 93 struct crypto_skcipher *tfm,
89 void * iv, 94 void * iv,
90 void * in, 95 void * in,
91 void * out, 96 void * out,
@@ -94,23 +99,27 @@ krb5_decrypt(
94 u32 ret = -EINVAL; 99 u32 ret = -EINVAL;
95 struct scatterlist sg[1]; 100 struct scatterlist sg[1];
96 u8 local_iv[GSS_KRB5_MAX_BLOCKSIZE] = {0}; 101 u8 local_iv[GSS_KRB5_MAX_BLOCKSIZE] = {0};
97 struct blkcipher_desc desc = { .tfm = tfm, .info = local_iv }; 102 SKCIPHER_REQUEST_ON_STACK(req, tfm);
98 103
99 if (length % crypto_blkcipher_blocksize(tfm) != 0) 104 if (length % crypto_skcipher_blocksize(tfm) != 0)
100 goto out; 105 goto out;
101 106
102 if (crypto_blkcipher_ivsize(tfm) > GSS_KRB5_MAX_BLOCKSIZE) { 107 if (crypto_skcipher_ivsize(tfm) > GSS_KRB5_MAX_BLOCKSIZE) {
103 dprintk("RPC: gss_k5decrypt: tfm iv size too large %d\n", 108 dprintk("RPC: gss_k5decrypt: tfm iv size too large %d\n",
104 crypto_blkcipher_ivsize(tfm)); 109 crypto_skcipher_ivsize(tfm));
105 goto out; 110 goto out;
106 } 111 }
107 if (iv) 112 if (iv)
108 memcpy(local_iv,iv, crypto_blkcipher_ivsize(tfm)); 113 memcpy(local_iv,iv, crypto_skcipher_ivsize(tfm));
109 114
110 memcpy(out, in, length); 115 memcpy(out, in, length);
111 sg_init_one(sg, out, length); 116 sg_init_one(sg, out, length);
112 117
113 ret = crypto_blkcipher_decrypt_iv(&desc, sg, sg, length); 118 skcipher_request_set_callback(req, 0, NULL, NULL);
119 skcipher_request_set_crypt(req, sg, sg, length, local_iv);
120
121 ret = crypto_skcipher_decrypt(req);
122 skcipher_request_zero(req);
114out: 123out:
115 dprintk("RPC: gss_k5decrypt returns %d\n",ret); 124 dprintk("RPC: gss_k5decrypt returns %d\n",ret);
116 return ret; 125 return ret;
@@ -119,9 +128,11 @@ out:
119static int 128static int
120checksummer(struct scatterlist *sg, void *data) 129checksummer(struct scatterlist *sg, void *data)
121{ 130{
122 struct hash_desc *desc = data; 131 struct ahash_request *req = data;
132
133 ahash_request_set_crypt(req, sg, NULL, sg->length);
123 134
124 return crypto_hash_update(desc, sg, sg->length); 135 return crypto_ahash_update(req);
125} 136}
126 137
127static int 138static int
@@ -152,13 +163,13 @@ make_checksum_hmac_md5(struct krb5_ctx *kctx, char *header, int hdrlen,
152 struct xdr_buf *body, int body_offset, u8 *cksumkey, 163 struct xdr_buf *body, int body_offset, u8 *cksumkey,
153 unsigned int usage, struct xdr_netobj *cksumout) 164 unsigned int usage, struct xdr_netobj *cksumout)
154{ 165{
155 struct hash_desc desc;
156 struct scatterlist sg[1]; 166 struct scatterlist sg[1];
157 int err; 167 int err;
158 u8 checksumdata[GSS_KRB5_MAX_CKSUM_LEN]; 168 u8 checksumdata[GSS_KRB5_MAX_CKSUM_LEN];
159 u8 rc4salt[4]; 169 u8 rc4salt[4];
160 struct crypto_hash *md5; 170 struct crypto_ahash *md5;
161 struct crypto_hash *hmac_md5; 171 struct crypto_ahash *hmac_md5;
172 struct ahash_request *req;
162 173
163 if (cksumkey == NULL) 174 if (cksumkey == NULL)
164 return GSS_S_FAILURE; 175 return GSS_S_FAILURE;
@@ -174,61 +185,79 @@ make_checksum_hmac_md5(struct krb5_ctx *kctx, char *header, int hdrlen,
174 return GSS_S_FAILURE; 185 return GSS_S_FAILURE;
175 } 186 }
176 187
177 md5 = crypto_alloc_hash("md5", 0, CRYPTO_ALG_ASYNC); 188 md5 = crypto_alloc_ahash("md5", 0, CRYPTO_ALG_ASYNC);
178 if (IS_ERR(md5)) 189 if (IS_ERR(md5))
179 return GSS_S_FAILURE; 190 return GSS_S_FAILURE;
180 191
181 hmac_md5 = crypto_alloc_hash(kctx->gk5e->cksum_name, 0, 192 hmac_md5 = crypto_alloc_ahash(kctx->gk5e->cksum_name, 0,
182 CRYPTO_ALG_ASYNC); 193 CRYPTO_ALG_ASYNC);
183 if (IS_ERR(hmac_md5)) { 194 if (IS_ERR(hmac_md5)) {
184 crypto_free_hash(md5); 195 crypto_free_ahash(md5);
196 return GSS_S_FAILURE;
197 }
198
199 req = ahash_request_alloc(md5, GFP_KERNEL);
200 if (!req) {
201 crypto_free_ahash(hmac_md5);
202 crypto_free_ahash(md5);
185 return GSS_S_FAILURE; 203 return GSS_S_FAILURE;
186 } 204 }
187 205
188 desc.tfm = md5; 206 ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP, NULL, NULL);
189 desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP;
190 207
191 err = crypto_hash_init(&desc); 208 err = crypto_ahash_init(req);
192 if (err) 209 if (err)
193 goto out; 210 goto out;
194 sg_init_one(sg, rc4salt, 4); 211 sg_init_one(sg, rc4salt, 4);
195 err = crypto_hash_update(&desc, sg, 4); 212 ahash_request_set_crypt(req, sg, NULL, 4);
213 err = crypto_ahash_update(req);
196 if (err) 214 if (err)
197 goto out; 215 goto out;
198 216
199 sg_init_one(sg, header, hdrlen); 217 sg_init_one(sg, header, hdrlen);
200 err = crypto_hash_update(&desc, sg, hdrlen); 218 ahash_request_set_crypt(req, sg, NULL, hdrlen);
219 err = crypto_ahash_update(req);
201 if (err) 220 if (err)
202 goto out; 221 goto out;
203 err = xdr_process_buf(body, body_offset, body->len - body_offset, 222 err = xdr_process_buf(body, body_offset, body->len - body_offset,
204 checksummer, &desc); 223 checksummer, req);
205 if (err) 224 if (err)
206 goto out; 225 goto out;
207 err = crypto_hash_final(&desc, checksumdata); 226 ahash_request_set_crypt(req, NULL, checksumdata, 0);
227 err = crypto_ahash_final(req);
208 if (err) 228 if (err)
209 goto out; 229 goto out;
210 230
211 desc.tfm = hmac_md5; 231 ahash_request_free(req);
212 desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP; 232 req = ahash_request_alloc(hmac_md5, GFP_KERNEL);
233 if (!req) {
234 crypto_free_ahash(hmac_md5);
235 crypto_free_ahash(md5);
236 return GSS_S_FAILURE;
237 }
238
239 ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP, NULL, NULL);
213 240
214 err = crypto_hash_init(&desc); 241 err = crypto_ahash_init(req);
215 if (err) 242 if (err)
216 goto out; 243 goto out;
217 err = crypto_hash_setkey(hmac_md5, cksumkey, kctx->gk5e->keylength); 244 err = crypto_ahash_setkey(hmac_md5, cksumkey, kctx->gk5e->keylength);
218 if (err) 245 if (err)
219 goto out; 246 goto out;
220 247
221 sg_init_one(sg, checksumdata, crypto_hash_digestsize(md5)); 248 sg_init_one(sg, checksumdata, crypto_ahash_digestsize(md5));
222 err = crypto_hash_digest(&desc, sg, crypto_hash_digestsize(md5), 249 ahash_request_set_crypt(req, sg, checksumdata,
223 checksumdata); 250 crypto_ahash_digestsize(md5));
251 err = crypto_ahash_digest(req);
224 if (err) 252 if (err)
225 goto out; 253 goto out;
226 254
227 memcpy(cksumout->data, checksumdata, kctx->gk5e->cksumlength); 255 memcpy(cksumout->data, checksumdata, kctx->gk5e->cksumlength);
228 cksumout->len = kctx->gk5e->cksumlength; 256 cksumout->len = kctx->gk5e->cksumlength;
229out: 257out:
230 crypto_free_hash(md5); 258 ahash_request_free(req);
231 crypto_free_hash(hmac_md5); 259 crypto_free_ahash(md5);
260 crypto_free_ahash(hmac_md5);
232 return err ? GSS_S_FAILURE : 0; 261 return err ? GSS_S_FAILURE : 0;
233} 262}
234 263
@@ -242,7 +271,8 @@ make_checksum(struct krb5_ctx *kctx, char *header, int hdrlen,
242 struct xdr_buf *body, int body_offset, u8 *cksumkey, 271 struct xdr_buf *body, int body_offset, u8 *cksumkey,
243 unsigned int usage, struct xdr_netobj *cksumout) 272 unsigned int usage, struct xdr_netobj *cksumout)
244{ 273{
245 struct hash_desc desc; 274 struct crypto_ahash *tfm;
275 struct ahash_request *req;
246 struct scatterlist sg[1]; 276 struct scatterlist sg[1];
247 int err; 277 int err;
248 u8 checksumdata[GSS_KRB5_MAX_CKSUM_LEN]; 278 u8 checksumdata[GSS_KRB5_MAX_CKSUM_LEN];
@@ -259,32 +289,41 @@ make_checksum(struct krb5_ctx *kctx, char *header, int hdrlen,
259 return GSS_S_FAILURE; 289 return GSS_S_FAILURE;
260 } 290 }
261 291
262 desc.tfm = crypto_alloc_hash(kctx->gk5e->cksum_name, 0, CRYPTO_ALG_ASYNC); 292 tfm = crypto_alloc_ahash(kctx->gk5e->cksum_name, 0, CRYPTO_ALG_ASYNC);
263 if (IS_ERR(desc.tfm)) 293 if (IS_ERR(tfm))
264 return GSS_S_FAILURE; 294 return GSS_S_FAILURE;
265 desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP;
266 295
267 checksumlen = crypto_hash_digestsize(desc.tfm); 296 req = ahash_request_alloc(tfm, GFP_KERNEL);
297 if (!req) {
298 crypto_free_ahash(tfm);
299 return GSS_S_FAILURE;
300 }
301
302 ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP, NULL, NULL);
303
304 checksumlen = crypto_ahash_digestsize(tfm);
268 305
269 if (cksumkey != NULL) { 306 if (cksumkey != NULL) {
270 err = crypto_hash_setkey(desc.tfm, cksumkey, 307 err = crypto_ahash_setkey(tfm, cksumkey,
271 kctx->gk5e->keylength); 308 kctx->gk5e->keylength);
272 if (err) 309 if (err)
273 goto out; 310 goto out;
274 } 311 }
275 312
276 err = crypto_hash_init(&desc); 313 err = crypto_ahash_init(req);
277 if (err) 314 if (err)
278 goto out; 315 goto out;
279 sg_init_one(sg, header, hdrlen); 316 sg_init_one(sg, header, hdrlen);
280 err = crypto_hash_update(&desc, sg, hdrlen); 317 ahash_request_set_crypt(req, sg, NULL, hdrlen);
318 err = crypto_ahash_update(req);
281 if (err) 319 if (err)
282 goto out; 320 goto out;
283 err = xdr_process_buf(body, body_offset, body->len - body_offset, 321 err = xdr_process_buf(body, body_offset, body->len - body_offset,
284 checksummer, &desc); 322 checksummer, req);
285 if (err) 323 if (err)
286 goto out; 324 goto out;
287 err = crypto_hash_final(&desc, checksumdata); 325 ahash_request_set_crypt(req, NULL, checksumdata, 0);
326 err = crypto_ahash_final(req);
288 if (err) 327 if (err)
289 goto out; 328 goto out;
290 329
@@ -307,7 +346,8 @@ make_checksum(struct krb5_ctx *kctx, char *header, int hdrlen,
307 } 346 }
308 cksumout->len = kctx->gk5e->cksumlength; 347 cksumout->len = kctx->gk5e->cksumlength;
309out: 348out:
310 crypto_free_hash(desc.tfm); 349 ahash_request_free(req);
350 crypto_free_ahash(tfm);
311 return err ? GSS_S_FAILURE : 0; 351 return err ? GSS_S_FAILURE : 0;
312} 352}
313 353
@@ -323,7 +363,8 @@ make_checksum_v2(struct krb5_ctx *kctx, char *header, int hdrlen,
323 struct xdr_buf *body, int body_offset, u8 *cksumkey, 363 struct xdr_buf *body, int body_offset, u8 *cksumkey,
324 unsigned int usage, struct xdr_netobj *cksumout) 364 unsigned int usage, struct xdr_netobj *cksumout)
325{ 365{
326 struct hash_desc desc; 366 struct crypto_ahash *tfm;
367 struct ahash_request *req;
327 struct scatterlist sg[1]; 368 struct scatterlist sg[1];
328 int err; 369 int err;
329 u8 checksumdata[GSS_KRB5_MAX_CKSUM_LEN]; 370 u8 checksumdata[GSS_KRB5_MAX_CKSUM_LEN];
@@ -340,31 +381,39 @@ make_checksum_v2(struct krb5_ctx *kctx, char *header, int hdrlen,
340 return GSS_S_FAILURE; 381 return GSS_S_FAILURE;
341 } 382 }
342 383
343 desc.tfm = crypto_alloc_hash(kctx->gk5e->cksum_name, 0, 384 tfm = crypto_alloc_ahash(kctx->gk5e->cksum_name, 0, CRYPTO_ALG_ASYNC);
344 CRYPTO_ALG_ASYNC); 385 if (IS_ERR(tfm))
345 if (IS_ERR(desc.tfm))
346 return GSS_S_FAILURE; 386 return GSS_S_FAILURE;
347 checksumlen = crypto_hash_digestsize(desc.tfm); 387 checksumlen = crypto_ahash_digestsize(tfm);
348 desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP; 388
389 req = ahash_request_alloc(tfm, GFP_KERNEL);
390 if (!req) {
391 crypto_free_ahash(tfm);
392 return GSS_S_FAILURE;
393 }
349 394
350 err = crypto_hash_setkey(desc.tfm, cksumkey, kctx->gk5e->keylength); 395 ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP, NULL, NULL);
396
397 err = crypto_ahash_setkey(tfm, cksumkey, kctx->gk5e->keylength);
351 if (err) 398 if (err)
352 goto out; 399 goto out;
353 400
354 err = crypto_hash_init(&desc); 401 err = crypto_ahash_init(req);
355 if (err) 402 if (err)
356 goto out; 403 goto out;
357 err = xdr_process_buf(body, body_offset, body->len - body_offset, 404 err = xdr_process_buf(body, body_offset, body->len - body_offset,
358 checksummer, &desc); 405 checksummer, req);
359 if (err) 406 if (err)
360 goto out; 407 goto out;
361 if (header != NULL) { 408 if (header != NULL) {
362 sg_init_one(sg, header, hdrlen); 409 sg_init_one(sg, header, hdrlen);
363 err = crypto_hash_update(&desc, sg, hdrlen); 410 ahash_request_set_crypt(req, sg, NULL, hdrlen);
411 err = crypto_ahash_update(req);
364 if (err) 412 if (err)
365 goto out; 413 goto out;
366 } 414 }
367 err = crypto_hash_final(&desc, checksumdata); 415 ahash_request_set_crypt(req, NULL, checksumdata, 0);
416 err = crypto_ahash_final(req);
368 if (err) 417 if (err)
369 goto out; 418 goto out;
370 419
@@ -381,13 +430,14 @@ make_checksum_v2(struct krb5_ctx *kctx, char *header, int hdrlen,
381 break; 430 break;
382 } 431 }
383out: 432out:
384 crypto_free_hash(desc.tfm); 433 ahash_request_free(req);
434 crypto_free_ahash(tfm);
385 return err ? GSS_S_FAILURE : 0; 435 return err ? GSS_S_FAILURE : 0;
386} 436}
387 437
388struct encryptor_desc { 438struct encryptor_desc {
389 u8 iv[GSS_KRB5_MAX_BLOCKSIZE]; 439 u8 iv[GSS_KRB5_MAX_BLOCKSIZE];
390 struct blkcipher_desc desc; 440 struct skcipher_request *req;
391 int pos; 441 int pos;
392 struct xdr_buf *outbuf; 442 struct xdr_buf *outbuf;
393 struct page **pages; 443 struct page **pages;
@@ -402,6 +452,7 @@ encryptor(struct scatterlist *sg, void *data)
402{ 452{
403 struct encryptor_desc *desc = data; 453 struct encryptor_desc *desc = data;
404 struct xdr_buf *outbuf = desc->outbuf; 454 struct xdr_buf *outbuf = desc->outbuf;
455 struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(desc->req);
405 struct page *in_page; 456 struct page *in_page;
406 int thislen = desc->fraglen + sg->length; 457 int thislen = desc->fraglen + sg->length;
407 int fraglen, ret; 458 int fraglen, ret;
@@ -427,7 +478,7 @@ encryptor(struct scatterlist *sg, void *data)
427 desc->fraglen += sg->length; 478 desc->fraglen += sg->length;
428 desc->pos += sg->length; 479 desc->pos += sg->length;
429 480
430 fraglen = thislen & (crypto_blkcipher_blocksize(desc->desc.tfm) - 1); 481 fraglen = thislen & (crypto_skcipher_blocksize(tfm) - 1);
431 thislen -= fraglen; 482 thislen -= fraglen;
432 483
433 if (thislen == 0) 484 if (thislen == 0)
@@ -436,8 +487,10 @@ encryptor(struct scatterlist *sg, void *data)
436 sg_mark_end(&desc->infrags[desc->fragno - 1]); 487 sg_mark_end(&desc->infrags[desc->fragno - 1]);
437 sg_mark_end(&desc->outfrags[desc->fragno - 1]); 488 sg_mark_end(&desc->outfrags[desc->fragno - 1]);
438 489
439 ret = crypto_blkcipher_encrypt_iv(&desc->desc, desc->outfrags, 490 skcipher_request_set_crypt(desc->req, desc->infrags, desc->outfrags,
440 desc->infrags, thislen); 491 thislen, desc->iv);
492
493 ret = crypto_skcipher_encrypt(desc->req);
441 if (ret) 494 if (ret)
442 return ret; 495 return ret;
443 496
@@ -459,18 +512,20 @@ encryptor(struct scatterlist *sg, void *data)
459} 512}
460 513
461int 514int
462gss_encrypt_xdr_buf(struct crypto_blkcipher *tfm, struct xdr_buf *buf, 515gss_encrypt_xdr_buf(struct crypto_skcipher *tfm, struct xdr_buf *buf,
463 int offset, struct page **pages) 516 int offset, struct page **pages)
464{ 517{
465 int ret; 518 int ret;
466 struct encryptor_desc desc; 519 struct encryptor_desc desc;
520 SKCIPHER_REQUEST_ON_STACK(req, tfm);
521
522 BUG_ON((buf->len - offset) % crypto_skcipher_blocksize(tfm) != 0);
467 523
468 BUG_ON((buf->len - offset) % crypto_blkcipher_blocksize(tfm) != 0); 524 skcipher_request_set_tfm(req, tfm);
525 skcipher_request_set_callback(req, 0, NULL, NULL);
469 526
470 memset(desc.iv, 0, sizeof(desc.iv)); 527 memset(desc.iv, 0, sizeof(desc.iv));
471 desc.desc.tfm = tfm; 528 desc.req = req;
472 desc.desc.info = desc.iv;
473 desc.desc.flags = 0;
474 desc.pos = offset; 529 desc.pos = offset;
475 desc.outbuf = buf; 530 desc.outbuf = buf;
476 desc.pages = pages; 531 desc.pages = pages;
@@ -481,12 +536,13 @@ gss_encrypt_xdr_buf(struct crypto_blkcipher *tfm, struct xdr_buf *buf,
481 sg_init_table(desc.outfrags, 4); 536 sg_init_table(desc.outfrags, 4);
482 537
483 ret = xdr_process_buf(buf, offset, buf->len - offset, encryptor, &desc); 538 ret = xdr_process_buf(buf, offset, buf->len - offset, encryptor, &desc);
539 skcipher_request_zero(req);
484 return ret; 540 return ret;
485} 541}
486 542
487struct decryptor_desc { 543struct decryptor_desc {
488 u8 iv[GSS_KRB5_MAX_BLOCKSIZE]; 544 u8 iv[GSS_KRB5_MAX_BLOCKSIZE];
489 struct blkcipher_desc desc; 545 struct skcipher_request *req;
490 struct scatterlist frags[4]; 546 struct scatterlist frags[4];
491 int fragno; 547 int fragno;
492 int fraglen; 548 int fraglen;
@@ -497,6 +553,7 @@ decryptor(struct scatterlist *sg, void *data)
497{ 553{
498 struct decryptor_desc *desc = data; 554 struct decryptor_desc *desc = data;
499 int thislen = desc->fraglen + sg->length; 555 int thislen = desc->fraglen + sg->length;
556 struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(desc->req);
500 int fraglen, ret; 557 int fraglen, ret;
501 558
502 /* Worst case is 4 fragments: head, end of page 1, start 559 /* Worst case is 4 fragments: head, end of page 1, start
@@ -507,7 +564,7 @@ decryptor(struct scatterlist *sg, void *data)
507 desc->fragno++; 564 desc->fragno++;
508 desc->fraglen += sg->length; 565 desc->fraglen += sg->length;
509 566
510 fraglen = thislen & (crypto_blkcipher_blocksize(desc->desc.tfm) - 1); 567 fraglen = thislen & (crypto_skcipher_blocksize(tfm) - 1);
511 thislen -= fraglen; 568 thislen -= fraglen;
512 569
513 if (thislen == 0) 570 if (thislen == 0)
@@ -515,8 +572,10 @@ decryptor(struct scatterlist *sg, void *data)
515 572
516 sg_mark_end(&desc->frags[desc->fragno - 1]); 573 sg_mark_end(&desc->frags[desc->fragno - 1]);
517 574
518 ret = crypto_blkcipher_decrypt_iv(&desc->desc, desc->frags, 575 skcipher_request_set_crypt(desc->req, desc->frags, desc->frags,
519 desc->frags, thislen); 576 thislen, desc->iv);
577
578 ret = crypto_skcipher_decrypt(desc->req);
520 if (ret) 579 if (ret)
521 return ret; 580 return ret;
522 581
@@ -535,24 +594,29 @@ decryptor(struct scatterlist *sg, void *data)
535} 594}
536 595
537int 596int
538gss_decrypt_xdr_buf(struct crypto_blkcipher *tfm, struct xdr_buf *buf, 597gss_decrypt_xdr_buf(struct crypto_skcipher *tfm, struct xdr_buf *buf,
539 int offset) 598 int offset)
540{ 599{
600 int ret;
541 struct decryptor_desc desc; 601 struct decryptor_desc desc;
602 SKCIPHER_REQUEST_ON_STACK(req, tfm);
542 603
543 /* XXXJBF: */ 604 /* XXXJBF: */
544 BUG_ON((buf->len - offset) % crypto_blkcipher_blocksize(tfm) != 0); 605 BUG_ON((buf->len - offset) % crypto_skcipher_blocksize(tfm) != 0);
606
607 skcipher_request_set_tfm(req, tfm);
608 skcipher_request_set_callback(req, 0, NULL, NULL);
545 609
546 memset(desc.iv, 0, sizeof(desc.iv)); 610 memset(desc.iv, 0, sizeof(desc.iv));
547 desc.desc.tfm = tfm; 611 desc.req = req;
548 desc.desc.info = desc.iv;
549 desc.desc.flags = 0;
550 desc.fragno = 0; 612 desc.fragno = 0;
551 desc.fraglen = 0; 613 desc.fraglen = 0;
552 614
553 sg_init_table(desc.frags, 4); 615 sg_init_table(desc.frags, 4);
554 616
555 return xdr_process_buf(buf, offset, buf->len - offset, decryptor, &desc); 617 ret = xdr_process_buf(buf, offset, buf->len - offset, decryptor, &desc);
618 skcipher_request_zero(req);
619 return ret;
556} 620}
557 621
558/* 622/*
@@ -594,12 +658,12 @@ xdr_extend_head(struct xdr_buf *buf, unsigned int base, unsigned int shiftlen)
594} 658}
595 659
596static u32 660static u32
597gss_krb5_cts_crypt(struct crypto_blkcipher *cipher, struct xdr_buf *buf, 661gss_krb5_cts_crypt(struct crypto_skcipher *cipher, struct xdr_buf *buf,
598 u32 offset, u8 *iv, struct page **pages, int encrypt) 662 u32 offset, u8 *iv, struct page **pages, int encrypt)
599{ 663{
600 u32 ret; 664 u32 ret;
601 struct scatterlist sg[1]; 665 struct scatterlist sg[1];
602 struct blkcipher_desc desc = { .tfm = cipher, .info = iv }; 666 SKCIPHER_REQUEST_ON_STACK(req, cipher);
603 u8 data[GSS_KRB5_MAX_BLOCKSIZE * 2]; 667 u8 data[GSS_KRB5_MAX_BLOCKSIZE * 2];
604 struct page **save_pages; 668 struct page **save_pages;
605 u32 len = buf->len - offset; 669 u32 len = buf->len - offset;
@@ -625,10 +689,16 @@ gss_krb5_cts_crypt(struct crypto_blkcipher *cipher, struct xdr_buf *buf,
625 689
626 sg_init_one(sg, data, len); 690 sg_init_one(sg, data, len);
627 691
692 skcipher_request_set_tfm(req, cipher);
693 skcipher_request_set_callback(req, 0, NULL, NULL);
694 skcipher_request_set_crypt(req, sg, sg, len, iv);
695
628 if (encrypt) 696 if (encrypt)
629 ret = crypto_blkcipher_encrypt_iv(&desc, sg, sg, len); 697 ret = crypto_skcipher_encrypt(req);
630 else 698 else
631 ret = crypto_blkcipher_decrypt_iv(&desc, sg, sg, len); 699 ret = crypto_skcipher_decrypt(req);
700
701 skcipher_request_zero(req);
632 702
633 if (ret) 703 if (ret)
634 goto out; 704 goto out;
@@ -647,7 +717,7 @@ gss_krb5_aes_encrypt(struct krb5_ctx *kctx, u32 offset,
647 struct xdr_netobj hmac; 717 struct xdr_netobj hmac;
648 u8 *cksumkey; 718 u8 *cksumkey;
649 u8 *ecptr; 719 u8 *ecptr;
650 struct crypto_blkcipher *cipher, *aux_cipher; 720 struct crypto_skcipher *cipher, *aux_cipher;
651 int blocksize; 721 int blocksize;
652 struct page **save_pages; 722 struct page **save_pages;
653 int nblocks, nbytes; 723 int nblocks, nbytes;
@@ -666,7 +736,7 @@ gss_krb5_aes_encrypt(struct krb5_ctx *kctx, u32 offset,
666 cksumkey = kctx->acceptor_integ; 736 cksumkey = kctx->acceptor_integ;
667 usage = KG_USAGE_ACCEPTOR_SEAL; 737 usage = KG_USAGE_ACCEPTOR_SEAL;
668 } 738 }
669 blocksize = crypto_blkcipher_blocksize(cipher); 739 blocksize = crypto_skcipher_blocksize(cipher);
670 740
671 /* hide the gss token header and insert the confounder */ 741 /* hide the gss token header and insert the confounder */
672 offset += GSS_KRB5_TOK_HDR_LEN; 742 offset += GSS_KRB5_TOK_HDR_LEN;
@@ -719,20 +789,24 @@ gss_krb5_aes_encrypt(struct krb5_ctx *kctx, u32 offset,
719 memset(desc.iv, 0, sizeof(desc.iv)); 789 memset(desc.iv, 0, sizeof(desc.iv));
720 790
721 if (cbcbytes) { 791 if (cbcbytes) {
792 SKCIPHER_REQUEST_ON_STACK(req, aux_cipher);
793
722 desc.pos = offset + GSS_KRB5_TOK_HDR_LEN; 794 desc.pos = offset + GSS_KRB5_TOK_HDR_LEN;
723 desc.fragno = 0; 795 desc.fragno = 0;
724 desc.fraglen = 0; 796 desc.fraglen = 0;
725 desc.pages = pages; 797 desc.pages = pages;
726 desc.outbuf = buf; 798 desc.outbuf = buf;
727 desc.desc.info = desc.iv; 799 desc.req = req;
728 desc.desc.flags = 0; 800
729 desc.desc.tfm = aux_cipher; 801 skcipher_request_set_tfm(req, aux_cipher);
802 skcipher_request_set_callback(req, 0, NULL, NULL);
730 803
731 sg_init_table(desc.infrags, 4); 804 sg_init_table(desc.infrags, 4);
732 sg_init_table(desc.outfrags, 4); 805 sg_init_table(desc.outfrags, 4);
733 806
734 err = xdr_process_buf(buf, offset + GSS_KRB5_TOK_HDR_LEN, 807 err = xdr_process_buf(buf, offset + GSS_KRB5_TOK_HDR_LEN,
735 cbcbytes, encryptor, &desc); 808 cbcbytes, encryptor, &desc);
809 skcipher_request_zero(req);
736 if (err) 810 if (err)
737 goto out_err; 811 goto out_err;
738 } 812 }
@@ -763,7 +837,7 @@ gss_krb5_aes_decrypt(struct krb5_ctx *kctx, u32 offset, struct xdr_buf *buf,
763 struct xdr_buf subbuf; 837 struct xdr_buf subbuf;
764 u32 ret = 0; 838 u32 ret = 0;
765 u8 *cksum_key; 839 u8 *cksum_key;
766 struct crypto_blkcipher *cipher, *aux_cipher; 840 struct crypto_skcipher *cipher, *aux_cipher;
767 struct xdr_netobj our_hmac_obj; 841 struct xdr_netobj our_hmac_obj;
768 u8 our_hmac[GSS_KRB5_MAX_CKSUM_LEN]; 842 u8 our_hmac[GSS_KRB5_MAX_CKSUM_LEN];
769 u8 pkt_hmac[GSS_KRB5_MAX_CKSUM_LEN]; 843 u8 pkt_hmac[GSS_KRB5_MAX_CKSUM_LEN];
@@ -782,7 +856,7 @@ gss_krb5_aes_decrypt(struct krb5_ctx *kctx, u32 offset, struct xdr_buf *buf,
782 cksum_key = kctx->initiator_integ; 856 cksum_key = kctx->initiator_integ;
783 usage = KG_USAGE_INITIATOR_SEAL; 857 usage = KG_USAGE_INITIATOR_SEAL;
784 } 858 }
785 blocksize = crypto_blkcipher_blocksize(cipher); 859 blocksize = crypto_skcipher_blocksize(cipher);
786 860
787 861
788 /* create a segment skipping the header and leaving out the checksum */ 862 /* create a segment skipping the header and leaving out the checksum */
@@ -799,15 +873,19 @@ gss_krb5_aes_decrypt(struct krb5_ctx *kctx, u32 offset, struct xdr_buf *buf,
799 memset(desc.iv, 0, sizeof(desc.iv)); 873 memset(desc.iv, 0, sizeof(desc.iv));
800 874
801 if (cbcbytes) { 875 if (cbcbytes) {
876 SKCIPHER_REQUEST_ON_STACK(req, aux_cipher);
877
802 desc.fragno = 0; 878 desc.fragno = 0;
803 desc.fraglen = 0; 879 desc.fraglen = 0;
804 desc.desc.info = desc.iv; 880 desc.req = req;
805 desc.desc.flags = 0; 881
806 desc.desc.tfm = aux_cipher; 882 skcipher_request_set_tfm(req, aux_cipher);
883 skcipher_request_set_callback(req, 0, NULL, NULL);
807 884
808 sg_init_table(desc.frags, 4); 885 sg_init_table(desc.frags, 4);
809 886
810 ret = xdr_process_buf(&subbuf, 0, cbcbytes, decryptor, &desc); 887 ret = xdr_process_buf(&subbuf, 0, cbcbytes, decryptor, &desc);
888 skcipher_request_zero(req);
811 if (ret) 889 if (ret)
812 goto out_err; 890 goto out_err;
813 } 891 }
@@ -850,61 +928,62 @@ out_err:
850 * Set the key of the given cipher. 928 * Set the key of the given cipher.
851 */ 929 */
852int 930int
853krb5_rc4_setup_seq_key(struct krb5_ctx *kctx, struct crypto_blkcipher *cipher, 931krb5_rc4_setup_seq_key(struct krb5_ctx *kctx, struct crypto_skcipher *cipher,
854 unsigned char *cksum) 932 unsigned char *cksum)
855{ 933{
856 struct crypto_hash *hmac; 934 struct crypto_shash *hmac;
857 struct hash_desc desc; 935 struct shash_desc *desc;
858 struct scatterlist sg[1];
859 u8 Kseq[GSS_KRB5_MAX_KEYLEN]; 936 u8 Kseq[GSS_KRB5_MAX_KEYLEN];
860 u32 zeroconstant = 0; 937 u32 zeroconstant = 0;
861 int err; 938 int err;
862 939
863 dprintk("%s: entered\n", __func__); 940 dprintk("%s: entered\n", __func__);
864 941
865 hmac = crypto_alloc_hash(kctx->gk5e->cksum_name, 0, CRYPTO_ALG_ASYNC); 942 hmac = crypto_alloc_shash(kctx->gk5e->cksum_name, 0, 0);
866 if (IS_ERR(hmac)) { 943 if (IS_ERR(hmac)) {
867 dprintk("%s: error %ld, allocating hash '%s'\n", 944 dprintk("%s: error %ld, allocating hash '%s'\n",
868 __func__, PTR_ERR(hmac), kctx->gk5e->cksum_name); 945 __func__, PTR_ERR(hmac), kctx->gk5e->cksum_name);
869 return PTR_ERR(hmac); 946 return PTR_ERR(hmac);
870 } 947 }
871 948
872 desc.tfm = hmac; 949 desc = kmalloc(sizeof(*desc), GFP_KERNEL);
873 desc.flags = 0; 950 if (!desc) {
951 dprintk("%s: failed to allocate shash descriptor for '%s'\n",
952 __func__, kctx->gk5e->cksum_name);
953 crypto_free_shash(hmac);
954 return -ENOMEM;
955 }
874 956
875 err = crypto_hash_init(&desc); 957 desc->tfm = hmac;
876 if (err) 958 desc->flags = 0;
877 goto out_err;
878 959
879 /* Compute intermediate Kseq from session key */ 960 /* Compute intermediate Kseq from session key */
880 err = crypto_hash_setkey(hmac, kctx->Ksess, kctx->gk5e->keylength); 961 err = crypto_shash_setkey(hmac, kctx->Ksess, kctx->gk5e->keylength);
881 if (err) 962 if (err)
882 goto out_err; 963 goto out_err;
883 964
884 sg_init_one(sg, &zeroconstant, 4); 965 err = crypto_shash_digest(desc, (u8 *)&zeroconstant, 4, Kseq);
885 err = crypto_hash_digest(&desc, sg, 4, Kseq);
886 if (err) 966 if (err)
887 goto out_err; 967 goto out_err;
888 968
889 /* Compute final Kseq from the checksum and intermediate Kseq */ 969 /* Compute final Kseq from the checksum and intermediate Kseq */
890 err = crypto_hash_setkey(hmac, Kseq, kctx->gk5e->keylength); 970 err = crypto_shash_setkey(hmac, Kseq, kctx->gk5e->keylength);
891 if (err) 971 if (err)
892 goto out_err; 972 goto out_err;
893 973
894 sg_set_buf(sg, cksum, 8); 974 err = crypto_shash_digest(desc, cksum, 8, Kseq);
895
896 err = crypto_hash_digest(&desc, sg, 8, Kseq);
897 if (err) 975 if (err)
898 goto out_err; 976 goto out_err;
899 977
900 err = crypto_blkcipher_setkey(cipher, Kseq, kctx->gk5e->keylength); 978 err = crypto_skcipher_setkey(cipher, Kseq, kctx->gk5e->keylength);
901 if (err) 979 if (err)
902 goto out_err; 980 goto out_err;
903 981
904 err = 0; 982 err = 0;
905 983
906out_err: 984out_err:
907 crypto_free_hash(hmac); 985 kzfree(desc);
986 crypto_free_shash(hmac);
908 dprintk("%s: returning %d\n", __func__, err); 987 dprintk("%s: returning %d\n", __func__, err);
909 return err; 988 return err;
910} 989}
@@ -914,12 +993,11 @@ out_err:
914 * Set the key of cipher kctx->enc. 993 * Set the key of cipher kctx->enc.
915 */ 994 */
916int 995int
917krb5_rc4_setup_enc_key(struct krb5_ctx *kctx, struct crypto_blkcipher *cipher, 996krb5_rc4_setup_enc_key(struct krb5_ctx *kctx, struct crypto_skcipher *cipher,
918 s32 seqnum) 997 s32 seqnum)
919{ 998{
920 struct crypto_hash *hmac; 999 struct crypto_shash *hmac;
921 struct hash_desc desc; 1000 struct shash_desc *desc;
922 struct scatterlist sg[1];
923 u8 Kcrypt[GSS_KRB5_MAX_KEYLEN]; 1001 u8 Kcrypt[GSS_KRB5_MAX_KEYLEN];
924 u8 zeroconstant[4] = {0}; 1002 u8 zeroconstant[4] = {0};
925 u8 seqnumarray[4]; 1003 u8 seqnumarray[4];
@@ -927,35 +1005,38 @@ krb5_rc4_setup_enc_key(struct krb5_ctx *kctx, struct crypto_blkcipher *cipher,
927 1005
928 dprintk("%s: entered, seqnum %u\n", __func__, seqnum); 1006 dprintk("%s: entered, seqnum %u\n", __func__, seqnum);
929 1007
930 hmac = crypto_alloc_hash(kctx->gk5e->cksum_name, 0, CRYPTO_ALG_ASYNC); 1008 hmac = crypto_alloc_shash(kctx->gk5e->cksum_name, 0, 0);
931 if (IS_ERR(hmac)) { 1009 if (IS_ERR(hmac)) {
932 dprintk("%s: error %ld, allocating hash '%s'\n", 1010 dprintk("%s: error %ld, allocating hash '%s'\n",
933 __func__, PTR_ERR(hmac), kctx->gk5e->cksum_name); 1011 __func__, PTR_ERR(hmac), kctx->gk5e->cksum_name);
934 return PTR_ERR(hmac); 1012 return PTR_ERR(hmac);
935 } 1013 }
936 1014
937 desc.tfm = hmac; 1015 desc = kmalloc(sizeof(*desc), GFP_KERNEL);
938 desc.flags = 0; 1016 if (!desc) {
1017 dprintk("%s: failed to allocate shash descriptor for '%s'\n",
1018 __func__, kctx->gk5e->cksum_name);
1019 crypto_free_shash(hmac);
1020 return -ENOMEM;
1021 }
939 1022
940 err = crypto_hash_init(&desc); 1023 desc->tfm = hmac;
941 if (err) 1024 desc->flags = 0;
942 goto out_err;
943 1025
944 /* Compute intermediate Kcrypt from session key */ 1026 /* Compute intermediate Kcrypt from session key */
945 for (i = 0; i < kctx->gk5e->keylength; i++) 1027 for (i = 0; i < kctx->gk5e->keylength; i++)
946 Kcrypt[i] = kctx->Ksess[i] ^ 0xf0; 1028 Kcrypt[i] = kctx->Ksess[i] ^ 0xf0;
947 1029
948 err = crypto_hash_setkey(hmac, Kcrypt, kctx->gk5e->keylength); 1030 err = crypto_shash_setkey(hmac, Kcrypt, kctx->gk5e->keylength);
949 if (err) 1031 if (err)
950 goto out_err; 1032 goto out_err;
951 1033
952 sg_init_one(sg, zeroconstant, 4); 1034 err = crypto_shash_digest(desc, zeroconstant, 4, Kcrypt);
953 err = crypto_hash_digest(&desc, sg, 4, Kcrypt);
954 if (err) 1035 if (err)
955 goto out_err; 1036 goto out_err;
956 1037
957 /* Compute final Kcrypt from the seqnum and intermediate Kcrypt */ 1038 /* Compute final Kcrypt from the seqnum and intermediate Kcrypt */
958 err = crypto_hash_setkey(hmac, Kcrypt, kctx->gk5e->keylength); 1039 err = crypto_shash_setkey(hmac, Kcrypt, kctx->gk5e->keylength);
959 if (err) 1040 if (err)
960 goto out_err; 1041 goto out_err;
961 1042
@@ -964,20 +1045,19 @@ krb5_rc4_setup_enc_key(struct krb5_ctx *kctx, struct crypto_blkcipher *cipher,
964 seqnumarray[2] = (unsigned char) ((seqnum >> 8) & 0xff); 1045 seqnumarray[2] = (unsigned char) ((seqnum >> 8) & 0xff);
965 seqnumarray[3] = (unsigned char) ((seqnum >> 0) & 0xff); 1046 seqnumarray[3] = (unsigned char) ((seqnum >> 0) & 0xff);
966 1047
967 sg_set_buf(sg, seqnumarray, 4); 1048 err = crypto_shash_digest(desc, seqnumarray, 4, Kcrypt);
968
969 err = crypto_hash_digest(&desc, sg, 4, Kcrypt);
970 if (err) 1049 if (err)
971 goto out_err; 1050 goto out_err;
972 1051
973 err = crypto_blkcipher_setkey(cipher, Kcrypt, kctx->gk5e->keylength); 1052 err = crypto_skcipher_setkey(cipher, Kcrypt, kctx->gk5e->keylength);
974 if (err) 1053 if (err)
975 goto out_err; 1054 goto out_err;
976 1055
977 err = 0; 1056 err = 0;
978 1057
979out_err: 1058out_err:
980 crypto_free_hash(hmac); 1059 kzfree(desc);
1060 crypto_free_shash(hmac);
981 dprintk("%s: returning %d\n", __func__, err); 1061 dprintk("%s: returning %d\n", __func__, err);
982 return err; 1062 return err;
983} 1063}
diff --git a/net/sunrpc/auth_gss/gss_krb5_keys.c b/net/sunrpc/auth_gss/gss_krb5_keys.c
index 234fa8d0fd9b..870133146026 100644
--- a/net/sunrpc/auth_gss/gss_krb5_keys.c
+++ b/net/sunrpc/auth_gss/gss_krb5_keys.c
@@ -54,9 +54,9 @@
54 * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. 54 * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
55 */ 55 */
56 56
57#include <crypto/skcipher.h>
57#include <linux/err.h> 58#include <linux/err.h>
58#include <linux/types.h> 59#include <linux/types.h>
59#include <linux/crypto.h>
60#include <linux/sunrpc/gss_krb5.h> 60#include <linux/sunrpc/gss_krb5.h>
61#include <linux/sunrpc/xdr.h> 61#include <linux/sunrpc/xdr.h>
62#include <linux/lcm.h> 62#include <linux/lcm.h>
@@ -147,7 +147,7 @@ u32 krb5_derive_key(const struct gss_krb5_enctype *gk5e,
147 size_t blocksize, keybytes, keylength, n; 147 size_t blocksize, keybytes, keylength, n;
148 unsigned char *inblockdata, *outblockdata, *rawkey; 148 unsigned char *inblockdata, *outblockdata, *rawkey;
149 struct xdr_netobj inblock, outblock; 149 struct xdr_netobj inblock, outblock;
150 struct crypto_blkcipher *cipher; 150 struct crypto_skcipher *cipher;
151 u32 ret = EINVAL; 151 u32 ret = EINVAL;
152 152
153 blocksize = gk5e->blocksize; 153 blocksize = gk5e->blocksize;
@@ -157,11 +157,11 @@ u32 krb5_derive_key(const struct gss_krb5_enctype *gk5e,
157 if ((inkey->len != keylength) || (outkey->len != keylength)) 157 if ((inkey->len != keylength) || (outkey->len != keylength))
158 goto err_return; 158 goto err_return;
159 159
160 cipher = crypto_alloc_blkcipher(gk5e->encrypt_name, 0, 160 cipher = crypto_alloc_skcipher(gk5e->encrypt_name, 0,
161 CRYPTO_ALG_ASYNC); 161 CRYPTO_ALG_ASYNC);
162 if (IS_ERR(cipher)) 162 if (IS_ERR(cipher))
163 goto err_return; 163 goto err_return;
164 if (crypto_blkcipher_setkey(cipher, inkey->data, inkey->len)) 164 if (crypto_skcipher_setkey(cipher, inkey->data, inkey->len))
165 goto err_return; 165 goto err_return;
166 166
167 /* allocate and set up buffers */ 167 /* allocate and set up buffers */
@@ -238,7 +238,7 @@ err_free_in:
238 memset(inblockdata, 0, blocksize); 238 memset(inblockdata, 0, blocksize);
239 kfree(inblockdata); 239 kfree(inblockdata);
240err_free_cipher: 240err_free_cipher:
241 crypto_free_blkcipher(cipher); 241 crypto_free_skcipher(cipher);
242err_return: 242err_return:
243 return ret; 243 return ret;
244} 244}
diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c
index 28db442a0034..71341ccb9890 100644
--- a/net/sunrpc/auth_gss/gss_krb5_mech.c
+++ b/net/sunrpc/auth_gss/gss_krb5_mech.c
@@ -34,6 +34,8 @@
34 * 34 *
35 */ 35 */
36 36
37#include <crypto/hash.h>
38#include <crypto/skcipher.h>
37#include <linux/err.h> 39#include <linux/err.h>
38#include <linux/module.h> 40#include <linux/module.h>
39#include <linux/init.h> 41#include <linux/init.h>
@@ -42,7 +44,6 @@
42#include <linux/sunrpc/auth.h> 44#include <linux/sunrpc/auth.h>
43#include <linux/sunrpc/gss_krb5.h> 45#include <linux/sunrpc/gss_krb5.h>
44#include <linux/sunrpc/xdr.h> 46#include <linux/sunrpc/xdr.h>
45#include <linux/crypto.h>
46#include <linux/sunrpc/gss_krb5_enctypes.h> 47#include <linux/sunrpc/gss_krb5_enctypes.h>
47 48
48#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) 49#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
@@ -217,7 +218,7 @@ simple_get_netobj(const void *p, const void *end, struct xdr_netobj *res)
217 218
218static inline const void * 219static inline const void *
219get_key(const void *p, const void *end, 220get_key(const void *p, const void *end,
220 struct krb5_ctx *ctx, struct crypto_blkcipher **res) 221 struct krb5_ctx *ctx, struct crypto_skcipher **res)
221{ 222{
222 struct xdr_netobj key; 223 struct xdr_netobj key;
223 int alg; 224 int alg;
@@ -245,7 +246,7 @@ get_key(const void *p, const void *end,
245 if (IS_ERR(p)) 246 if (IS_ERR(p))
246 goto out_err; 247 goto out_err;
247 248
248 *res = crypto_alloc_blkcipher(ctx->gk5e->encrypt_name, 0, 249 *res = crypto_alloc_skcipher(ctx->gk5e->encrypt_name, 0,
249 CRYPTO_ALG_ASYNC); 250 CRYPTO_ALG_ASYNC);
250 if (IS_ERR(*res)) { 251 if (IS_ERR(*res)) {
251 printk(KERN_WARNING "gss_kerberos_mech: unable to initialize " 252 printk(KERN_WARNING "gss_kerberos_mech: unable to initialize "
@@ -253,7 +254,7 @@ get_key(const void *p, const void *end,
253 *res = NULL; 254 *res = NULL;
254 goto out_err_free_key; 255 goto out_err_free_key;
255 } 256 }
256 if (crypto_blkcipher_setkey(*res, key.data, key.len)) { 257 if (crypto_skcipher_setkey(*res, key.data, key.len)) {
257 printk(KERN_WARNING "gss_kerberos_mech: error setting key for " 258 printk(KERN_WARNING "gss_kerberos_mech: error setting key for "
258 "crypto algorithm %s\n", ctx->gk5e->encrypt_name); 259 "crypto algorithm %s\n", ctx->gk5e->encrypt_name);
259 goto out_err_free_tfm; 260 goto out_err_free_tfm;
@@ -263,7 +264,7 @@ get_key(const void *p, const void *end,
263 return p; 264 return p;
264 265
265out_err_free_tfm: 266out_err_free_tfm:
266 crypto_free_blkcipher(*res); 267 crypto_free_skcipher(*res);
267out_err_free_key: 268out_err_free_key:
268 kfree(key.data); 269 kfree(key.data);
269 p = ERR_PTR(-EINVAL); 270 p = ERR_PTR(-EINVAL);
@@ -335,30 +336,30 @@ gss_import_v1_context(const void *p, const void *end, struct krb5_ctx *ctx)
335 return 0; 336 return 0;
336 337
337out_err_free_key2: 338out_err_free_key2:
338 crypto_free_blkcipher(ctx->seq); 339 crypto_free_skcipher(ctx->seq);
339out_err_free_key1: 340out_err_free_key1:
340 crypto_free_blkcipher(ctx->enc); 341 crypto_free_skcipher(ctx->enc);
341out_err_free_mech: 342out_err_free_mech:
342 kfree(ctx->mech_used.data); 343 kfree(ctx->mech_used.data);
343out_err: 344out_err:
344 return PTR_ERR(p); 345 return PTR_ERR(p);
345} 346}
346 347
347static struct crypto_blkcipher * 348static struct crypto_skcipher *
348context_v2_alloc_cipher(struct krb5_ctx *ctx, const char *cname, u8 *key) 349context_v2_alloc_cipher(struct krb5_ctx *ctx, const char *cname, u8 *key)
349{ 350{
350 struct crypto_blkcipher *cp; 351 struct crypto_skcipher *cp;
351 352
352 cp = crypto_alloc_blkcipher(cname, 0, CRYPTO_ALG_ASYNC); 353 cp = crypto_alloc_skcipher(cname, 0, CRYPTO_ALG_ASYNC);
353 if (IS_ERR(cp)) { 354 if (IS_ERR(cp)) {
354 dprintk("gss_kerberos_mech: unable to initialize " 355 dprintk("gss_kerberos_mech: unable to initialize "
355 "crypto algorithm %s\n", cname); 356 "crypto algorithm %s\n", cname);
356 return NULL; 357 return NULL;
357 } 358 }
358 if (crypto_blkcipher_setkey(cp, key, ctx->gk5e->keylength)) { 359 if (crypto_skcipher_setkey(cp, key, ctx->gk5e->keylength)) {
359 dprintk("gss_kerberos_mech: error setting key for " 360 dprintk("gss_kerberos_mech: error setting key for "
360 "crypto algorithm %s\n", cname); 361 "crypto algorithm %s\n", cname);
361 crypto_free_blkcipher(cp); 362 crypto_free_skcipher(cp);
362 return NULL; 363 return NULL;
363 } 364 }
364 return cp; 365 return cp;
@@ -412,9 +413,9 @@ context_derive_keys_des3(struct krb5_ctx *ctx, gfp_t gfp_mask)
412 return 0; 413 return 0;
413 414
414out_free_enc: 415out_free_enc:
415 crypto_free_blkcipher(ctx->enc); 416 crypto_free_skcipher(ctx->enc);
416out_free_seq: 417out_free_seq:
417 crypto_free_blkcipher(ctx->seq); 418 crypto_free_skcipher(ctx->seq);
418out_err: 419out_err:
419 return -EINVAL; 420 return -EINVAL;
420} 421}
@@ -427,18 +428,17 @@ out_err:
427static int 428static int
428context_derive_keys_rc4(struct krb5_ctx *ctx) 429context_derive_keys_rc4(struct krb5_ctx *ctx)
429{ 430{
430 struct crypto_hash *hmac; 431 struct crypto_shash *hmac;
431 char sigkeyconstant[] = "signaturekey"; 432 char sigkeyconstant[] = "signaturekey";
432 int slen = strlen(sigkeyconstant) + 1; /* include null terminator */ 433 int slen = strlen(sigkeyconstant) + 1; /* include null terminator */
433 struct hash_desc desc; 434 struct shash_desc *desc;
434 struct scatterlist sg[1];
435 int err; 435 int err;
436 436
437 dprintk("RPC: %s: entered\n", __func__); 437 dprintk("RPC: %s: entered\n", __func__);
438 /* 438 /*
439 * derive cksum (aka Ksign) key 439 * derive cksum (aka Ksign) key
440 */ 440 */
441 hmac = crypto_alloc_hash(ctx->gk5e->cksum_name, 0, CRYPTO_ALG_ASYNC); 441 hmac = crypto_alloc_shash(ctx->gk5e->cksum_name, 0, 0);
442 if (IS_ERR(hmac)) { 442 if (IS_ERR(hmac)) {
443 dprintk("%s: error %ld allocating hash '%s'\n", 443 dprintk("%s: error %ld allocating hash '%s'\n",
444 __func__, PTR_ERR(hmac), ctx->gk5e->cksum_name); 444 __func__, PTR_ERR(hmac), ctx->gk5e->cksum_name);
@@ -446,37 +446,40 @@ context_derive_keys_rc4(struct krb5_ctx *ctx)
446 goto out_err; 446 goto out_err;
447 } 447 }
448 448
449 err = crypto_hash_setkey(hmac, ctx->Ksess, ctx->gk5e->keylength); 449 err = crypto_shash_setkey(hmac, ctx->Ksess, ctx->gk5e->keylength);
450 if (err) 450 if (err)
451 goto out_err_free_hmac; 451 goto out_err_free_hmac;
452 452
453 sg_init_table(sg, 1);
454 sg_set_buf(sg, sigkeyconstant, slen);
455 453
456 desc.tfm = hmac; 454 desc = kmalloc(sizeof(*desc), GFP_KERNEL);
457 desc.flags = 0; 455 if (!desc) {
458 456 dprintk("%s: failed to allocate hash descriptor for '%s'\n",
459 err = crypto_hash_init(&desc); 457 __func__, ctx->gk5e->cksum_name);
460 if (err) 458 err = -ENOMEM;
461 goto out_err_free_hmac; 459 goto out_err_free_hmac;
460 }
461
462 desc->tfm = hmac;
463 desc->flags = 0;
462 464
463 err = crypto_hash_digest(&desc, sg, slen, ctx->cksum); 465 err = crypto_shash_digest(desc, sigkeyconstant, slen, ctx->cksum);
466 kzfree(desc);
464 if (err) 467 if (err)
465 goto out_err_free_hmac; 468 goto out_err_free_hmac;
466 /* 469 /*
467 * allocate hash, and blkciphers for data and seqnum encryption 470 * allocate hash, and skciphers for data and seqnum encryption
468 */ 471 */
469 ctx->enc = crypto_alloc_blkcipher(ctx->gk5e->encrypt_name, 0, 472 ctx->enc = crypto_alloc_skcipher(ctx->gk5e->encrypt_name, 0,
470 CRYPTO_ALG_ASYNC); 473 CRYPTO_ALG_ASYNC);
471 if (IS_ERR(ctx->enc)) { 474 if (IS_ERR(ctx->enc)) {
472 err = PTR_ERR(ctx->enc); 475 err = PTR_ERR(ctx->enc);
473 goto out_err_free_hmac; 476 goto out_err_free_hmac;
474 } 477 }
475 478
476 ctx->seq = crypto_alloc_blkcipher(ctx->gk5e->encrypt_name, 0, 479 ctx->seq = crypto_alloc_skcipher(ctx->gk5e->encrypt_name, 0,
477 CRYPTO_ALG_ASYNC); 480 CRYPTO_ALG_ASYNC);
478 if (IS_ERR(ctx->seq)) { 481 if (IS_ERR(ctx->seq)) {
479 crypto_free_blkcipher(ctx->enc); 482 crypto_free_skcipher(ctx->enc);
480 err = PTR_ERR(ctx->seq); 483 err = PTR_ERR(ctx->seq);
481 goto out_err_free_hmac; 484 goto out_err_free_hmac;
482 } 485 }
@@ -486,7 +489,7 @@ context_derive_keys_rc4(struct krb5_ctx *ctx)
486 err = 0; 489 err = 0;
487 490
488out_err_free_hmac: 491out_err_free_hmac:
489 crypto_free_hash(hmac); 492 crypto_free_shash(hmac);
490out_err: 493out_err:
491 dprintk("RPC: %s: returning %d\n", __func__, err); 494 dprintk("RPC: %s: returning %d\n", __func__, err);
492 return err; 495 return err;
@@ -588,7 +591,7 @@ context_derive_keys_new(struct krb5_ctx *ctx, gfp_t gfp_mask)
588 context_v2_alloc_cipher(ctx, "cbc(aes)", 591 context_v2_alloc_cipher(ctx, "cbc(aes)",
589 ctx->acceptor_seal); 592 ctx->acceptor_seal);
590 if (ctx->acceptor_enc_aux == NULL) { 593 if (ctx->acceptor_enc_aux == NULL) {
591 crypto_free_blkcipher(ctx->initiator_enc_aux); 594 crypto_free_skcipher(ctx->initiator_enc_aux);
592 goto out_free_acceptor_enc; 595 goto out_free_acceptor_enc;
593 } 596 }
594 } 597 }
@@ -596,9 +599,9 @@ context_derive_keys_new(struct krb5_ctx *ctx, gfp_t gfp_mask)
596 return 0; 599 return 0;
597 600
598out_free_acceptor_enc: 601out_free_acceptor_enc:
599 crypto_free_blkcipher(ctx->acceptor_enc); 602 crypto_free_skcipher(ctx->acceptor_enc);
600out_free_initiator_enc: 603out_free_initiator_enc:
601 crypto_free_blkcipher(ctx->initiator_enc); 604 crypto_free_skcipher(ctx->initiator_enc);
602out_err: 605out_err:
603 return -EINVAL; 606 return -EINVAL;
604} 607}
@@ -710,12 +713,12 @@ static void
710gss_delete_sec_context_kerberos(void *internal_ctx) { 713gss_delete_sec_context_kerberos(void *internal_ctx) {
711 struct krb5_ctx *kctx = internal_ctx; 714 struct krb5_ctx *kctx = internal_ctx;
712 715
713 crypto_free_blkcipher(kctx->seq); 716 crypto_free_skcipher(kctx->seq);
714 crypto_free_blkcipher(kctx->enc); 717 crypto_free_skcipher(kctx->enc);
715 crypto_free_blkcipher(kctx->acceptor_enc); 718 crypto_free_skcipher(kctx->acceptor_enc);
716 crypto_free_blkcipher(kctx->initiator_enc); 719 crypto_free_skcipher(kctx->initiator_enc);
717 crypto_free_blkcipher(kctx->acceptor_enc_aux); 720 crypto_free_skcipher(kctx->acceptor_enc_aux);
718 crypto_free_blkcipher(kctx->initiator_enc_aux); 721 crypto_free_skcipher(kctx->initiator_enc_aux);
719 kfree(kctx->mech_used.data); 722 kfree(kctx->mech_used.data);
720 kfree(kctx); 723 kfree(kctx);
721} 724}
diff --git a/net/sunrpc/auth_gss/gss_krb5_seqnum.c b/net/sunrpc/auth_gss/gss_krb5_seqnum.c
index 20d55c793eb6..c8b9082f4a9d 100644
--- a/net/sunrpc/auth_gss/gss_krb5_seqnum.c
+++ b/net/sunrpc/auth_gss/gss_krb5_seqnum.c
@@ -31,9 +31,9 @@
31 * PERFORMANCE OF THIS SOFTWARE. 31 * PERFORMANCE OF THIS SOFTWARE.
32 */ 32 */
33 33
34#include <crypto/skcipher.h>
34#include <linux/types.h> 35#include <linux/types.h>
35#include <linux/sunrpc/gss_krb5.h> 36#include <linux/sunrpc/gss_krb5.h>
36#include <linux/crypto.h>
37 37
38#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) 38#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
39# define RPCDBG_FACILITY RPCDBG_AUTH 39# define RPCDBG_FACILITY RPCDBG_AUTH
@@ -43,13 +43,13 @@ static s32
43krb5_make_rc4_seq_num(struct krb5_ctx *kctx, int direction, s32 seqnum, 43krb5_make_rc4_seq_num(struct krb5_ctx *kctx, int direction, s32 seqnum,
44 unsigned char *cksum, unsigned char *buf) 44 unsigned char *cksum, unsigned char *buf)
45{ 45{
46 struct crypto_blkcipher *cipher; 46 struct crypto_skcipher *cipher;
47 unsigned char plain[8]; 47 unsigned char plain[8];
48 s32 code; 48 s32 code;
49 49
50 dprintk("RPC: %s:\n", __func__); 50 dprintk("RPC: %s:\n", __func__);
51 cipher = crypto_alloc_blkcipher(kctx->gk5e->encrypt_name, 0, 51 cipher = crypto_alloc_skcipher(kctx->gk5e->encrypt_name, 0,
52 CRYPTO_ALG_ASYNC); 52 CRYPTO_ALG_ASYNC);
53 if (IS_ERR(cipher)) 53 if (IS_ERR(cipher))
54 return PTR_ERR(cipher); 54 return PTR_ERR(cipher);
55 55
@@ -68,12 +68,12 @@ krb5_make_rc4_seq_num(struct krb5_ctx *kctx, int direction, s32 seqnum,
68 68
69 code = krb5_encrypt(cipher, cksum, plain, buf, 8); 69 code = krb5_encrypt(cipher, cksum, plain, buf, 8);
70out: 70out:
71 crypto_free_blkcipher(cipher); 71 crypto_free_skcipher(cipher);
72 return code; 72 return code;
73} 73}
74s32 74s32
75krb5_make_seq_num(struct krb5_ctx *kctx, 75krb5_make_seq_num(struct krb5_ctx *kctx,
76 struct crypto_blkcipher *key, 76 struct crypto_skcipher *key,
77 int direction, 77 int direction,
78 u32 seqnum, 78 u32 seqnum,
79 unsigned char *cksum, unsigned char *buf) 79 unsigned char *cksum, unsigned char *buf)
@@ -101,13 +101,13 @@ static s32
101krb5_get_rc4_seq_num(struct krb5_ctx *kctx, unsigned char *cksum, 101krb5_get_rc4_seq_num(struct krb5_ctx *kctx, unsigned char *cksum,
102 unsigned char *buf, int *direction, s32 *seqnum) 102 unsigned char *buf, int *direction, s32 *seqnum)
103{ 103{
104 struct crypto_blkcipher *cipher; 104 struct crypto_skcipher *cipher;
105 unsigned char plain[8]; 105 unsigned char plain[8];
106 s32 code; 106 s32 code;
107 107
108 dprintk("RPC: %s:\n", __func__); 108 dprintk("RPC: %s:\n", __func__);
109 cipher = crypto_alloc_blkcipher(kctx->gk5e->encrypt_name, 0, 109 cipher = crypto_alloc_skcipher(kctx->gk5e->encrypt_name, 0,
110 CRYPTO_ALG_ASYNC); 110 CRYPTO_ALG_ASYNC);
111 if (IS_ERR(cipher)) 111 if (IS_ERR(cipher))
112 return PTR_ERR(cipher); 112 return PTR_ERR(cipher);
113 113
@@ -130,7 +130,7 @@ krb5_get_rc4_seq_num(struct krb5_ctx *kctx, unsigned char *cksum,
130 *seqnum = ((plain[0] << 24) | (plain[1] << 16) | 130 *seqnum = ((plain[0] << 24) | (plain[1] << 16) |
131 (plain[2] << 8) | (plain[3])); 131 (plain[2] << 8) | (plain[3]));
132out: 132out:
133 crypto_free_blkcipher(cipher); 133 crypto_free_skcipher(cipher);
134 return code; 134 return code;
135} 135}
136 136
@@ -142,7 +142,7 @@ krb5_get_seq_num(struct krb5_ctx *kctx,
142{ 142{
143 s32 code; 143 s32 code;
144 unsigned char plain[8]; 144 unsigned char plain[8];
145 struct crypto_blkcipher *key = kctx->seq; 145 struct crypto_skcipher *key = kctx->seq;
146 146
147 dprintk("RPC: krb5_get_seq_num:\n"); 147 dprintk("RPC: krb5_get_seq_num:\n");
148 148
diff --git a/net/sunrpc/auth_gss/gss_krb5_wrap.c b/net/sunrpc/auth_gss/gss_krb5_wrap.c
index ca7e92a32f84..765088e4ad84 100644
--- a/net/sunrpc/auth_gss/gss_krb5_wrap.c
+++ b/net/sunrpc/auth_gss/gss_krb5_wrap.c
@@ -28,12 +28,12 @@
28 * SUCH DAMAGES. 28 * SUCH DAMAGES.
29 */ 29 */
30 30
31#include <crypto/skcipher.h>
31#include <linux/types.h> 32#include <linux/types.h>
32#include <linux/jiffies.h> 33#include <linux/jiffies.h>
33#include <linux/sunrpc/gss_krb5.h> 34#include <linux/sunrpc/gss_krb5.h>
34#include <linux/random.h> 35#include <linux/random.h>
35#include <linux/pagemap.h> 36#include <linux/pagemap.h>
36#include <linux/crypto.h>
37 37
38#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) 38#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
39# define RPCDBG_FACILITY RPCDBG_AUTH 39# define RPCDBG_FACILITY RPCDBG_AUTH
@@ -174,7 +174,7 @@ gss_wrap_kerberos_v1(struct krb5_ctx *kctx, int offset,
174 174
175 now = get_seconds(); 175 now = get_seconds();
176 176
177 blocksize = crypto_blkcipher_blocksize(kctx->enc); 177 blocksize = crypto_skcipher_blocksize(kctx->enc);
178 gss_krb5_add_padding(buf, offset, blocksize); 178 gss_krb5_add_padding(buf, offset, blocksize);
179 BUG_ON((buf->len - offset) % blocksize); 179 BUG_ON((buf->len - offset) % blocksize);
180 plainlen = conflen + buf->len - offset; 180 plainlen = conflen + buf->len - offset;
@@ -239,10 +239,10 @@ gss_wrap_kerberos_v1(struct krb5_ctx *kctx, int offset,
239 return GSS_S_FAILURE; 239 return GSS_S_FAILURE;
240 240
241 if (kctx->enctype == ENCTYPE_ARCFOUR_HMAC) { 241 if (kctx->enctype == ENCTYPE_ARCFOUR_HMAC) {
242 struct crypto_blkcipher *cipher; 242 struct crypto_skcipher *cipher;
243 int err; 243 int err;
244 cipher = crypto_alloc_blkcipher(kctx->gk5e->encrypt_name, 0, 244 cipher = crypto_alloc_skcipher(kctx->gk5e->encrypt_name, 0,
245 CRYPTO_ALG_ASYNC); 245 CRYPTO_ALG_ASYNC);
246 if (IS_ERR(cipher)) 246 if (IS_ERR(cipher))
247 return GSS_S_FAILURE; 247 return GSS_S_FAILURE;
248 248
@@ -250,7 +250,7 @@ gss_wrap_kerberos_v1(struct krb5_ctx *kctx, int offset,
250 250
251 err = gss_encrypt_xdr_buf(cipher, buf, 251 err = gss_encrypt_xdr_buf(cipher, buf,
252 offset + headlen - conflen, pages); 252 offset + headlen - conflen, pages);
253 crypto_free_blkcipher(cipher); 253 crypto_free_skcipher(cipher);
254 if (err) 254 if (err)
255 return GSS_S_FAILURE; 255 return GSS_S_FAILURE;
256 } else { 256 } else {
@@ -327,18 +327,18 @@ gss_unwrap_kerberos_v1(struct krb5_ctx *kctx, int offset, struct xdr_buf *buf)
327 return GSS_S_BAD_SIG; 327 return GSS_S_BAD_SIG;
328 328
329 if (kctx->enctype == ENCTYPE_ARCFOUR_HMAC) { 329 if (kctx->enctype == ENCTYPE_ARCFOUR_HMAC) {
330 struct crypto_blkcipher *cipher; 330 struct crypto_skcipher *cipher;
331 int err; 331 int err;
332 332
333 cipher = crypto_alloc_blkcipher(kctx->gk5e->encrypt_name, 0, 333 cipher = crypto_alloc_skcipher(kctx->gk5e->encrypt_name, 0,
334 CRYPTO_ALG_ASYNC); 334 CRYPTO_ALG_ASYNC);
335 if (IS_ERR(cipher)) 335 if (IS_ERR(cipher))
336 return GSS_S_FAILURE; 336 return GSS_S_FAILURE;
337 337
338 krb5_rc4_setup_enc_key(kctx, cipher, seqnum); 338 krb5_rc4_setup_enc_key(kctx, cipher, seqnum);
339 339
340 err = gss_decrypt_xdr_buf(cipher, buf, crypt_offset); 340 err = gss_decrypt_xdr_buf(cipher, buf, crypt_offset);
341 crypto_free_blkcipher(cipher); 341 crypto_free_skcipher(cipher);
342 if (err) 342 if (err)
343 return GSS_S_DEFECTIVE_TOKEN; 343 return GSS_S_DEFECTIVE_TOKEN;
344 } else { 344 } else {
@@ -371,7 +371,7 @@ gss_unwrap_kerberos_v1(struct krb5_ctx *kctx, int offset, struct xdr_buf *buf)
371 /* Copy the data back to the right position. XXX: Would probably be 371 /* Copy the data back to the right position. XXX: Would probably be
372 * better to copy and encrypt at the same time. */ 372 * better to copy and encrypt at the same time. */
373 373
374 blocksize = crypto_blkcipher_blocksize(kctx->enc); 374 blocksize = crypto_skcipher_blocksize(kctx->enc);
375 data_start = ptr + (GSS_KRB5_TOK_HDR_LEN + kctx->gk5e->cksumlength) + 375 data_start = ptr + (GSS_KRB5_TOK_HDR_LEN + kctx->gk5e->cksumlength) +
376 conflen; 376 conflen;
377 orig_start = buf->head[0].iov_base + offset; 377 orig_start = buf->head[0].iov_base + offset;
@@ -473,7 +473,7 @@ gss_wrap_kerberos_v2(struct krb5_ctx *kctx, u32 offset,
473 *ptr++ = 0xff; 473 *ptr++ = 0xff;
474 be16ptr = (__be16 *)ptr; 474 be16ptr = (__be16 *)ptr;
475 475
476 blocksize = crypto_blkcipher_blocksize(kctx->acceptor_enc); 476 blocksize = crypto_skcipher_blocksize(kctx->acceptor_enc);
477 *be16ptr++ = 0; 477 *be16ptr++ = 0;
478 /* "inner" token header always uses 0 for RRC */ 478 /* "inner" token header always uses 0 for RRC */
479 *be16ptr++ = 0; 479 *be16ptr++ = 0;
diff --git a/net/sunrpc/auth_null.c b/net/sunrpc/auth_null.c
index c2a2b584a056..8d9eb4d5ddd8 100644
--- a/net/sunrpc/auth_null.c
+++ b/net/sunrpc/auth_null.c
@@ -113,8 +113,8 @@ const struct rpc_authops authnull_ops = {
113 113
114static 114static
115struct rpc_auth null_auth = { 115struct rpc_auth null_auth = {
116 .au_cslack = 4, 116 .au_cslack = NUL_CALLSLACK,
117 .au_rslack = 2, 117 .au_rslack = NUL_REPLYSLACK,
118 .au_ops = &authnull_ops, 118 .au_ops = &authnull_ops,
119 .au_flavor = RPC_AUTH_NULL, 119 .au_flavor = RPC_AUTH_NULL,
120 .au_count = ATOMIC_INIT(0), 120 .au_count = ATOMIC_INIT(0),
diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c
index 548240dd15fc..0d3dd364c22f 100644
--- a/net/sunrpc/auth_unix.c
+++ b/net/sunrpc/auth_unix.c
@@ -23,8 +23,6 @@ struct unx_cred {
23}; 23};
24#define uc_uid uc_base.cr_uid 24#define uc_uid uc_base.cr_uid
25 25
26#define UNX_WRITESLACK (21 + XDR_QUADLEN(UNX_MAXNODENAME))
27
28#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) 26#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
29# define RPCDBG_FACILITY RPCDBG_AUTH 27# define RPCDBG_FACILITY RPCDBG_AUTH
30#endif 28#endif
@@ -228,8 +226,8 @@ const struct rpc_authops authunix_ops = {
228 226
229static 227static
230struct rpc_auth unix_auth = { 228struct rpc_auth unix_auth = {
231 .au_cslack = UNX_WRITESLACK, 229 .au_cslack = UNX_CALLSLACK,
232 .au_rslack = 2, /* assume AUTH_NULL verf */ 230 .au_rslack = NUL_REPLYSLACK,
233 .au_ops = &authunix_ops, 231 .au_ops = &authunix_ops,
234 .au_flavor = RPC_AUTH_UNIX, 232 .au_flavor = RPC_AUTH_UNIX,
235 .au_count = ATOMIC_INIT(0), 233 .au_count = ATOMIC_INIT(0),
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 2b32fd602669..008c25d1b9f9 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -1182,14 +1182,14 @@ int sunrpc_cache_pipe_upcall(struct cache_detail *detail, struct cache_head *h)
1182 } 1182 }
1183 1183
1184 crq->q.reader = 0; 1184 crq->q.reader = 0;
1185 crq->item = cache_get(h);
1186 crq->buf = buf; 1185 crq->buf = buf;
1187 crq->len = 0; 1186 crq->len = 0;
1188 crq->readers = 0; 1187 crq->readers = 0;
1189 spin_lock(&queue_lock); 1188 spin_lock(&queue_lock);
1190 if (test_bit(CACHE_PENDING, &h->flags)) 1189 if (test_bit(CACHE_PENDING, &h->flags)) {
1190 crq->item = cache_get(h);
1191 list_add_tail(&crq->q.list, &detail->queue); 1191 list_add_tail(&crq->q.list, &detail->queue);
1192 else 1192 } else
1193 /* Lost a race, no longer PENDING, so don't enqueue */ 1193 /* Lost a race, no longer PENDING, so don't enqueue */
1194 ret = -EAGAIN; 1194 ret = -EAGAIN;
1195 spin_unlock(&queue_lock); 1195 spin_unlock(&queue_lock);
@@ -1225,7 +1225,7 @@ int qword_get(char **bpp, char *dest, int bufsize)
1225 if (bp[0] == '\\' && bp[1] == 'x') { 1225 if (bp[0] == '\\' && bp[1] == 'x') {
1226 /* HEX STRING */ 1226 /* HEX STRING */
1227 bp += 2; 1227 bp += 2;
1228 while (len < bufsize) { 1228 while (len < bufsize - 1) {
1229 int h, l; 1229 int h, l;
1230 1230
1231 h = hex_to_bin(bp[0]); 1231 h = hex_to_bin(bp[0]);
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index b7f21044f4d8..7e0c9bf22df8 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -354,6 +354,7 @@ static void rpc_free_clid(struct rpc_clnt *clnt)
354} 354}
355 355
356static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, 356static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args,
357 struct rpc_xprt_switch *xps,
357 struct rpc_xprt *xprt, 358 struct rpc_xprt *xprt,
358 struct rpc_clnt *parent) 359 struct rpc_clnt *parent)
359{ 360{
@@ -411,6 +412,8 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args,
411 } 412 }
412 413
413 rpc_clnt_set_transport(clnt, xprt, timeout); 414 rpc_clnt_set_transport(clnt, xprt, timeout);
415 xprt_iter_init(&clnt->cl_xpi, xps);
416 xprt_switch_put(xps);
414 417
415 clnt->cl_rtt = &clnt->cl_rtt_default; 418 clnt->cl_rtt = &clnt->cl_rtt_default;
416 rpc_init_rtt(&clnt->cl_rtt_default, clnt->cl_timeout->to_initval); 419 rpc_init_rtt(&clnt->cl_rtt_default, clnt->cl_timeout->to_initval);
@@ -438,6 +441,7 @@ out_no_clid:
438out_err: 441out_err:
439 rpciod_down(); 442 rpciod_down();
440out_no_rpciod: 443out_no_rpciod:
444 xprt_switch_put(xps);
441 xprt_put(xprt); 445 xprt_put(xprt);
442 return ERR_PTR(err); 446 return ERR_PTR(err);
443} 447}
@@ -446,8 +450,13 @@ struct rpc_clnt *rpc_create_xprt(struct rpc_create_args *args,
446 struct rpc_xprt *xprt) 450 struct rpc_xprt *xprt)
447{ 451{
448 struct rpc_clnt *clnt = NULL; 452 struct rpc_clnt *clnt = NULL;
453 struct rpc_xprt_switch *xps;
449 454
450 clnt = rpc_new_client(args, xprt, NULL); 455 xps = xprt_switch_alloc(xprt, GFP_KERNEL);
456 if (xps == NULL)
457 return ERR_PTR(-ENOMEM);
458
459 clnt = rpc_new_client(args, xps, xprt, NULL);
451 if (IS_ERR(clnt)) 460 if (IS_ERR(clnt))
452 return clnt; 461 return clnt;
453 462
@@ -564,6 +573,7 @@ EXPORT_SYMBOL_GPL(rpc_create);
564static struct rpc_clnt *__rpc_clone_client(struct rpc_create_args *args, 573static struct rpc_clnt *__rpc_clone_client(struct rpc_create_args *args,
565 struct rpc_clnt *clnt) 574 struct rpc_clnt *clnt)
566{ 575{
576 struct rpc_xprt_switch *xps;
567 struct rpc_xprt *xprt; 577 struct rpc_xprt *xprt;
568 struct rpc_clnt *new; 578 struct rpc_clnt *new;
569 int err; 579 int err;
@@ -571,13 +581,17 @@ static struct rpc_clnt *__rpc_clone_client(struct rpc_create_args *args,
571 err = -ENOMEM; 581 err = -ENOMEM;
572 rcu_read_lock(); 582 rcu_read_lock();
573 xprt = xprt_get(rcu_dereference(clnt->cl_xprt)); 583 xprt = xprt_get(rcu_dereference(clnt->cl_xprt));
584 xps = xprt_switch_get(rcu_dereference(clnt->cl_xpi.xpi_xpswitch));
574 rcu_read_unlock(); 585 rcu_read_unlock();
575 if (xprt == NULL) 586 if (xprt == NULL || xps == NULL) {
587 xprt_put(xprt);
588 xprt_switch_put(xps);
576 goto out_err; 589 goto out_err;
590 }
577 args->servername = xprt->servername; 591 args->servername = xprt->servername;
578 args->nodename = clnt->cl_nodename; 592 args->nodename = clnt->cl_nodename;
579 593
580 new = rpc_new_client(args, xprt, clnt); 594 new = rpc_new_client(args, xps, xprt, clnt);
581 if (IS_ERR(new)) { 595 if (IS_ERR(new)) {
582 err = PTR_ERR(new); 596 err = PTR_ERR(new);
583 goto out_err; 597 goto out_err;
@@ -657,6 +671,7 @@ int rpc_switch_client_transport(struct rpc_clnt *clnt,
657{ 671{
658 const struct rpc_timeout *old_timeo; 672 const struct rpc_timeout *old_timeo;
659 rpc_authflavor_t pseudoflavor; 673 rpc_authflavor_t pseudoflavor;
674 struct rpc_xprt_switch *xps, *oldxps;
660 struct rpc_xprt *xprt, *old; 675 struct rpc_xprt *xprt, *old;
661 struct rpc_clnt *parent; 676 struct rpc_clnt *parent;
662 int err; 677 int err;
@@ -668,10 +683,17 @@ int rpc_switch_client_transport(struct rpc_clnt *clnt,
668 return PTR_ERR(xprt); 683 return PTR_ERR(xprt);
669 } 684 }
670 685
686 xps = xprt_switch_alloc(xprt, GFP_KERNEL);
687 if (xps == NULL) {
688 xprt_put(xprt);
689 return -ENOMEM;
690 }
691
671 pseudoflavor = clnt->cl_auth->au_flavor; 692 pseudoflavor = clnt->cl_auth->au_flavor;
672 693
673 old_timeo = clnt->cl_timeout; 694 old_timeo = clnt->cl_timeout;
674 old = rpc_clnt_set_transport(clnt, xprt, timeout); 695 old = rpc_clnt_set_transport(clnt, xprt, timeout);
696 oldxps = xprt_iter_xchg_switch(&clnt->cl_xpi, xps);
675 697
676 rpc_unregister_client(clnt); 698 rpc_unregister_client(clnt);
677 __rpc_clnt_remove_pipedir(clnt); 699 __rpc_clnt_remove_pipedir(clnt);
@@ -697,20 +719,74 @@ int rpc_switch_client_transport(struct rpc_clnt *clnt,
697 synchronize_rcu(); 719 synchronize_rcu();
698 if (parent != clnt) 720 if (parent != clnt)
699 rpc_release_client(parent); 721 rpc_release_client(parent);
722 xprt_switch_put(oldxps);
700 xprt_put(old); 723 xprt_put(old);
701 dprintk("RPC: replaced xprt for clnt %p\n", clnt); 724 dprintk("RPC: replaced xprt for clnt %p\n", clnt);
702 return 0; 725 return 0;
703 726
704out_revert: 727out_revert:
728 xps = xprt_iter_xchg_switch(&clnt->cl_xpi, oldxps);
705 rpc_clnt_set_transport(clnt, old, old_timeo); 729 rpc_clnt_set_transport(clnt, old, old_timeo);
706 clnt->cl_parent = parent; 730 clnt->cl_parent = parent;
707 rpc_client_register(clnt, pseudoflavor, NULL); 731 rpc_client_register(clnt, pseudoflavor, NULL);
732 xprt_switch_put(xps);
708 xprt_put(xprt); 733 xprt_put(xprt);
709 dprintk("RPC: failed to switch xprt for clnt %p\n", clnt); 734 dprintk("RPC: failed to switch xprt for clnt %p\n", clnt);
710 return err; 735 return err;
711} 736}
712EXPORT_SYMBOL_GPL(rpc_switch_client_transport); 737EXPORT_SYMBOL_GPL(rpc_switch_client_transport);
713 738
739static
740int rpc_clnt_xprt_iter_init(struct rpc_clnt *clnt, struct rpc_xprt_iter *xpi)
741{
742 struct rpc_xprt_switch *xps;
743
744 rcu_read_lock();
745 xps = xprt_switch_get(rcu_dereference(clnt->cl_xpi.xpi_xpswitch));
746 rcu_read_unlock();
747 if (xps == NULL)
748 return -EAGAIN;
749 xprt_iter_init_listall(xpi, xps);
750 xprt_switch_put(xps);
751 return 0;
752}
753
754/**
755 * rpc_clnt_iterate_for_each_xprt - Apply a function to all transports
756 * @clnt: pointer to client
757 * @fn: function to apply
758 * @data: void pointer to function data
759 *
760 * Iterates through the list of RPC transports currently attached to the
761 * client and applies the function fn(clnt, xprt, data).
762 *
763 * On error, the iteration stops, and the function returns the error value.
764 */
765int rpc_clnt_iterate_for_each_xprt(struct rpc_clnt *clnt,
766 int (*fn)(struct rpc_clnt *, struct rpc_xprt *, void *),
767 void *data)
768{
769 struct rpc_xprt_iter xpi;
770 int ret;
771
772 ret = rpc_clnt_xprt_iter_init(clnt, &xpi);
773 if (ret)
774 return ret;
775 for (;;) {
776 struct rpc_xprt *xprt = xprt_iter_get_next(&xpi);
777
778 if (!xprt)
779 break;
780 ret = fn(clnt, xprt, data);
781 xprt_put(xprt);
782 if (ret < 0)
783 break;
784 }
785 xprt_iter_destroy(&xpi);
786 return ret;
787}
788EXPORT_SYMBOL_GPL(rpc_clnt_iterate_for_each_xprt);
789
714/* 790/*
715 * Kill all tasks for the given client. 791 * Kill all tasks for the given client.
716 * XXX: kill their descendants as well? 792 * XXX: kill their descendants as well?
@@ -783,6 +859,7 @@ rpc_free_client(struct rpc_clnt *clnt)
783 rpc_free_iostats(clnt->cl_metrics); 859 rpc_free_iostats(clnt->cl_metrics);
784 clnt->cl_metrics = NULL; 860 clnt->cl_metrics = NULL;
785 xprt_put(rcu_dereference_raw(clnt->cl_xprt)); 861 xprt_put(rcu_dereference_raw(clnt->cl_xprt));
862 xprt_iter_destroy(&clnt->cl_xpi);
786 rpciod_down(); 863 rpciod_down();
787 rpc_free_clid(clnt); 864 rpc_free_clid(clnt);
788 kfree(clnt); 865 kfree(clnt);
@@ -868,6 +945,7 @@ EXPORT_SYMBOL_GPL(rpc_bind_new_program);
868void rpc_task_release_client(struct rpc_task *task) 945void rpc_task_release_client(struct rpc_task *task)
869{ 946{
870 struct rpc_clnt *clnt = task->tk_client; 947 struct rpc_clnt *clnt = task->tk_client;
948 struct rpc_xprt *xprt = task->tk_xprt;
871 949
872 if (clnt != NULL) { 950 if (clnt != NULL) {
873 /* Remove from client task list */ 951 /* Remove from client task list */
@@ -878,13 +956,22 @@ void rpc_task_release_client(struct rpc_task *task)
878 956
879 rpc_release_client(clnt); 957 rpc_release_client(clnt);
880 } 958 }
959
960 if (xprt != NULL) {
961 task->tk_xprt = NULL;
962
963 xprt_put(xprt);
964 }
881} 965}
882 966
883static 967static
884void rpc_task_set_client(struct rpc_task *task, struct rpc_clnt *clnt) 968void rpc_task_set_client(struct rpc_task *task, struct rpc_clnt *clnt)
885{ 969{
970
886 if (clnt != NULL) { 971 if (clnt != NULL) {
887 rpc_task_release_client(task); 972 rpc_task_release_client(task);
973 if (task->tk_xprt == NULL)
974 task->tk_xprt = xprt_iter_get_next(&clnt->cl_xpi);
888 task->tk_client = clnt; 975 task->tk_client = clnt;
889 atomic_inc(&clnt->cl_count); 976 atomic_inc(&clnt->cl_count);
890 if (clnt->cl_softrtry) 977 if (clnt->cl_softrtry)
@@ -900,14 +987,6 @@ void rpc_task_set_client(struct rpc_task *task, struct rpc_clnt *clnt)
900 } 987 }
901} 988}
902 989
903void rpc_task_reset_client(struct rpc_task *task, struct rpc_clnt *clnt)
904{
905 rpc_task_release_client(task);
906 rpc_task_set_client(task, clnt);
907}
908EXPORT_SYMBOL_GPL(rpc_task_reset_client);
909
910
911static void 990static void
912rpc_task_set_rpc_message(struct rpc_task *task, const struct rpc_message *msg) 991rpc_task_set_rpc_message(struct rpc_task *task, const struct rpc_message *msg)
913{ 992{
@@ -2104,11 +2183,9 @@ call_timeout(struct rpc_task *task)
2104 } 2183 }
2105 if (RPC_IS_SOFT(task)) { 2184 if (RPC_IS_SOFT(task)) {
2106 if (clnt->cl_chatty) { 2185 if (clnt->cl_chatty) {
2107 rcu_read_lock();
2108 printk(KERN_NOTICE "%s: server %s not responding, timed out\n", 2186 printk(KERN_NOTICE "%s: server %s not responding, timed out\n",
2109 clnt->cl_program->name, 2187 clnt->cl_program->name,
2110 rcu_dereference(clnt->cl_xprt)->servername); 2188 task->tk_xprt->servername);
2111 rcu_read_unlock();
2112 } 2189 }
2113 if (task->tk_flags & RPC_TASK_TIMEOUT) 2190 if (task->tk_flags & RPC_TASK_TIMEOUT)
2114 rpc_exit(task, -ETIMEDOUT); 2191 rpc_exit(task, -ETIMEDOUT);
@@ -2120,11 +2197,9 @@ call_timeout(struct rpc_task *task)
2120 if (!(task->tk_flags & RPC_CALL_MAJORSEEN)) { 2197 if (!(task->tk_flags & RPC_CALL_MAJORSEEN)) {
2121 task->tk_flags |= RPC_CALL_MAJORSEEN; 2198 task->tk_flags |= RPC_CALL_MAJORSEEN;
2122 if (clnt->cl_chatty) { 2199 if (clnt->cl_chatty) {
2123 rcu_read_lock();
2124 printk(KERN_NOTICE "%s: server %s not responding, still trying\n", 2200 printk(KERN_NOTICE "%s: server %s not responding, still trying\n",
2125 clnt->cl_program->name, 2201 clnt->cl_program->name,
2126 rcu_dereference(clnt->cl_xprt)->servername); 2202 task->tk_xprt->servername);
2127 rcu_read_unlock();
2128 } 2203 }
2129 } 2204 }
2130 rpc_force_rebind(clnt); 2205 rpc_force_rebind(clnt);
@@ -2154,11 +2229,9 @@ call_decode(struct rpc_task *task)
2154 2229
2155 if (task->tk_flags & RPC_CALL_MAJORSEEN) { 2230 if (task->tk_flags & RPC_CALL_MAJORSEEN) {
2156 if (clnt->cl_chatty) { 2231 if (clnt->cl_chatty) {
2157 rcu_read_lock();
2158 printk(KERN_NOTICE "%s: server %s OK\n", 2232 printk(KERN_NOTICE "%s: server %s OK\n",
2159 clnt->cl_program->name, 2233 clnt->cl_program->name,
2160 rcu_dereference(clnt->cl_xprt)->servername); 2234 task->tk_xprt->servername);
2161 rcu_read_unlock();
2162 } 2235 }
2163 task->tk_flags &= ~RPC_CALL_MAJORSEEN; 2236 task->tk_flags &= ~RPC_CALL_MAJORSEEN;
2164 } 2237 }
@@ -2312,11 +2385,9 @@ rpc_verify_header(struct rpc_task *task)
2312 task->tk_action = call_bind; 2385 task->tk_action = call_bind;
2313 goto out_retry; 2386 goto out_retry;
2314 case RPC_AUTH_TOOWEAK: 2387 case RPC_AUTH_TOOWEAK:
2315 rcu_read_lock();
2316 printk(KERN_NOTICE "RPC: server %s requires stronger " 2388 printk(KERN_NOTICE "RPC: server %s requires stronger "
2317 "authentication.\n", 2389 "authentication.\n",
2318 rcu_dereference(clnt->cl_xprt)->servername); 2390 task->tk_xprt->servername);
2319 rcu_read_unlock();
2320 break; 2391 break;
2321 default: 2392 default:
2322 dprintk("RPC: %5u %s: unknown auth error: %x\n", 2393 dprintk("RPC: %5u %s: unknown auth error: %x\n",
@@ -2341,27 +2412,27 @@ rpc_verify_header(struct rpc_task *task)
2341 case RPC_SUCCESS: 2412 case RPC_SUCCESS:
2342 return p; 2413 return p;
2343 case RPC_PROG_UNAVAIL: 2414 case RPC_PROG_UNAVAIL:
2344 dprintk_rcu("RPC: %5u %s: program %u is unsupported " 2415 dprintk("RPC: %5u %s: program %u is unsupported "
2345 "by server %s\n", task->tk_pid, __func__, 2416 "by server %s\n", task->tk_pid, __func__,
2346 (unsigned int)clnt->cl_prog, 2417 (unsigned int)clnt->cl_prog,
2347 rcu_dereference(clnt->cl_xprt)->servername); 2418 task->tk_xprt->servername);
2348 error = -EPFNOSUPPORT; 2419 error = -EPFNOSUPPORT;
2349 goto out_err; 2420 goto out_err;
2350 case RPC_PROG_MISMATCH: 2421 case RPC_PROG_MISMATCH:
2351 dprintk_rcu("RPC: %5u %s: program %u, version %u unsupported " 2422 dprintk("RPC: %5u %s: program %u, version %u unsupported "
2352 "by server %s\n", task->tk_pid, __func__, 2423 "by server %s\n", task->tk_pid, __func__,
2353 (unsigned int)clnt->cl_prog, 2424 (unsigned int)clnt->cl_prog,
2354 (unsigned int)clnt->cl_vers, 2425 (unsigned int)clnt->cl_vers,
2355 rcu_dereference(clnt->cl_xprt)->servername); 2426 task->tk_xprt->servername);
2356 error = -EPROTONOSUPPORT; 2427 error = -EPROTONOSUPPORT;
2357 goto out_err; 2428 goto out_err;
2358 case RPC_PROC_UNAVAIL: 2429 case RPC_PROC_UNAVAIL:
2359 dprintk_rcu("RPC: %5u %s: proc %s unsupported by program %u, " 2430 dprintk("RPC: %5u %s: proc %s unsupported by program %u, "
2360 "version %u on server %s\n", 2431 "version %u on server %s\n",
2361 task->tk_pid, __func__, 2432 task->tk_pid, __func__,
2362 rpc_proc_name(task), 2433 rpc_proc_name(task),
2363 clnt->cl_prog, clnt->cl_vers, 2434 clnt->cl_prog, clnt->cl_vers,
2364 rcu_dereference(clnt->cl_xprt)->servername); 2435 task->tk_xprt->servername);
2365 error = -EOPNOTSUPP; 2436 error = -EOPNOTSUPP;
2366 goto out_err; 2437 goto out_err;
2367 case RPC_GARBAGE_ARGS: 2438 case RPC_GARBAGE_ARGS:
@@ -2421,7 +2492,10 @@ static int rpc_ping(struct rpc_clnt *clnt)
2421 return err; 2492 return err;
2422} 2493}
2423 2494
2424struct rpc_task *rpc_call_null(struct rpc_clnt *clnt, struct rpc_cred *cred, int flags) 2495static
2496struct rpc_task *rpc_call_null_helper(struct rpc_clnt *clnt,
2497 struct rpc_xprt *xprt, struct rpc_cred *cred, int flags,
2498 const struct rpc_call_ops *ops, void *data)
2425{ 2499{
2426 struct rpc_message msg = { 2500 struct rpc_message msg = {
2427 .rpc_proc = &rpcproc_null, 2501 .rpc_proc = &rpcproc_null,
@@ -2429,14 +2503,140 @@ struct rpc_task *rpc_call_null(struct rpc_clnt *clnt, struct rpc_cred *cred, int
2429 }; 2503 };
2430 struct rpc_task_setup task_setup_data = { 2504 struct rpc_task_setup task_setup_data = {
2431 .rpc_client = clnt, 2505 .rpc_client = clnt,
2506 .rpc_xprt = xprt,
2432 .rpc_message = &msg, 2507 .rpc_message = &msg,
2433 .callback_ops = &rpc_default_ops, 2508 .callback_ops = (ops != NULL) ? ops : &rpc_default_ops,
2509 .callback_data = data,
2434 .flags = flags, 2510 .flags = flags,
2435 }; 2511 };
2512
2436 return rpc_run_task(&task_setup_data); 2513 return rpc_run_task(&task_setup_data);
2437} 2514}
2515
2516struct rpc_task *rpc_call_null(struct rpc_clnt *clnt, struct rpc_cred *cred, int flags)
2517{
2518 return rpc_call_null_helper(clnt, NULL, cred, flags, NULL, NULL);
2519}
2438EXPORT_SYMBOL_GPL(rpc_call_null); 2520EXPORT_SYMBOL_GPL(rpc_call_null);
2439 2521
2522struct rpc_cb_add_xprt_calldata {
2523 struct rpc_xprt_switch *xps;
2524 struct rpc_xprt *xprt;
2525};
2526
2527static void rpc_cb_add_xprt_done(struct rpc_task *task, void *calldata)
2528{
2529 struct rpc_cb_add_xprt_calldata *data = calldata;
2530
2531 if (task->tk_status == 0)
2532 rpc_xprt_switch_add_xprt(data->xps, data->xprt);
2533}
2534
2535static void rpc_cb_add_xprt_release(void *calldata)
2536{
2537 struct rpc_cb_add_xprt_calldata *data = calldata;
2538
2539 xprt_put(data->xprt);
2540 xprt_switch_put(data->xps);
2541 kfree(data);
2542}
2543
2544const static struct rpc_call_ops rpc_cb_add_xprt_call_ops = {
2545 .rpc_call_done = rpc_cb_add_xprt_done,
2546 .rpc_release = rpc_cb_add_xprt_release,
2547};
2548
2549/**
2550 * rpc_clnt_test_and_add_xprt - Test and add a new transport to a rpc_clnt
2551 * @clnt: pointer to struct rpc_clnt
2552 * @xps: pointer to struct rpc_xprt_switch,
2553 * @xprt: pointer struct rpc_xprt
2554 * @dummy: unused
2555 */
2556int rpc_clnt_test_and_add_xprt(struct rpc_clnt *clnt,
2557 struct rpc_xprt_switch *xps, struct rpc_xprt *xprt,
2558 void *dummy)
2559{
2560 struct rpc_cb_add_xprt_calldata *data;
2561 struct rpc_cred *cred;
2562 struct rpc_task *task;
2563
2564 data = kmalloc(sizeof(*data), GFP_NOFS);
2565 if (!data)
2566 return -ENOMEM;
2567 data->xps = xprt_switch_get(xps);
2568 data->xprt = xprt_get(xprt);
2569
2570 cred = authnull_ops.lookup_cred(NULL, NULL, 0);
2571 task = rpc_call_null_helper(clnt, xprt, cred,
2572 RPC_TASK_SOFT|RPC_TASK_SOFTCONN|RPC_TASK_ASYNC,
2573 &rpc_cb_add_xprt_call_ops, data);
2574 put_rpccred(cred);
2575 if (IS_ERR(task))
2576 return PTR_ERR(task);
2577 rpc_put_task(task);
2578 return 1;
2579}
2580EXPORT_SYMBOL_GPL(rpc_clnt_test_and_add_xprt);
2581
2582/**
2583 * rpc_clnt_add_xprt - Add a new transport to a rpc_clnt
2584 * @clnt: pointer to struct rpc_clnt
2585 * @xprtargs: pointer to struct xprt_create
2586 * @setup: callback to test and/or set up the connection
2587 * @data: pointer to setup function data
2588 *
2589 * Creates a new transport using the parameters set in args and
2590 * adds it to clnt.
2591 * If ping is set, then test that connectivity succeeds before
2592 * adding the new transport.
2593 *
2594 */
2595int rpc_clnt_add_xprt(struct rpc_clnt *clnt,
2596 struct xprt_create *xprtargs,
2597 int (*setup)(struct rpc_clnt *,
2598 struct rpc_xprt_switch *,
2599 struct rpc_xprt *,
2600 void *),
2601 void *data)
2602{
2603 struct rpc_xprt_switch *xps;
2604 struct rpc_xprt *xprt;
2605 unsigned char resvport;
2606 int ret = 0;
2607
2608 rcu_read_lock();
2609 xps = xprt_switch_get(rcu_dereference(clnt->cl_xpi.xpi_xpswitch));
2610 xprt = xprt_iter_xprt(&clnt->cl_xpi);
2611 if (xps == NULL || xprt == NULL) {
2612 rcu_read_unlock();
2613 return -EAGAIN;
2614 }
2615 resvport = xprt->resvport;
2616 rcu_read_unlock();
2617
2618 xprt = xprt_create_transport(xprtargs);
2619 if (IS_ERR(xprt)) {
2620 ret = PTR_ERR(xprt);
2621 goto out_put_switch;
2622 }
2623 xprt->resvport = resvport;
2624
2625 rpc_xprt_switch_set_roundrobin(xps);
2626 if (setup) {
2627 ret = setup(clnt, xps, xprt, data);
2628 if (ret != 0)
2629 goto out_put_xprt;
2630 }
2631 rpc_xprt_switch_add_xprt(xps, xprt);
2632out_put_xprt:
2633 xprt_put(xprt);
2634out_put_switch:
2635 xprt_switch_put(xps);
2636 return ret;
2637}
2638EXPORT_SYMBOL_GPL(rpc_clnt_add_xprt);
2639
2440#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) 2640#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
2441static void rpc_show_header(void) 2641static void rpc_show_header(void)
2442{ 2642{
@@ -2483,57 +2683,39 @@ void rpc_show_tasks(struct net *net)
2483#endif 2683#endif
2484 2684
2485#if IS_ENABLED(CONFIG_SUNRPC_SWAP) 2685#if IS_ENABLED(CONFIG_SUNRPC_SWAP)
2686static int
2687rpc_clnt_swap_activate_callback(struct rpc_clnt *clnt,
2688 struct rpc_xprt *xprt,
2689 void *dummy)
2690{
2691 return xprt_enable_swap(xprt);
2692}
2693
2486int 2694int
2487rpc_clnt_swap_activate(struct rpc_clnt *clnt) 2695rpc_clnt_swap_activate(struct rpc_clnt *clnt)
2488{ 2696{
2489 int ret = 0; 2697 if (atomic_inc_return(&clnt->cl_swapper) == 1)
2490 struct rpc_xprt *xprt; 2698 return rpc_clnt_iterate_for_each_xprt(clnt,
2491 2699 rpc_clnt_swap_activate_callback, NULL);
2492 if (atomic_inc_return(&clnt->cl_swapper) == 1) { 2700 return 0;
2493retry:
2494 rcu_read_lock();
2495 xprt = xprt_get(rcu_dereference(clnt->cl_xprt));
2496 rcu_read_unlock();
2497 if (!xprt) {
2498 /*
2499 * If we didn't get a reference, then we likely are
2500 * racing with a migration event. Wait for a grace
2501 * period and try again.
2502 */
2503 synchronize_rcu();
2504 goto retry;
2505 }
2506
2507 ret = xprt_enable_swap(xprt);
2508 xprt_put(xprt);
2509 }
2510 return ret;
2511} 2701}
2512EXPORT_SYMBOL_GPL(rpc_clnt_swap_activate); 2702EXPORT_SYMBOL_GPL(rpc_clnt_swap_activate);
2513 2703
2704static int
2705rpc_clnt_swap_deactivate_callback(struct rpc_clnt *clnt,
2706 struct rpc_xprt *xprt,
2707 void *dummy)
2708{
2709 xprt_disable_swap(xprt);
2710 return 0;
2711}
2712
2514void 2713void
2515rpc_clnt_swap_deactivate(struct rpc_clnt *clnt) 2714rpc_clnt_swap_deactivate(struct rpc_clnt *clnt)
2516{ 2715{
2517 struct rpc_xprt *xprt; 2716 if (atomic_dec_if_positive(&clnt->cl_swapper) == 0)
2518 2717 rpc_clnt_iterate_for_each_xprt(clnt,
2519 if (atomic_dec_if_positive(&clnt->cl_swapper) == 0) { 2718 rpc_clnt_swap_deactivate_callback, NULL);
2520retry:
2521 rcu_read_lock();
2522 xprt = xprt_get(rcu_dereference(clnt->cl_xprt));
2523 rcu_read_unlock();
2524 if (!xprt) {
2525 /*
2526 * If we didn't get a reference, then we likely are
2527 * racing with a migration event. Wait for a grace
2528 * period and try again.
2529 */
2530 synchronize_rcu();
2531 goto retry;
2532 }
2533
2534 xprt_disable_swap(xprt);
2535 xprt_put(xprt);
2536 }
2537} 2719}
2538EXPORT_SYMBOL_GPL(rpc_clnt_swap_deactivate); 2720EXPORT_SYMBOL_GPL(rpc_clnt_swap_deactivate);
2539#endif /* CONFIG_SUNRPC_SWAP */ 2721#endif /* CONFIG_SUNRPC_SWAP */
diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index cf5770d8f49a..5b30603596d0 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c
@@ -648,10 +648,10 @@ static struct rpc_task *rpcb_call_async(struct rpc_clnt *rpcb_clnt, struct rpcbi
648static struct rpc_clnt *rpcb_find_transport_owner(struct rpc_clnt *clnt) 648static struct rpc_clnt *rpcb_find_transport_owner(struct rpc_clnt *clnt)
649{ 649{
650 struct rpc_clnt *parent = clnt->cl_parent; 650 struct rpc_clnt *parent = clnt->cl_parent;
651 struct rpc_xprt *xprt = rcu_dereference(clnt->cl_xprt); 651 struct rpc_xprt_switch *xps = rcu_access_pointer(clnt->cl_xpi.xpi_xpswitch);
652 652
653 while (parent != clnt) { 653 while (parent != clnt) {
654 if (rcu_dereference(parent->cl_xprt) != xprt) 654 if (rcu_access_pointer(parent->cl_xpi.xpi_xpswitch) != xps)
655 break; 655 break;
656 if (clnt->cl_autobind) 656 if (clnt->cl_autobind)
657 break; 657 break;
@@ -683,11 +683,9 @@ void rpcb_getport_async(struct rpc_task *task)
683 int status; 683 int status;
684 684
685 rcu_read_lock(); 685 rcu_read_lock();
686 do { 686 clnt = rpcb_find_transport_owner(task->tk_client);
687 clnt = rpcb_find_transport_owner(task->tk_client);
688 xprt = xprt_get(rcu_dereference(clnt->cl_xprt));
689 } while (xprt == NULL);
690 rcu_read_unlock(); 687 rcu_read_unlock();
688 xprt = xprt_get(task->tk_xprt);
691 689
692 dprintk("RPC: %5u %s(%s, %u, %u, %d)\n", 690 dprintk("RPC: %5u %s(%s, %u, %u, %d)\n",
693 task->tk_pid, __func__, 691 task->tk_pid, __func__,
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index 73ad57a59989..fcfd48d263f6 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -909,6 +909,8 @@ static void rpc_init_task(struct rpc_task *task, const struct rpc_task_setup *ta
909 /* Initialize workqueue for async tasks */ 909 /* Initialize workqueue for async tasks */
910 task->tk_workqueue = task_setup_data->workqueue; 910 task->tk_workqueue = task_setup_data->workqueue;
911 911
912 task->tk_xprt = xprt_get(task_setup_data->rpc_xprt);
913
912 if (task->tk_ops->rpc_call_prepare != NULL) 914 if (task->tk_ops->rpc_call_prepare != NULL)
913 task->tk_action = rpc_prepare_task; 915 task->tk_action = rpc_prepare_task;
914 916
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 37edea6fa92d..216a1385718a 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -48,6 +48,7 @@
48#include <linux/sunrpc/clnt.h> 48#include <linux/sunrpc/clnt.h>
49#include <linux/sunrpc/metrics.h> 49#include <linux/sunrpc/metrics.h>
50#include <linux/sunrpc/bc_xprt.h> 50#include <linux/sunrpc/bc_xprt.h>
51#include <linux/rcupdate.h>
51 52
52#include <trace/events/sunrpc.h> 53#include <trace/events/sunrpc.h>
53 54
@@ -1166,7 +1167,7 @@ void xprt_free(struct rpc_xprt *xprt)
1166{ 1167{
1167 put_net(xprt->xprt_net); 1168 put_net(xprt->xprt_net);
1168 xprt_free_all_slots(xprt); 1169 xprt_free_all_slots(xprt);
1169 kfree(xprt); 1170 kfree_rcu(xprt, rcu);
1170} 1171}
1171EXPORT_SYMBOL_GPL(xprt_free); 1172EXPORT_SYMBOL_GPL(xprt_free);
1172 1173
@@ -1180,7 +1181,7 @@ EXPORT_SYMBOL_GPL(xprt_free);
1180 */ 1181 */
1181void xprt_reserve(struct rpc_task *task) 1182void xprt_reserve(struct rpc_task *task)
1182{ 1183{
1183 struct rpc_xprt *xprt; 1184 struct rpc_xprt *xprt = task->tk_xprt;
1184 1185
1185 task->tk_status = 0; 1186 task->tk_status = 0;
1186 if (task->tk_rqstp != NULL) 1187 if (task->tk_rqstp != NULL)
@@ -1188,11 +1189,8 @@ void xprt_reserve(struct rpc_task *task)
1188 1189
1189 task->tk_timeout = 0; 1190 task->tk_timeout = 0;
1190 task->tk_status = -EAGAIN; 1191 task->tk_status = -EAGAIN;
1191 rcu_read_lock();
1192 xprt = rcu_dereference(task->tk_client->cl_xprt);
1193 if (!xprt_throttle_congested(xprt, task)) 1192 if (!xprt_throttle_congested(xprt, task))
1194 xprt->ops->alloc_slot(xprt, task); 1193 xprt->ops->alloc_slot(xprt, task);
1195 rcu_read_unlock();
1196} 1194}
1197 1195
1198/** 1196/**
@@ -1206,7 +1204,7 @@ void xprt_reserve(struct rpc_task *task)
1206 */ 1204 */
1207void xprt_retry_reserve(struct rpc_task *task) 1205void xprt_retry_reserve(struct rpc_task *task)
1208{ 1206{
1209 struct rpc_xprt *xprt; 1207 struct rpc_xprt *xprt = task->tk_xprt;
1210 1208
1211 task->tk_status = 0; 1209 task->tk_status = 0;
1212 if (task->tk_rqstp != NULL) 1210 if (task->tk_rqstp != NULL)
@@ -1214,10 +1212,7 @@ void xprt_retry_reserve(struct rpc_task *task)
1214 1212
1215 task->tk_timeout = 0; 1213 task->tk_timeout = 0;
1216 task->tk_status = -EAGAIN; 1214 task->tk_status = -EAGAIN;
1217 rcu_read_lock();
1218 xprt = rcu_dereference(task->tk_client->cl_xprt);
1219 xprt->ops->alloc_slot(xprt, task); 1215 xprt->ops->alloc_slot(xprt, task);
1220 rcu_read_unlock();
1221} 1216}
1222 1217
1223static inline __be32 xprt_alloc_xid(struct rpc_xprt *xprt) 1218static inline __be32 xprt_alloc_xid(struct rpc_xprt *xprt)
@@ -1264,11 +1259,9 @@ void xprt_release(struct rpc_task *task)
1264 1259
1265 if (req == NULL) { 1260 if (req == NULL) {
1266 if (task->tk_client) { 1261 if (task->tk_client) {
1267 rcu_read_lock(); 1262 xprt = task->tk_xprt;
1268 xprt = rcu_dereference(task->tk_client->cl_xprt);
1269 if (xprt->snd_task == task) 1263 if (xprt->snd_task == task)
1270 xprt_release_write(xprt, task); 1264 xprt_release_write(xprt, task);
1271 rcu_read_unlock();
1272 } 1265 }
1273 return; 1266 return;
1274 } 1267 }
@@ -1307,7 +1300,7 @@ void xprt_release(struct rpc_task *task)
1307 1300
1308static void xprt_init(struct rpc_xprt *xprt, struct net *net) 1301static void xprt_init(struct rpc_xprt *xprt, struct net *net)
1309{ 1302{
1310 atomic_set(&xprt->count, 1); 1303 kref_init(&xprt->kref);
1311 1304
1312 spin_lock_init(&xprt->transport_lock); 1305 spin_lock_init(&xprt->transport_lock);
1313 spin_lock_init(&xprt->reserve_lock); 1306 spin_lock_init(&xprt->reserve_lock);
@@ -1318,6 +1311,7 @@ static void xprt_init(struct rpc_xprt *xprt, struct net *net)
1318 spin_lock_init(&xprt->bc_pa_lock); 1311 spin_lock_init(&xprt->bc_pa_lock);
1319 INIT_LIST_HEAD(&xprt->bc_pa_list); 1312 INIT_LIST_HEAD(&xprt->bc_pa_list);
1320#endif /* CONFIG_SUNRPC_BACKCHANNEL */ 1313#endif /* CONFIG_SUNRPC_BACKCHANNEL */
1314 INIT_LIST_HEAD(&xprt->xprt_switch);
1321 1315
1322 xprt->last_used = jiffies; 1316 xprt->last_used = jiffies;
1323 xprt->cwnd = RPC_INITCWND; 1317 xprt->cwnd = RPC_INITCWND;
@@ -1415,6 +1409,24 @@ static void xprt_destroy(struct rpc_xprt *xprt)
1415 xprt->ops->destroy(xprt); 1409 xprt->ops->destroy(xprt);
1416} 1410}
1417 1411
1412static void xprt_destroy_kref(struct kref *kref)
1413{
1414 xprt_destroy(container_of(kref, struct rpc_xprt, kref));
1415}
1416
1417/**
1418 * xprt_get - return a reference to an RPC transport.
1419 * @xprt: pointer to the transport
1420 *
1421 */
1422struct rpc_xprt *xprt_get(struct rpc_xprt *xprt)
1423{
1424 if (xprt != NULL && kref_get_unless_zero(&xprt->kref))
1425 return xprt;
1426 return NULL;
1427}
1428EXPORT_SYMBOL_GPL(xprt_get);
1429
1418/** 1430/**
1419 * xprt_put - release a reference to an RPC transport. 1431 * xprt_put - release a reference to an RPC transport.
1420 * @xprt: pointer to the transport 1432 * @xprt: pointer to the transport
@@ -1422,7 +1434,7 @@ static void xprt_destroy(struct rpc_xprt *xprt)
1422 */ 1434 */
1423void xprt_put(struct rpc_xprt *xprt) 1435void xprt_put(struct rpc_xprt *xprt)
1424{ 1436{
1425 if (atomic_dec_and_test(&xprt->count)) 1437 if (xprt != NULL)
1426 xprt_destroy(xprt); 1438 kref_put(&xprt->kref, xprt_destroy_kref);
1427} 1439}
1428EXPORT_SYMBOL_GPL(xprt_put); 1440EXPORT_SYMBOL_GPL(xprt_put);
diff --git a/net/sunrpc/xprtmultipath.c b/net/sunrpc/xprtmultipath.c
new file mode 100644
index 000000000000..e7fd76975d86
--- /dev/null
+++ b/net/sunrpc/xprtmultipath.c
@@ -0,0 +1,475 @@
1/*
2 * Multipath support for RPC
3 *
4 * Copyright (c) 2015, 2016, Primary Data, Inc. All rights reserved.
5 *
6 * Trond Myklebust <trond.myklebust@primarydata.com>
7 *
8 */
9#include <linux/types.h>
10#include <linux/kref.h>
11#include <linux/list.h>
12#include <linux/rcupdate.h>
13#include <linux/rculist.h>
14#include <linux/slab.h>
15#include <asm/cmpxchg.h>
16#include <linux/spinlock.h>
17#include <linux/sunrpc/xprt.h>
18#include <linux/sunrpc/xprtmultipath.h>
19
20typedef struct rpc_xprt *(*xprt_switch_find_xprt_t)(struct list_head *head,
21 const struct rpc_xprt *cur);
22
23static const struct rpc_xprt_iter_ops rpc_xprt_iter_singular;
24static const struct rpc_xprt_iter_ops rpc_xprt_iter_roundrobin;
25static const struct rpc_xprt_iter_ops rpc_xprt_iter_listall;
26
27static void xprt_switch_add_xprt_locked(struct rpc_xprt_switch *xps,
28 struct rpc_xprt *xprt)
29{
30 if (unlikely(xprt_get(xprt) == NULL))
31 return;
32 list_add_tail_rcu(&xprt->xprt_switch, &xps->xps_xprt_list);
33 smp_wmb();
34 if (xps->xps_nxprts == 0)
35 xps->xps_net = xprt->xprt_net;
36 xps->xps_nxprts++;
37}
38
39/**
40 * rpc_xprt_switch_add_xprt - Add a new rpc_xprt to an rpc_xprt_switch
41 * @xps: pointer to struct rpc_xprt_switch
42 * @xprt: pointer to struct rpc_xprt
43 *
44 * Adds xprt to the end of the list of struct rpc_xprt in xps.
45 */
46void rpc_xprt_switch_add_xprt(struct rpc_xprt_switch *xps,
47 struct rpc_xprt *xprt)
48{
49 if (xprt == NULL)
50 return;
51 spin_lock(&xps->xps_lock);
52 if (xps->xps_net == xprt->xprt_net || xps->xps_net == NULL)
53 xprt_switch_add_xprt_locked(xps, xprt);
54 spin_unlock(&xps->xps_lock);
55}
56
57static void xprt_switch_remove_xprt_locked(struct rpc_xprt_switch *xps,
58 struct rpc_xprt *xprt)
59{
60 if (unlikely(xprt == NULL))
61 return;
62 xps->xps_nxprts--;
63 if (xps->xps_nxprts == 0)
64 xps->xps_net = NULL;
65 smp_wmb();
66 list_del_rcu(&xprt->xprt_switch);
67}
68
69/**
70 * rpc_xprt_switch_remove_xprt - Removes an rpc_xprt from a rpc_xprt_switch
71 * @xps: pointer to struct rpc_xprt_switch
72 * @xprt: pointer to struct rpc_xprt
73 *
74 * Removes xprt from the list of struct rpc_xprt in xps.
75 */
76void rpc_xprt_switch_remove_xprt(struct rpc_xprt_switch *xps,
77 struct rpc_xprt *xprt)
78{
79 spin_lock(&xps->xps_lock);
80 xprt_switch_remove_xprt_locked(xps, xprt);
81 spin_unlock(&xps->xps_lock);
82 xprt_put(xprt);
83}
84
85/**
86 * xprt_switch_alloc - Allocate a new struct rpc_xprt_switch
87 * @xprt: pointer to struct rpc_xprt
88 * @gfp_flags: allocation flags
89 *
90 * On success, returns an initialised struct rpc_xprt_switch, containing
91 * the entry xprt. Returns NULL on failure.
92 */
93struct rpc_xprt_switch *xprt_switch_alloc(struct rpc_xprt *xprt,
94 gfp_t gfp_flags)
95{
96 struct rpc_xprt_switch *xps;
97
98 xps = kmalloc(sizeof(*xps), gfp_flags);
99 if (xps != NULL) {
100 spin_lock_init(&xps->xps_lock);
101 kref_init(&xps->xps_kref);
102 xps->xps_nxprts = 0;
103 INIT_LIST_HEAD(&xps->xps_xprt_list);
104 xps->xps_iter_ops = &rpc_xprt_iter_singular;
105 xprt_switch_add_xprt_locked(xps, xprt);
106 }
107
108 return xps;
109}
110
111static void xprt_switch_free_entries(struct rpc_xprt_switch *xps)
112{
113 spin_lock(&xps->xps_lock);
114 while (!list_empty(&xps->xps_xprt_list)) {
115 struct rpc_xprt *xprt;
116
117 xprt = list_first_entry(&xps->xps_xprt_list,
118 struct rpc_xprt, xprt_switch);
119 xprt_switch_remove_xprt_locked(xps, xprt);
120 spin_unlock(&xps->xps_lock);
121 xprt_put(xprt);
122 spin_lock(&xps->xps_lock);
123 }
124 spin_unlock(&xps->xps_lock);
125}
126
127static void xprt_switch_free(struct kref *kref)
128{
129 struct rpc_xprt_switch *xps = container_of(kref,
130 struct rpc_xprt_switch, xps_kref);
131
132 xprt_switch_free_entries(xps);
133 kfree_rcu(xps, xps_rcu);
134}
135
136/**
137 * xprt_switch_get - Return a reference to a rpc_xprt_switch
138 * @xps: pointer to struct rpc_xprt_switch
139 *
140 * Returns a reference to xps unless the refcount is already zero.
141 */
142struct rpc_xprt_switch *xprt_switch_get(struct rpc_xprt_switch *xps)
143{
144 if (xps != NULL && kref_get_unless_zero(&xps->xps_kref))
145 return xps;
146 return NULL;
147}
148
149/**
150 * xprt_switch_put - Release a reference to a rpc_xprt_switch
151 * @xps: pointer to struct rpc_xprt_switch
152 *
153 * Release the reference to xps, and free it once the refcount is zero.
154 */
155void xprt_switch_put(struct rpc_xprt_switch *xps)
156{
157 if (xps != NULL)
158 kref_put(&xps->xps_kref, xprt_switch_free);
159}
160
161/**
162 * rpc_xprt_switch_set_roundrobin - Set a round-robin policy on rpc_xprt_switch
163 * @xps: pointer to struct rpc_xprt_switch
164 *
165 * Sets a round-robin default policy for iterators acting on xps.
166 */
167void rpc_xprt_switch_set_roundrobin(struct rpc_xprt_switch *xps)
168{
169 if (READ_ONCE(xps->xps_iter_ops) != &rpc_xprt_iter_roundrobin)
170 WRITE_ONCE(xps->xps_iter_ops, &rpc_xprt_iter_roundrobin);
171}
172
173static
174const struct rpc_xprt_iter_ops *xprt_iter_ops(const struct rpc_xprt_iter *xpi)
175{
176 if (xpi->xpi_ops != NULL)
177 return xpi->xpi_ops;
178 return rcu_dereference(xpi->xpi_xpswitch)->xps_iter_ops;
179}
180
181static
182void xprt_iter_no_rewind(struct rpc_xprt_iter *xpi)
183{
184}
185
186static
187void xprt_iter_default_rewind(struct rpc_xprt_iter *xpi)
188{
189 WRITE_ONCE(xpi->xpi_cursor, NULL);
190}
191
192static
193struct rpc_xprt *xprt_switch_find_first_entry(struct list_head *head)
194{
195 return list_first_or_null_rcu(head, struct rpc_xprt, xprt_switch);
196}
197
198static
199struct rpc_xprt *xprt_iter_first_entry(struct rpc_xprt_iter *xpi)
200{
201 struct rpc_xprt_switch *xps = rcu_dereference(xpi->xpi_xpswitch);
202
203 if (xps == NULL)
204 return NULL;
205 return xprt_switch_find_first_entry(&xps->xps_xprt_list);
206}
207
208static
209struct rpc_xprt *xprt_switch_find_current_entry(struct list_head *head,
210 const struct rpc_xprt *cur)
211{
212 struct rpc_xprt *pos;
213
214 list_for_each_entry_rcu(pos, head, xprt_switch) {
215 if (cur == pos)
216 return pos;
217 }
218 return NULL;
219}
220
221static
222struct rpc_xprt *xprt_iter_current_entry(struct rpc_xprt_iter *xpi)
223{
224 struct rpc_xprt_switch *xps = rcu_dereference(xpi->xpi_xpswitch);
225 struct list_head *head;
226
227 if (xps == NULL)
228 return NULL;
229 head = &xps->xps_xprt_list;
230 if (xpi->xpi_cursor == NULL || xps->xps_nxprts < 2)
231 return xprt_switch_find_first_entry(head);
232 return xprt_switch_find_current_entry(head, xpi->xpi_cursor);
233}
234
235static
236struct rpc_xprt *xprt_switch_find_next_entry(struct list_head *head,
237 const struct rpc_xprt *cur)
238{
239 struct rpc_xprt *pos, *prev = NULL;
240
241 list_for_each_entry_rcu(pos, head, xprt_switch) {
242 if (cur == prev)
243 return pos;
244 prev = pos;
245 }
246 return NULL;
247}
248
249static
250struct rpc_xprt *xprt_switch_set_next_cursor(struct list_head *head,
251 struct rpc_xprt **cursor,
252 xprt_switch_find_xprt_t find_next)
253{
254 struct rpc_xprt *cur, *pos, *old;
255
256 cur = READ_ONCE(*cursor);
257 for (;;) {
258 old = cur;
259 pos = find_next(head, old);
260 if (pos == NULL)
261 break;
262 cur = cmpxchg_relaxed(cursor, old, pos);
263 if (cur == old)
264 break;
265 }
266 return pos;
267}
268
269static
270struct rpc_xprt *xprt_iter_next_entry_multiple(struct rpc_xprt_iter *xpi,
271 xprt_switch_find_xprt_t find_next)
272{
273 struct rpc_xprt_switch *xps = rcu_dereference(xpi->xpi_xpswitch);
274 struct list_head *head;
275
276 if (xps == NULL)
277 return NULL;
278 head = &xps->xps_xprt_list;
279 if (xps->xps_nxprts < 2)
280 return xprt_switch_find_first_entry(head);
281 return xprt_switch_set_next_cursor(head, &xpi->xpi_cursor, find_next);
282}
283
284static
285struct rpc_xprt *xprt_switch_find_next_entry_roundrobin(struct list_head *head,
286 const struct rpc_xprt *cur)
287{
288 struct rpc_xprt *ret;
289
290 ret = xprt_switch_find_next_entry(head, cur);
291 if (ret != NULL)
292 return ret;
293 return xprt_switch_find_first_entry(head);
294}
295
296static
297struct rpc_xprt *xprt_iter_next_entry_roundrobin(struct rpc_xprt_iter *xpi)
298{
299 return xprt_iter_next_entry_multiple(xpi,
300 xprt_switch_find_next_entry_roundrobin);
301}
302
303static
304struct rpc_xprt *xprt_iter_next_entry_all(struct rpc_xprt_iter *xpi)
305{
306 return xprt_iter_next_entry_multiple(xpi, xprt_switch_find_next_entry);
307}
308
309/*
310 * xprt_iter_rewind - Resets the xprt iterator
311 * @xpi: pointer to rpc_xprt_iter
312 *
313 * Resets xpi to ensure that it points to the first entry in the list
314 * of transports.
315 */
316static
317void xprt_iter_rewind(struct rpc_xprt_iter *xpi)
318{
319 rcu_read_lock();
320 xprt_iter_ops(xpi)->xpi_rewind(xpi);
321 rcu_read_unlock();
322}
323
324static void __xprt_iter_init(struct rpc_xprt_iter *xpi,
325 struct rpc_xprt_switch *xps,
326 const struct rpc_xprt_iter_ops *ops)
327{
328 rcu_assign_pointer(xpi->xpi_xpswitch, xprt_switch_get(xps));
329 xpi->xpi_cursor = NULL;
330 xpi->xpi_ops = ops;
331}
332
333/**
334 * xprt_iter_init - Initialise an xprt iterator
335 * @xpi: pointer to rpc_xprt_iter
336 * @xps: pointer to rpc_xprt_switch
337 *
338 * Initialises the iterator to use the default iterator ops
339 * as set in xps. This function is mainly intended for internal
340 * use in the rpc_client.
341 */
342void xprt_iter_init(struct rpc_xprt_iter *xpi,
343 struct rpc_xprt_switch *xps)
344{
345 __xprt_iter_init(xpi, xps, NULL);
346}
347
348/**
349 * xprt_iter_init_listall - Initialise an xprt iterator
350 * @xpi: pointer to rpc_xprt_iter
351 * @xps: pointer to rpc_xprt_switch
352 *
353 * Initialises the iterator to iterate once through the entire list
354 * of entries in xps.
355 */
356void xprt_iter_init_listall(struct rpc_xprt_iter *xpi,
357 struct rpc_xprt_switch *xps)
358{
359 __xprt_iter_init(xpi, xps, &rpc_xprt_iter_listall);
360}
361
362/**
363 * xprt_iter_xchg_switch - Atomically swap out the rpc_xprt_switch
364 * @xpi: pointer to rpc_xprt_iter
365 * @xps: pointer to a new rpc_xprt_switch or NULL
366 *
367 * Swaps out the existing xpi->xpi_xpswitch with a new value.
368 */
369struct rpc_xprt_switch *xprt_iter_xchg_switch(struct rpc_xprt_iter *xpi,
370 struct rpc_xprt_switch *newswitch)
371{
372 struct rpc_xprt_switch __rcu *oldswitch;
373
374 /* Atomically swap out the old xpswitch */
375 oldswitch = xchg(&xpi->xpi_xpswitch, RCU_INITIALIZER(newswitch));
376 if (newswitch != NULL)
377 xprt_iter_rewind(xpi);
378 return rcu_dereference_protected(oldswitch, true);
379}
380
381/**
382 * xprt_iter_destroy - Destroys the xprt iterator
383 * @xpi pointer to rpc_xprt_iter
384 */
385void xprt_iter_destroy(struct rpc_xprt_iter *xpi)
386{
387 xprt_switch_put(xprt_iter_xchg_switch(xpi, NULL));
388}
389
390/**
391 * xprt_iter_xprt - Returns the rpc_xprt pointed to by the cursor
392 * @xpi: pointer to rpc_xprt_iter
393 *
394 * Returns a pointer to the struct rpc_xprt that is currently
395 * pointed to by the cursor.
396 * Caller must be holding rcu_read_lock().
397 */
398struct rpc_xprt *xprt_iter_xprt(struct rpc_xprt_iter *xpi)
399{
400 WARN_ON_ONCE(!rcu_read_lock_held());
401 return xprt_iter_ops(xpi)->xpi_xprt(xpi);
402}
403
404static
405struct rpc_xprt *xprt_iter_get_helper(struct rpc_xprt_iter *xpi,
406 struct rpc_xprt *(*fn)(struct rpc_xprt_iter *))
407{
408 struct rpc_xprt *ret;
409
410 do {
411 ret = fn(xpi);
412 if (ret == NULL)
413 break;
414 ret = xprt_get(ret);
415 } while (ret == NULL);
416 return ret;
417}
418
419/**
420 * xprt_iter_get_xprt - Returns the rpc_xprt pointed to by the cursor
421 * @xpi: pointer to rpc_xprt_iter
422 *
423 * Returns a reference to the struct rpc_xprt that is currently
424 * pointed to by the cursor.
425 */
426struct rpc_xprt *xprt_iter_get_xprt(struct rpc_xprt_iter *xpi)
427{
428 struct rpc_xprt *xprt;
429
430 rcu_read_lock();
431 xprt = xprt_iter_get_helper(xpi, xprt_iter_ops(xpi)->xpi_xprt);
432 rcu_read_unlock();
433 return xprt;
434}
435
436/**
437 * xprt_iter_get_next - Returns the next rpc_xprt following the cursor
438 * @xpi: pointer to rpc_xprt_iter
439 *
440 * Returns a reference to the struct rpc_xprt that immediately follows the
441 * entry pointed to by the cursor.
442 */
443struct rpc_xprt *xprt_iter_get_next(struct rpc_xprt_iter *xpi)
444{
445 struct rpc_xprt *xprt;
446
447 rcu_read_lock();
448 xprt = xprt_iter_get_helper(xpi, xprt_iter_ops(xpi)->xpi_next);
449 rcu_read_unlock();
450 return xprt;
451}
452
453/* Policy for always returning the first entry in the rpc_xprt_switch */
454static
455const struct rpc_xprt_iter_ops rpc_xprt_iter_singular = {
456 .xpi_rewind = xprt_iter_no_rewind,
457 .xpi_xprt = xprt_iter_first_entry,
458 .xpi_next = xprt_iter_first_entry,
459};
460
461/* Policy for round-robin iteration of entries in the rpc_xprt_switch */
462static
463const struct rpc_xprt_iter_ops rpc_xprt_iter_roundrobin = {
464 .xpi_rewind = xprt_iter_default_rewind,
465 .xpi_xprt = xprt_iter_current_entry,
466 .xpi_next = xprt_iter_next_entry_roundrobin,
467};
468
469/* Policy for once-through iteration of entries in the rpc_xprt_switch */
470static
471const struct rpc_xprt_iter_ops rpc_xprt_iter_listall = {
472 .xpi_rewind = xprt_iter_default_rewind,
473 .xpi_xprt = xprt_iter_current_entry,
474 .xpi_next = xprt_iter_next_entry_all,
475};
diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c
index cc1251d07297..2dcd7640eeb5 100644
--- a/net/sunrpc/xprtrdma/backchannel.c
+++ b/net/sunrpc/xprtrdma/backchannel.c
@@ -341,6 +341,8 @@ void rpcrdma_bc_receive_call(struct rpcrdma_xprt *r_xprt,
341 rqst->rq_reply_bytes_recvd = 0; 341 rqst->rq_reply_bytes_recvd = 0;
342 rqst->rq_bytes_sent = 0; 342 rqst->rq_bytes_sent = 0;
343 rqst->rq_xid = headerp->rm_xid; 343 rqst->rq_xid = headerp->rm_xid;
344
345 rqst->rq_private_buf.len = size;
344 set_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state); 346 set_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state);
345 347
346 buf = &rqst->rq_rcv_buf; 348 buf = &rqst->rq_rcv_buf;
diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c
index c14f3a4bff68..b289e106540b 100644
--- a/net/sunrpc/xprtrdma/fmr_ops.c
+++ b/net/sunrpc/xprtrdma/fmr_ops.c
@@ -80,13 +80,13 @@ fmr_op_init(struct rpcrdma_xprt *r_xprt)
80 if (!r) 80 if (!r)
81 goto out; 81 goto out;
82 82
83 r->r.fmr.physaddrs = kmalloc(RPCRDMA_MAX_FMR_SGES * 83 r->fmr.physaddrs = kmalloc(RPCRDMA_MAX_FMR_SGES *
84 sizeof(u64), GFP_KERNEL); 84 sizeof(u64), GFP_KERNEL);
85 if (!r->r.fmr.physaddrs) 85 if (!r->fmr.physaddrs)
86 goto out_free; 86 goto out_free;
87 87
88 r->r.fmr.fmr = ib_alloc_fmr(pd, mr_access_flags, &fmr_attr); 88 r->fmr.fmr = ib_alloc_fmr(pd, mr_access_flags, &fmr_attr);
89 if (IS_ERR(r->r.fmr.fmr)) 89 if (IS_ERR(r->fmr.fmr))
90 goto out_fmr_err; 90 goto out_fmr_err;
91 91
92 list_add(&r->mw_list, &buf->rb_mws); 92 list_add(&r->mw_list, &buf->rb_mws);
@@ -95,9 +95,9 @@ fmr_op_init(struct rpcrdma_xprt *r_xprt)
95 return 0; 95 return 0;
96 96
97out_fmr_err: 97out_fmr_err:
98 rc = PTR_ERR(r->r.fmr.fmr); 98 rc = PTR_ERR(r->fmr.fmr);
99 dprintk("RPC: %s: ib_alloc_fmr status %i\n", __func__, rc); 99 dprintk("RPC: %s: ib_alloc_fmr status %i\n", __func__, rc);
100 kfree(r->r.fmr.physaddrs); 100 kfree(r->fmr.physaddrs);
101out_free: 101out_free:
102 kfree(r); 102 kfree(r);
103out: 103out:
@@ -109,7 +109,7 @@ __fmr_unmap(struct rpcrdma_mw *r)
109{ 109{
110 LIST_HEAD(l); 110 LIST_HEAD(l);
111 111
112 list_add(&r->r.fmr.fmr->list, &l); 112 list_add(&r->fmr.fmr->list, &l);
113 return ib_unmap_fmr(&l); 113 return ib_unmap_fmr(&l);
114} 114}
115 115
@@ -148,7 +148,7 @@ fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
148 nsegs = RPCRDMA_MAX_FMR_SGES; 148 nsegs = RPCRDMA_MAX_FMR_SGES;
149 for (i = 0; i < nsegs;) { 149 for (i = 0; i < nsegs;) {
150 rpcrdma_map_one(device, seg, direction); 150 rpcrdma_map_one(device, seg, direction);
151 mw->r.fmr.physaddrs[i] = seg->mr_dma; 151 mw->fmr.physaddrs[i] = seg->mr_dma;
152 len += seg->mr_len; 152 len += seg->mr_len;
153 ++seg; 153 ++seg;
154 ++i; 154 ++i;
@@ -158,13 +158,13 @@ fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
158 break; 158 break;
159 } 159 }
160 160
161 rc = ib_map_phys_fmr(mw->r.fmr.fmr, mw->r.fmr.physaddrs, 161 rc = ib_map_phys_fmr(mw->fmr.fmr, mw->fmr.physaddrs,
162 i, seg1->mr_dma); 162 i, seg1->mr_dma);
163 if (rc) 163 if (rc)
164 goto out_maperr; 164 goto out_maperr;
165 165
166 seg1->rl_mw = mw; 166 seg1->rl_mw = mw;
167 seg1->mr_rkey = mw->r.fmr.fmr->rkey; 167 seg1->mr_rkey = mw->fmr.fmr->rkey;
168 seg1->mr_base = seg1->mr_dma + pageoff; 168 seg1->mr_base = seg1->mr_dma + pageoff;
169 seg1->mr_nsegs = i; 169 seg1->mr_nsegs = i;
170 seg1->mr_len = len; 170 seg1->mr_len = len;
@@ -219,7 +219,7 @@ fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
219 seg = &req->rl_segments[i]; 219 seg = &req->rl_segments[i];
220 mw = seg->rl_mw; 220 mw = seg->rl_mw;
221 221
222 list_add(&mw->r.fmr.fmr->list, &unmap_list); 222 list_add(&mw->fmr.fmr->list, &unmap_list);
223 223
224 i += seg->mr_nsegs; 224 i += seg->mr_nsegs;
225 } 225 }
@@ -281,9 +281,9 @@ fmr_op_destroy(struct rpcrdma_buffer *buf)
281 while (!list_empty(&buf->rb_all)) { 281 while (!list_empty(&buf->rb_all)) {
282 r = list_entry(buf->rb_all.next, struct rpcrdma_mw, mw_all); 282 r = list_entry(buf->rb_all.next, struct rpcrdma_mw, mw_all);
283 list_del(&r->mw_all); 283 list_del(&r->mw_all);
284 kfree(r->r.fmr.physaddrs); 284 kfree(r->fmr.physaddrs);
285 285
286 rc = ib_dealloc_fmr(r->r.fmr.fmr); 286 rc = ib_dealloc_fmr(r->fmr.fmr);
287 if (rc) 287 if (rc)
288 dprintk("RPC: %s: ib_dealloc_fmr failed %i\n", 288 dprintk("RPC: %s: ib_dealloc_fmr failed %i\n",
289 __func__, rc); 289 __func__, rc);
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
index e16567389e28..c250924a9fd3 100644
--- a/net/sunrpc/xprtrdma/frwr_ops.c
+++ b/net/sunrpc/xprtrdma/frwr_ops.c
@@ -109,20 +109,20 @@ static void
109__frwr_recovery_worker(struct work_struct *work) 109__frwr_recovery_worker(struct work_struct *work)
110{ 110{
111 struct rpcrdma_mw *r = container_of(work, struct rpcrdma_mw, 111 struct rpcrdma_mw *r = container_of(work, struct rpcrdma_mw,
112 r.frmr.fr_work); 112 frmr.fr_work);
113 struct rpcrdma_xprt *r_xprt = r->r.frmr.fr_xprt; 113 struct rpcrdma_xprt *r_xprt = r->frmr.fr_xprt;
114 unsigned int depth = r_xprt->rx_ia.ri_max_frmr_depth; 114 unsigned int depth = r_xprt->rx_ia.ri_max_frmr_depth;
115 struct ib_pd *pd = r_xprt->rx_ia.ri_pd; 115 struct ib_pd *pd = r_xprt->rx_ia.ri_pd;
116 116
117 if (ib_dereg_mr(r->r.frmr.fr_mr)) 117 if (ib_dereg_mr(r->frmr.fr_mr))
118 goto out_fail; 118 goto out_fail;
119 119
120 r->r.frmr.fr_mr = ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG, depth); 120 r->frmr.fr_mr = ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG, depth);
121 if (IS_ERR(r->r.frmr.fr_mr)) 121 if (IS_ERR(r->frmr.fr_mr))
122 goto out_fail; 122 goto out_fail;
123 123
124 dprintk("RPC: %s: recovered FRMR %p\n", __func__, r); 124 dprintk("RPC: %s: recovered FRMR %p\n", __func__, r);
125 r->r.frmr.fr_state = FRMR_IS_INVALID; 125 r->frmr.fr_state = FRMR_IS_INVALID;
126 rpcrdma_put_mw(r_xprt, r); 126 rpcrdma_put_mw(r_xprt, r);
127 return; 127 return;
128 128
@@ -137,15 +137,15 @@ out_fail:
137static void 137static void
138__frwr_queue_recovery(struct rpcrdma_mw *r) 138__frwr_queue_recovery(struct rpcrdma_mw *r)
139{ 139{
140 INIT_WORK(&r->r.frmr.fr_work, __frwr_recovery_worker); 140 INIT_WORK(&r->frmr.fr_work, __frwr_recovery_worker);
141 queue_work(frwr_recovery_wq, &r->r.frmr.fr_work); 141 queue_work(frwr_recovery_wq, &r->frmr.fr_work);
142} 142}
143 143
144static int 144static int
145__frwr_init(struct rpcrdma_mw *r, struct ib_pd *pd, struct ib_device *device, 145__frwr_init(struct rpcrdma_mw *r, struct ib_pd *pd, struct ib_device *device,
146 unsigned int depth) 146 unsigned int depth)
147{ 147{
148 struct rpcrdma_frmr *f = &r->r.frmr; 148 struct rpcrdma_frmr *f = &r->frmr;
149 int rc; 149 int rc;
150 150
151 f->fr_mr = ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG, depth); 151 f->fr_mr = ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG, depth);
@@ -158,6 +158,8 @@ __frwr_init(struct rpcrdma_mw *r, struct ib_pd *pd, struct ib_device *device,
158 158
159 sg_init_table(f->sg, depth); 159 sg_init_table(f->sg, depth);
160 160
161 init_completion(&f->fr_linv_done);
162
161 return 0; 163 return 0;
162 164
163out_mr_err: 165out_mr_err:
@@ -179,11 +181,11 @@ __frwr_release(struct rpcrdma_mw *r)
179{ 181{
180 int rc; 182 int rc;
181 183
182 rc = ib_dereg_mr(r->r.frmr.fr_mr); 184 rc = ib_dereg_mr(r->frmr.fr_mr);
183 if (rc) 185 if (rc)
184 dprintk("RPC: %s: ib_dereg_mr status %i\n", 186 dprintk("RPC: %s: ib_dereg_mr status %i\n",
185 __func__, rc); 187 __func__, rc);
186 kfree(r->r.frmr.sg); 188 kfree(r->frmr.sg);
187} 189}
188 190
189static int 191static int
@@ -244,39 +246,76 @@ frwr_op_maxpages(struct rpcrdma_xprt *r_xprt)
244 rpcrdma_max_segments(r_xprt) * ia->ri_max_frmr_depth); 246 rpcrdma_max_segments(r_xprt) * ia->ri_max_frmr_depth);
245} 247}
246 248
247/* If FAST_REG or LOCAL_INV failed, indicate the frmr needs 249static void
248 * to be reset. 250__frwr_sendcompletion_flush(struct ib_wc *wc, struct rpcrdma_frmr *frmr,
251 const char *wr)
252{
253 frmr->fr_state = FRMR_IS_STALE;
254 if (wc->status != IB_WC_WR_FLUSH_ERR)
255 pr_err("rpcrdma: %s: %s (%u/0x%x)\n",
256 wr, ib_wc_status_msg(wc->status),
257 wc->status, wc->vendor_err);
258}
259
260/**
261 * frwr_wc_fastreg - Invoked by RDMA provider for each polled FastReg WC
262 * @cq: completion queue (ignored)
263 * @wc: completed WR
249 * 264 *
250 * WARNING: Only wr_id and status are reliable at this point
251 */ 265 */
252static void 266static void
253__frwr_sendcompletion_flush(struct ib_wc *wc, struct rpcrdma_mw *r) 267frwr_wc_fastreg(struct ib_cq *cq, struct ib_wc *wc)
254{ 268{
255 if (likely(wc->status == IB_WC_SUCCESS)) 269 struct rpcrdma_frmr *frmr;
256 return; 270 struct ib_cqe *cqe;
257
258 /* WARNING: Only wr_id and status are reliable at this point */
259 r = (struct rpcrdma_mw *)(unsigned long)wc->wr_id;
260 if (wc->status == IB_WC_WR_FLUSH_ERR)
261 dprintk("RPC: %s: frmr %p flushed\n", __func__, r);
262 else
263 pr_warn("RPC: %s: frmr %p error, status %s (%d)\n",
264 __func__, r, ib_wc_status_msg(wc->status), wc->status);
265 271
266 r->r.frmr.fr_state = FRMR_IS_STALE; 272 /* WARNING: Only wr_cqe and status are reliable at this point */
273 if (wc->status != IB_WC_SUCCESS) {
274 cqe = wc->wr_cqe;
275 frmr = container_of(cqe, struct rpcrdma_frmr, fr_cqe);
276 __frwr_sendcompletion_flush(wc, frmr, "fastreg");
277 }
267} 278}
268 279
280/**
281 * frwr_wc_localinv - Invoked by RDMA provider for each polled LocalInv WC
282 * @cq: completion queue (ignored)
283 * @wc: completed WR
284 *
285 */
269static void 286static void
270frwr_sendcompletion(struct ib_wc *wc) 287frwr_wc_localinv(struct ib_cq *cq, struct ib_wc *wc)
271{ 288{
272 struct rpcrdma_mw *r = (struct rpcrdma_mw *)(unsigned long)wc->wr_id; 289 struct rpcrdma_frmr *frmr;
273 struct rpcrdma_frmr *f = &r->r.frmr; 290 struct ib_cqe *cqe;
274 291
275 if (unlikely(wc->status != IB_WC_SUCCESS)) 292 /* WARNING: Only wr_cqe and status are reliable at this point */
276 __frwr_sendcompletion_flush(wc, r); 293 if (wc->status != IB_WC_SUCCESS) {
294 cqe = wc->wr_cqe;
295 frmr = container_of(cqe, struct rpcrdma_frmr, fr_cqe);
296 __frwr_sendcompletion_flush(wc, frmr, "localinv");
297 }
298}
277 299
278 if (f->fr_waiter) 300/**
279 complete(&f->fr_linv_done); 301 * frwr_wc_localinv - Invoked by RDMA provider for each polled LocalInv WC
302 * @cq: completion queue (ignored)
303 * @wc: completed WR
304 *
305 * Awaken anyone waiting for an MR to finish being fenced.
306 */
307static void
308frwr_wc_localinv_wake(struct ib_cq *cq, struct ib_wc *wc)
309{
310 struct rpcrdma_frmr *frmr;
311 struct ib_cqe *cqe;
312
313 /* WARNING: Only wr_cqe and status are reliable at this point */
314 cqe = wc->wr_cqe;
315 frmr = container_of(cqe, struct rpcrdma_frmr, fr_cqe);
316 if (wc->status != IB_WC_SUCCESS)
317 __frwr_sendcompletion_flush(wc, frmr, "localinv");
318 complete_all(&frmr->fr_linv_done);
280} 319}
281 320
282static int 321static int
@@ -313,8 +352,7 @@ frwr_op_init(struct rpcrdma_xprt *r_xprt)
313 352
314 list_add(&r->mw_list, &buf->rb_mws); 353 list_add(&r->mw_list, &buf->rb_mws);
315 list_add(&r->mw_all, &buf->rb_all); 354 list_add(&r->mw_all, &buf->rb_all);
316 r->mw_sendcompletion = frwr_sendcompletion; 355 r->frmr.fr_xprt = r_xprt;
317 r->r.frmr.fr_xprt = r_xprt;
318 } 356 }
319 357
320 return 0; 358 return 0;
@@ -347,10 +385,9 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
347 mw = rpcrdma_get_mw(r_xprt); 385 mw = rpcrdma_get_mw(r_xprt);
348 if (!mw) 386 if (!mw)
349 return -ENOMEM; 387 return -ENOMEM;
350 } while (mw->r.frmr.fr_state != FRMR_IS_INVALID); 388 } while (mw->frmr.fr_state != FRMR_IS_INVALID);
351 frmr = &mw->r.frmr; 389 frmr = &mw->frmr;
352 frmr->fr_state = FRMR_IS_VALID; 390 frmr->fr_state = FRMR_IS_VALID;
353 frmr->fr_waiter = false;
354 mr = frmr->fr_mr; 391 mr = frmr->fr_mr;
355 reg_wr = &frmr->fr_regwr; 392 reg_wr = &frmr->fr_regwr;
356 393
@@ -400,7 +437,8 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
400 437
401 reg_wr->wr.next = NULL; 438 reg_wr->wr.next = NULL;
402 reg_wr->wr.opcode = IB_WR_REG_MR; 439 reg_wr->wr.opcode = IB_WR_REG_MR;
403 reg_wr->wr.wr_id = (uintptr_t)mw; 440 frmr->fr_cqe.done = frwr_wc_fastreg;
441 reg_wr->wr.wr_cqe = &frmr->fr_cqe;
404 reg_wr->wr.num_sge = 0; 442 reg_wr->wr.num_sge = 0;
405 reg_wr->wr.send_flags = 0; 443 reg_wr->wr.send_flags = 0;
406 reg_wr->mr = mr; 444 reg_wr->mr = mr;
@@ -434,15 +472,15 @@ static struct ib_send_wr *
434__frwr_prepare_linv_wr(struct rpcrdma_mr_seg *seg) 472__frwr_prepare_linv_wr(struct rpcrdma_mr_seg *seg)
435{ 473{
436 struct rpcrdma_mw *mw = seg->rl_mw; 474 struct rpcrdma_mw *mw = seg->rl_mw;
437 struct rpcrdma_frmr *f = &mw->r.frmr; 475 struct rpcrdma_frmr *f = &mw->frmr;
438 struct ib_send_wr *invalidate_wr; 476 struct ib_send_wr *invalidate_wr;
439 477
440 f->fr_waiter = false;
441 f->fr_state = FRMR_IS_INVALID; 478 f->fr_state = FRMR_IS_INVALID;
442 invalidate_wr = &f->fr_invwr; 479 invalidate_wr = &f->fr_invwr;
443 480
444 memset(invalidate_wr, 0, sizeof(*invalidate_wr)); 481 memset(invalidate_wr, 0, sizeof(*invalidate_wr));
445 invalidate_wr->wr_id = (unsigned long)(void *)mw; 482 f->fr_cqe.done = frwr_wc_localinv;
483 invalidate_wr->wr_cqe = &f->fr_cqe;
446 invalidate_wr->opcode = IB_WR_LOCAL_INV; 484 invalidate_wr->opcode = IB_WR_LOCAL_INV;
447 invalidate_wr->ex.invalidate_rkey = f->fr_mr->rkey; 485 invalidate_wr->ex.invalidate_rkey = f->fr_mr->rkey;
448 486
@@ -455,7 +493,7 @@ __frwr_dma_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
455{ 493{
456 struct ib_device *device = r_xprt->rx_ia.ri_device; 494 struct ib_device *device = r_xprt->rx_ia.ri_device;
457 struct rpcrdma_mw *mw = seg->rl_mw; 495 struct rpcrdma_mw *mw = seg->rl_mw;
458 struct rpcrdma_frmr *f = &mw->r.frmr; 496 struct rpcrdma_frmr *f = &mw->frmr;
459 497
460 seg->rl_mw = NULL; 498 seg->rl_mw = NULL;
461 499
@@ -504,15 +542,15 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
504 542
505 i += seg->mr_nsegs; 543 i += seg->mr_nsegs;
506 } 544 }
507 f = &seg->rl_mw->r.frmr; 545 f = &seg->rl_mw->frmr;
508 546
509 /* Strong send queue ordering guarantees that when the 547 /* Strong send queue ordering guarantees that when the
510 * last WR in the chain completes, all WRs in the chain 548 * last WR in the chain completes, all WRs in the chain
511 * are complete. 549 * are complete.
512 */ 550 */
513 f->fr_invwr.send_flags = IB_SEND_SIGNALED; 551 f->fr_invwr.send_flags = IB_SEND_SIGNALED;
514 f->fr_waiter = true; 552 f->fr_cqe.done = frwr_wc_localinv_wake;
515 init_completion(&f->fr_linv_done); 553 reinit_completion(&f->fr_linv_done);
516 INIT_CQCOUNT(&r_xprt->rx_ep); 554 INIT_CQCOUNT(&r_xprt->rx_ep);
517 555
518 /* Transport disconnect drains the receive CQ before it 556 /* Transport disconnect drains the receive CQ before it
@@ -520,14 +558,18 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
520 * unless ri_id->qp is a valid pointer. 558 * unless ri_id->qp is a valid pointer.
521 */ 559 */
522 rc = ib_post_send(ia->ri_id->qp, invalidate_wrs, &bad_wr); 560 rc = ib_post_send(ia->ri_id->qp, invalidate_wrs, &bad_wr);
523 if (rc) 561 if (rc) {
524 pr_warn("%s: ib_post_send failed %i\n", __func__, rc); 562 pr_warn("%s: ib_post_send failed %i\n", __func__, rc);
563 rdma_disconnect(ia->ri_id);
564 goto unmap;
565 }
525 566
526 wait_for_completion(&f->fr_linv_done); 567 wait_for_completion(&f->fr_linv_done);
527 568
528 /* ORDER: Now DMA unmap all of the req's MRs, and return 569 /* ORDER: Now DMA unmap all of the req's MRs, and return
529 * them to the free MW list. 570 * them to the free MW list.
530 */ 571 */
572unmap:
531 for (i = 0, nchunks = req->rl_nchunks; nchunks; nchunks--) { 573 for (i = 0, nchunks = req->rl_nchunks; nchunks; nchunks--) {
532 seg = &req->rl_segments[i]; 574 seg = &req->rl_segments[i];
533 575
@@ -549,7 +591,7 @@ frwr_op_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg)
549 struct rpcrdma_mr_seg *seg1 = seg; 591 struct rpcrdma_mr_seg *seg1 = seg;
550 struct rpcrdma_ia *ia = &r_xprt->rx_ia; 592 struct rpcrdma_ia *ia = &r_xprt->rx_ia;
551 struct rpcrdma_mw *mw = seg1->rl_mw; 593 struct rpcrdma_mw *mw = seg1->rl_mw;
552 struct rpcrdma_frmr *frmr = &mw->r.frmr; 594 struct rpcrdma_frmr *frmr = &mw->frmr;
553 struct ib_send_wr *invalidate_wr, *bad_wr; 595 struct ib_send_wr *invalidate_wr, *bad_wr;
554 int rc, nsegs = seg->mr_nsegs; 596 int rc, nsegs = seg->mr_nsegs;
555 597
@@ -557,10 +599,11 @@ frwr_op_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg)
557 599
558 seg1->rl_mw = NULL; 600 seg1->rl_mw = NULL;
559 frmr->fr_state = FRMR_IS_INVALID; 601 frmr->fr_state = FRMR_IS_INVALID;
560 invalidate_wr = &mw->r.frmr.fr_invwr; 602 invalidate_wr = &mw->frmr.fr_invwr;
561 603
562 memset(invalidate_wr, 0, sizeof(*invalidate_wr)); 604 memset(invalidate_wr, 0, sizeof(*invalidate_wr));
563 invalidate_wr->wr_id = (uintptr_t)mw; 605 frmr->fr_cqe.done = frwr_wc_localinv;
606 invalidate_wr->wr_cqe = &frmr->fr_cqe;
564 invalidate_wr->opcode = IB_WR_LOCAL_INV; 607 invalidate_wr->opcode = IB_WR_LOCAL_INV;
565 invalidate_wr->ex.invalidate_rkey = frmr->fr_mr->rkey; 608 invalidate_wr->ex.invalidate_rkey = frmr->fr_mr->rkey;
566 DECR_CQCOUNT(&r_xprt->rx_ep); 609 DECR_CQCOUNT(&r_xprt->rx_ep);
diff --git a/net/sunrpc/xprtrdma/physical_ops.c b/net/sunrpc/xprtrdma/physical_ops.c
index dbb302ecf590..481b9b6f4a15 100644
--- a/net/sunrpc/xprtrdma/physical_ops.c
+++ b/net/sunrpc/xprtrdma/physical_ops.c
@@ -68,7 +68,6 @@ physical_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
68 rpcrdma_map_one(ia->ri_device, seg, rpcrdma_data_dir(writing)); 68 rpcrdma_map_one(ia->ri_device, seg, rpcrdma_data_dir(writing));
69 seg->mr_rkey = ia->ri_dma_mr->rkey; 69 seg->mr_rkey = ia->ri_dma_mr->rkey;
70 seg->mr_base = seg->mr_dma; 70 seg->mr_base = seg->mr_dma;
71 seg->mr_nsegs = 1;
72 return 1; 71 return 1;
73} 72}
74 73
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index 0f28f2d743ed..888823bb6dae 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -132,6 +132,33 @@ rpcrdma_tail_pullup(struct xdr_buf *buf)
132 return tlen; 132 return tlen;
133} 133}
134 134
135/* Split "vec" on page boundaries into segments. FMR registers pages,
136 * not a byte range. Other modes coalesce these segments into a single
137 * MR when they can.
138 */
139static int
140rpcrdma_convert_kvec(struct kvec *vec, struct rpcrdma_mr_seg *seg,
141 int n, int nsegs)
142{
143 size_t page_offset;
144 u32 remaining;
145 char *base;
146
147 base = vec->iov_base;
148 page_offset = offset_in_page(base);
149 remaining = vec->iov_len;
150 while (remaining && n < nsegs) {
151 seg[n].mr_page = NULL;
152 seg[n].mr_offset = base;
153 seg[n].mr_len = min_t(u32, PAGE_SIZE - page_offset, remaining);
154 remaining -= seg[n].mr_len;
155 base += seg[n].mr_len;
156 ++n;
157 page_offset = 0;
158 }
159 return n;
160}
161
135/* 162/*
136 * Chunk assembly from upper layer xdr_buf. 163 * Chunk assembly from upper layer xdr_buf.
137 * 164 *
@@ -150,11 +177,10 @@ rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, unsigned int pos,
150 int page_base; 177 int page_base;
151 struct page **ppages; 178 struct page **ppages;
152 179
153 if (pos == 0 && xdrbuf->head[0].iov_len) { 180 if (pos == 0) {
154 seg[n].mr_page = NULL; 181 n = rpcrdma_convert_kvec(&xdrbuf->head[0], seg, n, nsegs);
155 seg[n].mr_offset = xdrbuf->head[0].iov_base; 182 if (n == nsegs)
156 seg[n].mr_len = xdrbuf->head[0].iov_len; 183 return -EIO;
157 ++n;
158 } 184 }
159 185
160 len = xdrbuf->page_len; 186 len = xdrbuf->page_len;
@@ -192,13 +218,9 @@ rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, unsigned int pos,
192 * xdr pad bytes, saving the server an RDMA operation. */ 218 * xdr pad bytes, saving the server an RDMA operation. */
193 if (xdrbuf->tail[0].iov_len < 4 && xprt_rdma_pad_optimize) 219 if (xdrbuf->tail[0].iov_len < 4 && xprt_rdma_pad_optimize)
194 return n; 220 return n;
221 n = rpcrdma_convert_kvec(&xdrbuf->tail[0], seg, n, nsegs);
195 if (n == nsegs) 222 if (n == nsegs)
196 /* Tail remains, but we're out of segments */
197 return -EIO; 223 return -EIO;
198 seg[n].mr_page = NULL;
199 seg[n].mr_offset = xdrbuf->tail[0].iov_base;
200 seg[n].mr_len = xdrbuf->tail[0].iov_len;
201 ++n;
202 } 224 }
203 225
204 return n; 226 return n;
@@ -773,20 +795,17 @@ rpcrdma_reply_handler(struct rpcrdma_rep *rep)
773 struct rpcrdma_xprt *r_xprt = rep->rr_rxprt; 795 struct rpcrdma_xprt *r_xprt = rep->rr_rxprt;
774 struct rpc_xprt *xprt = &r_xprt->rx_xprt; 796 struct rpc_xprt *xprt = &r_xprt->rx_xprt;
775 __be32 *iptr; 797 __be32 *iptr;
776 int rdmalen, status; 798 int rdmalen, status, rmerr;
777 unsigned long cwnd; 799 unsigned long cwnd;
778 u32 credits;
779 800
780 dprintk("RPC: %s: incoming rep %p\n", __func__, rep); 801 dprintk("RPC: %s: incoming rep %p\n", __func__, rep);
781 802
782 if (rep->rr_len == RPCRDMA_BAD_LEN) 803 if (rep->rr_len == RPCRDMA_BAD_LEN)
783 goto out_badstatus; 804 goto out_badstatus;
784 if (rep->rr_len < RPCRDMA_HDRLEN_MIN) 805 if (rep->rr_len < RPCRDMA_HDRLEN_ERR)
785 goto out_shortreply; 806 goto out_shortreply;
786 807
787 headerp = rdmab_to_msg(rep->rr_rdmabuf); 808 headerp = rdmab_to_msg(rep->rr_rdmabuf);
788 if (headerp->rm_vers != rpcrdma_version)
789 goto out_badversion;
790#if defined(CONFIG_SUNRPC_BACKCHANNEL) 809#if defined(CONFIG_SUNRPC_BACKCHANNEL)
791 if (rpcrdma_is_bcall(headerp)) 810 if (rpcrdma_is_bcall(headerp))
792 goto out_bcall; 811 goto out_bcall;
@@ -809,15 +828,16 @@ rpcrdma_reply_handler(struct rpcrdma_rep *rep)
809 */ 828 */
810 list_del_init(&rqst->rq_list); 829 list_del_init(&rqst->rq_list);
811 spin_unlock_bh(&xprt->transport_lock); 830 spin_unlock_bh(&xprt->transport_lock);
812 dprintk("RPC: %s: reply 0x%p completes request 0x%p\n" 831 dprintk("RPC: %s: reply %p completes request %p (xid 0x%08x)\n",
813 " RPC request 0x%p xid 0x%08x\n", 832 __func__, rep, req, be32_to_cpu(headerp->rm_xid));
814 __func__, rep, req, rqst,
815 be32_to_cpu(headerp->rm_xid));
816 833
817 /* from here on, the reply is no longer an orphan */ 834 /* from here on, the reply is no longer an orphan */
818 req->rl_reply = rep; 835 req->rl_reply = rep;
819 xprt->reestablish_timeout = 0; 836 xprt->reestablish_timeout = 0;
820 837
838 if (headerp->rm_vers != rpcrdma_version)
839 goto out_badversion;
840
821 /* check for expected message types */ 841 /* check for expected message types */
822 /* The order of some of these tests is important. */ 842 /* The order of some of these tests is important. */
823 switch (headerp->rm_type) { 843 switch (headerp->rm_type) {
@@ -878,6 +898,9 @@ rpcrdma_reply_handler(struct rpcrdma_rep *rep)
878 status = rdmalen; 898 status = rdmalen;
879 break; 899 break;
880 900
901 case rdma_error:
902 goto out_rdmaerr;
903
881badheader: 904badheader:
882 default: 905 default:
883 dprintk("%s: invalid rpcrdma reply header (type %d):" 906 dprintk("%s: invalid rpcrdma reply header (type %d):"
@@ -893,6 +916,7 @@ badheader:
893 break; 916 break;
894 } 917 }
895 918
919out:
896 /* Invalidate and flush the data payloads before waking the 920 /* Invalidate and flush the data payloads before waking the
897 * waiting application. This guarantees the memory region is 921 * waiting application. This guarantees the memory region is
898 * properly fenced from the server before the application 922 * properly fenced from the server before the application
@@ -903,15 +927,9 @@ badheader:
903 if (req->rl_nchunks) 927 if (req->rl_nchunks)
904 r_xprt->rx_ia.ri_ops->ro_unmap_sync(r_xprt, req); 928 r_xprt->rx_ia.ri_ops->ro_unmap_sync(r_xprt, req);
905 929
906 credits = be32_to_cpu(headerp->rm_credit);
907 if (credits == 0)
908 credits = 1; /* don't deadlock */
909 else if (credits > r_xprt->rx_buf.rb_max_requests)
910 credits = r_xprt->rx_buf.rb_max_requests;
911
912 spin_lock_bh(&xprt->transport_lock); 930 spin_lock_bh(&xprt->transport_lock);
913 cwnd = xprt->cwnd; 931 cwnd = xprt->cwnd;
914 xprt->cwnd = credits << RPC_CWNDSHIFT; 932 xprt->cwnd = atomic_read(&r_xprt->rx_buf.rb_credits) << RPC_CWNDSHIFT;
915 if (xprt->cwnd > cwnd) 933 if (xprt->cwnd > cwnd)
916 xprt_release_rqst_cong(rqst->rq_task); 934 xprt_release_rqst_cong(rqst->rq_task);
917 935
@@ -935,13 +953,43 @@ out_bcall:
935 return; 953 return;
936#endif 954#endif
937 955
938out_shortreply: 956/* If the incoming reply terminated a pending RPC, the next
939 dprintk("RPC: %s: short/invalid reply\n", __func__); 957 * RPC call will post a replacement receive buffer as it is
940 goto repost; 958 * being marshaled.
941 959 */
942out_badversion: 960out_badversion:
943 dprintk("RPC: %s: invalid version %d\n", 961 dprintk("RPC: %s: invalid version %d\n",
944 __func__, be32_to_cpu(headerp->rm_vers)); 962 __func__, be32_to_cpu(headerp->rm_vers));
963 status = -EIO;
964 r_xprt->rx_stats.bad_reply_count++;
965 goto out;
966
967out_rdmaerr:
968 rmerr = be32_to_cpu(headerp->rm_body.rm_error.rm_err);
969 switch (rmerr) {
970 case ERR_VERS:
971 pr_err("%s: server reports header version error (%u-%u)\n",
972 __func__,
973 be32_to_cpu(headerp->rm_body.rm_error.rm_vers_low),
974 be32_to_cpu(headerp->rm_body.rm_error.rm_vers_high));
975 break;
976 case ERR_CHUNK:
977 pr_err("%s: server reports header decoding error\n",
978 __func__);
979 break;
980 default:
981 pr_err("%s: server reports unknown error %d\n",
982 __func__, rmerr);
983 }
984 status = -EREMOTEIO;
985 r_xprt->rx_stats.bad_reply_count++;
986 goto out;
987
988/* If no pending RPC transaction was matched, post a replacement
989 * receive buffer before returning.
990 */
991out_shortreply:
992 dprintk("RPC: %s: short/invalid reply\n", __func__);
945 goto repost; 993 goto repost;
946 994
947out_nomatch: 995out_nomatch:
diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
index 65a7c232a345..a2a7519b0f23 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
@@ -107,26 +107,18 @@ static int svc_rdma_bc_sendto(struct svcxprt_rdma *rdma,
107 int ret; 107 int ret;
108 108
109 vec = svc_rdma_get_req_map(rdma); 109 vec = svc_rdma_get_req_map(rdma);
110 ret = svc_rdma_map_xdr(rdma, sndbuf, vec); 110 ret = svc_rdma_map_xdr(rdma, sndbuf, vec, false);
111 if (ret) 111 if (ret)
112 goto out_err; 112 goto out_err;
113 113
114 /* Post a recv buffer to handle the reply for this request. */ 114 ret = svc_rdma_repost_recv(rdma, GFP_NOIO);
115 ret = svc_rdma_post_recv(rdma, GFP_NOIO); 115 if (ret)
116 if (ret) {
117 pr_err("svcrdma: Failed to post bc receive buffer, err=%d.\n",
118 ret);
119 pr_err("svcrdma: closing transport %p.\n", rdma);
120 set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags);
121 ret = -ENOTCONN;
122 goto out_err; 116 goto out_err;
123 }
124 117
125 ctxt = svc_rdma_get_context(rdma); 118 ctxt = svc_rdma_get_context(rdma);
126 ctxt->pages[0] = virt_to_page(rqst->rq_buffer); 119 ctxt->pages[0] = virt_to_page(rqst->rq_buffer);
127 ctxt->count = 1; 120 ctxt->count = 1;
128 121
129 ctxt->wr_op = IB_WR_SEND;
130 ctxt->direction = DMA_TO_DEVICE; 122 ctxt->direction = DMA_TO_DEVICE;
131 ctxt->sge[0].lkey = rdma->sc_pd->local_dma_lkey; 123 ctxt->sge[0].lkey = rdma->sc_pd->local_dma_lkey;
132 ctxt->sge[0].length = sndbuf->len; 124 ctxt->sge[0].length = sndbuf->len;
@@ -140,7 +132,8 @@ static int svc_rdma_bc_sendto(struct svcxprt_rdma *rdma,
140 atomic_inc(&rdma->sc_dma_used); 132 atomic_inc(&rdma->sc_dma_used);
141 133
142 memset(&send_wr, 0, sizeof(send_wr)); 134 memset(&send_wr, 0, sizeof(send_wr));
143 send_wr.wr_id = (unsigned long)ctxt; 135 ctxt->cqe.done = svc_rdma_wc_send;
136 send_wr.wr_cqe = &ctxt->cqe;
144 send_wr.sg_list = ctxt->sge; 137 send_wr.sg_list = ctxt->sge;
145 send_wr.num_sge = 1; 138 send_wr.num_sge = 1;
146 send_wr.opcode = IB_WR_SEND; 139 send_wr.opcode = IB_WR_SEND;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_marshal.c b/net/sunrpc/xprtrdma/svc_rdma_marshal.c
index e2fca7617242..765bca47c74d 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_marshal.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_marshal.c
@@ -145,29 +145,44 @@ static __be32 *decode_reply_array(__be32 *va, __be32 *vaend)
145 return (__be32 *)&ary->wc_array[nchunks]; 145 return (__be32 *)&ary->wc_array[nchunks];
146} 146}
147 147
148int svc_rdma_xdr_decode_req(struct rpcrdma_msg **rdma_req, 148int svc_rdma_xdr_decode_req(struct rpcrdma_msg *rmsgp, struct svc_rqst *rqstp)
149 struct svc_rqst *rqstp)
150{ 149{
151 struct rpcrdma_msg *rmsgp = NULL;
152 __be32 *va, *vaend; 150 __be32 *va, *vaend;
151 unsigned int len;
153 u32 hdr_len; 152 u32 hdr_len;
154 153
155 rmsgp = (struct rpcrdma_msg *)rqstp->rq_arg.head[0].iov_base;
156
157 /* Verify that there's enough bytes for header + something */ 154 /* Verify that there's enough bytes for header + something */
158 if (rqstp->rq_arg.len <= RPCRDMA_HDRLEN_MIN) { 155 if (rqstp->rq_arg.len <= RPCRDMA_HDRLEN_ERR) {
159 dprintk("svcrdma: header too short = %d\n", 156 dprintk("svcrdma: header too short = %d\n",
160 rqstp->rq_arg.len); 157 rqstp->rq_arg.len);
161 return -EINVAL; 158 return -EINVAL;
162 } 159 }
163 160
164 if (rmsgp->rm_vers != rpcrdma_version) 161 if (rmsgp->rm_vers != rpcrdma_version) {
165 return -ENOSYS; 162 dprintk("%s: bad version %u\n", __func__,
166 163 be32_to_cpu(rmsgp->rm_vers));
167 /* Pull in the extra for the padded case and bump our pointer */ 164 return -EPROTONOSUPPORT;
168 if (rmsgp->rm_type == rdma_msgp) { 165 }
169 int hdrlen;
170 166
167 switch (be32_to_cpu(rmsgp->rm_type)) {
168 case RDMA_MSG:
169 case RDMA_NOMSG:
170 break;
171
172 case RDMA_DONE:
173 /* Just drop it */
174 dprintk("svcrdma: dropping RDMA_DONE message\n");
175 return 0;
176
177 case RDMA_ERROR:
178 /* Possible if this is a backchannel reply.
179 * XXX: We should cancel this XID, though.
180 */
181 dprintk("svcrdma: dropping RDMA_ERROR message\n");
182 return 0;
183
184 case RDMA_MSGP:
185 /* Pull in the extra for the padded case, bump our pointer */
171 rmsgp->rm_body.rm_padded.rm_align = 186 rmsgp->rm_body.rm_padded.rm_align =
172 be32_to_cpu(rmsgp->rm_body.rm_padded.rm_align); 187 be32_to_cpu(rmsgp->rm_body.rm_padded.rm_align);
173 rmsgp->rm_body.rm_padded.rm_thresh = 188 rmsgp->rm_body.rm_padded.rm_thresh =
@@ -175,11 +190,15 @@ int svc_rdma_xdr_decode_req(struct rpcrdma_msg **rdma_req,
175 190
176 va = &rmsgp->rm_body.rm_padded.rm_pempty[4]; 191 va = &rmsgp->rm_body.rm_padded.rm_pempty[4];
177 rqstp->rq_arg.head[0].iov_base = va; 192 rqstp->rq_arg.head[0].iov_base = va;
178 hdrlen = (u32)((unsigned long)va - (unsigned long)rmsgp); 193 len = (u32)((unsigned long)va - (unsigned long)rmsgp);
179 rqstp->rq_arg.head[0].iov_len -= hdrlen; 194 rqstp->rq_arg.head[0].iov_len -= len;
180 if (hdrlen > rqstp->rq_arg.len) 195 if (len > rqstp->rq_arg.len)
181 return -EINVAL; 196 return -EINVAL;
182 return hdrlen; 197 return len;
198 default:
199 dprintk("svcrdma: bad rdma procedure (%u)\n",
200 be32_to_cpu(rmsgp->rm_type));
201 return -EINVAL;
183 } 202 }
184 203
185 /* The chunk list may contain either a read chunk list or a write 204 /* The chunk list may contain either a read chunk list or a write
@@ -188,20 +207,25 @@ int svc_rdma_xdr_decode_req(struct rpcrdma_msg **rdma_req,
188 va = &rmsgp->rm_body.rm_chunks[0]; 207 va = &rmsgp->rm_body.rm_chunks[0];
189 vaend = (__be32 *)((unsigned long)rmsgp + rqstp->rq_arg.len); 208 vaend = (__be32 *)((unsigned long)rmsgp + rqstp->rq_arg.len);
190 va = decode_read_list(va, vaend); 209 va = decode_read_list(va, vaend);
191 if (!va) 210 if (!va) {
211 dprintk("svcrdma: failed to decode read list\n");
192 return -EINVAL; 212 return -EINVAL;
213 }
193 va = decode_write_list(va, vaend); 214 va = decode_write_list(va, vaend);
194 if (!va) 215 if (!va) {
216 dprintk("svcrdma: failed to decode write list\n");
195 return -EINVAL; 217 return -EINVAL;
218 }
196 va = decode_reply_array(va, vaend); 219 va = decode_reply_array(va, vaend);
197 if (!va) 220 if (!va) {
221 dprintk("svcrdma: failed to decode reply chunk\n");
198 return -EINVAL; 222 return -EINVAL;
223 }
199 224
200 rqstp->rq_arg.head[0].iov_base = va; 225 rqstp->rq_arg.head[0].iov_base = va;
201 hdr_len = (unsigned long)va - (unsigned long)rmsgp; 226 hdr_len = (unsigned long)va - (unsigned long)rmsgp;
202 rqstp->rq_arg.head[0].iov_len -= hdr_len; 227 rqstp->rq_arg.head[0].iov_len -= hdr_len;
203 228
204 *rdma_req = rmsgp;
205 return hdr_len; 229 return hdr_len;
206} 230}
207 231
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index c8b8a8b4181e..3b24a646eb46 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -180,9 +180,9 @@ int rdma_read_chunk_lcl(struct svcxprt_rdma *xprt,
180 clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags); 180 clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
181 181
182 memset(&read_wr, 0, sizeof(read_wr)); 182 memset(&read_wr, 0, sizeof(read_wr));
183 read_wr.wr.wr_id = (unsigned long)ctxt; 183 ctxt->cqe.done = svc_rdma_wc_read;
184 read_wr.wr.wr_cqe = &ctxt->cqe;
184 read_wr.wr.opcode = IB_WR_RDMA_READ; 185 read_wr.wr.opcode = IB_WR_RDMA_READ;
185 ctxt->wr_op = read_wr.wr.opcode;
186 read_wr.wr.send_flags = IB_SEND_SIGNALED; 186 read_wr.wr.send_flags = IB_SEND_SIGNALED;
187 read_wr.rkey = rs_handle; 187 read_wr.rkey = rs_handle;
188 read_wr.remote_addr = rs_offset; 188 read_wr.remote_addr = rs_offset;
@@ -299,8 +299,9 @@ int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt,
299 ctxt->read_hdr = head; 299 ctxt->read_hdr = head;
300 300
301 /* Prepare REG WR */ 301 /* Prepare REG WR */
302 ctxt->reg_cqe.done = svc_rdma_wc_reg;
303 reg_wr.wr.wr_cqe = &ctxt->reg_cqe;
302 reg_wr.wr.opcode = IB_WR_REG_MR; 304 reg_wr.wr.opcode = IB_WR_REG_MR;
303 reg_wr.wr.wr_id = 0;
304 reg_wr.wr.send_flags = IB_SEND_SIGNALED; 305 reg_wr.wr.send_flags = IB_SEND_SIGNALED;
305 reg_wr.wr.num_sge = 0; 306 reg_wr.wr.num_sge = 0;
306 reg_wr.mr = frmr->mr; 307 reg_wr.mr = frmr->mr;
@@ -310,6 +311,8 @@ int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt,
310 311
311 /* Prepare RDMA_READ */ 312 /* Prepare RDMA_READ */
312 memset(&read_wr, 0, sizeof(read_wr)); 313 memset(&read_wr, 0, sizeof(read_wr));
314 ctxt->cqe.done = svc_rdma_wc_read;
315 read_wr.wr.wr_cqe = &ctxt->cqe;
313 read_wr.wr.send_flags = IB_SEND_SIGNALED; 316 read_wr.wr.send_flags = IB_SEND_SIGNALED;
314 read_wr.rkey = rs_handle; 317 read_wr.rkey = rs_handle;
315 read_wr.remote_addr = rs_offset; 318 read_wr.remote_addr = rs_offset;
@@ -317,19 +320,18 @@ int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt,
317 read_wr.wr.num_sge = 1; 320 read_wr.wr.num_sge = 1;
318 if (xprt->sc_dev_caps & SVCRDMA_DEVCAP_READ_W_INV) { 321 if (xprt->sc_dev_caps & SVCRDMA_DEVCAP_READ_W_INV) {
319 read_wr.wr.opcode = IB_WR_RDMA_READ_WITH_INV; 322 read_wr.wr.opcode = IB_WR_RDMA_READ_WITH_INV;
320 read_wr.wr.wr_id = (unsigned long)ctxt;
321 read_wr.wr.ex.invalidate_rkey = ctxt->frmr->mr->lkey; 323 read_wr.wr.ex.invalidate_rkey = ctxt->frmr->mr->lkey;
322 } else { 324 } else {
323 read_wr.wr.opcode = IB_WR_RDMA_READ; 325 read_wr.wr.opcode = IB_WR_RDMA_READ;
324 read_wr.wr.next = &inv_wr; 326 read_wr.wr.next = &inv_wr;
325 /* Prepare invalidate */ 327 /* Prepare invalidate */
326 memset(&inv_wr, 0, sizeof(inv_wr)); 328 memset(&inv_wr, 0, sizeof(inv_wr));
327 inv_wr.wr_id = (unsigned long)ctxt; 329 ctxt->inv_cqe.done = svc_rdma_wc_inv;
330 inv_wr.wr_cqe = &ctxt->inv_cqe;
328 inv_wr.opcode = IB_WR_LOCAL_INV; 331 inv_wr.opcode = IB_WR_LOCAL_INV;
329 inv_wr.send_flags = IB_SEND_SIGNALED | IB_SEND_FENCE; 332 inv_wr.send_flags = IB_SEND_SIGNALED | IB_SEND_FENCE;
330 inv_wr.ex.invalidate_rkey = frmr->mr->lkey; 333 inv_wr.ex.invalidate_rkey = frmr->mr->lkey;
331 } 334 }
332 ctxt->wr_op = read_wr.wr.opcode;
333 335
334 /* Post the chain */ 336 /* Post the chain */
335 ret = svc_rdma_send(xprt, &reg_wr.wr); 337 ret = svc_rdma_send(xprt, &reg_wr.wr);
@@ -612,7 +614,6 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
612 struct svc_rdma_op_ctxt *ctxt = NULL; 614 struct svc_rdma_op_ctxt *ctxt = NULL;
613 struct rpcrdma_msg *rmsgp; 615 struct rpcrdma_msg *rmsgp;
614 int ret = 0; 616 int ret = 0;
615 int len;
616 617
617 dprintk("svcrdma: rqstp=%p\n", rqstp); 618 dprintk("svcrdma: rqstp=%p\n", rqstp);
618 619
@@ -642,8 +643,7 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
642 * transport list 643 * transport list
643 */ 644 */
644 if (test_bit(XPT_CLOSE, &xprt->xpt_flags)) 645 if (test_bit(XPT_CLOSE, &xprt->xpt_flags))
645 goto close_out; 646 goto defer;
646
647 goto out; 647 goto out;
648 } 648 }
649 dprintk("svcrdma: processing ctxt=%p on xprt=%p, rqstp=%p, status=%d\n", 649 dprintk("svcrdma: processing ctxt=%p on xprt=%p, rqstp=%p, status=%d\n",
@@ -654,15 +654,13 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
654 rdma_build_arg_xdr(rqstp, ctxt, ctxt->byte_len); 654 rdma_build_arg_xdr(rqstp, ctxt, ctxt->byte_len);
655 655
656 /* Decode the RDMA header. */ 656 /* Decode the RDMA header. */
657 len = svc_rdma_xdr_decode_req(&rmsgp, rqstp); 657 rmsgp = (struct rpcrdma_msg *)rqstp->rq_arg.head[0].iov_base;
658 rqstp->rq_xprt_hlen = len; 658 ret = svc_rdma_xdr_decode_req(rmsgp, rqstp);
659 659 if (ret < 0)
660 /* If the request is invalid, reply with an error */ 660 goto out_err;
661 if (len < 0) { 661 if (ret == 0)
662 if (len == -ENOSYS) 662 goto out_drop;
663 svc_rdma_send_error(rdma_xprt, rmsgp, ERR_VERS); 663 rqstp->rq_xprt_hlen = ret;
664 goto close_out;
665 }
666 664
667 if (svc_rdma_is_backchannel_reply(xprt, rmsgp)) { 665 if (svc_rdma_is_backchannel_reply(xprt, rmsgp)) {
668 ret = svc_rdma_handle_bc_reply(xprt->xpt_bc_xprt, rmsgp, 666 ret = svc_rdma_handle_bc_reply(xprt->xpt_bc_xprt, rmsgp,
@@ -698,26 +696,16 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
698 svc_xprt_copy_addrs(rqstp, xprt); 696 svc_xprt_copy_addrs(rqstp, xprt);
699 return ret; 697 return ret;
700 698
701 close_out: 699out_err:
702 if (ctxt) 700 svc_rdma_send_error(rdma_xprt, rmsgp, ret);
703 svc_rdma_put_context(ctxt, 1); 701 svc_rdma_put_context(ctxt, 0);
704 dprintk("svcrdma: transport %p is closing\n", xprt); 702 return 0;
705 /* 703
706 * Set the close bit and enqueue it. svc_recv will see the
707 * close bit and call svc_xprt_delete
708 */
709 set_bit(XPT_CLOSE, &xprt->xpt_flags);
710defer: 704defer:
711 return 0; 705 return 0;
712 706
707out_drop:
708 svc_rdma_put_context(ctxt, 1);
713repost: 709repost:
714 ret = svc_rdma_post_recv(rdma_xprt, GFP_KERNEL); 710 return svc_rdma_repost_recv(rdma_xprt, GFP_KERNEL);
715 if (ret) {
716 pr_err("svcrdma: could not post a receive buffer, err=%d.\n",
717 ret);
718 pr_err("svcrdma: closing transport %p.\n", rdma_xprt);
719 set_bit(XPT_CLOSE, &rdma_xprt->sc_xprt.xpt_flags);
720 ret = -ENOTCONN;
721 }
722 return ret;
723} 711}
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index df57f3ce6cd2..4f1b1c4f45f9 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -50,9 +50,15 @@
50 50
51#define RPCDBG_FACILITY RPCDBG_SVCXPRT 51#define RPCDBG_FACILITY RPCDBG_SVCXPRT
52 52
53static u32 xdr_padsize(u32 len)
54{
55 return (len & 3) ? (4 - (len & 3)) : 0;
56}
57
53int svc_rdma_map_xdr(struct svcxprt_rdma *xprt, 58int svc_rdma_map_xdr(struct svcxprt_rdma *xprt,
54 struct xdr_buf *xdr, 59 struct xdr_buf *xdr,
55 struct svc_rdma_req_map *vec) 60 struct svc_rdma_req_map *vec,
61 bool write_chunk_present)
56{ 62{
57 int sge_no; 63 int sge_no;
58 u32 sge_bytes; 64 u32 sge_bytes;
@@ -92,9 +98,20 @@ int svc_rdma_map_xdr(struct svcxprt_rdma *xprt,
92 98
93 /* Tail SGE */ 99 /* Tail SGE */
94 if (xdr->tail[0].iov_len) { 100 if (xdr->tail[0].iov_len) {
95 vec->sge[sge_no].iov_base = xdr->tail[0].iov_base; 101 unsigned char *base = xdr->tail[0].iov_base;
96 vec->sge[sge_no].iov_len = xdr->tail[0].iov_len; 102 size_t len = xdr->tail[0].iov_len;
97 sge_no++; 103 u32 xdr_pad = xdr_padsize(xdr->page_len);
104
105 if (write_chunk_present && xdr_pad) {
106 base += xdr_pad;
107 len -= xdr_pad;
108 }
109
110 if (len) {
111 vec->sge[sge_no].iov_base = base;
112 vec->sge[sge_no].iov_len = len;
113 sge_no++;
114 }
98 } 115 }
99 116
100 dprintk("svcrdma: %s: sge_no %d page_no %d " 117 dprintk("svcrdma: %s: sge_no %d page_no %d "
@@ -166,10 +183,10 @@ svc_rdma_get_write_array(struct rpcrdma_msg *rmsgp)
166 * reply array is present 183 * reply array is present
167 */ 184 */
168static struct rpcrdma_write_array * 185static struct rpcrdma_write_array *
169svc_rdma_get_reply_array(struct rpcrdma_msg *rmsgp) 186svc_rdma_get_reply_array(struct rpcrdma_msg *rmsgp,
187 struct rpcrdma_write_array *wr_ary)
170{ 188{
171 struct rpcrdma_read_chunk *rch; 189 struct rpcrdma_read_chunk *rch;
172 struct rpcrdma_write_array *wr_ary;
173 struct rpcrdma_write_array *rp_ary; 190 struct rpcrdma_write_array *rp_ary;
174 191
175 /* XXX: Need to fix when reply chunk may occur with read list 192 /* XXX: Need to fix when reply chunk may occur with read list
@@ -191,7 +208,6 @@ svc_rdma_get_reply_array(struct rpcrdma_msg *rmsgp)
191 goto found_it; 208 goto found_it;
192 } 209 }
193 210
194 wr_ary = svc_rdma_get_write_array(rmsgp);
195 if (wr_ary) { 211 if (wr_ary) {
196 int chunk = be32_to_cpu(wr_ary->wc_nchunks); 212 int chunk = be32_to_cpu(wr_ary->wc_nchunks);
197 213
@@ -281,8 +297,8 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
281 297
282 /* Prepare WRITE WR */ 298 /* Prepare WRITE WR */
283 memset(&write_wr, 0, sizeof write_wr); 299 memset(&write_wr, 0, sizeof write_wr);
284 ctxt->wr_op = IB_WR_RDMA_WRITE; 300 ctxt->cqe.done = svc_rdma_wc_write;
285 write_wr.wr.wr_id = (unsigned long)ctxt; 301 write_wr.wr.wr_cqe = &ctxt->cqe;
286 write_wr.wr.sg_list = &sge[0]; 302 write_wr.wr.sg_list = &sge[0];
287 write_wr.wr.num_sge = sge_no; 303 write_wr.wr.num_sge = sge_no;
288 write_wr.wr.opcode = IB_WR_RDMA_WRITE; 304 write_wr.wr.opcode = IB_WR_RDMA_WRITE;
@@ -298,41 +314,37 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
298 err: 314 err:
299 svc_rdma_unmap_dma(ctxt); 315 svc_rdma_unmap_dma(ctxt);
300 svc_rdma_put_context(ctxt, 0); 316 svc_rdma_put_context(ctxt, 0);
301 /* Fatal error, close transport */
302 return -EIO; 317 return -EIO;
303} 318}
304 319
320noinline
305static int send_write_chunks(struct svcxprt_rdma *xprt, 321static int send_write_chunks(struct svcxprt_rdma *xprt,
306 struct rpcrdma_msg *rdma_argp, 322 struct rpcrdma_write_array *wr_ary,
307 struct rpcrdma_msg *rdma_resp, 323 struct rpcrdma_msg *rdma_resp,
308 struct svc_rqst *rqstp, 324 struct svc_rqst *rqstp,
309 struct svc_rdma_req_map *vec) 325 struct svc_rdma_req_map *vec)
310{ 326{
311 u32 xfer_len = rqstp->rq_res.page_len + rqstp->rq_res.tail[0].iov_len; 327 u32 xfer_len = rqstp->rq_res.page_len;
312 int write_len; 328 int write_len;
313 u32 xdr_off; 329 u32 xdr_off;
314 int chunk_off; 330 int chunk_off;
315 int chunk_no; 331 int chunk_no;
316 int nchunks; 332 int nchunks;
317 struct rpcrdma_write_array *arg_ary;
318 struct rpcrdma_write_array *res_ary; 333 struct rpcrdma_write_array *res_ary;
319 int ret; 334 int ret;
320 335
321 arg_ary = svc_rdma_get_write_array(rdma_argp);
322 if (!arg_ary)
323 return 0;
324 res_ary = (struct rpcrdma_write_array *) 336 res_ary = (struct rpcrdma_write_array *)
325 &rdma_resp->rm_body.rm_chunks[1]; 337 &rdma_resp->rm_body.rm_chunks[1];
326 338
327 /* Write chunks start at the pagelist */ 339 /* Write chunks start at the pagelist */
328 nchunks = be32_to_cpu(arg_ary->wc_nchunks); 340 nchunks = be32_to_cpu(wr_ary->wc_nchunks);
329 for (xdr_off = rqstp->rq_res.head[0].iov_len, chunk_no = 0; 341 for (xdr_off = rqstp->rq_res.head[0].iov_len, chunk_no = 0;
330 xfer_len && chunk_no < nchunks; 342 xfer_len && chunk_no < nchunks;
331 chunk_no++) { 343 chunk_no++) {
332 struct rpcrdma_segment *arg_ch; 344 struct rpcrdma_segment *arg_ch;
333 u64 rs_offset; 345 u64 rs_offset;
334 346
335 arg_ch = &arg_ary->wc_array[chunk_no].wc_target; 347 arg_ch = &wr_ary->wc_array[chunk_no].wc_target;
336 write_len = min(xfer_len, be32_to_cpu(arg_ch->rs_length)); 348 write_len = min(xfer_len, be32_to_cpu(arg_ch->rs_length));
337 349
338 /* Prepare the response chunk given the length actually 350 /* Prepare the response chunk given the length actually
@@ -350,11 +362,8 @@ static int send_write_chunks(struct svcxprt_rdma *xprt,
350 xdr_off, 362 xdr_off,
351 write_len, 363 write_len,
352 vec); 364 vec);
353 if (ret <= 0) { 365 if (ret <= 0)
354 dprintk("svcrdma: RDMA_WRITE failed, ret=%d\n", 366 goto out_err;
355 ret);
356 return -EIO;
357 }
358 chunk_off += ret; 367 chunk_off += ret;
359 xdr_off += ret; 368 xdr_off += ret;
360 xfer_len -= ret; 369 xfer_len -= ret;
@@ -364,11 +373,16 @@ static int send_write_chunks(struct svcxprt_rdma *xprt,
364 /* Update the req with the number of chunks actually used */ 373 /* Update the req with the number of chunks actually used */
365 svc_rdma_xdr_encode_write_list(rdma_resp, chunk_no); 374 svc_rdma_xdr_encode_write_list(rdma_resp, chunk_no);
366 375
367 return rqstp->rq_res.page_len + rqstp->rq_res.tail[0].iov_len; 376 return rqstp->rq_res.page_len;
377
378out_err:
379 pr_err("svcrdma: failed to send write chunks, rc=%d\n", ret);
380 return -EIO;
368} 381}
369 382
383noinline
370static int send_reply_chunks(struct svcxprt_rdma *xprt, 384static int send_reply_chunks(struct svcxprt_rdma *xprt,
371 struct rpcrdma_msg *rdma_argp, 385 struct rpcrdma_write_array *rp_ary,
372 struct rpcrdma_msg *rdma_resp, 386 struct rpcrdma_msg *rdma_resp,
373 struct svc_rqst *rqstp, 387 struct svc_rqst *rqstp,
374 struct svc_rdma_req_map *vec) 388 struct svc_rdma_req_map *vec)
@@ -380,25 +394,21 @@ static int send_reply_chunks(struct svcxprt_rdma *xprt,
380 int chunk_off; 394 int chunk_off;
381 int nchunks; 395 int nchunks;
382 struct rpcrdma_segment *ch; 396 struct rpcrdma_segment *ch;
383 struct rpcrdma_write_array *arg_ary;
384 struct rpcrdma_write_array *res_ary; 397 struct rpcrdma_write_array *res_ary;
385 int ret; 398 int ret;
386 399
387 arg_ary = svc_rdma_get_reply_array(rdma_argp);
388 if (!arg_ary)
389 return 0;
390 /* XXX: need to fix when reply lists occur with read-list and or 400 /* XXX: need to fix when reply lists occur with read-list and or
391 * write-list */ 401 * write-list */
392 res_ary = (struct rpcrdma_write_array *) 402 res_ary = (struct rpcrdma_write_array *)
393 &rdma_resp->rm_body.rm_chunks[2]; 403 &rdma_resp->rm_body.rm_chunks[2];
394 404
395 /* xdr offset starts at RPC message */ 405 /* xdr offset starts at RPC message */
396 nchunks = be32_to_cpu(arg_ary->wc_nchunks); 406 nchunks = be32_to_cpu(rp_ary->wc_nchunks);
397 for (xdr_off = 0, chunk_no = 0; 407 for (xdr_off = 0, chunk_no = 0;
398 xfer_len && chunk_no < nchunks; 408 xfer_len && chunk_no < nchunks;
399 chunk_no++) { 409 chunk_no++) {
400 u64 rs_offset; 410 u64 rs_offset;
401 ch = &arg_ary->wc_array[chunk_no].wc_target; 411 ch = &rp_ary->wc_array[chunk_no].wc_target;
402 write_len = min(xfer_len, be32_to_cpu(ch->rs_length)); 412 write_len = min(xfer_len, be32_to_cpu(ch->rs_length));
403 413
404 /* Prepare the reply chunk given the length actually 414 /* Prepare the reply chunk given the length actually
@@ -415,11 +425,8 @@ static int send_reply_chunks(struct svcxprt_rdma *xprt,
415 xdr_off, 425 xdr_off,
416 write_len, 426 write_len,
417 vec); 427 vec);
418 if (ret <= 0) { 428 if (ret <= 0)
419 dprintk("svcrdma: RDMA_WRITE failed, ret=%d\n", 429 goto out_err;
420 ret);
421 return -EIO;
422 }
423 chunk_off += ret; 430 chunk_off += ret;
424 xdr_off += ret; 431 xdr_off += ret;
425 xfer_len -= ret; 432 xfer_len -= ret;
@@ -430,6 +437,10 @@ static int send_reply_chunks(struct svcxprt_rdma *xprt,
430 svc_rdma_xdr_encode_reply_array(res_ary, chunk_no); 437 svc_rdma_xdr_encode_reply_array(res_ary, chunk_no);
431 438
432 return rqstp->rq_res.len; 439 return rqstp->rq_res.len;
440
441out_err:
442 pr_err("svcrdma: failed to send reply chunks, rc=%d\n", ret);
443 return -EIO;
433} 444}
434 445
435/* This function prepares the portion of the RPCRDMA message to be 446/* This function prepares the portion of the RPCRDMA message to be
@@ -464,13 +475,8 @@ static int send_reply(struct svcxprt_rdma *rdma,
464 int pages; 475 int pages;
465 int ret; 476 int ret;
466 477
467 /* Post a recv buffer to handle another request. */ 478 ret = svc_rdma_repost_recv(rdma, GFP_KERNEL);
468 ret = svc_rdma_post_recv(rdma, GFP_KERNEL);
469 if (ret) { 479 if (ret) {
470 printk(KERN_INFO
471 "svcrdma: could not post a receive buffer, err=%d."
472 "Closing transport %p.\n", ret, rdma);
473 set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags);
474 svc_rdma_put_context(ctxt, 0); 480 svc_rdma_put_context(ctxt, 0);
475 return -ENOTCONN; 481 return -ENOTCONN;
476 } 482 }
@@ -543,8 +549,8 @@ static int send_reply(struct svcxprt_rdma *rdma,
543 goto err; 549 goto err;
544 } 550 }
545 memset(&send_wr, 0, sizeof send_wr); 551 memset(&send_wr, 0, sizeof send_wr);
546 ctxt->wr_op = IB_WR_SEND; 552 ctxt->cqe.done = svc_rdma_wc_send;
547 send_wr.wr_id = (unsigned long)ctxt; 553 send_wr.wr_cqe = &ctxt->cqe;
548 send_wr.sg_list = ctxt->sge; 554 send_wr.sg_list = ctxt->sge;
549 send_wr.num_sge = sge_no; 555 send_wr.num_sge = sge_no;
550 send_wr.opcode = IB_WR_SEND; 556 send_wr.opcode = IB_WR_SEND;
@@ -559,6 +565,7 @@ static int send_reply(struct svcxprt_rdma *rdma,
559 err: 565 err:
560 svc_rdma_unmap_dma(ctxt); 566 svc_rdma_unmap_dma(ctxt);
561 svc_rdma_put_context(ctxt, 1); 567 svc_rdma_put_context(ctxt, 1);
568 pr_err("svcrdma: failed to send reply, rc=%d\n", ret);
562 return -EIO; 569 return -EIO;
563} 570}
564 571
@@ -573,7 +580,7 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
573 container_of(xprt, struct svcxprt_rdma, sc_xprt); 580 container_of(xprt, struct svcxprt_rdma, sc_xprt);
574 struct rpcrdma_msg *rdma_argp; 581 struct rpcrdma_msg *rdma_argp;
575 struct rpcrdma_msg *rdma_resp; 582 struct rpcrdma_msg *rdma_resp;
576 struct rpcrdma_write_array *reply_ary; 583 struct rpcrdma_write_array *wr_ary, *rp_ary;
577 enum rpcrdma_proc reply_type; 584 enum rpcrdma_proc reply_type;
578 int ret; 585 int ret;
579 int inline_bytes; 586 int inline_bytes;
@@ -587,12 +594,14 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
587 * places this at the start of page 0. 594 * places this at the start of page 0.
588 */ 595 */
589 rdma_argp = page_address(rqstp->rq_pages[0]); 596 rdma_argp = page_address(rqstp->rq_pages[0]);
597 wr_ary = svc_rdma_get_write_array(rdma_argp);
598 rp_ary = svc_rdma_get_reply_array(rdma_argp, wr_ary);
590 599
591 /* Build an req vec for the XDR */ 600 /* Build an req vec for the XDR */
592 ctxt = svc_rdma_get_context(rdma); 601 ctxt = svc_rdma_get_context(rdma);
593 ctxt->direction = DMA_TO_DEVICE; 602 ctxt->direction = DMA_TO_DEVICE;
594 vec = svc_rdma_get_req_map(rdma); 603 vec = svc_rdma_get_req_map(rdma);
595 ret = svc_rdma_map_xdr(rdma, &rqstp->rq_res, vec); 604 ret = svc_rdma_map_xdr(rdma, &rqstp->rq_res, vec, wr_ary != NULL);
596 if (ret) 605 if (ret)
597 goto err0; 606 goto err0;
598 inline_bytes = rqstp->rq_res.len; 607 inline_bytes = rqstp->rq_res.len;
@@ -603,8 +612,7 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
603 if (!res_page) 612 if (!res_page)
604 goto err0; 613 goto err0;
605 rdma_resp = page_address(res_page); 614 rdma_resp = page_address(res_page);
606 reply_ary = svc_rdma_get_reply_array(rdma_argp); 615 if (rp_ary)
607 if (reply_ary)
608 reply_type = RDMA_NOMSG; 616 reply_type = RDMA_NOMSG;
609 else 617 else
610 reply_type = RDMA_MSG; 618 reply_type = RDMA_MSG;
@@ -612,27 +620,26 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
612 rdma_resp, reply_type); 620 rdma_resp, reply_type);
613 621
614 /* Send any write-chunk data and build resp write-list */ 622 /* Send any write-chunk data and build resp write-list */
615 ret = send_write_chunks(rdma, rdma_argp, rdma_resp, 623 if (wr_ary) {
616 rqstp, vec); 624 ret = send_write_chunks(rdma, wr_ary, rdma_resp, rqstp, vec);
617 if (ret < 0) { 625 if (ret < 0)
618 printk(KERN_ERR "svcrdma: failed to send write chunks, rc=%d\n", 626 goto err1;
619 ret); 627 inline_bytes -= ret + xdr_padsize(ret);
620 goto err1;
621 } 628 }
622 inline_bytes -= ret;
623 629
624 /* Send any reply-list data and update resp reply-list */ 630 /* Send any reply-list data and update resp reply-list */
625 ret = send_reply_chunks(rdma, rdma_argp, rdma_resp, 631 if (rp_ary) {
626 rqstp, vec); 632 ret = send_reply_chunks(rdma, rp_ary, rdma_resp, rqstp, vec);
627 if (ret < 0) { 633 if (ret < 0)
628 printk(KERN_ERR "svcrdma: failed to send reply chunks, rc=%d\n", 634 goto err1;
629 ret); 635 inline_bytes -= ret;
630 goto err1;
631 } 636 }
632 inline_bytes -= ret;
633 637
634 ret = send_reply(rdma, rqstp, res_page, rdma_resp, ctxt, vec, 638 ret = send_reply(rdma, rqstp, res_page, rdma_resp, ctxt, vec,
635 inline_bytes); 639 inline_bytes);
640 if (ret < 0)
641 goto err1;
642
636 svc_rdma_put_req_map(rdma, vec); 643 svc_rdma_put_req_map(rdma, vec);
637 dprintk("svcrdma: send_reply returns %d\n", ret); 644 dprintk("svcrdma: send_reply returns %d\n", ret);
638 return ret; 645 return ret;
@@ -642,5 +649,68 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
642 err0: 649 err0:
643 svc_rdma_put_req_map(rdma, vec); 650 svc_rdma_put_req_map(rdma, vec);
644 svc_rdma_put_context(ctxt, 0); 651 svc_rdma_put_context(ctxt, 0);
645 return ret; 652 set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags);
653 return -ENOTCONN;
654}
655
656void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp,
657 int status)
658{
659 struct ib_send_wr err_wr;
660 struct page *p;
661 struct svc_rdma_op_ctxt *ctxt;
662 enum rpcrdma_errcode err;
663 __be32 *va;
664 int length;
665 int ret;
666
667 ret = svc_rdma_repost_recv(xprt, GFP_KERNEL);
668 if (ret)
669 return;
670
671 p = alloc_page(GFP_KERNEL);
672 if (!p)
673 return;
674 va = page_address(p);
675
676 /* XDR encode an error reply */
677 err = ERR_CHUNK;
678 if (status == -EPROTONOSUPPORT)
679 err = ERR_VERS;
680 length = svc_rdma_xdr_encode_error(xprt, rmsgp, err, va);
681
682 ctxt = svc_rdma_get_context(xprt);
683 ctxt->direction = DMA_TO_DEVICE;
684 ctxt->count = 1;
685 ctxt->pages[0] = p;
686
687 /* Prepare SGE for local address */
688 ctxt->sge[0].lkey = xprt->sc_pd->local_dma_lkey;
689 ctxt->sge[0].length = length;
690 ctxt->sge[0].addr = ib_dma_map_page(xprt->sc_cm_id->device,
691 p, 0, length, DMA_TO_DEVICE);
692 if (ib_dma_mapping_error(xprt->sc_cm_id->device, ctxt->sge[0].addr)) {
693 dprintk("svcrdma: Error mapping buffer for protocol error\n");
694 svc_rdma_put_context(ctxt, 1);
695 return;
696 }
697 atomic_inc(&xprt->sc_dma_used);
698
699 /* Prepare SEND WR */
700 memset(&err_wr, 0, sizeof(err_wr));
701 ctxt->cqe.done = svc_rdma_wc_send;
702 err_wr.wr_cqe = &ctxt->cqe;
703 err_wr.sg_list = ctxt->sge;
704 err_wr.num_sge = 1;
705 err_wr.opcode = IB_WR_SEND;
706 err_wr.send_flags = IB_SEND_SIGNALED;
707
708 /* Post It */
709 ret = svc_rdma_send(xprt, &err_wr);
710 if (ret) {
711 dprintk("svcrdma: Error %d posting send for protocol error\n",
712 ret);
713 svc_rdma_unmap_dma(ctxt);
714 svc_rdma_put_context(ctxt, 1);
715 }
646} 716}
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 5763825d09bf..90668969d559 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -63,17 +63,10 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
63 int flags); 63 int flags);
64static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt); 64static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt);
65static void svc_rdma_release_rqst(struct svc_rqst *); 65static void svc_rdma_release_rqst(struct svc_rqst *);
66static void dto_tasklet_func(unsigned long data);
67static void svc_rdma_detach(struct svc_xprt *xprt); 66static void svc_rdma_detach(struct svc_xprt *xprt);
68static void svc_rdma_free(struct svc_xprt *xprt); 67static void svc_rdma_free(struct svc_xprt *xprt);
69static int svc_rdma_has_wspace(struct svc_xprt *xprt); 68static int svc_rdma_has_wspace(struct svc_xprt *xprt);
70static int svc_rdma_secure_port(struct svc_rqst *); 69static int svc_rdma_secure_port(struct svc_rqst *);
71static void rq_cq_reap(struct svcxprt_rdma *xprt);
72static void sq_cq_reap(struct svcxprt_rdma *xprt);
73
74static DECLARE_TASKLET(dto_tasklet, dto_tasklet_func, 0UL);
75static DEFINE_SPINLOCK(dto_lock);
76static LIST_HEAD(dto_xprt_q);
77 70
78static struct svc_xprt_ops svc_rdma_ops = { 71static struct svc_xprt_ops svc_rdma_ops = {
79 .xpo_create = svc_rdma_create, 72 .xpo_create = svc_rdma_create,
@@ -352,15 +345,6 @@ static void svc_rdma_destroy_maps(struct svcxprt_rdma *xprt)
352 } 345 }
353} 346}
354 347
355/* ib_cq event handler */
356static void cq_event_handler(struct ib_event *event, void *context)
357{
358 struct svc_xprt *xprt = context;
359 dprintk("svcrdma: received CQ event %s (%d), context=%p\n",
360 ib_event_msg(event->event), event->event, context);
361 set_bit(XPT_CLOSE, &xprt->xpt_flags);
362}
363
364/* QP event handler */ 348/* QP event handler */
365static void qp_event_handler(struct ib_event *event, void *context) 349static void qp_event_handler(struct ib_event *event, void *context)
366{ 350{
@@ -392,251 +376,171 @@ static void qp_event_handler(struct ib_event *event, void *context)
392 } 376 }
393} 377}
394 378
395/* 379/**
396 * Data Transfer Operation Tasklet 380 * svc_rdma_wc_receive - Invoked by RDMA provider for each polled Receive WC
381 * @cq: completion queue
382 * @wc: completed WR
397 * 383 *
398 * Walks a list of transports with I/O pending, removing entries as
399 * they are added to the server's I/O pending list. Two bits indicate
400 * if SQ, RQ, or both have I/O pending. The dto_lock is an irqsave
401 * spinlock that serializes access to the transport list with the RQ
402 * and SQ interrupt handlers.
403 */ 384 */
404static void dto_tasklet_func(unsigned long data) 385static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
405{ 386{
406 struct svcxprt_rdma *xprt; 387 struct svcxprt_rdma *xprt = cq->cq_context;
407 unsigned long flags; 388 struct ib_cqe *cqe = wc->wr_cqe;
389 struct svc_rdma_op_ctxt *ctxt;
408 390
409 spin_lock_irqsave(&dto_lock, flags); 391 /* WARNING: Only wc->wr_cqe and wc->status are reliable */
410 while (!list_empty(&dto_xprt_q)) { 392 ctxt = container_of(cqe, struct svc_rdma_op_ctxt, cqe);
411 xprt = list_entry(dto_xprt_q.next, 393 ctxt->wc_status = wc->status;
412 struct svcxprt_rdma, sc_dto_q); 394 svc_rdma_unmap_dma(ctxt);
413 list_del_init(&xprt->sc_dto_q);
414 spin_unlock_irqrestore(&dto_lock, flags);
415 395
416 rq_cq_reap(xprt); 396 if (wc->status != IB_WC_SUCCESS)
417 sq_cq_reap(xprt); 397 goto flushed;
418 398
419 svc_xprt_put(&xprt->sc_xprt); 399 /* All wc fields are now known to be valid */
420 spin_lock_irqsave(&dto_lock, flags); 400 ctxt->byte_len = wc->byte_len;
421 } 401 spin_lock(&xprt->sc_rq_dto_lock);
422 spin_unlock_irqrestore(&dto_lock, flags); 402 list_add_tail(&ctxt->dto_q, &xprt->sc_rq_dto_q);
403 spin_unlock(&xprt->sc_rq_dto_lock);
404
405 set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags);
406 if (test_bit(RDMAXPRT_CONN_PENDING, &xprt->sc_flags))
407 goto out;
408 svc_xprt_enqueue(&xprt->sc_xprt);
409 goto out;
410
411flushed:
412 if (wc->status != IB_WC_WR_FLUSH_ERR)
413 pr_warn("svcrdma: receive: %s (%u/0x%x)\n",
414 ib_wc_status_msg(wc->status),
415 wc->status, wc->vendor_err);
416 set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
417 svc_rdma_put_context(ctxt, 1);
418
419out:
420 svc_xprt_put(&xprt->sc_xprt);
423} 421}
424 422
425/* 423static void svc_rdma_send_wc_common(struct svcxprt_rdma *xprt,
426 * Receive Queue Completion Handler 424 struct ib_wc *wc,
427 * 425 const char *opname)
428 * Since an RQ completion handler is called on interrupt context, we
429 * need to defer the handling of the I/O to a tasklet
430 */
431static void rq_comp_handler(struct ib_cq *cq, void *cq_context)
432{ 426{
433 struct svcxprt_rdma *xprt = cq_context; 427 if (wc->status != IB_WC_SUCCESS)
434 unsigned long flags; 428 goto err;
435
436 /* Guard against unconditional flush call for destroyed QP */
437 if (atomic_read(&xprt->sc_xprt.xpt_ref.refcount)==0)
438 return;
439 429
440 /* 430out:
441 * Set the bit regardless of whether or not it's on the list 431 atomic_dec(&xprt->sc_sq_count);
442 * because it may be on the list already due to an SQ 432 wake_up(&xprt->sc_send_wait);
443 * completion. 433 return;
444 */ 434
445 set_bit(RDMAXPRT_RQ_PENDING, &xprt->sc_flags); 435err:
436 set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
437 if (wc->status != IB_WC_WR_FLUSH_ERR)
438 pr_err("svcrdma: %s: %s (%u/0x%x)\n",
439 opname, ib_wc_status_msg(wc->status),
440 wc->status, wc->vendor_err);
441 goto out;
442}
446 443
447 /* 444static void svc_rdma_send_wc_common_put(struct ib_cq *cq, struct ib_wc *wc,
448 * If this transport is not already on the DTO transport queue, 445 const char *opname)
449 * add it 446{
450 */ 447 struct svcxprt_rdma *xprt = cq->cq_context;
451 spin_lock_irqsave(&dto_lock, flags);
452 if (list_empty(&xprt->sc_dto_q)) {
453 svc_xprt_get(&xprt->sc_xprt);
454 list_add_tail(&xprt->sc_dto_q, &dto_xprt_q);
455 }
456 spin_unlock_irqrestore(&dto_lock, flags);
457 448
458 /* Tasklet does all the work to avoid irqsave locks. */ 449 svc_rdma_send_wc_common(xprt, wc, opname);
459 tasklet_schedule(&dto_tasklet); 450 svc_xprt_put(&xprt->sc_xprt);
460} 451}
461 452
462/* 453/**
463 * rq_cq_reap - Process the RQ CQ. 454 * svc_rdma_wc_send - Invoked by RDMA provider for each polled Send WC
464 * 455 * @cq: completion queue
465 * Take all completing WC off the CQE and enqueue the associated DTO 456 * @wc: completed WR
466 * context on the dto_q for the transport.
467 * 457 *
468 * Note that caller must hold a transport reference.
469 */ 458 */
470static void rq_cq_reap(struct svcxprt_rdma *xprt) 459void svc_rdma_wc_send(struct ib_cq *cq, struct ib_wc *wc)
471{ 460{
472 int ret; 461 struct ib_cqe *cqe = wc->wr_cqe;
473 struct ib_wc wc; 462 struct svc_rdma_op_ctxt *ctxt;
474 struct svc_rdma_op_ctxt *ctxt = NULL;
475 463
476 if (!test_and_clear_bit(RDMAXPRT_RQ_PENDING, &xprt->sc_flags)) 464 svc_rdma_send_wc_common_put(cq, wc, "send");
477 return;
478 465
479 ib_req_notify_cq(xprt->sc_rq_cq, IB_CQ_NEXT_COMP); 466 ctxt = container_of(cqe, struct svc_rdma_op_ctxt, cqe);
480 atomic_inc(&rdma_stat_rq_poll); 467 svc_rdma_unmap_dma(ctxt);
468 svc_rdma_put_context(ctxt, 1);
469}
481 470
482 while ((ret = ib_poll_cq(xprt->sc_rq_cq, 1, &wc)) > 0) { 471/**
483 ctxt = (struct svc_rdma_op_ctxt *)(unsigned long)wc.wr_id; 472 * svc_rdma_wc_write - Invoked by RDMA provider for each polled Write WC
484 ctxt->wc_status = wc.status; 473 * @cq: completion queue
485 ctxt->byte_len = wc.byte_len; 474 * @wc: completed WR
486 svc_rdma_unmap_dma(ctxt); 475 *
487 if (wc.status != IB_WC_SUCCESS) { 476 */
488 /* Close the transport */ 477void svc_rdma_wc_write(struct ib_cq *cq, struct ib_wc *wc)
489 dprintk("svcrdma: transport closing putting ctxt %p\n", ctxt); 478{
490 set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); 479 struct ib_cqe *cqe = wc->wr_cqe;
491 svc_rdma_put_context(ctxt, 1); 480 struct svc_rdma_op_ctxt *ctxt;
492 svc_xprt_put(&xprt->sc_xprt);
493 continue;
494 }
495 spin_lock_bh(&xprt->sc_rq_dto_lock);
496 list_add_tail(&ctxt->dto_q, &xprt->sc_rq_dto_q);
497 spin_unlock_bh(&xprt->sc_rq_dto_lock);
498 svc_xprt_put(&xprt->sc_xprt);
499 }
500 481
501 if (ctxt) 482 svc_rdma_send_wc_common_put(cq, wc, "write");
502 atomic_inc(&rdma_stat_rq_prod);
503 483
504 set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags); 484 ctxt = container_of(cqe, struct svc_rdma_op_ctxt, cqe);
505 /* 485 svc_rdma_unmap_dma(ctxt);
506 * If data arrived before established event, 486 svc_rdma_put_context(ctxt, 0);
507 * don't enqueue. This defers RPC I/O until the
508 * RDMA connection is complete.
509 */
510 if (!test_bit(RDMAXPRT_CONN_PENDING, &xprt->sc_flags))
511 svc_xprt_enqueue(&xprt->sc_xprt);
512} 487}
513 488
514/* 489/**
515 * Process a completion context 490 * svc_rdma_wc_reg - Invoked by RDMA provider for each polled FASTREG WC
491 * @cq: completion queue
492 * @wc: completed WR
493 *
516 */ 494 */
517static void process_context(struct svcxprt_rdma *xprt, 495void svc_rdma_wc_reg(struct ib_cq *cq, struct ib_wc *wc)
518 struct svc_rdma_op_ctxt *ctxt)
519{ 496{
520 struct svc_rdma_op_ctxt *read_hdr; 497 svc_rdma_send_wc_common_put(cq, wc, "fastreg");
521 int free_pages = 0; 498}
522
523 svc_rdma_unmap_dma(ctxt);
524 499
525 switch (ctxt->wr_op) { 500/**
526 case IB_WR_SEND: 501 * svc_rdma_wc_read - Invoked by RDMA provider for each polled Read WC
527 free_pages = 1; 502 * @cq: completion queue
528 break; 503 * @wc: completed WR
504 *
505 */
506void svc_rdma_wc_read(struct ib_cq *cq, struct ib_wc *wc)
507{
508 struct svcxprt_rdma *xprt = cq->cq_context;
509 struct ib_cqe *cqe = wc->wr_cqe;
510 struct svc_rdma_op_ctxt *ctxt;
529 511
530 case IB_WR_RDMA_WRITE: 512 svc_rdma_send_wc_common(xprt, wc, "read");
531 break;
532 513
533 case IB_WR_RDMA_READ: 514 ctxt = container_of(cqe, struct svc_rdma_op_ctxt, cqe);
534 case IB_WR_RDMA_READ_WITH_INV: 515 svc_rdma_unmap_dma(ctxt);
535 svc_rdma_put_frmr(xprt, ctxt->frmr); 516 svc_rdma_put_frmr(xprt, ctxt->frmr);
536 517
537 if (!test_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags)) 518 if (test_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags)) {
538 break; 519 struct svc_rdma_op_ctxt *read_hdr;
539 520
540 read_hdr = ctxt->read_hdr; 521 read_hdr = ctxt->read_hdr;
541 svc_rdma_put_context(ctxt, 0); 522 spin_lock(&xprt->sc_rq_dto_lock);
542
543 spin_lock_bh(&xprt->sc_rq_dto_lock);
544 set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags);
545 list_add_tail(&read_hdr->dto_q, 523 list_add_tail(&read_hdr->dto_q,
546 &xprt->sc_read_complete_q); 524 &xprt->sc_read_complete_q);
547 spin_unlock_bh(&xprt->sc_rq_dto_lock); 525 spin_unlock(&xprt->sc_rq_dto_lock);
548 svc_xprt_enqueue(&xprt->sc_xprt);
549 return;
550 526
551 default: 527 set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags);
552 dprintk("svcrdma: unexpected completion opcode=%d\n", 528 svc_xprt_enqueue(&xprt->sc_xprt);
553 ctxt->wr_op);
554 break;
555 } 529 }
556 530
557 svc_rdma_put_context(ctxt, free_pages); 531 svc_rdma_put_context(ctxt, 0);
532 svc_xprt_put(&xprt->sc_xprt);
558} 533}
559 534
560/* 535/**
561 * Send Queue Completion Handler - potentially called on interrupt context. 536 * svc_rdma_wc_inv - Invoked by RDMA provider for each polled LOCAL_INV WC
537 * @cq: completion queue
538 * @wc: completed WR
562 * 539 *
563 * Note that caller must hold a transport reference.
564 */ 540 */
565static void sq_cq_reap(struct svcxprt_rdma *xprt) 541void svc_rdma_wc_inv(struct ib_cq *cq, struct ib_wc *wc)
566{
567 struct svc_rdma_op_ctxt *ctxt = NULL;
568 struct ib_wc wc_a[6];
569 struct ib_wc *wc;
570 struct ib_cq *cq = xprt->sc_sq_cq;
571 int ret;
572
573 memset(wc_a, 0, sizeof(wc_a));
574
575 if (!test_and_clear_bit(RDMAXPRT_SQ_PENDING, &xprt->sc_flags))
576 return;
577
578 ib_req_notify_cq(xprt->sc_sq_cq, IB_CQ_NEXT_COMP);
579 atomic_inc(&rdma_stat_sq_poll);
580 while ((ret = ib_poll_cq(cq, ARRAY_SIZE(wc_a), wc_a)) > 0) {
581 int i;
582
583 for (i = 0; i < ret; i++) {
584 wc = &wc_a[i];
585 if (wc->status != IB_WC_SUCCESS) {
586 dprintk("svcrdma: sq wc err status %s (%d)\n",
587 ib_wc_status_msg(wc->status),
588 wc->status);
589
590 /* Close the transport */
591 set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
592 }
593
594 /* Decrement used SQ WR count */
595 atomic_dec(&xprt->sc_sq_count);
596 wake_up(&xprt->sc_send_wait);
597
598 ctxt = (struct svc_rdma_op_ctxt *)
599 (unsigned long)wc->wr_id;
600 if (ctxt)
601 process_context(xprt, ctxt);
602
603 svc_xprt_put(&xprt->sc_xprt);
604 }
605 }
606
607 if (ctxt)
608 atomic_inc(&rdma_stat_sq_prod);
609}
610
611static void sq_comp_handler(struct ib_cq *cq, void *cq_context)
612{ 542{
613 struct svcxprt_rdma *xprt = cq_context; 543 svc_rdma_send_wc_common_put(cq, wc, "localInv");
614 unsigned long flags;
615
616 /* Guard against unconditional flush call for destroyed QP */
617 if (atomic_read(&xprt->sc_xprt.xpt_ref.refcount)==0)
618 return;
619
620 /*
621 * Set the bit regardless of whether or not it's on the list
622 * because it may be on the list already due to an RQ
623 * completion.
624 */
625 set_bit(RDMAXPRT_SQ_PENDING, &xprt->sc_flags);
626
627 /*
628 * If this transport is not already on the DTO transport queue,
629 * add it
630 */
631 spin_lock_irqsave(&dto_lock, flags);
632 if (list_empty(&xprt->sc_dto_q)) {
633 svc_xprt_get(&xprt->sc_xprt);
634 list_add_tail(&xprt->sc_dto_q, &dto_xprt_q);
635 }
636 spin_unlock_irqrestore(&dto_lock, flags);
637
638 /* Tasklet does all the work to avoid irqsave locks. */
639 tasklet_schedule(&dto_tasklet);
640} 544}
641 545
642static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv, 546static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv,
@@ -681,6 +585,7 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt, gfp_t flags)
681 ctxt = svc_rdma_get_context(xprt); 585 ctxt = svc_rdma_get_context(xprt);
682 buflen = 0; 586 buflen = 0;
683 ctxt->direction = DMA_FROM_DEVICE; 587 ctxt->direction = DMA_FROM_DEVICE;
588 ctxt->cqe.done = svc_rdma_wc_receive;
684 for (sge_no = 0; buflen < xprt->sc_max_req_size; sge_no++) { 589 for (sge_no = 0; buflen < xprt->sc_max_req_size; sge_no++) {
685 if (sge_no >= xprt->sc_max_sge) { 590 if (sge_no >= xprt->sc_max_sge) {
686 pr_err("svcrdma: Too many sges (%d)\n", sge_no); 591 pr_err("svcrdma: Too many sges (%d)\n", sge_no);
@@ -705,7 +610,7 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt, gfp_t flags)
705 recv_wr.next = NULL; 610 recv_wr.next = NULL;
706 recv_wr.sg_list = &ctxt->sge[0]; 611 recv_wr.sg_list = &ctxt->sge[0];
707 recv_wr.num_sge = ctxt->count; 612 recv_wr.num_sge = ctxt->count;
708 recv_wr.wr_id = (u64)(unsigned long)ctxt; 613 recv_wr.wr_cqe = &ctxt->cqe;
709 614
710 svc_xprt_get(&xprt->sc_xprt); 615 svc_xprt_get(&xprt->sc_xprt);
711 ret = ib_post_recv(xprt->sc_qp, &recv_wr, &bad_recv_wr); 616 ret = ib_post_recv(xprt->sc_qp, &recv_wr, &bad_recv_wr);
@@ -722,6 +627,21 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt, gfp_t flags)
722 return -ENOMEM; 627 return -ENOMEM;
723} 628}
724 629
630int svc_rdma_repost_recv(struct svcxprt_rdma *xprt, gfp_t flags)
631{
632 int ret = 0;
633
634 ret = svc_rdma_post_recv(xprt, flags);
635 if (ret) {
636 pr_err("svcrdma: could not post a receive buffer, err=%d.\n",
637 ret);
638 pr_err("svcrdma: closing transport %p.\n", xprt);
639 set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
640 ret = -ENOTCONN;
641 }
642 return ret;
643}
644
725/* 645/*
726 * This function handles the CONNECT_REQUEST event on a listening 646 * This function handles the CONNECT_REQUEST event on a listening
727 * endpoint. It is passed the cma_id for the _new_ connection. The context in 647 * endpoint. It is passed the cma_id for the _new_ connection. The context in
@@ -1011,7 +931,6 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
1011 struct svcxprt_rdma *listen_rdma; 931 struct svcxprt_rdma *listen_rdma;
1012 struct svcxprt_rdma *newxprt = NULL; 932 struct svcxprt_rdma *newxprt = NULL;
1013 struct rdma_conn_param conn_param; 933 struct rdma_conn_param conn_param;
1014 struct ib_cq_init_attr cq_attr = {};
1015 struct ib_qp_init_attr qp_attr; 934 struct ib_qp_init_attr qp_attr;
1016 struct ib_device *dev; 935 struct ib_device *dev;
1017 unsigned int i; 936 unsigned int i;
@@ -1069,22 +988,14 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
1069 dprintk("svcrdma: error creating PD for connect request\n"); 988 dprintk("svcrdma: error creating PD for connect request\n");
1070 goto errout; 989 goto errout;
1071 } 990 }
1072 cq_attr.cqe = newxprt->sc_sq_depth; 991 newxprt->sc_sq_cq = ib_alloc_cq(dev, newxprt, newxprt->sc_sq_depth,
1073 newxprt->sc_sq_cq = ib_create_cq(dev, 992 0, IB_POLL_SOFTIRQ);
1074 sq_comp_handler,
1075 cq_event_handler,
1076 newxprt,
1077 &cq_attr);
1078 if (IS_ERR(newxprt->sc_sq_cq)) { 993 if (IS_ERR(newxprt->sc_sq_cq)) {
1079 dprintk("svcrdma: error creating SQ CQ for connect request\n"); 994 dprintk("svcrdma: error creating SQ CQ for connect request\n");
1080 goto errout; 995 goto errout;
1081 } 996 }
1082 cq_attr.cqe = newxprt->sc_rq_depth; 997 newxprt->sc_rq_cq = ib_alloc_cq(dev, newxprt, newxprt->sc_rq_depth,
1083 newxprt->sc_rq_cq = ib_create_cq(dev, 998 0, IB_POLL_SOFTIRQ);
1084 rq_comp_handler,
1085 cq_event_handler,
1086 newxprt,
1087 &cq_attr);
1088 if (IS_ERR(newxprt->sc_rq_cq)) { 999 if (IS_ERR(newxprt->sc_rq_cq)) {
1089 dprintk("svcrdma: error creating RQ CQ for connect request\n"); 1000 dprintk("svcrdma: error creating RQ CQ for connect request\n");
1090 goto errout; 1001 goto errout;
@@ -1173,13 +1084,6 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
1173 /* Swap out the handler */ 1084 /* Swap out the handler */
1174 newxprt->sc_cm_id->event_handler = rdma_cma_handler; 1085 newxprt->sc_cm_id->event_handler = rdma_cma_handler;
1175 1086
1176 /*
1177 * Arm the CQs for the SQ and RQ before accepting so we can't
1178 * miss the first message
1179 */
1180 ib_req_notify_cq(newxprt->sc_sq_cq, IB_CQ_NEXT_COMP);
1181 ib_req_notify_cq(newxprt->sc_rq_cq, IB_CQ_NEXT_COMP);
1182
1183 /* Accept Connection */ 1087 /* Accept Connection */
1184 set_bit(RDMAXPRT_CONN_PENDING, &newxprt->sc_flags); 1088 set_bit(RDMAXPRT_CONN_PENDING, &newxprt->sc_flags);
1185 memset(&conn_param, 0, sizeof conn_param); 1089 memset(&conn_param, 0, sizeof conn_param);
@@ -1319,10 +1223,10 @@ static void __svc_rdma_free(struct work_struct *work)
1319 ib_destroy_qp(rdma->sc_qp); 1223 ib_destroy_qp(rdma->sc_qp);
1320 1224
1321 if (rdma->sc_sq_cq && !IS_ERR(rdma->sc_sq_cq)) 1225 if (rdma->sc_sq_cq && !IS_ERR(rdma->sc_sq_cq))
1322 ib_destroy_cq(rdma->sc_sq_cq); 1226 ib_free_cq(rdma->sc_sq_cq);
1323 1227
1324 if (rdma->sc_rq_cq && !IS_ERR(rdma->sc_rq_cq)) 1228 if (rdma->sc_rq_cq && !IS_ERR(rdma->sc_rq_cq))
1325 ib_destroy_cq(rdma->sc_rq_cq); 1229 ib_free_cq(rdma->sc_rq_cq);
1326 1230
1327 if (rdma->sc_pd && !IS_ERR(rdma->sc_pd)) 1231 if (rdma->sc_pd && !IS_ERR(rdma->sc_pd))
1328 ib_dealloc_pd(rdma->sc_pd); 1232 ib_dealloc_pd(rdma->sc_pd);
@@ -1383,9 +1287,6 @@ int svc_rdma_send(struct svcxprt_rdma *xprt, struct ib_send_wr *wr)
1383 spin_unlock_bh(&xprt->sc_lock); 1287 spin_unlock_bh(&xprt->sc_lock);
1384 atomic_inc(&rdma_stat_sq_starve); 1288 atomic_inc(&rdma_stat_sq_starve);
1385 1289
1386 /* See if we can opportunistically reap SQ WR to make room */
1387 sq_cq_reap(xprt);
1388
1389 /* Wait until SQ WR available if SQ still full */ 1290 /* Wait until SQ WR available if SQ still full */
1390 wait_event(xprt->sc_send_wait, 1291 wait_event(xprt->sc_send_wait,
1391 atomic_read(&xprt->sc_sq_count) < 1292 atomic_read(&xprt->sc_sq_count) <
@@ -1418,57 +1319,3 @@ int svc_rdma_send(struct svcxprt_rdma *xprt, struct ib_send_wr *wr)
1418 } 1319 }
1419 return ret; 1320 return ret;
1420} 1321}
1421
1422void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp,
1423 enum rpcrdma_errcode err)
1424{
1425 struct ib_send_wr err_wr;
1426 struct page *p;
1427 struct svc_rdma_op_ctxt *ctxt;
1428 __be32 *va;
1429 int length;
1430 int ret;
1431
1432 p = alloc_page(GFP_KERNEL);
1433 if (!p)
1434 return;
1435 va = page_address(p);
1436
1437 /* XDR encode error */
1438 length = svc_rdma_xdr_encode_error(xprt, rmsgp, err, va);
1439
1440 ctxt = svc_rdma_get_context(xprt);
1441 ctxt->direction = DMA_FROM_DEVICE;
1442 ctxt->count = 1;
1443 ctxt->pages[0] = p;
1444
1445 /* Prepare SGE for local address */
1446 ctxt->sge[0].addr = ib_dma_map_page(xprt->sc_cm_id->device,
1447 p, 0, length, DMA_FROM_DEVICE);
1448 if (ib_dma_mapping_error(xprt->sc_cm_id->device, ctxt->sge[0].addr)) {
1449 put_page(p);
1450 svc_rdma_put_context(ctxt, 1);
1451 return;
1452 }
1453 atomic_inc(&xprt->sc_dma_used);
1454 ctxt->sge[0].lkey = xprt->sc_pd->local_dma_lkey;
1455 ctxt->sge[0].length = length;
1456
1457 /* Prepare SEND WR */
1458 memset(&err_wr, 0, sizeof err_wr);
1459 ctxt->wr_op = IB_WR_SEND;
1460 err_wr.wr_id = (unsigned long)ctxt;
1461 err_wr.sg_list = ctxt->sge;
1462 err_wr.num_sge = 1;
1463 err_wr.opcode = IB_WR_SEND;
1464 err_wr.send_flags = IB_SEND_SIGNALED;
1465
1466 /* Post It */
1467 ret = svc_rdma_send(xprt, &err_wr);
1468 if (ret) {
1469 dprintk("svcrdma: Error %d posting send for protocol error\n",
1470 ret);
1471 svc_rdma_unmap_dma(ctxt);
1472 svc_rdma_put_context(ctxt, 1);
1473 }
1474}
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 878f1bfb1db9..f5ed9f982cd7 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -112,89 +112,65 @@ rpcrdma_qp_async_error_upcall(struct ib_event *event, void *context)
112 } 112 }
113} 113}
114 114
115/**
116 * rpcrdma_wc_send - Invoked by RDMA provider for each polled Send WC
117 * @cq: completion queue (ignored)
118 * @wc: completed WR
119 *
120 */
115static void 121static void
116rpcrdma_cq_async_error_upcall(struct ib_event *event, void *context) 122rpcrdma_wc_send(struct ib_cq *cq, struct ib_wc *wc)
117{ 123{
118 struct rpcrdma_ep *ep = context; 124 /* WARNING: Only wr_cqe and status are reliable at this point */
119 125 if (wc->status != IB_WC_SUCCESS && wc->status != IB_WC_WR_FLUSH_ERR)
120 pr_err("RPC: %s: %s on device %s ep %p\n", 126 pr_err("rpcrdma: Send: %s (%u/0x%x)\n",
121 __func__, ib_event_msg(event->event), 127 ib_wc_status_msg(wc->status),
122 event->device->name, context); 128 wc->status, wc->vendor_err);
123 if (ep->rep_connected == 1) {
124 ep->rep_connected = -EIO;
125 rpcrdma_conn_func(ep);
126 wake_up_all(&ep->rep_connect_wait);
127 }
128} 129}
129 130
130static void 131static void
131rpcrdma_sendcq_process_wc(struct ib_wc *wc) 132rpcrdma_receive_worker(struct work_struct *work)
132{ 133{
133 /* WARNING: Only wr_id and status are reliable at this point */ 134 struct rpcrdma_rep *rep =
134 if (wc->wr_id == RPCRDMA_IGNORE_COMPLETION) { 135 container_of(work, struct rpcrdma_rep, rr_work);
135 if (wc->status != IB_WC_SUCCESS &&
136 wc->status != IB_WC_WR_FLUSH_ERR)
137 pr_err("RPC: %s: SEND: %s\n",
138 __func__, ib_wc_status_msg(wc->status));
139 } else {
140 struct rpcrdma_mw *r;
141 136
142 r = (struct rpcrdma_mw *)(unsigned long)wc->wr_id; 137 rpcrdma_reply_handler(rep);
143 r->mw_sendcompletion(wc);
144 }
145} 138}
146 139
147/* The common case is a single send completion is waiting. By 140/* Perform basic sanity checking to avoid using garbage
148 * passing two WC entries to ib_poll_cq, a return code of 1 141 * to update the credit grant value.
149 * means there is exactly one WC waiting and no more. We don't
150 * have to invoke ib_poll_cq again to know that the CQ has been
151 * properly drained.
152 */ 142 */
153static void 143static void
154rpcrdma_sendcq_poll(struct ib_cq *cq) 144rpcrdma_update_granted_credits(struct rpcrdma_rep *rep)
155{ 145{
156 struct ib_wc *pos, wcs[2]; 146 struct rpcrdma_msg *rmsgp = rdmab_to_msg(rep->rr_rdmabuf);
157 int count, rc; 147 struct rpcrdma_buffer *buffer = &rep->rr_rxprt->rx_buf;
148 u32 credits;
158 149
159 do { 150 if (rep->rr_len < RPCRDMA_HDRLEN_ERR)
160 pos = wcs; 151 return;
161 152
162 rc = ib_poll_cq(cq, ARRAY_SIZE(wcs), pos); 153 credits = be32_to_cpu(rmsgp->rm_credit);
163 if (rc < 0) 154 if (credits == 0)
164 break; 155 credits = 1; /* don't deadlock */
156 else if (credits > buffer->rb_max_requests)
157 credits = buffer->rb_max_requests;
165 158
166 count = rc; 159 atomic_set(&buffer->rb_credits, credits);
167 while (count-- > 0)
168 rpcrdma_sendcq_process_wc(pos++);
169 } while (rc == ARRAY_SIZE(wcs));
170 return;
171} 160}
172 161
173/* Handle provider send completion upcalls. 162/**
163 * rpcrdma_receive_wc - Invoked by RDMA provider for each polled Receive WC
164 * @cq: completion queue (ignored)
165 * @wc: completed WR
166 *
174 */ 167 */
175static void 168static void
176rpcrdma_sendcq_upcall(struct ib_cq *cq, void *cq_context) 169rpcrdma_receive_wc(struct ib_cq *cq, struct ib_wc *wc)
177{ 170{
178 do { 171 struct ib_cqe *cqe = wc->wr_cqe;
179 rpcrdma_sendcq_poll(cq); 172 struct rpcrdma_rep *rep = container_of(cqe, struct rpcrdma_rep,
180 } while (ib_req_notify_cq(cq, IB_CQ_NEXT_COMP | 173 rr_cqe);
181 IB_CQ_REPORT_MISSED_EVENTS) > 0);
182}
183
184static void
185rpcrdma_receive_worker(struct work_struct *work)
186{
187 struct rpcrdma_rep *rep =
188 container_of(work, struct rpcrdma_rep, rr_work);
189
190 rpcrdma_reply_handler(rep);
191}
192
193static void
194rpcrdma_recvcq_process_wc(struct ib_wc *wc)
195{
196 struct rpcrdma_rep *rep =
197 (struct rpcrdma_rep *)(unsigned long)wc->wr_id;
198 174
199 /* WARNING: Only wr_id and status are reliable at this point */ 175 /* WARNING: Only wr_id and status are reliable at this point */
200 if (wc->status != IB_WC_SUCCESS) 176 if (wc->status != IB_WC_SUCCESS)
@@ -211,7 +187,8 @@ rpcrdma_recvcq_process_wc(struct ib_wc *wc)
211 ib_dma_sync_single_for_cpu(rep->rr_device, 187 ib_dma_sync_single_for_cpu(rep->rr_device,
212 rdmab_addr(rep->rr_rdmabuf), 188 rdmab_addr(rep->rr_rdmabuf),
213 rep->rr_len, DMA_FROM_DEVICE); 189 rep->rr_len, DMA_FROM_DEVICE);
214 prefetch(rdmab_to_msg(rep->rr_rdmabuf)); 190
191 rpcrdma_update_granted_credits(rep);
215 192
216out_schedule: 193out_schedule:
217 queue_work(rpcrdma_receive_wq, &rep->rr_work); 194 queue_work(rpcrdma_receive_wq, &rep->rr_work);
@@ -219,57 +196,20 @@ out_schedule:
219 196
220out_fail: 197out_fail:
221 if (wc->status != IB_WC_WR_FLUSH_ERR) 198 if (wc->status != IB_WC_WR_FLUSH_ERR)
222 pr_err("RPC: %s: rep %p: %s\n", 199 pr_err("rpcrdma: Recv: %s (%u/0x%x)\n",
223 __func__, rep, ib_wc_status_msg(wc->status)); 200 ib_wc_status_msg(wc->status),
201 wc->status, wc->vendor_err);
224 rep->rr_len = RPCRDMA_BAD_LEN; 202 rep->rr_len = RPCRDMA_BAD_LEN;
225 goto out_schedule; 203 goto out_schedule;
226} 204}
227 205
228/* The wc array is on stack: automatic memory is always CPU-local.
229 *
230 * struct ib_wc is 64 bytes, making the poll array potentially
231 * large. But this is at the bottom of the call chain. Further
232 * substantial work is done in another thread.
233 */
234static void
235rpcrdma_recvcq_poll(struct ib_cq *cq)
236{
237 struct ib_wc *pos, wcs[4];
238 int count, rc;
239
240 do {
241 pos = wcs;
242
243 rc = ib_poll_cq(cq, ARRAY_SIZE(wcs), pos);
244 if (rc < 0)
245 break;
246
247 count = rc;
248 while (count-- > 0)
249 rpcrdma_recvcq_process_wc(pos++);
250 } while (rc == ARRAY_SIZE(wcs));
251}
252
253/* Handle provider receive completion upcalls.
254 */
255static void
256rpcrdma_recvcq_upcall(struct ib_cq *cq, void *cq_context)
257{
258 do {
259 rpcrdma_recvcq_poll(cq);
260 } while (ib_req_notify_cq(cq, IB_CQ_NEXT_COMP |
261 IB_CQ_REPORT_MISSED_EVENTS) > 0);
262}
263
264static void 206static void
265rpcrdma_flush_cqs(struct rpcrdma_ep *ep) 207rpcrdma_flush_cqs(struct rpcrdma_ep *ep)
266{ 208{
267 struct ib_wc wc; 209 struct ib_wc wc;
268 210
269 while (ib_poll_cq(ep->rep_attr.recv_cq, 1, &wc) > 0) 211 while (ib_poll_cq(ep->rep_attr.recv_cq, 1, &wc) > 0)
270 rpcrdma_recvcq_process_wc(&wc); 212 rpcrdma_receive_wc(NULL, &wc);
271 while (ib_poll_cq(ep->rep_attr.send_cq, 1, &wc) > 0)
272 rpcrdma_sendcq_process_wc(&wc);
273} 213}
274 214
275static int 215static int
@@ -330,6 +270,7 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
330connected: 270connected:
331 dprintk("RPC: %s: %sconnected\n", 271 dprintk("RPC: %s: %sconnected\n",
332 __func__, connstate > 0 ? "" : "dis"); 272 __func__, connstate > 0 ? "" : "dis");
273 atomic_set(&xprt->rx_buf.rb_credits, 1);
333 ep->rep_connected = connstate; 274 ep->rep_connected = connstate;
334 rpcrdma_conn_func(ep); 275 rpcrdma_conn_func(ep);
335 wake_up_all(&ep->rep_connect_wait); 276 wake_up_all(&ep->rep_connect_wait);
@@ -560,9 +501,8 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
560 struct rpcrdma_create_data_internal *cdata) 501 struct rpcrdma_create_data_internal *cdata)
561{ 502{
562 struct ib_cq *sendcq, *recvcq; 503 struct ib_cq *sendcq, *recvcq;
563 struct ib_cq_init_attr cq_attr = {};
564 unsigned int max_qp_wr; 504 unsigned int max_qp_wr;
565 int rc, err; 505 int rc;
566 506
567 if (ia->ri_device->attrs.max_sge < RPCRDMA_MAX_IOVS) { 507 if (ia->ri_device->attrs.max_sge < RPCRDMA_MAX_IOVS) {
568 dprintk("RPC: %s: insufficient sge's available\n", 508 dprintk("RPC: %s: insufficient sge's available\n",
@@ -614,9 +554,9 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
614 init_waitqueue_head(&ep->rep_connect_wait); 554 init_waitqueue_head(&ep->rep_connect_wait);
615 INIT_DELAYED_WORK(&ep->rep_connect_worker, rpcrdma_connect_worker); 555 INIT_DELAYED_WORK(&ep->rep_connect_worker, rpcrdma_connect_worker);
616 556
617 cq_attr.cqe = ep->rep_attr.cap.max_send_wr + 1; 557 sendcq = ib_alloc_cq(ia->ri_device, NULL,
618 sendcq = ib_create_cq(ia->ri_device, rpcrdma_sendcq_upcall, 558 ep->rep_attr.cap.max_send_wr + 1,
619 rpcrdma_cq_async_error_upcall, NULL, &cq_attr); 559 0, IB_POLL_SOFTIRQ);
620 if (IS_ERR(sendcq)) { 560 if (IS_ERR(sendcq)) {
621 rc = PTR_ERR(sendcq); 561 rc = PTR_ERR(sendcq);
622 dprintk("RPC: %s: failed to create send CQ: %i\n", 562 dprintk("RPC: %s: failed to create send CQ: %i\n",
@@ -624,16 +564,9 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
624 goto out1; 564 goto out1;
625 } 565 }
626 566
627 rc = ib_req_notify_cq(sendcq, IB_CQ_NEXT_COMP); 567 recvcq = ib_alloc_cq(ia->ri_device, NULL,
628 if (rc) { 568 ep->rep_attr.cap.max_recv_wr + 1,
629 dprintk("RPC: %s: ib_req_notify_cq failed: %i\n", 569 0, IB_POLL_SOFTIRQ);
630 __func__, rc);
631 goto out2;
632 }
633
634 cq_attr.cqe = ep->rep_attr.cap.max_recv_wr + 1;
635 recvcq = ib_create_cq(ia->ri_device, rpcrdma_recvcq_upcall,
636 rpcrdma_cq_async_error_upcall, NULL, &cq_attr);
637 if (IS_ERR(recvcq)) { 570 if (IS_ERR(recvcq)) {
638 rc = PTR_ERR(recvcq); 571 rc = PTR_ERR(recvcq);
639 dprintk("RPC: %s: failed to create recv CQ: %i\n", 572 dprintk("RPC: %s: failed to create recv CQ: %i\n",
@@ -641,14 +574,6 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
641 goto out2; 574 goto out2;
642 } 575 }
643 576
644 rc = ib_req_notify_cq(recvcq, IB_CQ_NEXT_COMP);
645 if (rc) {
646 dprintk("RPC: %s: ib_req_notify_cq failed: %i\n",
647 __func__, rc);
648 ib_destroy_cq(recvcq);
649 goto out2;
650 }
651
652 ep->rep_attr.send_cq = sendcq; 577 ep->rep_attr.send_cq = sendcq;
653 ep->rep_attr.recv_cq = recvcq; 578 ep->rep_attr.recv_cq = recvcq;
654 579
@@ -673,10 +598,7 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
673 return 0; 598 return 0;
674 599
675out2: 600out2:
676 err = ib_destroy_cq(sendcq); 601 ib_free_cq(sendcq);
677 if (err)
678 dprintk("RPC: %s: ib_destroy_cq returned %i\n",
679 __func__, err);
680out1: 602out1:
681 if (ia->ri_dma_mr) 603 if (ia->ri_dma_mr)
682 ib_dereg_mr(ia->ri_dma_mr); 604 ib_dereg_mr(ia->ri_dma_mr);
@@ -711,15 +633,8 @@ rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
711 ia->ri_id->qp = NULL; 633 ia->ri_id->qp = NULL;
712 } 634 }
713 635
714 rc = ib_destroy_cq(ep->rep_attr.recv_cq); 636 ib_free_cq(ep->rep_attr.recv_cq);
715 if (rc) 637 ib_free_cq(ep->rep_attr.send_cq);
716 dprintk("RPC: %s: ib_destroy_cq returned %i\n",
717 __func__, rc);
718
719 rc = ib_destroy_cq(ep->rep_attr.send_cq);
720 if (rc)
721 dprintk("RPC: %s: ib_destroy_cq returned %i\n",
722 __func__, rc);
723 638
724 if (ia->ri_dma_mr) { 639 if (ia->ri_dma_mr) {
725 rc = ib_dereg_mr(ia->ri_dma_mr); 640 rc = ib_dereg_mr(ia->ri_dma_mr);
@@ -898,6 +813,7 @@ rpcrdma_create_req(struct rpcrdma_xprt *r_xprt)
898 spin_lock(&buffer->rb_reqslock); 813 spin_lock(&buffer->rb_reqslock);
899 list_add(&req->rl_all, &buffer->rb_allreqs); 814 list_add(&req->rl_all, &buffer->rb_allreqs);
900 spin_unlock(&buffer->rb_reqslock); 815 spin_unlock(&buffer->rb_reqslock);
816 req->rl_cqe.done = rpcrdma_wc_send;
901 req->rl_buffer = &r_xprt->rx_buf; 817 req->rl_buffer = &r_xprt->rx_buf;
902 return req; 818 return req;
903} 819}
@@ -923,6 +839,7 @@ rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt)
923 } 839 }
924 840
925 rep->rr_device = ia->ri_device; 841 rep->rr_device = ia->ri_device;
842 rep->rr_cqe.done = rpcrdma_receive_wc;
926 rep->rr_rxprt = r_xprt; 843 rep->rr_rxprt = r_xprt;
927 INIT_WORK(&rep->rr_work, rpcrdma_receive_worker); 844 INIT_WORK(&rep->rr_work, rpcrdma_receive_worker);
928 return rep; 845 return rep;
@@ -943,6 +860,7 @@ rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt)
943 buf->rb_max_requests = r_xprt->rx_data.max_requests; 860 buf->rb_max_requests = r_xprt->rx_data.max_requests;
944 buf->rb_bc_srv_max_requests = 0; 861 buf->rb_bc_srv_max_requests = 0;
945 spin_lock_init(&buf->rb_lock); 862 spin_lock_init(&buf->rb_lock);
863 atomic_set(&buf->rb_credits, 1);
946 864
947 rc = ia->ri_ops->ro_init(r_xprt); 865 rc = ia->ri_ops->ro_init(r_xprt);
948 if (rc) 866 if (rc)
@@ -1259,7 +1177,7 @@ rpcrdma_ep_post(struct rpcrdma_ia *ia,
1259 } 1177 }
1260 1178
1261 send_wr.next = NULL; 1179 send_wr.next = NULL;
1262 send_wr.wr_id = RPCRDMA_IGNORE_COMPLETION; 1180 send_wr.wr_cqe = &req->rl_cqe;
1263 send_wr.sg_list = iov; 1181 send_wr.sg_list = iov;
1264 send_wr.num_sge = req->rl_niovs; 1182 send_wr.num_sge = req->rl_niovs;
1265 send_wr.opcode = IB_WR_SEND; 1183 send_wr.opcode = IB_WR_SEND;
@@ -1297,7 +1215,7 @@ rpcrdma_ep_post_recv(struct rpcrdma_ia *ia,
1297 int rc; 1215 int rc;
1298 1216
1299 recv_wr.next = NULL; 1217 recv_wr.next = NULL;
1300 recv_wr.wr_id = (u64) (unsigned long) rep; 1218 recv_wr.wr_cqe = &rep->rr_cqe;
1301 recv_wr.sg_list = &rep->rr_rdmabuf->rg_iov; 1219 recv_wr.sg_list = &rep->rr_rdmabuf->rg_iov;
1302 recv_wr.num_sge = 1; 1220 recv_wr.num_sge = 1;
1303 1221
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index 38fe11b09875..2ebc743cb96f 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -95,10 +95,6 @@ struct rpcrdma_ep {
95#define INIT_CQCOUNT(ep) atomic_set(&(ep)->rep_cqcount, (ep)->rep_cqinit) 95#define INIT_CQCOUNT(ep) atomic_set(&(ep)->rep_cqcount, (ep)->rep_cqinit)
96#define DECR_CQCOUNT(ep) atomic_sub_return(1, &(ep)->rep_cqcount) 96#define DECR_CQCOUNT(ep) atomic_sub_return(1, &(ep)->rep_cqcount)
97 97
98/* Force completion handler to ignore the signal
99 */
100#define RPCRDMA_IGNORE_COMPLETION (0ULL)
101
102/* Pre-allocate extra Work Requests for handling backward receives 98/* Pre-allocate extra Work Requests for handling backward receives
103 * and sends. This is a fixed value because the Work Queues are 99 * and sends. This is a fixed value because the Work Queues are
104 * allocated when the forward channel is set up. 100 * allocated when the forward channel is set up.
@@ -171,6 +167,7 @@ rdmab_to_msg(struct rpcrdma_regbuf *rb)
171struct rpcrdma_buffer; 167struct rpcrdma_buffer;
172 168
173struct rpcrdma_rep { 169struct rpcrdma_rep {
170 struct ib_cqe rr_cqe;
174 unsigned int rr_len; 171 unsigned int rr_len;
175 struct ib_device *rr_device; 172 struct ib_device *rr_device;
176 struct rpcrdma_xprt *rr_rxprt; 173 struct rpcrdma_xprt *rr_rxprt;
@@ -204,11 +201,11 @@ struct rpcrdma_frmr {
204 struct scatterlist *sg; 201 struct scatterlist *sg;
205 int sg_nents; 202 int sg_nents;
206 struct ib_mr *fr_mr; 203 struct ib_mr *fr_mr;
204 struct ib_cqe fr_cqe;
207 enum rpcrdma_frmr_state fr_state; 205 enum rpcrdma_frmr_state fr_state;
206 struct completion fr_linv_done;
208 struct work_struct fr_work; 207 struct work_struct fr_work;
209 struct rpcrdma_xprt *fr_xprt; 208 struct rpcrdma_xprt *fr_xprt;
210 bool fr_waiter;
211 struct completion fr_linv_done;;
212 union { 209 union {
213 struct ib_reg_wr fr_regwr; 210 struct ib_reg_wr fr_regwr;
214 struct ib_send_wr fr_invwr; 211 struct ib_send_wr fr_invwr;
@@ -224,8 +221,7 @@ struct rpcrdma_mw {
224 union { 221 union {
225 struct rpcrdma_fmr fmr; 222 struct rpcrdma_fmr fmr;
226 struct rpcrdma_frmr frmr; 223 struct rpcrdma_frmr frmr;
227 } r; 224 };
228 void (*mw_sendcompletion)(struct ib_wc *);
229 struct list_head mw_list; 225 struct list_head mw_list;
230 struct list_head mw_all; 226 struct list_head mw_all;
231}; 227};
@@ -281,6 +277,7 @@ struct rpcrdma_req {
281 struct rpcrdma_regbuf *rl_sendbuf; 277 struct rpcrdma_regbuf *rl_sendbuf;
282 struct rpcrdma_mr_seg rl_segments[RPCRDMA_MAX_SEGS]; 278 struct rpcrdma_mr_seg rl_segments[RPCRDMA_MAX_SEGS];
283 279
280 struct ib_cqe rl_cqe;
284 struct list_head rl_all; 281 struct list_head rl_all;
285 bool rl_backchannel; 282 bool rl_backchannel;
286}; 283};
@@ -311,6 +308,7 @@ struct rpcrdma_buffer {
311 struct list_head rb_send_bufs; 308 struct list_head rb_send_bufs;
312 struct list_head rb_recv_bufs; 309 struct list_head rb_recv_bufs;
313 u32 rb_max_requests; 310 u32 rb_max_requests;
311 atomic_t rb_credits; /* most recent credit grant */
314 312
315 u32 rb_bc_srv_max_requests; 313 u32 rb_bc_srv_max_requests;
316 spinlock_t rb_reqslock; /* protect rb_allreqs */ 314 spinlock_t rb_reqslock; /* protect rb_allreqs */
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index fde2138b81e7..65e759569e48 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -1844,9 +1844,7 @@ static int xs_bind(struct sock_xprt *transport, struct socket *sock)
1844 */ 1844 */
1845static void xs_local_rpcbind(struct rpc_task *task) 1845static void xs_local_rpcbind(struct rpc_task *task)
1846{ 1846{
1847 rcu_read_lock(); 1847 xprt_set_bound(task->tk_xprt);
1848 xprt_set_bound(rcu_dereference(task->tk_client->cl_xprt));
1849 rcu_read_unlock();
1850} 1848}
1851 1849
1852static void xs_local_set_port(struct rpc_xprt *xprt, unsigned short port) 1850static void xs_local_set_port(struct rpc_xprt *xprt, unsigned short port)
diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index ebc661d3b6e3..8b5833c1ff2e 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c
@@ -20,6 +20,7 @@
20#include <linux/list.h> 20#include <linux/list.h>
21#include <linux/workqueue.h> 21#include <linux/workqueue.h>
22#include <linux/if_vlan.h> 22#include <linux/if_vlan.h>
23#include <linux/rtnetlink.h>
23#include <net/ip_fib.h> 24#include <net/ip_fib.h>
24#include <net/switchdev.h> 25#include <net/switchdev.h>
25 26
@@ -567,7 +568,6 @@ int switchdev_port_obj_dump(struct net_device *dev, struct switchdev_obj *obj,
567} 568}
568EXPORT_SYMBOL_GPL(switchdev_port_obj_dump); 569EXPORT_SYMBOL_GPL(switchdev_port_obj_dump);
569 570
570static DEFINE_MUTEX(switchdev_mutex);
571static RAW_NOTIFIER_HEAD(switchdev_notif_chain); 571static RAW_NOTIFIER_HEAD(switchdev_notif_chain);
572 572
573/** 573/**
@@ -582,9 +582,9 @@ int register_switchdev_notifier(struct notifier_block *nb)
582{ 582{
583 int err; 583 int err;
584 584
585 mutex_lock(&switchdev_mutex); 585 rtnl_lock();
586 err = raw_notifier_chain_register(&switchdev_notif_chain, nb); 586 err = raw_notifier_chain_register(&switchdev_notif_chain, nb);
587 mutex_unlock(&switchdev_mutex); 587 rtnl_unlock();
588 return err; 588 return err;
589} 589}
590EXPORT_SYMBOL_GPL(register_switchdev_notifier); 590EXPORT_SYMBOL_GPL(register_switchdev_notifier);
@@ -600,9 +600,9 @@ int unregister_switchdev_notifier(struct notifier_block *nb)
600{ 600{
601 int err; 601 int err;
602 602
603 mutex_lock(&switchdev_mutex); 603 rtnl_lock();
604 err = raw_notifier_chain_unregister(&switchdev_notif_chain, nb); 604 err = raw_notifier_chain_unregister(&switchdev_notif_chain, nb);
605 mutex_unlock(&switchdev_mutex); 605 rtnl_unlock();
606 return err; 606 return err;
607} 607}
608EXPORT_SYMBOL_GPL(unregister_switchdev_notifier); 608EXPORT_SYMBOL_GPL(unregister_switchdev_notifier);
@@ -616,16 +616,17 @@ EXPORT_SYMBOL_GPL(unregister_switchdev_notifier);
616 * Call all network notifier blocks. This should be called by driver 616 * Call all network notifier blocks. This should be called by driver
617 * when it needs to propagate hardware event. 617 * when it needs to propagate hardware event.
618 * Return values are same as for atomic_notifier_call_chain(). 618 * Return values are same as for atomic_notifier_call_chain().
619 * rtnl_lock must be held.
619 */ 620 */
620int call_switchdev_notifiers(unsigned long val, struct net_device *dev, 621int call_switchdev_notifiers(unsigned long val, struct net_device *dev,
621 struct switchdev_notifier_info *info) 622 struct switchdev_notifier_info *info)
622{ 623{
623 int err; 624 int err;
624 625
626 ASSERT_RTNL();
627
625 info->dev = dev; 628 info->dev = dev;
626 mutex_lock(&switchdev_mutex);
627 err = raw_notifier_call_chain(&switchdev_notif_chain, val, info); 629 err = raw_notifier_call_chain(&switchdev_notif_chain, val, info);
628 mutex_unlock(&switchdev_mutex);
629 return err; 630 return err;
630} 631}
631EXPORT_SYMBOL_GPL(call_switchdev_notifiers); 632EXPORT_SYMBOL_GPL(call_switchdev_notifiers);
@@ -1092,8 +1093,11 @@ int switchdev_port_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb,
1092 .cb = cb, 1093 .cb = cb,
1093 .idx = idx, 1094 .idx = idx,
1094 }; 1095 };
1096 int err;
1095 1097
1096 switchdev_port_obj_dump(dev, &dump.fdb.obj, switchdev_port_fdb_dump_cb); 1098 err = switchdev_port_obj_dump(dev, &dump.fdb.obj,
1099 switchdev_port_fdb_dump_cb);
1100 cb->args[1] = err;
1097 return dump.idx; 1101 return dump.idx;
1098} 1102}
1099EXPORT_SYMBOL_GPL(switchdev_port_fdb_dump); 1103EXPORT_SYMBOL_GPL(switchdev_port_fdb_dump);
diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index e401108360a2..ae469b37d852 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -412,11 +412,6 @@ enomem:
412 return -ENOMEM; 412 return -ENOMEM;
413} 413}
414 414
415void tipc_bcast_reinit(struct net *net)
416{
417 tipc_link_reinit(tipc_bc_sndlink(net), tipc_own_addr(net));
418}
419
420void tipc_bcast_stop(struct net *net) 415void tipc_bcast_stop(struct net *net)
421{ 416{
422 struct tipc_net *tn = net_generic(net, tipc_net_id); 417 struct tipc_net *tn = net_generic(net, tipc_net_id);
diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h
index 1944c6c00bb9..d5e79b3767fd 100644
--- a/net/tipc/bcast.h
+++ b/net/tipc/bcast.h
@@ -46,7 +46,6 @@ struct tipc_node_map;
46extern const char tipc_bclink_name[]; 46extern const char tipc_bclink_name[];
47 47
48int tipc_bcast_init(struct net *net); 48int tipc_bcast_init(struct net *net);
49void tipc_bcast_reinit(struct net *net);
50void tipc_bcast_stop(struct net *net); 49void tipc_bcast_stop(struct net *net);
51void tipc_bcast_add_peer(struct net *net, struct tipc_link *l, 50void tipc_bcast_add_peer(struct net *net, struct tipc_link *l,
52 struct sk_buff_head *xmitq); 51 struct sk_buff_head *xmitq);
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index 802ffad3200d..27a5406213c6 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -40,6 +40,7 @@
40#include "link.h" 40#include "link.h"
41#include "discover.h" 41#include "discover.h"
42#include "bcast.h" 42#include "bcast.h"
43#include "netlink.h"
43 44
44#define MAX_ADDR_STR 60 45#define MAX_ADDR_STR 60
45 46
@@ -54,23 +55,6 @@ static struct tipc_media * const media_info_array[] = {
54 NULL 55 NULL
55}; 56};
56 57
57static const struct nla_policy
58tipc_nl_bearer_policy[TIPC_NLA_BEARER_MAX + 1] = {
59 [TIPC_NLA_BEARER_UNSPEC] = { .type = NLA_UNSPEC },
60 [TIPC_NLA_BEARER_NAME] = {
61 .type = NLA_STRING,
62 .len = TIPC_MAX_BEARER_NAME
63 },
64 [TIPC_NLA_BEARER_PROP] = { .type = NLA_NESTED },
65 [TIPC_NLA_BEARER_DOMAIN] = { .type = NLA_U32 }
66};
67
68static const struct nla_policy tipc_nl_media_policy[TIPC_NLA_MEDIA_MAX + 1] = {
69 [TIPC_NLA_MEDIA_UNSPEC] = { .type = NLA_UNSPEC },
70 [TIPC_NLA_MEDIA_NAME] = { .type = NLA_STRING },
71 [TIPC_NLA_MEDIA_PROP] = { .type = NLA_NESTED }
72};
73
74static void bearer_disable(struct net *net, struct tipc_bearer *b); 58static void bearer_disable(struct net *net, struct tipc_bearer *b);
75 59
76/** 60/**
diff --git a/net/tipc/link.c b/net/tipc/link.c
index 0c2944fb9ae0..7d2bb3e70baa 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * net/tipc/link.c: TIPC link code 2 * net/tipc/link.c: TIPC link code
3 * 3 *
4 * Copyright (c) 1996-2007, 2012-2015, Ericsson AB 4 * Copyright (c) 1996-2007, 2012-2016, Ericsson AB
5 * Copyright (c) 2004-2007, 2010-2013, Wind River Systems 5 * Copyright (c) 2004-2007, 2010-2013, Wind River Systems
6 * All rights reserved. 6 * All rights reserved.
7 * 7 *
@@ -123,11 +123,11 @@ struct tipc_stats {
123struct tipc_link { 123struct tipc_link {
124 u32 addr; 124 u32 addr;
125 char name[TIPC_MAX_LINK_NAME]; 125 char name[TIPC_MAX_LINK_NAME];
126 struct tipc_media_addr *media_addr;
127 struct net *net; 126 struct net *net;
128 127
129 /* Management and link supervision data */ 128 /* Management and link supervision data */
130 u32 peer_session; 129 u32 peer_session;
130 u32 session;
131 u32 peer_bearer_id; 131 u32 peer_bearer_id;
132 u32 bearer_id; 132 u32 bearer_id;
133 u32 tolerance; 133 u32 tolerance;
@@ -137,11 +137,7 @@ struct tipc_link {
137 u16 peer_caps; 137 u16 peer_caps;
138 bool active; 138 bool active;
139 u32 silent_intv_cnt; 139 u32 silent_intv_cnt;
140 struct { 140 char if_name[TIPC_MAX_IF_NAME];
141 unchar hdr[INT_H_SIZE];
142 unchar body[TIPC_MAX_IF_NAME];
143 } proto_msg;
144 struct tipc_msg *pmsg;
145 u32 priority; 141 u32 priority;
146 char net_plane; 142 char net_plane;
147 143
@@ -196,14 +192,6 @@ struct tipc_link {
196static const char *link_co_err = "Link tunneling error, "; 192static const char *link_co_err = "Link tunneling error, ";
197static const char *link_rst_msg = "Resetting link "; 193static const char *link_rst_msg = "Resetting link ";
198 194
199/* Properties valid for media, bearar and link */
200static const struct nla_policy tipc_nl_prop_policy[TIPC_NLA_PROP_MAX + 1] = {
201 [TIPC_NLA_PROP_UNSPEC] = { .type = NLA_UNSPEC },
202 [TIPC_NLA_PROP_PRIO] = { .type = NLA_U32 },
203 [TIPC_NLA_PROP_TOL] = { .type = NLA_U32 },
204 [TIPC_NLA_PROP_WIN] = { .type = NLA_U32 }
205};
206
207/* Send states for broadcast NACKs 195/* Send states for broadcast NACKs
208 */ 196 */
209enum { 197enum {
@@ -216,10 +204,11 @@ enum {
216 * Interval between NACKs when packets arrive out of order 204 * Interval between NACKs when packets arrive out of order
217 */ 205 */
218#define TIPC_NACK_INTV (TIPC_MIN_LINK_WIN * 2) 206#define TIPC_NACK_INTV (TIPC_MIN_LINK_WIN * 2)
219/* 207
220 * Out-of-range value for link session numbers 208/* Wildcard value for link session numbers. When it is known that
209 * peer endpoint is down, any session number must be accepted.
221 */ 210 */
222#define WILDCARD_SESSION 0x10000 211#define ANY_SESSION 0x10000
223 212
224/* Link FSM states: 213/* Link FSM states:
225 */ 214 */
@@ -399,16 +388,6 @@ char *tipc_link_name(struct tipc_link *l)
399 return l->name; 388 return l->name;
400} 389}
401 390
402static u32 link_own_addr(struct tipc_link *l)
403{
404 return msg_prevnode(l->pmsg);
405}
406
407void tipc_link_reinit(struct tipc_link *l, u32 addr)
408{
409 msg_set_prevnode(l->pmsg, addr);
410}
411
412/** 391/**
413 * tipc_link_create - create a new link 392 * tipc_link_create - create a new link
414 * @n: pointer to associated node 393 * @n: pointer to associated node
@@ -442,29 +421,22 @@ bool tipc_link_create(struct net *net, char *if_name, int bearer_id,
442 struct tipc_link **link) 421 struct tipc_link **link)
443{ 422{
444 struct tipc_link *l; 423 struct tipc_link *l;
445 struct tipc_msg *hdr;
446 424
447 l = kzalloc(sizeof(*l), GFP_ATOMIC); 425 l = kzalloc(sizeof(*l), GFP_ATOMIC);
448 if (!l) 426 if (!l)
449 return false; 427 return false;
450 *link = l; 428 *link = l;
451 l->pmsg = (struct tipc_msg *)&l->proto_msg; 429 l->session = session;
452 hdr = l->pmsg;
453 tipc_msg_init(ownnode, hdr, LINK_PROTOCOL, RESET_MSG, INT_H_SIZE, peer);
454 msg_set_size(hdr, sizeof(l->proto_msg));
455 msg_set_session(hdr, session);
456 msg_set_bearer_id(hdr, l->bearer_id);
457 430
458 /* Note: peer i/f name is completed by reset/activate message */ 431 /* Note: peer i/f name is completed by reset/activate message */
459 sprintf(l->name, "%u.%u.%u:%s-%u.%u.%u:unknown", 432 sprintf(l->name, "%u.%u.%u:%s-%u.%u.%u:unknown",
460 tipc_zone(ownnode), tipc_cluster(ownnode), tipc_node(ownnode), 433 tipc_zone(ownnode), tipc_cluster(ownnode), tipc_node(ownnode),
461 if_name, tipc_zone(peer), tipc_cluster(peer), tipc_node(peer)); 434 if_name, tipc_zone(peer), tipc_cluster(peer), tipc_node(peer));
462 strcpy((char *)msg_data(hdr), if_name); 435 strcpy(l->if_name, if_name);
463
464 l->addr = peer; 436 l->addr = peer;
465 l->peer_caps = peer_caps; 437 l->peer_caps = peer_caps;
466 l->net = net; 438 l->net = net;
467 l->peer_session = WILDCARD_SESSION; 439 l->peer_session = ANY_SESSION;
468 l->bearer_id = bearer_id; 440 l->bearer_id = bearer_id;
469 l->tolerance = tolerance; 441 l->tolerance = tolerance;
470 l->net_plane = net_plane; 442 l->net_plane = net_plane;
@@ -791,7 +763,7 @@ static int link_schedule_user(struct tipc_link *link, struct sk_buff_head *list)
791 struct tipc_msg *msg = buf_msg(skb_peek(list)); 763 struct tipc_msg *msg = buf_msg(skb_peek(list));
792 int imp = msg_importance(msg); 764 int imp = msg_importance(msg);
793 u32 oport = msg_origport(msg); 765 u32 oport = msg_origport(msg);
794 u32 addr = link_own_addr(link); 766 u32 addr = tipc_own_addr(link->net);
795 struct sk_buff *skb; 767 struct sk_buff *skb;
796 768
797 /* This really cannot happen... */ 769 /* This really cannot happen... */
@@ -840,16 +812,9 @@ void link_prepare_wakeup(struct tipc_link *l)
840 812
841void tipc_link_reset(struct tipc_link *l) 813void tipc_link_reset(struct tipc_link *l)
842{ 814{
843 /* Link is down, accept any session */ 815 l->peer_session = ANY_SESSION;
844 l->peer_session = WILDCARD_SESSION; 816 l->session++;
845
846 /* If peer is up, it only accepts an incremented session number */
847 msg_set_session(l->pmsg, msg_session(l->pmsg) + 1);
848
849 /* Prepare for renewed mtu size negotiation */
850 l->mtu = l->advertised_mtu; 817 l->mtu = l->advertised_mtu;
851
852 /* Clean up all queues and counters: */
853 __skb_queue_purge(&l->transmq); 818 __skb_queue_purge(&l->transmq);
854 __skb_queue_purge(&l->deferdq); 819 __skb_queue_purge(&l->deferdq);
855 skb_queue_splice_init(&l->wakeupq, l->inputq); 820 skb_queue_splice_init(&l->wakeupq, l->inputq);
@@ -904,8 +869,10 @@ int tipc_link_xmit(struct tipc_link *l, struct sk_buff_head *list,
904 if (unlikely(l->backlog[i].len >= l->backlog[i].limit)) 869 if (unlikely(l->backlog[i].len >= l->backlog[i].limit))
905 return link_schedule_user(l, list); 870 return link_schedule_user(l, list);
906 } 871 }
907 if (unlikely(msg_size(hdr) > mtu)) 872 if (unlikely(msg_size(hdr) > mtu)) {
873 skb_queue_purge(list);
908 return -EMSGSIZE; 874 return -EMSGSIZE;
875 }
909 876
910 /* Prepare each packet for sending, and add to relevant queue: */ 877 /* Prepare each packet for sending, and add to relevant queue: */
911 while (skb_queue_len(list)) { 878 while (skb_queue_len(list)) {
@@ -917,8 +884,10 @@ int tipc_link_xmit(struct tipc_link *l, struct sk_buff_head *list,
917 884
918 if (likely(skb_queue_len(transmq) < maxwin)) { 885 if (likely(skb_queue_len(transmq) < maxwin)) {
919 _skb = skb_clone(skb, GFP_ATOMIC); 886 _skb = skb_clone(skb, GFP_ATOMIC);
920 if (!_skb) 887 if (!_skb) {
888 skb_queue_purge(list);
921 return -ENOBUFS; 889 return -ENOBUFS;
890 }
922 __skb_dequeue(list); 891 __skb_dequeue(list);
923 __skb_queue_tail(transmq, skb); 892 __skb_queue_tail(transmq, skb);
924 __skb_queue_tail(xmitq, _skb); 893 __skb_queue_tail(xmitq, _skb);
@@ -1153,7 +1122,7 @@ int tipc_link_build_ack_msg(struct tipc_link *l, struct sk_buff_head *xmitq)
1153 1122
1154 /* Broadcast ACK must be sent via a unicast link => defer to caller */ 1123 /* Broadcast ACK must be sent via a unicast link => defer to caller */
1155 if (link_is_bc_rcvlink(l)) { 1124 if (link_is_bc_rcvlink(l)) {
1156 if (((l->rcv_nxt ^ link_own_addr(l)) & 0xf) != 0xf) 1125 if (((l->rcv_nxt ^ tipc_own_addr(l->net)) & 0xf) != 0xf)
1157 return 0; 1126 return 0;
1158 l->rcv_unacked = 0; 1127 l->rcv_unacked = 0;
1159 return TIPC_LINK_SND_BC_ACK; 1128 return TIPC_LINK_SND_BC_ACK;
@@ -1261,39 +1230,34 @@ drop:
1261 return rc; 1230 return rc;
1262} 1231}
1263 1232
1264/*
1265 * Send protocol message to the other endpoint.
1266 */
1267static void tipc_link_proto_xmit(struct tipc_link *l, u32 msg_typ,
1268 int probe_msg, u32 gap, u32 tolerance,
1269 u32 priority)
1270{
1271 struct sk_buff *skb = NULL;
1272 struct sk_buff_head xmitq;
1273
1274 __skb_queue_head_init(&xmitq);
1275 tipc_link_build_proto_msg(l, msg_typ, probe_msg, gap,
1276 tolerance, priority, &xmitq);
1277 skb = __skb_dequeue(&xmitq);
1278 if (!skb)
1279 return;
1280 tipc_bearer_xmit_skb(l->net, l->bearer_id, skb, l->media_addr);
1281 l->rcv_unacked = 0;
1282}
1283
1284static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe, 1233static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe,
1285 u16 rcvgap, int tolerance, int priority, 1234 u16 rcvgap, int tolerance, int priority,
1286 struct sk_buff_head *xmitq) 1235 struct sk_buff_head *xmitq)
1287{ 1236{
1288 struct sk_buff *skb = NULL; 1237 struct sk_buff *skb;
1289 struct tipc_msg *hdr = l->pmsg; 1238 struct tipc_msg *hdr;
1239 struct sk_buff_head *dfq = &l->deferdq;
1290 bool node_up = link_is_up(l->bc_rcvlink); 1240 bool node_up = link_is_up(l->bc_rcvlink);
1291 1241
1292 /* Don't send protocol message during reset or link failover */ 1242 /* Don't send protocol message during reset or link failover */
1293 if (tipc_link_is_blocked(l)) 1243 if (tipc_link_is_blocked(l))
1294 return; 1244 return;
1295 1245
1296 msg_set_type(hdr, mtyp); 1246 if (!tipc_link_is_up(l) && (mtyp == STATE_MSG))
1247 return;
1248
1249 if (!skb_queue_empty(dfq))
1250 rcvgap = buf_seqno(skb_peek(dfq)) - l->rcv_nxt;
1251
1252 skb = tipc_msg_create(LINK_PROTOCOL, mtyp, INT_H_SIZE,
1253 TIPC_MAX_IF_NAME, l->addr,
1254 tipc_own_addr(l->net), 0, 0, 0);
1255 if (!skb)
1256 return;
1257
1258 hdr = buf_msg(skb);
1259 msg_set_session(hdr, l->session);
1260 msg_set_bearer_id(hdr, l->bearer_id);
1297 msg_set_net_plane(hdr, l->net_plane); 1261 msg_set_net_plane(hdr, l->net_plane);
1298 msg_set_next_sent(hdr, l->snd_nxt); 1262 msg_set_next_sent(hdr, l->snd_nxt);
1299 msg_set_ack(hdr, l->rcv_nxt - 1); 1263 msg_set_ack(hdr, l->rcv_nxt - 1);
@@ -1303,36 +1267,23 @@ static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe,
1303 msg_set_linkprio(hdr, priority); 1267 msg_set_linkprio(hdr, priority);
1304 msg_set_redundant_link(hdr, node_up); 1268 msg_set_redundant_link(hdr, node_up);
1305 msg_set_seq_gap(hdr, 0); 1269 msg_set_seq_gap(hdr, 0);
1306
1307 /* Compatibility: created msg must not be in sequence with pkt flow */
1308 msg_set_seqno(hdr, l->snd_nxt + U16_MAX / 2); 1270 msg_set_seqno(hdr, l->snd_nxt + U16_MAX / 2);
1309 1271
1310 if (mtyp == STATE_MSG) { 1272 if (mtyp == STATE_MSG) {
1311 if (!tipc_link_is_up(l)) 1273 msg_set_seq_gap(hdr, rcvgap);
1312 return; 1274 msg_set_size(hdr, INT_H_SIZE);
1313
1314 /* Override rcvgap if there are packets in deferred queue */
1315 if (!skb_queue_empty(&l->deferdq))
1316 rcvgap = buf_seqno(skb_peek(&l->deferdq)) - l->rcv_nxt;
1317 if (rcvgap) {
1318 msg_set_seq_gap(hdr, rcvgap);
1319 l->stats.sent_nacks++;
1320 }
1321 msg_set_probe(hdr, probe); 1275 msg_set_probe(hdr, probe);
1322 if (probe)
1323 l->stats.sent_probes++;
1324 l->stats.sent_states++; 1276 l->stats.sent_states++;
1325 l->rcv_unacked = 0; 1277 l->rcv_unacked = 0;
1326 } else { 1278 } else {
1327 /* RESET_MSG or ACTIVATE_MSG */ 1279 /* RESET_MSG or ACTIVATE_MSG */
1328 msg_set_max_pkt(hdr, l->advertised_mtu); 1280 msg_set_max_pkt(hdr, l->advertised_mtu);
1329 msg_set_ack(hdr, l->rcv_nxt - 1); 1281 strcpy(msg_data(hdr), l->if_name);
1330 msg_set_next_sent(hdr, 1);
1331 } 1282 }
1332 skb = tipc_buf_acquire(msg_size(hdr)); 1283 if (probe)
1333 if (!skb) 1284 l->stats.sent_probes++;
1334 return; 1285 if (rcvgap)
1335 skb_copy_to_linear_data(skb, hdr, msg_size(hdr)); 1286 l->stats.sent_nacks++;
1336 skb->priority = TC_PRIO_CONTROL; 1287 skb->priority = TC_PRIO_CONTROL;
1337 __skb_queue_tail(xmitq, skb); 1288 __skb_queue_tail(xmitq, skb);
1338} 1289}
@@ -1357,7 +1308,7 @@ void tipc_link_tnl_prepare(struct tipc_link *l, struct tipc_link *tnl,
1357 1308
1358 /* At least one packet required for safe algorithm => add dummy */ 1309 /* At least one packet required for safe algorithm => add dummy */
1359 skb = tipc_msg_create(TIPC_LOW_IMPORTANCE, TIPC_DIRECT_MSG, 1310 skb = tipc_msg_create(TIPC_LOW_IMPORTANCE, TIPC_DIRECT_MSG,
1360 BASIC_H_SIZE, 0, l->addr, link_own_addr(l), 1311 BASIC_H_SIZE, 0, l->addr, tipc_own_addr(l->net),
1361 0, 0, TIPC_ERR_NO_PORT); 1312 0, 0, TIPC_ERR_NO_PORT);
1362 if (!skb) { 1313 if (!skb) {
1363 pr_warn("%sunable to create tunnel packet\n", link_co_err); 1314 pr_warn("%sunable to create tunnel packet\n", link_co_err);
@@ -1368,7 +1319,7 @@ void tipc_link_tnl_prepare(struct tipc_link *l, struct tipc_link *tnl,
1368 __skb_queue_purge(&tmpxq); 1319 __skb_queue_purge(&tmpxq);
1369 1320
1370 /* Initialize reusable tunnel packet header */ 1321 /* Initialize reusable tunnel packet header */
1371 tipc_msg_init(link_own_addr(l), &tnlhdr, TUNNEL_PROTOCOL, 1322 tipc_msg_init(tipc_own_addr(l->net), &tnlhdr, TUNNEL_PROTOCOL,
1372 mtyp, INT_H_SIZE, l->addr); 1323 mtyp, INT_H_SIZE, l->addr);
1373 pktcnt = skb_queue_len(&l->transmq) + skb_queue_len(&l->backlogq); 1324 pktcnt = skb_queue_len(&l->transmq) + skb_queue_len(&l->backlogq);
1374 msg_set_msgcnt(&tnlhdr, pktcnt); 1325 msg_set_msgcnt(&tnlhdr, pktcnt);
@@ -1427,7 +1378,7 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
1427 if (tipc_link_is_blocked(l) || !xmitq) 1378 if (tipc_link_is_blocked(l) || !xmitq)
1428 goto exit; 1379 goto exit;
1429 1380
1430 if (link_own_addr(l) > msg_prevnode(hdr)) 1381 if (tipc_own_addr(l->net) > msg_prevnode(hdr))
1431 l->net_plane = msg_net_plane(hdr); 1382 l->net_plane = msg_net_plane(hdr);
1432 1383
1433 switch (mtyp) { 1384 switch (mtyp) {
@@ -1435,7 +1386,7 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
1435 1386
1436 /* Ignore duplicate RESET with old session number */ 1387 /* Ignore duplicate RESET with old session number */
1437 if ((less_eq(msg_session(hdr), l->peer_session)) && 1388 if ((less_eq(msg_session(hdr), l->peer_session)) &&
1438 (l->peer_session != WILDCARD_SESSION)) 1389 (l->peer_session != ANY_SESSION))
1439 break; 1390 break;
1440 /* fall thru' */ 1391 /* fall thru' */
1441 1392
@@ -1479,6 +1430,12 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
1479 if (in_range(peers_tol, TIPC_MIN_LINK_TOL, TIPC_MAX_LINK_TOL)) 1430 if (in_range(peers_tol, TIPC_MIN_LINK_TOL, TIPC_MAX_LINK_TOL))
1480 l->tolerance = peers_tol; 1431 l->tolerance = peers_tol;
1481 1432
1433 if (peers_prio && in_range(peers_prio, TIPC_MIN_LINK_PRI,
1434 TIPC_MAX_LINK_PRI)) {
1435 l->priority = peers_prio;
1436 rc = tipc_link_fsm_evt(l, LINK_FAILURE_EVT);
1437 }
1438
1482 l->silent_intv_cnt = 0; 1439 l->silent_intv_cnt = 0;
1483 l->stats.recv_states++; 1440 l->stats.recv_states++;
1484 if (msg_probe(hdr)) 1441 if (msg_probe(hdr))
@@ -1526,7 +1483,7 @@ static bool tipc_link_build_bc_proto_msg(struct tipc_link *l, bool bcast,
1526 u16 gap_to = peers_snd_nxt - 1; 1483 u16 gap_to = peers_snd_nxt - 1;
1527 1484
1528 skb = tipc_msg_create(BCAST_PROTOCOL, STATE_MSG, INT_H_SIZE, 1485 skb = tipc_msg_create(BCAST_PROTOCOL, STATE_MSG, INT_H_SIZE,
1529 0, l->addr, link_own_addr(l), 0, 0, 0); 1486 0, l->addr, tipc_own_addr(l->net), 0, 0, 0);
1530 if (!skb) 1487 if (!skb)
1531 return false; 1488 return false;
1532 hdr = buf_msg(skb); 1489 hdr = buf_msg(skb);
@@ -1681,7 +1638,7 @@ int tipc_link_bc_nack_rcv(struct tipc_link *l, struct sk_buff *skb,
1681 if (mtyp != STATE_MSG) 1638 if (mtyp != STATE_MSG)
1682 return 0; 1639 return 0;
1683 1640
1684 if (dnode == link_own_addr(l)) { 1641 if (dnode == tipc_own_addr(l->net)) {
1685 tipc_link_bc_ack_rcv(l, acked, xmitq); 1642 tipc_link_bc_ack_rcv(l, acked, xmitq);
1686 rc = tipc_link_retrans(l->bc_sndlink, from, to, xmitq); 1643 rc = tipc_link_retrans(l->bc_sndlink, from, to, xmitq);
1687 l->stats.recv_nacks++; 1644 l->stats.recv_nacks++;
@@ -1973,8 +1930,10 @@ int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg)
1973 1930
1974 hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family, 1931 hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family,
1975 NLM_F_MULTI, TIPC_NL_LINK_GET); 1932 NLM_F_MULTI, TIPC_NL_LINK_GET);
1976 if (!hdr) 1933 if (!hdr) {
1934 tipc_bcast_unlock(net);
1977 return -EMSGSIZE; 1935 return -EMSGSIZE;
1936 }
1978 1937
1979 attrs = nla_nest_start(msg->skb, TIPC_NLA_LINK); 1938 attrs = nla_nest_start(msg->skb, TIPC_NLA_LINK);
1980 if (!attrs) 1939 if (!attrs)
@@ -2021,16 +1980,18 @@ msg_full:
2021 return -EMSGSIZE; 1980 return -EMSGSIZE;
2022} 1981}
2023 1982
2024void tipc_link_set_tolerance(struct tipc_link *l, u32 tol) 1983void tipc_link_set_tolerance(struct tipc_link *l, u32 tol,
1984 struct sk_buff_head *xmitq)
2025{ 1985{
2026 l->tolerance = tol; 1986 l->tolerance = tol;
2027 tipc_link_proto_xmit(l, STATE_MSG, 0, 0, tol, 0); 1987 tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, tol, 0, xmitq);
2028} 1988}
2029 1989
2030void tipc_link_set_prio(struct tipc_link *l, u32 prio) 1990void tipc_link_set_prio(struct tipc_link *l, u32 prio,
1991 struct sk_buff_head *xmitq)
2031{ 1992{
2032 l->priority = prio; 1993 l->priority = prio;
2033 tipc_link_proto_xmit(l, STATE_MSG, 0, 0, 0, prio); 1994 tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, prio, xmitq);
2034} 1995}
2035 1996
2036void tipc_link_set_abort_limit(struct tipc_link *l, u32 limit) 1997void tipc_link_set_abort_limit(struct tipc_link *l, u32 limit)
diff --git a/net/tipc/link.h b/net/tipc/link.h
index b2ae0f4276af..6a94175ee20a 100644
--- a/net/tipc/link.h
+++ b/net/tipc/link.h
@@ -86,7 +86,6 @@ bool tipc_link_bc_create(struct net *net, u32 ownnode, u32 peer,
86 struct sk_buff_head *namedq, 86 struct sk_buff_head *namedq,
87 struct tipc_link *bc_sndlink, 87 struct tipc_link *bc_sndlink,
88 struct tipc_link **link); 88 struct tipc_link **link);
89void tipc_link_reinit(struct tipc_link *l, u32 addr);
90void tipc_link_tnl_prepare(struct tipc_link *l, struct tipc_link *tnl, 89void tipc_link_tnl_prepare(struct tipc_link *l, struct tipc_link *tnl,
91 int mtyp, struct sk_buff_head *xmitq); 90 int mtyp, struct sk_buff_head *xmitq);
92void tipc_link_build_reset_msg(struct tipc_link *l, struct sk_buff_head *xmitq); 91void tipc_link_build_reset_msg(struct tipc_link *l, struct sk_buff_head *xmitq);
@@ -112,8 +111,10 @@ char tipc_link_plane(struct tipc_link *l);
112int tipc_link_prio(struct tipc_link *l); 111int tipc_link_prio(struct tipc_link *l);
113int tipc_link_window(struct tipc_link *l); 112int tipc_link_window(struct tipc_link *l);
114unsigned long tipc_link_tolerance(struct tipc_link *l); 113unsigned long tipc_link_tolerance(struct tipc_link *l);
115void tipc_link_set_tolerance(struct tipc_link *l, u32 tol); 114void tipc_link_set_tolerance(struct tipc_link *l, u32 tol,
116void tipc_link_set_prio(struct tipc_link *l, u32 prio); 115 struct sk_buff_head *xmitq);
116void tipc_link_set_prio(struct tipc_link *l, u32 prio,
117 struct sk_buff_head *xmitq);
117void tipc_link_set_abort_limit(struct tipc_link *l, u32 limit); 118void tipc_link_set_abort_limit(struct tipc_link *l, u32 limit);
118void tipc_link_set_queue_limits(struct tipc_link *l, u32 window); 119void tipc_link_set_queue_limits(struct tipc_link *l, u32 window);
119int __tipc_nl_add_link(struct net *net, struct tipc_nl_msg *msg, 120int __tipc_nl_add_link(struct net *net, struct tipc_nl_msg *msg,
diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
index 91fce70291a8..e190460fe0d3 100644
--- a/net/tipc/name_table.c
+++ b/net/tipc/name_table.c
@@ -47,12 +47,6 @@
47 47
48#define TIPC_NAMETBL_SIZE 1024 /* must be a power of 2 */ 48#define TIPC_NAMETBL_SIZE 1024 /* must be a power of 2 */
49 49
50static const struct nla_policy
51tipc_nl_name_table_policy[TIPC_NLA_NAME_TABLE_MAX + 1] = {
52 [TIPC_NLA_NAME_TABLE_UNSPEC] = { .type = NLA_UNSPEC },
53 [TIPC_NLA_NAME_TABLE_PUBL] = { .type = NLA_NESTED }
54};
55
56/** 50/**
57 * struct name_info - name sequence publication info 51 * struct name_info - name sequence publication info
58 * @node_list: circular list of publications made by own node 52 * @node_list: circular list of publications made by own node
@@ -418,6 +412,9 @@ static void tipc_nameseq_subscribe(struct name_seq *nseq,
418 struct tipc_subscription *s) 412 struct tipc_subscription *s)
419{ 413{
420 struct sub_seq *sseq = nseq->sseqs; 414 struct sub_seq *sseq = nseq->sseqs;
415 struct tipc_name_seq ns;
416
417 tipc_subscrp_convert_seq(&s->evt.s.seq, s->swap, &ns);
421 418
422 list_add(&s->nameseq_list, &nseq->subscriptions); 419 list_add(&s->nameseq_list, &nseq->subscriptions);
423 420
@@ -425,7 +422,7 @@ static void tipc_nameseq_subscribe(struct name_seq *nseq,
425 return; 422 return;
426 423
427 while (sseq != &nseq->sseqs[nseq->first_free]) { 424 while (sseq != &nseq->sseqs[nseq->first_free]) {
428 if (tipc_subscrp_check_overlap(s, sseq->lower, sseq->upper)) { 425 if (tipc_subscrp_check_overlap(&ns, sseq->lower, sseq->upper)) {
429 struct publication *crs; 426 struct publication *crs;
430 struct name_info *info = sseq->info; 427 struct name_info *info = sseq->info;
431 int must_report = 1; 428 int must_report = 1;
@@ -722,9 +719,10 @@ int tipc_nametbl_withdraw(struct net *net, u32 type, u32 lower, u32 ref,
722void tipc_nametbl_subscribe(struct tipc_subscription *s) 719void tipc_nametbl_subscribe(struct tipc_subscription *s)
723{ 720{
724 struct tipc_net *tn = net_generic(s->net, tipc_net_id); 721 struct tipc_net *tn = net_generic(s->net, tipc_net_id);
725 u32 type = s->seq.type; 722 u32 type = tipc_subscrp_convert_seq_type(s->evt.s.seq.type, s->swap);
726 int index = hash(type); 723 int index = hash(type);
727 struct name_seq *seq; 724 struct name_seq *seq;
725 struct tipc_name_seq ns;
728 726
729 spin_lock_bh(&tn->nametbl_lock); 727 spin_lock_bh(&tn->nametbl_lock);
730 seq = nametbl_find_seq(s->net, type); 728 seq = nametbl_find_seq(s->net, type);
@@ -735,8 +733,9 @@ void tipc_nametbl_subscribe(struct tipc_subscription *s)
735 tipc_nameseq_subscribe(seq, s); 733 tipc_nameseq_subscribe(seq, s);
736 spin_unlock_bh(&seq->lock); 734 spin_unlock_bh(&seq->lock);
737 } else { 735 } else {
736 tipc_subscrp_convert_seq(&s->evt.s.seq, s->swap, &ns);
738 pr_warn("Failed to create subscription for {%u,%u,%u}\n", 737 pr_warn("Failed to create subscription for {%u,%u,%u}\n",
739 s->seq.type, s->seq.lower, s->seq.upper); 738 ns.type, ns.lower, ns.upper);
740 } 739 }
741 spin_unlock_bh(&tn->nametbl_lock); 740 spin_unlock_bh(&tn->nametbl_lock);
742} 741}
@@ -748,9 +747,10 @@ void tipc_nametbl_unsubscribe(struct tipc_subscription *s)
748{ 747{
749 struct tipc_net *tn = net_generic(s->net, tipc_net_id); 748 struct tipc_net *tn = net_generic(s->net, tipc_net_id);
750 struct name_seq *seq; 749 struct name_seq *seq;
750 u32 type = tipc_subscrp_convert_seq_type(s->evt.s.seq.type, s->swap);
751 751
752 spin_lock_bh(&tn->nametbl_lock); 752 spin_lock_bh(&tn->nametbl_lock);
753 seq = nametbl_find_seq(s->net, s->seq.type); 753 seq = nametbl_find_seq(s->net, type);
754 if (seq != NULL) { 754 if (seq != NULL) {
755 spin_lock_bh(&seq->lock); 755 spin_lock_bh(&seq->lock);
756 list_del_init(&s->nameseq_list); 756 list_del_init(&s->nameseq_list);
diff --git a/net/tipc/net.c b/net/tipc/net.c
index 77bf9113c7a7..28bf4feeb81c 100644
--- a/net/tipc/net.c
+++ b/net/tipc/net.c
@@ -41,11 +41,7 @@
41#include "socket.h" 41#include "socket.h"
42#include "node.h" 42#include "node.h"
43#include "bcast.h" 43#include "bcast.h"
44 44#include "netlink.h"
45static const struct nla_policy tipc_nl_net_policy[TIPC_NLA_NET_MAX + 1] = {
46 [TIPC_NLA_NET_UNSPEC] = { .type = NLA_UNSPEC },
47 [TIPC_NLA_NET_ID] = { .type = NLA_U32 }
48};
49 45
50/* 46/*
51 * The TIPC locking policy is designed to ensure a very fine locking 47 * The TIPC locking policy is designed to ensure a very fine locking
@@ -116,7 +112,6 @@ int tipc_net_start(struct net *net, u32 addr)
116 tn->own_addr = addr; 112 tn->own_addr = addr;
117 tipc_named_reinit(net); 113 tipc_named_reinit(net);
118 tipc_sk_reinit(net); 114 tipc_sk_reinit(net);
119 tipc_bcast_reinit(net);
120 115
121 tipc_nametbl_publish(net, TIPC_CFG_SRV, tn->own_addr, tn->own_addr, 116 tipc_nametbl_publish(net, TIPC_CFG_SRV, tn->own_addr, tn->own_addr,
122 TIPC_ZONE_SCOPE, 0, tn->own_addr); 117 TIPC_ZONE_SCOPE, 0, tn->own_addr);
diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c
index 8975b0135b76..56935df2167a 100644
--- a/net/tipc/netlink.c
+++ b/net/tipc/netlink.c
@@ -55,6 +55,75 @@ static const struct nla_policy tipc_nl_policy[TIPC_NLA_MAX + 1] = {
55 [TIPC_NLA_NAME_TABLE] = { .type = NLA_NESTED, } 55 [TIPC_NLA_NAME_TABLE] = { .type = NLA_NESTED, }
56}; 56};
57 57
58const struct nla_policy
59tipc_nl_name_table_policy[TIPC_NLA_NAME_TABLE_MAX + 1] = {
60 [TIPC_NLA_NAME_TABLE_UNSPEC] = { .type = NLA_UNSPEC },
61 [TIPC_NLA_NAME_TABLE_PUBL] = { .type = NLA_NESTED }
62};
63
64const struct nla_policy tipc_nl_sock_policy[TIPC_NLA_SOCK_MAX + 1] = {
65 [TIPC_NLA_SOCK_UNSPEC] = { .type = NLA_UNSPEC },
66 [TIPC_NLA_SOCK_ADDR] = { .type = NLA_U32 },
67 [TIPC_NLA_SOCK_REF] = { .type = NLA_U32 },
68 [TIPC_NLA_SOCK_CON] = { .type = NLA_NESTED },
69 [TIPC_NLA_SOCK_HAS_PUBL] = { .type = NLA_FLAG }
70};
71
72const struct nla_policy tipc_nl_net_policy[TIPC_NLA_NET_MAX + 1] = {
73 [TIPC_NLA_NET_UNSPEC] = { .type = NLA_UNSPEC },
74 [TIPC_NLA_NET_ID] = { .type = NLA_U32 }
75};
76
77const struct nla_policy tipc_nl_link_policy[TIPC_NLA_LINK_MAX + 1] = {
78 [TIPC_NLA_LINK_UNSPEC] = { .type = NLA_UNSPEC },
79 [TIPC_NLA_LINK_NAME] = { .type = NLA_STRING,
80 .len = TIPC_MAX_LINK_NAME },
81 [TIPC_NLA_LINK_MTU] = { .type = NLA_U32 },
82 [TIPC_NLA_LINK_BROADCAST] = { .type = NLA_FLAG },
83 [TIPC_NLA_LINK_UP] = { .type = NLA_FLAG },
84 [TIPC_NLA_LINK_ACTIVE] = { .type = NLA_FLAG },
85 [TIPC_NLA_LINK_PROP] = { .type = NLA_NESTED },
86 [TIPC_NLA_LINK_STATS] = { .type = NLA_NESTED },
87 [TIPC_NLA_LINK_RX] = { .type = NLA_U32 },
88 [TIPC_NLA_LINK_TX] = { .type = NLA_U32 }
89};
90
91const struct nla_policy tipc_nl_node_policy[TIPC_NLA_NODE_MAX + 1] = {
92 [TIPC_NLA_NODE_UNSPEC] = { .type = NLA_UNSPEC },
93 [TIPC_NLA_NODE_ADDR] = { .type = NLA_U32 },
94 [TIPC_NLA_NODE_UP] = { .type = NLA_FLAG }
95};
96
97/* Properties valid for media, bearer and link */
98const struct nla_policy tipc_nl_prop_policy[TIPC_NLA_PROP_MAX + 1] = {
99 [TIPC_NLA_PROP_UNSPEC] = { .type = NLA_UNSPEC },
100 [TIPC_NLA_PROP_PRIO] = { .type = NLA_U32 },
101 [TIPC_NLA_PROP_TOL] = { .type = NLA_U32 },
102 [TIPC_NLA_PROP_WIN] = { .type = NLA_U32 }
103};
104
105const struct nla_policy tipc_nl_bearer_policy[TIPC_NLA_BEARER_MAX + 1] = {
106 [TIPC_NLA_BEARER_UNSPEC] = { .type = NLA_UNSPEC },
107 [TIPC_NLA_BEARER_NAME] = { .type = NLA_STRING,
108 .len = TIPC_MAX_BEARER_NAME },
109 [TIPC_NLA_BEARER_PROP] = { .type = NLA_NESTED },
110 [TIPC_NLA_BEARER_DOMAIN] = { .type = NLA_U32 }
111};
112
113const struct nla_policy tipc_nl_media_policy[TIPC_NLA_MEDIA_MAX + 1] = {
114 [TIPC_NLA_MEDIA_UNSPEC] = { .type = NLA_UNSPEC },
115 [TIPC_NLA_MEDIA_NAME] = { .type = NLA_STRING },
116 [TIPC_NLA_MEDIA_PROP] = { .type = NLA_NESTED }
117};
118
119const struct nla_policy tipc_nl_udp_policy[TIPC_NLA_UDP_MAX + 1] = {
120 [TIPC_NLA_UDP_UNSPEC] = {.type = NLA_UNSPEC},
121 [TIPC_NLA_UDP_LOCAL] = {.type = NLA_BINARY,
122 .len = sizeof(struct sockaddr_storage)},
123 [TIPC_NLA_UDP_REMOTE] = {.type = NLA_BINARY,
124 .len = sizeof(struct sockaddr_storage)},
125};
126
58/* Users of the legacy API (tipc-config) can't handle that we add operations, 127/* Users of the legacy API (tipc-config) can't handle that we add operations,
59 * so we have a separate genl handling for the new API. 128 * so we have a separate genl handling for the new API.
60 */ 129 */
diff --git a/net/tipc/netlink.h b/net/tipc/netlink.h
index 08a1db67b927..ed1dbcb4afbd 100644
--- a/net/tipc/netlink.h
+++ b/net/tipc/netlink.h
@@ -35,6 +35,7 @@
35 35
36#ifndef _TIPC_NETLINK_H 36#ifndef _TIPC_NETLINK_H
37#define _TIPC_NETLINK_H 37#define _TIPC_NETLINK_H
38#include <net/netlink.h>
38 39
39extern struct genl_family tipc_genl_family; 40extern struct genl_family tipc_genl_family;
40int tipc_nlmsg_parse(const struct nlmsghdr *nlh, struct nlattr ***buf); 41int tipc_nlmsg_parse(const struct nlmsghdr *nlh, struct nlattr ***buf);
@@ -45,6 +46,16 @@ struct tipc_nl_msg {
45 u32 seq; 46 u32 seq;
46}; 47};
47 48
49extern const struct nla_policy tipc_nl_name_table_policy[];
50extern const struct nla_policy tipc_nl_sock_policy[];
51extern const struct nla_policy tipc_nl_net_policy[];
52extern const struct nla_policy tipc_nl_link_policy[];
53extern const struct nla_policy tipc_nl_node_policy[];
54extern const struct nla_policy tipc_nl_prop_policy[];
55extern const struct nla_policy tipc_nl_bearer_policy[];
56extern const struct nla_policy tipc_nl_media_policy[];
57extern const struct nla_policy tipc_nl_udp_policy[];
58
48int tipc_netlink_start(void); 59int tipc_netlink_start(void);
49int tipc_netlink_compat_start(void); 60int tipc_netlink_compat_start(void);
50void tipc_netlink_stop(void); 61void tipc_netlink_stop(void);
diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c
index 2c016fdefe97..d7d050f44fc1 100644
--- a/net/tipc/netlink_compat.c
+++ b/net/tipc/netlink_compat.c
@@ -1104,8 +1104,8 @@ static int tipc_nl_compat_recv(struct sk_buff *skb, struct genl_info *info)
1104 req_nlh = (struct nlmsghdr *)skb->data; 1104 req_nlh = (struct nlmsghdr *)skb->data;
1105 msg.req = nlmsg_data(req_nlh) + GENL_HDRLEN + TIPC_GENL_HDRLEN; 1105 msg.req = nlmsg_data(req_nlh) + GENL_HDRLEN + TIPC_GENL_HDRLEN;
1106 msg.cmd = req_userhdr->cmd; 1106 msg.cmd = req_userhdr->cmd;
1107 msg.dst_sk = info->dst_sk;
1108 msg.net = genl_info_net(info); 1107 msg.net = genl_info_net(info);
1108 msg.dst_sk = skb->sk;
1109 1109
1110 if ((msg.cmd & 0xC000) && (!netlink_net_capable(skb, CAP_NET_ADMIN))) { 1110 if ((msg.cmd & 0xC000) && (!netlink_net_capable(skb, CAP_NET_ADMIN))) {
1111 msg.rep = tipc_get_err_tlv(TIPC_CFG_NOT_NET_ADMIN); 1111 msg.rep = tipc_get_err_tlv(TIPC_CFG_NOT_NET_ADMIN);
diff --git a/net/tipc/node.c b/net/tipc/node.c
index fa97d9649a28..ace178fd3850 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -41,6 +41,7 @@
41#include "socket.h" 41#include "socket.h"
42#include "bcast.h" 42#include "bcast.h"
43#include "discover.h" 43#include "discover.h"
44#include "netlink.h"
44 45
45#define INVALID_NODE_SIG 0x10000 46#define INVALID_NODE_SIG 0x10000
46 47
@@ -164,28 +165,6 @@ struct tipc_sock_conn {
164 struct list_head list; 165 struct list_head list;
165}; 166};
166 167
167static const struct nla_policy tipc_nl_link_policy[TIPC_NLA_LINK_MAX + 1] = {
168 [TIPC_NLA_LINK_UNSPEC] = { .type = NLA_UNSPEC },
169 [TIPC_NLA_LINK_NAME] = {
170 .type = NLA_STRING,
171 .len = TIPC_MAX_LINK_NAME
172 },
173 [TIPC_NLA_LINK_MTU] = { .type = NLA_U32 },
174 [TIPC_NLA_LINK_BROADCAST] = { .type = NLA_FLAG },
175 [TIPC_NLA_LINK_UP] = { .type = NLA_FLAG },
176 [TIPC_NLA_LINK_ACTIVE] = { .type = NLA_FLAG },
177 [TIPC_NLA_LINK_PROP] = { .type = NLA_NESTED },
178 [TIPC_NLA_LINK_STATS] = { .type = NLA_NESTED },
179 [TIPC_NLA_LINK_RX] = { .type = NLA_U32 },
180 [TIPC_NLA_LINK_TX] = { .type = NLA_U32 }
181};
182
183static const struct nla_policy tipc_nl_node_policy[TIPC_NLA_NODE_MAX + 1] = {
184 [TIPC_NLA_NODE_UNSPEC] = { .type = NLA_UNSPEC },
185 [TIPC_NLA_NODE_ADDR] = { .type = NLA_U32 },
186 [TIPC_NLA_NODE_UP] = { .type = NLA_FLAG }
187};
188
189static struct tipc_link *node_active_link(struct tipc_node *n, int sel) 168static struct tipc_link *node_active_link(struct tipc_node *n, int sel)
190{ 169{
191 int bearer_id = n->active_links[sel & 1]; 170 int bearer_id = n->active_links[sel & 1];
@@ -225,9 +204,10 @@ static unsigned int tipc_hashfn(u32 addr)
225 204
226static void tipc_node_kref_release(struct kref *kref) 205static void tipc_node_kref_release(struct kref *kref)
227{ 206{
228 struct tipc_node *node = container_of(kref, struct tipc_node, kref); 207 struct tipc_node *n = container_of(kref, struct tipc_node, kref);
229 208
230 tipc_node_delete(node); 209 kfree(n->bc_entry.link);
210 kfree_rcu(n, rcu);
231} 211}
232 212
233static void tipc_node_put(struct tipc_node *node) 213static void tipc_node_put(struct tipc_node *node)
@@ -245,23 +225,23 @@ static void tipc_node_get(struct tipc_node *node)
245 */ 225 */
246static struct tipc_node *tipc_node_find(struct net *net, u32 addr) 226static struct tipc_node *tipc_node_find(struct net *net, u32 addr)
247{ 227{
248 struct tipc_net *tn = net_generic(net, tipc_net_id); 228 struct tipc_net *tn = tipc_net(net);
249 struct tipc_node *node; 229 struct tipc_node *node;
230 unsigned int thash = tipc_hashfn(addr);
250 231
251 if (unlikely(!in_own_cluster_exact(net, addr))) 232 if (unlikely(!in_own_cluster_exact(net, addr)))
252 return NULL; 233 return NULL;
253 234
254 rcu_read_lock(); 235 rcu_read_lock();
255 hlist_for_each_entry_rcu(node, &tn->node_htable[tipc_hashfn(addr)], 236 hlist_for_each_entry_rcu(node, &tn->node_htable[thash], hash) {
256 hash) { 237 if (node->addr != addr)
257 if (node->addr == addr) { 238 continue;
258 tipc_node_get(node); 239 if (!kref_get_unless_zero(&node->kref))
259 rcu_read_unlock(); 240 node = NULL;
260 return node; 241 break;
261 }
262 } 242 }
263 rcu_read_unlock(); 243 rcu_read_unlock();
264 return NULL; 244 return node;
265} 245}
266 246
267static void tipc_node_read_lock(struct tipc_node *n) 247static void tipc_node_read_lock(struct tipc_node *n)
@@ -346,12 +326,6 @@ struct tipc_node *tipc_node_create(struct net *net, u32 addr, u16 capabilities)
346 skb_queue_head_init(&n->bc_entry.inputq2); 326 skb_queue_head_init(&n->bc_entry.inputq2);
347 for (i = 0; i < MAX_BEARERS; i++) 327 for (i = 0; i < MAX_BEARERS; i++)
348 spin_lock_init(&n->links[i].lock); 328 spin_lock_init(&n->links[i].lock);
349 hlist_add_head_rcu(&n->hash, &tn->node_htable[tipc_hashfn(addr)]);
350 list_for_each_entry_rcu(temp_node, &tn->node_list, list) {
351 if (n->addr < temp_node->addr)
352 break;
353 }
354 list_add_tail_rcu(&n->list, &temp_node->list);
355 n->state = SELF_DOWN_PEER_LEAVING; 329 n->state = SELF_DOWN_PEER_LEAVING;
356 n->signature = INVALID_NODE_SIG; 330 n->signature = INVALID_NODE_SIG;
357 n->active_links[0] = INVALID_BEARER_ID; 331 n->active_links[0] = INVALID_BEARER_ID;
@@ -372,6 +346,12 @@ struct tipc_node *tipc_node_create(struct net *net, u32 addr, u16 capabilities)
372 tipc_node_get(n); 346 tipc_node_get(n);
373 setup_timer(&n->timer, tipc_node_timeout, (unsigned long)n); 347 setup_timer(&n->timer, tipc_node_timeout, (unsigned long)n);
374 n->keepalive_intv = U32_MAX; 348 n->keepalive_intv = U32_MAX;
349 hlist_add_head_rcu(&n->hash, &tn->node_htable[tipc_hashfn(addr)]);
350 list_for_each_entry_rcu(temp_node, &tn->node_list, list) {
351 if (n->addr < temp_node->addr)
352 break;
353 }
354 list_add_tail_rcu(&n->list, &temp_node->list);
375exit: 355exit:
376 spin_unlock_bh(&tn->node_list_lock); 356 spin_unlock_bh(&tn->node_list_lock);
377 return n; 357 return n;
@@ -395,21 +375,20 @@ static void tipc_node_delete(struct tipc_node *node)
395{ 375{
396 list_del_rcu(&node->list); 376 list_del_rcu(&node->list);
397 hlist_del_rcu(&node->hash); 377 hlist_del_rcu(&node->hash);
398 kfree(node->bc_entry.link); 378 tipc_node_put(node);
399 kfree_rcu(node, rcu); 379
380 del_timer_sync(&node->timer);
381 tipc_node_put(node);
400} 382}
401 383
402void tipc_node_stop(struct net *net) 384void tipc_node_stop(struct net *net)
403{ 385{
404 struct tipc_net *tn = net_generic(net, tipc_net_id); 386 struct tipc_net *tn = tipc_net(net);
405 struct tipc_node *node, *t_node; 387 struct tipc_node *node, *t_node;
406 388
407 spin_lock_bh(&tn->node_list_lock); 389 spin_lock_bh(&tn->node_list_lock);
408 list_for_each_entry_safe(node, t_node, &tn->node_list, list) { 390 list_for_each_entry_safe(node, t_node, &tn->node_list, list)
409 if (del_timer(&node->timer)) 391 tipc_node_delete(node);
410 tipc_node_put(node);
411 tipc_node_put(node);
412 }
413 spin_unlock_bh(&tn->node_list_lock); 392 spin_unlock_bh(&tn->node_list_lock);
414} 393}
415 394
@@ -530,9 +509,7 @@ static void tipc_node_timeout(unsigned long data)
530 if (rc & TIPC_LINK_DOWN_EVT) 509 if (rc & TIPC_LINK_DOWN_EVT)
531 tipc_node_link_down(n, bearer_id, false); 510 tipc_node_link_down(n, bearer_id, false);
532 } 511 }
533 if (!mod_timer(&n->timer, jiffies + n->keepalive_intv)) 512 mod_timer(&n->timer, jiffies + n->keepalive_intv);
534 tipc_node_get(n);
535 tipc_node_put(n);
536} 513}
537 514
538/** 515/**
@@ -845,7 +822,7 @@ void tipc_node_check_dest(struct net *net, u32 onode,
845 memcpy(&le->maddr, maddr, sizeof(*maddr)); 822 memcpy(&le->maddr, maddr, sizeof(*maddr));
846exit: 823exit:
847 tipc_node_write_unlock(n); 824 tipc_node_write_unlock(n);
848 if (reset && !tipc_link_is_reset(l)) 825 if (reset && l && !tipc_link_is_reset(l))
849 tipc_node_link_down(n, b->identity, false); 826 tipc_node_link_down(n, b->identity, false);
850 tipc_node_put(n); 827 tipc_node_put(n);
851} 828}
@@ -1166,7 +1143,7 @@ msg_full:
1166 * @dnode: address of destination node 1143 * @dnode: address of destination node
1167 * @selector: a number used for deterministic link selection 1144 * @selector: a number used for deterministic link selection
1168 * Consumes the buffer chain, except when returning -ELINKCONG 1145 * Consumes the buffer chain, except when returning -ELINKCONG
1169 * Returns 0 if success, otherwise errno: -ELINKCONG,-EHOSTUNREACH,-EMSGSIZE 1146 * Returns 0 if success, otherwise: -ELINKCONG,-EHOSTUNREACH,-EMSGSIZE,-ENOBUF
1170 */ 1147 */
1171int tipc_node_xmit(struct net *net, struct sk_buff_head *list, 1148int tipc_node_xmit(struct net *net, struct sk_buff_head *list,
1172 u32 dnode, int selector) 1149 u32 dnode, int selector)
@@ -1174,33 +1151,43 @@ int tipc_node_xmit(struct net *net, struct sk_buff_head *list,
1174 struct tipc_link_entry *le = NULL; 1151 struct tipc_link_entry *le = NULL;
1175 struct tipc_node *n; 1152 struct tipc_node *n;
1176 struct sk_buff_head xmitq; 1153 struct sk_buff_head xmitq;
1177 int bearer_id = -1; 1154 int bearer_id;
1178 int rc = -EHOSTUNREACH; 1155 int rc;
1156
1157 if (in_own_node(net, dnode)) {
1158 tipc_sk_rcv(net, list);
1159 return 0;
1160 }
1179 1161
1180 __skb_queue_head_init(&xmitq);
1181 n = tipc_node_find(net, dnode); 1162 n = tipc_node_find(net, dnode);
1182 if (likely(n)) { 1163 if (unlikely(!n)) {
1183 tipc_node_read_lock(n); 1164 skb_queue_purge(list);
1184 bearer_id = n->active_links[selector & 1]; 1165 return -EHOSTUNREACH;
1185 if (bearer_id >= 0) { 1166 }
1186 le = &n->links[bearer_id]; 1167
1187 spin_lock_bh(&le->lock); 1168 tipc_node_read_lock(n);
1188 rc = tipc_link_xmit(le->link, list, &xmitq); 1169 bearer_id = n->active_links[selector & 1];
1189 spin_unlock_bh(&le->lock); 1170 if (unlikely(bearer_id == INVALID_BEARER_ID)) {
1190 }
1191 tipc_node_read_unlock(n); 1171 tipc_node_read_unlock(n);
1192 if (likely(!rc))
1193 tipc_bearer_xmit(net, bearer_id, &xmitq, &le->maddr);
1194 else if (rc == -ENOBUFS)
1195 tipc_node_link_down(n, bearer_id, false);
1196 tipc_node_put(n); 1172 tipc_node_put(n);
1197 return rc; 1173 skb_queue_purge(list);
1174 return -EHOSTUNREACH;
1198 } 1175 }
1199 1176
1200 if (likely(in_own_node(net, dnode))) { 1177 __skb_queue_head_init(&xmitq);
1201 tipc_sk_rcv(net, list); 1178 le = &n->links[bearer_id];
1202 return 0; 1179 spin_lock_bh(&le->lock);
1203 } 1180 rc = tipc_link_xmit(le->link, list, &xmitq);
1181 spin_unlock_bh(&le->lock);
1182 tipc_node_read_unlock(n);
1183
1184 if (likely(rc == 0))
1185 tipc_bearer_xmit(net, bearer_id, &xmitq, &le->maddr);
1186 else if (rc == -ENOBUFS)
1187 tipc_node_link_down(n, bearer_id, false);
1188
1189 tipc_node_put(n);
1190
1204 return rc; 1191 return rc;
1205} 1192}
1206 1193
@@ -1637,9 +1624,12 @@ int tipc_nl_node_set_link(struct sk_buff *skb, struct genl_info *info)
1637 char *name; 1624 char *name;
1638 struct tipc_link *link; 1625 struct tipc_link *link;
1639 struct tipc_node *node; 1626 struct tipc_node *node;
1627 struct sk_buff_head xmitq;
1640 struct nlattr *attrs[TIPC_NLA_LINK_MAX + 1]; 1628 struct nlattr *attrs[TIPC_NLA_LINK_MAX + 1];
1641 struct net *net = sock_net(skb->sk); 1629 struct net *net = sock_net(skb->sk);
1642 1630
1631 __skb_queue_head_init(&xmitq);
1632
1643 if (!info->attrs[TIPC_NLA_LINK]) 1633 if (!info->attrs[TIPC_NLA_LINK])
1644 return -EINVAL; 1634 return -EINVAL;
1645 1635
@@ -1683,13 +1673,13 @@ int tipc_nl_node_set_link(struct sk_buff *skb, struct genl_info *info)
1683 u32 tol; 1673 u32 tol;
1684 1674
1685 tol = nla_get_u32(props[TIPC_NLA_PROP_TOL]); 1675 tol = nla_get_u32(props[TIPC_NLA_PROP_TOL]);
1686 tipc_link_set_tolerance(link, tol); 1676 tipc_link_set_tolerance(link, tol, &xmitq);
1687 } 1677 }
1688 if (props[TIPC_NLA_PROP_PRIO]) { 1678 if (props[TIPC_NLA_PROP_PRIO]) {
1689 u32 prio; 1679 u32 prio;
1690 1680
1691 prio = nla_get_u32(props[TIPC_NLA_PROP_PRIO]); 1681 prio = nla_get_u32(props[TIPC_NLA_PROP_PRIO]);
1692 tipc_link_set_prio(link, prio); 1682 tipc_link_set_prio(link, prio, &xmitq);
1693 } 1683 }
1694 if (props[TIPC_NLA_PROP_WIN]) { 1684 if (props[TIPC_NLA_PROP_WIN]) {
1695 u32 win; 1685 u32 win;
@@ -1701,7 +1691,7 @@ int tipc_nl_node_set_link(struct sk_buff *skb, struct genl_info *info)
1701 1691
1702out: 1692out:
1703 tipc_node_read_unlock(node); 1693 tipc_node_read_unlock(node);
1704 1694 tipc_bearer_xmit(net, bearer_id, &xmitq, &node->links[bearer_id].maddr);
1705 return res; 1695 return res;
1706} 1696}
1707 1697
diff --git a/net/tipc/server.c b/net/tipc/server.c
index 922e04a43396..2446bfbaa309 100644
--- a/net/tipc/server.c
+++ b/net/tipc/server.c
@@ -571,13 +571,13 @@ static void tipc_work_stop(struct tipc_server *s)
571 571
572static int tipc_work_start(struct tipc_server *s) 572static int tipc_work_start(struct tipc_server *s)
573{ 573{
574 s->rcv_wq = alloc_workqueue("tipc_rcv", WQ_UNBOUND, 1); 574 s->rcv_wq = alloc_ordered_workqueue("tipc_rcv", 0);
575 if (!s->rcv_wq) { 575 if (!s->rcv_wq) {
576 pr_err("can't start tipc receive workqueue\n"); 576 pr_err("can't start tipc receive workqueue\n");
577 return -ENOMEM; 577 return -ENOMEM;
578 } 578 }
579 579
580 s->send_wq = alloc_workqueue("tipc_send", WQ_UNBOUND, 1); 580 s->send_wq = alloc_ordered_workqueue("tipc_send", 0);
581 if (!s->send_wq) { 581 if (!s->send_wq) {
582 pr_err("can't start tipc send workqueue\n"); 582 pr_err("can't start tipc send workqueue\n");
583 destroy_workqueue(s->rcv_wq); 583 destroy_workqueue(s->rcv_wq);
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 69c29050f14a..3eeb50a27b89 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -42,6 +42,7 @@
42#include "name_distr.h" 42#include "name_distr.h"
43#include "socket.h" 43#include "socket.h"
44#include "bcast.h" 44#include "bcast.h"
45#include "netlink.h"
45 46
46#define SS_LISTENING -1 /* socket is listening */ 47#define SS_LISTENING -1 /* socket is listening */
47#define SS_READY -2 /* socket is connectionless */ 48#define SS_READY -2 /* socket is connectionless */
@@ -126,14 +127,6 @@ static const struct proto_ops stream_ops;
126static const struct proto_ops msg_ops; 127static const struct proto_ops msg_ops;
127static struct proto tipc_proto; 128static struct proto tipc_proto;
128 129
129static const struct nla_policy tipc_nl_sock_policy[TIPC_NLA_SOCK_MAX + 1] = {
130 [TIPC_NLA_SOCK_UNSPEC] = { .type = NLA_UNSPEC },
131 [TIPC_NLA_SOCK_ADDR] = { .type = NLA_U32 },
132 [TIPC_NLA_SOCK_REF] = { .type = NLA_U32 },
133 [TIPC_NLA_SOCK_CON] = { .type = NLA_NESTED },
134 [TIPC_NLA_SOCK_HAS_PUBL] = { .type = NLA_FLAG }
135};
136
137static const struct rhashtable_params tsk_rht_params; 130static const struct rhashtable_params tsk_rht_params;
138 131
139/* 132/*
@@ -673,7 +666,7 @@ static int tipc_sendmcast(struct socket *sock, struct tipc_name_seq *seq,
673 struct tipc_sock *tsk = tipc_sk(sk); 666 struct tipc_sock *tsk = tipc_sk(sk);
674 struct net *net = sock_net(sk); 667 struct net *net = sock_net(sk);
675 struct tipc_msg *mhdr = &tsk->phdr; 668 struct tipc_msg *mhdr = &tsk->phdr;
676 struct sk_buff_head *pktchain = &sk->sk_write_queue; 669 struct sk_buff_head pktchain;
677 struct iov_iter save = msg->msg_iter; 670 struct iov_iter save = msg->msg_iter;
678 uint mtu; 671 uint mtu;
679 int rc; 672 int rc;
@@ -687,14 +680,16 @@ static int tipc_sendmcast(struct socket *sock, struct tipc_name_seq *seq,
687 msg_set_nameupper(mhdr, seq->upper); 680 msg_set_nameupper(mhdr, seq->upper);
688 msg_set_hdr_sz(mhdr, MCAST_H_SIZE); 681 msg_set_hdr_sz(mhdr, MCAST_H_SIZE);
689 682
683 skb_queue_head_init(&pktchain);
684
690new_mtu: 685new_mtu:
691 mtu = tipc_bcast_get_mtu(net); 686 mtu = tipc_bcast_get_mtu(net);
692 rc = tipc_msg_build(mhdr, msg, 0, dsz, mtu, pktchain); 687 rc = tipc_msg_build(mhdr, msg, 0, dsz, mtu, &pktchain);
693 if (unlikely(rc < 0)) 688 if (unlikely(rc < 0))
694 return rc; 689 return rc;
695 690
696 do { 691 do {
697 rc = tipc_bcast_xmit(net, pktchain); 692 rc = tipc_bcast_xmit(net, &pktchain);
698 if (likely(!rc)) 693 if (likely(!rc))
699 return dsz; 694 return dsz;
700 695
@@ -704,7 +699,7 @@ new_mtu:
704 if (!rc) 699 if (!rc)
705 continue; 700 continue;
706 } 701 }
707 __skb_queue_purge(pktchain); 702 __skb_queue_purge(&pktchain);
708 if (rc == -EMSGSIZE) { 703 if (rc == -EMSGSIZE) {
709 msg->msg_iter = save; 704 msg->msg_iter = save;
710 goto new_mtu; 705 goto new_mtu;
@@ -863,7 +858,7 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dsz)
863 struct net *net = sock_net(sk); 858 struct net *net = sock_net(sk);
864 struct tipc_msg *mhdr = &tsk->phdr; 859 struct tipc_msg *mhdr = &tsk->phdr;
865 u32 dnode, dport; 860 u32 dnode, dport;
866 struct sk_buff_head *pktchain = &sk->sk_write_queue; 861 struct sk_buff_head pktchain;
867 struct sk_buff *skb; 862 struct sk_buff *skb;
868 struct tipc_name_seq *seq; 863 struct tipc_name_seq *seq;
869 struct iov_iter save; 864 struct iov_iter save;
@@ -924,17 +919,18 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dsz)
924 msg_set_hdr_sz(mhdr, BASIC_H_SIZE); 919 msg_set_hdr_sz(mhdr, BASIC_H_SIZE);
925 } 920 }
926 921
922 skb_queue_head_init(&pktchain);
927 save = m->msg_iter; 923 save = m->msg_iter;
928new_mtu: 924new_mtu:
929 mtu = tipc_node_get_mtu(net, dnode, tsk->portid); 925 mtu = tipc_node_get_mtu(net, dnode, tsk->portid);
930 rc = tipc_msg_build(mhdr, m, 0, dsz, mtu, pktchain); 926 rc = tipc_msg_build(mhdr, m, 0, dsz, mtu, &pktchain);
931 if (rc < 0) 927 if (rc < 0)
932 return rc; 928 return rc;
933 929
934 do { 930 do {
935 skb = skb_peek(pktchain); 931 skb = skb_peek(&pktchain);
936 TIPC_SKB_CB(skb)->wakeup_pending = tsk->link_cong; 932 TIPC_SKB_CB(skb)->wakeup_pending = tsk->link_cong;
937 rc = tipc_node_xmit(net, pktchain, dnode, tsk->portid); 933 rc = tipc_node_xmit(net, &pktchain, dnode, tsk->portid);
938 if (likely(!rc)) { 934 if (likely(!rc)) {
939 if (sock->state != SS_READY) 935 if (sock->state != SS_READY)
940 sock->state = SS_CONNECTING; 936 sock->state = SS_CONNECTING;
@@ -946,7 +942,7 @@ new_mtu:
946 if (!rc) 942 if (!rc)
947 continue; 943 continue;
948 } 944 }
949 __skb_queue_purge(pktchain); 945 __skb_queue_purge(&pktchain);
950 if (rc == -EMSGSIZE) { 946 if (rc == -EMSGSIZE) {
951 m->msg_iter = save; 947 m->msg_iter = save;
952 goto new_mtu; 948 goto new_mtu;
@@ -1016,7 +1012,7 @@ static int __tipc_send_stream(struct socket *sock, struct msghdr *m, size_t dsz)
1016 struct net *net = sock_net(sk); 1012 struct net *net = sock_net(sk);
1017 struct tipc_sock *tsk = tipc_sk(sk); 1013 struct tipc_sock *tsk = tipc_sk(sk);
1018 struct tipc_msg *mhdr = &tsk->phdr; 1014 struct tipc_msg *mhdr = &tsk->phdr;
1019 struct sk_buff_head *pktchain = &sk->sk_write_queue; 1015 struct sk_buff_head pktchain;
1020 DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name); 1016 DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name);
1021 u32 portid = tsk->portid; 1017 u32 portid = tsk->portid;
1022 int rc = -EINVAL; 1018 int rc = -EINVAL;
@@ -1044,17 +1040,19 @@ static int __tipc_send_stream(struct socket *sock, struct msghdr *m, size_t dsz)
1044 1040
1045 timeo = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT); 1041 timeo = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT);
1046 dnode = tsk_peer_node(tsk); 1042 dnode = tsk_peer_node(tsk);
1043 skb_queue_head_init(&pktchain);
1047 1044
1048next: 1045next:
1049 save = m->msg_iter; 1046 save = m->msg_iter;
1050 mtu = tsk->max_pkt; 1047 mtu = tsk->max_pkt;
1051 send = min_t(uint, dsz - sent, TIPC_MAX_USER_MSG_SIZE); 1048 send = min_t(uint, dsz - sent, TIPC_MAX_USER_MSG_SIZE);
1052 rc = tipc_msg_build(mhdr, m, sent, send, mtu, pktchain); 1049 rc = tipc_msg_build(mhdr, m, sent, send, mtu, &pktchain);
1053 if (unlikely(rc < 0)) 1050 if (unlikely(rc < 0))
1054 return rc; 1051 return rc;
1052
1055 do { 1053 do {
1056 if (likely(!tsk_conn_cong(tsk))) { 1054 if (likely(!tsk_conn_cong(tsk))) {
1057 rc = tipc_node_xmit(net, pktchain, dnode, portid); 1055 rc = tipc_node_xmit(net, &pktchain, dnode, portid);
1058 if (likely(!rc)) { 1056 if (likely(!rc)) {
1059 tsk->sent_unacked++; 1057 tsk->sent_unacked++;
1060 sent += send; 1058 sent += send;
@@ -1063,7 +1061,7 @@ next:
1063 goto next; 1061 goto next;
1064 } 1062 }
1065 if (rc == -EMSGSIZE) { 1063 if (rc == -EMSGSIZE) {
1066 __skb_queue_purge(pktchain); 1064 __skb_queue_purge(&pktchain);
1067 tsk->max_pkt = tipc_node_get_mtu(net, dnode, 1065 tsk->max_pkt = tipc_node_get_mtu(net, dnode,
1068 portid); 1066 portid);
1069 m->msg_iter = save; 1067 m->msg_iter = save;
@@ -1077,7 +1075,7 @@ next:
1077 rc = tipc_wait_for_sndpkt(sock, &timeo); 1075 rc = tipc_wait_for_sndpkt(sock, &timeo);
1078 } while (!rc); 1076 } while (!rc);
1079 1077
1080 __skb_queue_purge(pktchain); 1078 __skb_queue_purge(&pktchain);
1081 return sent ? sent : rc; 1079 return sent ? sent : rc;
1082} 1080}
1083 1081
diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c
index 350cca33ee0a..e6cb386fbf34 100644
--- a/net/tipc/subscr.c
+++ b/net/tipc/subscr.c
@@ -92,25 +92,42 @@ static void tipc_subscrp_send_event(struct tipc_subscription *sub,
92 * 92 *
93 * Returns 1 if there is overlap, otherwise 0. 93 * Returns 1 if there is overlap, otherwise 0.
94 */ 94 */
95int tipc_subscrp_check_overlap(struct tipc_subscription *sub, u32 found_lower, 95int tipc_subscrp_check_overlap(struct tipc_name_seq *seq, u32 found_lower,
96 u32 found_upper) 96 u32 found_upper)
97{ 97{
98 if (found_lower < sub->seq.lower) 98 if (found_lower < seq->lower)
99 found_lower = sub->seq.lower; 99 found_lower = seq->lower;
100 if (found_upper > sub->seq.upper) 100 if (found_upper > seq->upper)
101 found_upper = sub->seq.upper; 101 found_upper = seq->upper;
102 if (found_lower > found_upper) 102 if (found_lower > found_upper)
103 return 0; 103 return 0;
104 return 1; 104 return 1;
105} 105}
106 106
107u32 tipc_subscrp_convert_seq_type(u32 type, int swap)
108{
109 return htohl(type, swap);
110}
111
112void tipc_subscrp_convert_seq(struct tipc_name_seq *in, int swap,
113 struct tipc_name_seq *out)
114{
115 out->type = htohl(in->type, swap);
116 out->lower = htohl(in->lower, swap);
117 out->upper = htohl(in->upper, swap);
118}
119
107void tipc_subscrp_report_overlap(struct tipc_subscription *sub, u32 found_lower, 120void tipc_subscrp_report_overlap(struct tipc_subscription *sub, u32 found_lower,
108 u32 found_upper, u32 event, u32 port_ref, 121 u32 found_upper, u32 event, u32 port_ref,
109 u32 node, int must) 122 u32 node, int must)
110{ 123{
111 if (!tipc_subscrp_check_overlap(sub, found_lower, found_upper)) 124 struct tipc_name_seq seq;
125
126 tipc_subscrp_convert_seq(&sub->evt.s.seq, sub->swap, &seq);
127 if (!tipc_subscrp_check_overlap(&seq, found_lower, found_upper))
112 return; 128 return;
113 if (!must && !(sub->filter & TIPC_SUB_PORTS)) 129 if (!must &&
130 !(htohl(sub->evt.s.filter, sub->swap) & TIPC_SUB_PORTS))
114 return; 131 return;
115 132
116 tipc_subscrp_send_event(sub, found_lower, found_upper, event, port_ref, 133 tipc_subscrp_send_event(sub, found_lower, found_upper, event, port_ref,
@@ -171,12 +188,14 @@ static struct tipc_subscriber *tipc_subscrb_create(int conid)
171static void tipc_subscrb_delete(struct tipc_subscriber *subscriber) 188static void tipc_subscrb_delete(struct tipc_subscriber *subscriber)
172{ 189{
173 struct tipc_subscription *sub, *temp; 190 struct tipc_subscription *sub, *temp;
191 u32 timeout;
174 192
175 spin_lock_bh(&subscriber->lock); 193 spin_lock_bh(&subscriber->lock);
176 /* Destroy any existing subscriptions for subscriber */ 194 /* Destroy any existing subscriptions for subscriber */
177 list_for_each_entry_safe(sub, temp, &subscriber->subscrp_list, 195 list_for_each_entry_safe(sub, temp, &subscriber->subscrp_list,
178 subscrp_list) { 196 subscrp_list) {
179 if (del_timer(&sub->timer)) { 197 timeout = htohl(sub->evt.s.timeout, sub->swap);
198 if ((timeout == TIPC_WAIT_FOREVER) || del_timer(&sub->timer)) {
180 tipc_subscrp_delete(sub); 199 tipc_subscrp_delete(sub);
181 tipc_subscrb_put(subscriber); 200 tipc_subscrb_put(subscriber);
182 } 201 }
@@ -200,13 +219,16 @@ static void tipc_subscrp_cancel(struct tipc_subscr *s,
200 struct tipc_subscriber *subscriber) 219 struct tipc_subscriber *subscriber)
201{ 220{
202 struct tipc_subscription *sub, *temp; 221 struct tipc_subscription *sub, *temp;
222 u32 timeout;
203 223
204 spin_lock_bh(&subscriber->lock); 224 spin_lock_bh(&subscriber->lock);
205 /* Find first matching subscription, exit if not found */ 225 /* Find first matching subscription, exit if not found */
206 list_for_each_entry_safe(sub, temp, &subscriber->subscrp_list, 226 list_for_each_entry_safe(sub, temp, &subscriber->subscrp_list,
207 subscrp_list) { 227 subscrp_list) {
208 if (!memcmp(s, &sub->evt.s, sizeof(struct tipc_subscr))) { 228 if (!memcmp(s, &sub->evt.s, sizeof(struct tipc_subscr))) {
209 if (del_timer(&sub->timer)) { 229 timeout = htohl(sub->evt.s.timeout, sub->swap);
230 if ((timeout == TIPC_WAIT_FOREVER) ||
231 del_timer(&sub->timer)) {
210 tipc_subscrp_delete(sub); 232 tipc_subscrp_delete(sub);
211 tipc_subscrb_put(subscriber); 233 tipc_subscrb_put(subscriber);
212 } 234 }
@@ -216,66 +238,67 @@ static void tipc_subscrp_cancel(struct tipc_subscr *s,
216 spin_unlock_bh(&subscriber->lock); 238 spin_unlock_bh(&subscriber->lock);
217} 239}
218 240
219static int tipc_subscrp_create(struct net *net, struct tipc_subscr *s, 241static struct tipc_subscription *tipc_subscrp_create(struct net *net,
220 struct tipc_subscriber *subscriber, 242 struct tipc_subscr *s,
221 struct tipc_subscription **sub_p) 243 int swap)
222{ 244{
223 struct tipc_net *tn = net_generic(net, tipc_net_id); 245 struct tipc_net *tn = net_generic(net, tipc_net_id);
224 struct tipc_subscription *sub; 246 struct tipc_subscription *sub;
225 int swap; 247 u32 filter = htohl(s->filter, swap);
226
227 /* Determine subscriber's endianness */
228 swap = !(s->filter & (TIPC_SUB_PORTS | TIPC_SUB_SERVICE));
229
230 /* Detect & process a subscription cancellation request */
231 if (s->filter & htohl(TIPC_SUB_CANCEL, swap)) {
232 s->filter &= ~htohl(TIPC_SUB_CANCEL, swap);
233 tipc_subscrp_cancel(s, subscriber);
234 return 0;
235 }
236 248
237 /* Refuse subscription if global limit exceeded */ 249 /* Refuse subscription if global limit exceeded */
238 if (atomic_read(&tn->subscription_count) >= TIPC_MAX_SUBSCRIPTIONS) { 250 if (atomic_read(&tn->subscription_count) >= TIPC_MAX_SUBSCRIPTIONS) {
239 pr_warn("Subscription rejected, limit reached (%u)\n", 251 pr_warn("Subscription rejected, limit reached (%u)\n",
240 TIPC_MAX_SUBSCRIPTIONS); 252 TIPC_MAX_SUBSCRIPTIONS);
241 return -EINVAL; 253 return NULL;
242 } 254 }
243 255
244 /* Allocate subscription object */ 256 /* Allocate subscription object */
245 sub = kmalloc(sizeof(*sub), GFP_ATOMIC); 257 sub = kmalloc(sizeof(*sub), GFP_ATOMIC);
246 if (!sub) { 258 if (!sub) {
247 pr_warn("Subscription rejected, no memory\n"); 259 pr_warn("Subscription rejected, no memory\n");
248 return -ENOMEM; 260 return NULL;
249 } 261 }
250 262
251 /* Initialize subscription object */ 263 /* Initialize subscription object */
252 sub->net = net; 264 sub->net = net;
253 sub->seq.type = htohl(s->seq.type, swap); 265 if (((filter & TIPC_SUB_PORTS) && (filter & TIPC_SUB_SERVICE)) ||
254 sub->seq.lower = htohl(s->seq.lower, swap); 266 (htohl(s->seq.lower, swap) > htohl(s->seq.upper, swap))) {
255 sub->seq.upper = htohl(s->seq.upper, swap);
256 sub->timeout = msecs_to_jiffies(htohl(s->timeout, swap));
257 sub->filter = htohl(s->filter, swap);
258 if ((!(sub->filter & TIPC_SUB_PORTS) ==
259 !(sub->filter & TIPC_SUB_SERVICE)) ||
260 (sub->seq.lower > sub->seq.upper)) {
261 pr_warn("Subscription rejected, illegal request\n"); 267 pr_warn("Subscription rejected, illegal request\n");
262 kfree(sub); 268 kfree(sub);
263 return -EINVAL; 269 return NULL;
264 } 270 }
265 spin_lock_bh(&subscriber->lock); 271
266 list_add(&sub->subscrp_list, &subscriber->subscrp_list);
267 spin_unlock_bh(&subscriber->lock);
268 sub->subscriber = subscriber;
269 sub->swap = swap; 272 sub->swap = swap;
270 memcpy(&sub->evt.s, s, sizeof(*s)); 273 memcpy(&sub->evt.s, s, sizeof(*s));
271 atomic_inc(&tn->subscription_count); 274 atomic_inc(&tn->subscription_count);
275 return sub;
276}
277
278static void tipc_subscrp_subscribe(struct net *net, struct tipc_subscr *s,
279 struct tipc_subscriber *subscriber, int swap)
280{
281 struct tipc_net *tn = net_generic(net, tipc_net_id);
282 struct tipc_subscription *sub = NULL;
283 u32 timeout;
284
285 sub = tipc_subscrp_create(net, s, swap);
286 if (!sub)
287 return tipc_conn_terminate(tn->topsrv, subscriber->conid);
288
289 spin_lock_bh(&subscriber->lock);
290 list_add(&sub->subscrp_list, &subscriber->subscrp_list);
291 tipc_subscrb_get(subscriber);
292 sub->subscriber = subscriber;
293 tipc_nametbl_subscribe(sub);
294 spin_unlock_bh(&subscriber->lock);
295
296 timeout = htohl(sub->evt.s.timeout, swap);
297 if (timeout == TIPC_WAIT_FOREVER)
298 return;
299
272 setup_timer(&sub->timer, tipc_subscrp_timeout, (unsigned long)sub); 300 setup_timer(&sub->timer, tipc_subscrp_timeout, (unsigned long)sub);
273 if (sub->timeout != TIPC_WAIT_FOREVER) 301 mod_timer(&sub->timer, jiffies + msecs_to_jiffies(timeout));
274 sub->timeout += jiffies;
275 if (!mod_timer(&sub->timer, sub->timeout))
276 tipc_subscrb_get(subscriber);
277 *sub_p = sub;
278 return 0;
279} 302}
280 303
281/* Handle one termination request for the subscriber */ 304/* Handle one termination request for the subscriber */
@@ -290,14 +313,21 @@ static void tipc_subscrb_rcv_cb(struct net *net, int conid,
290 void *buf, size_t len) 313 void *buf, size_t len)
291{ 314{
292 struct tipc_subscriber *subscriber = usr_data; 315 struct tipc_subscriber *subscriber = usr_data;
293 struct tipc_subscription *sub = NULL; 316 struct tipc_subscr *s = (struct tipc_subscr *)buf;
294 struct tipc_net *tn = net_generic(net, tipc_net_id); 317 int swap;
318
319 /* Determine subscriber's endianness */
320 swap = !(s->filter & (TIPC_SUB_PORTS | TIPC_SUB_SERVICE |
321 TIPC_SUB_CANCEL));
322
323 /* Detect & process a subscription cancellation request */
324 if (s->filter & htohl(TIPC_SUB_CANCEL, swap)) {
325 s->filter &= ~htohl(TIPC_SUB_CANCEL, swap);
326 return tipc_subscrp_cancel(s, subscriber);
327 }
295 328
296 tipc_subscrp_create(net, (struct tipc_subscr *)buf, subscriber, &sub); 329 if (s)
297 if (sub) 330 tipc_subscrp_subscribe(net, s, subscriber, swap);
298 tipc_nametbl_subscribe(sub);
299 else
300 tipc_conn_terminate(tn->topsrv, subscriber->conid);
301} 331}
302 332
303/* Handle one request to establish a new subscriber */ 333/* Handle one request to establish a new subscriber */
diff --git a/net/tipc/subscr.h b/net/tipc/subscr.h
index 92ee18cc5fe6..be60103082c9 100644
--- a/net/tipc/subscr.h
+++ b/net/tipc/subscr.h
@@ -50,21 +50,15 @@ struct tipc_subscriber;
50 * @subscriber: pointer to its subscriber 50 * @subscriber: pointer to its subscriber
51 * @seq: name sequence associated with subscription 51 * @seq: name sequence associated with subscription
52 * @net: point to network namespace 52 * @net: point to network namespace
53 * @timeout: duration of subscription (in ms)
54 * @filter: event filtering to be done for subscription
55 * @timer: timer governing subscription duration (optional) 53 * @timer: timer governing subscription duration (optional)
56 * @nameseq_list: adjacent subscriptions in name sequence's subscription list 54 * @nameseq_list: adjacent subscriptions in name sequence's subscription list
57 * @subscrp_list: adjacent subscriptions in subscriber's subscription list 55 * @subscrp_list: adjacent subscriptions in subscriber's subscription list
58 * @server_ref: object reference of server port associated with subscription
59 * @swap: indicates if subscriber uses opposite endianness in its messages 56 * @swap: indicates if subscriber uses opposite endianness in its messages
60 * @evt: template for events generated by subscription 57 * @evt: template for events generated by subscription
61 */ 58 */
62struct tipc_subscription { 59struct tipc_subscription {
63 struct tipc_subscriber *subscriber; 60 struct tipc_subscriber *subscriber;
64 struct tipc_name_seq seq;
65 struct net *net; 61 struct net *net;
66 unsigned long timeout;
67 u32 filter;
68 struct timer_list timer; 62 struct timer_list timer;
69 struct list_head nameseq_list; 63 struct list_head nameseq_list;
70 struct list_head subscrp_list; 64 struct list_head subscrp_list;
@@ -72,11 +66,14 @@ struct tipc_subscription {
72 struct tipc_event evt; 66 struct tipc_event evt;
73}; 67};
74 68
75int tipc_subscrp_check_overlap(struct tipc_subscription *sub, u32 found_lower, 69int tipc_subscrp_check_overlap(struct tipc_name_seq *seq, u32 found_lower,
76 u32 found_upper); 70 u32 found_upper);
77void tipc_subscrp_report_overlap(struct tipc_subscription *sub, 71void tipc_subscrp_report_overlap(struct tipc_subscription *sub,
78 u32 found_lower, u32 found_upper, u32 event, 72 u32 found_lower, u32 found_upper, u32 event,
79 u32 port_ref, u32 node, int must); 73 u32 port_ref, u32 node, int must);
74void tipc_subscrp_convert_seq(struct tipc_name_seq *in, int swap,
75 struct tipc_name_seq *out);
76u32 tipc_subscrp_convert_seq_type(u32 type, int swap);
80int tipc_topsrv_start(struct net *net); 77int tipc_topsrv_start(struct net *net);
81void tipc_topsrv_stop(struct net *net); 78void tipc_topsrv_stop(struct net *net);
82 79
diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c
index d63a911e7fe2..c9cf2be3674a 100644
--- a/net/tipc/udp_media.c
+++ b/net/tipc/udp_media.c
@@ -48,19 +48,12 @@
48#include <linux/tipc_netlink.h> 48#include <linux/tipc_netlink.h>
49#include "core.h" 49#include "core.h"
50#include "bearer.h" 50#include "bearer.h"
51#include "netlink.h"
51 52
52/* IANA assigned UDP port */ 53/* IANA assigned UDP port */
53#define UDP_PORT_DEFAULT 6118 54#define UDP_PORT_DEFAULT 6118
54 55
55#define UDP_MIN_HEADROOM 28 56#define UDP_MIN_HEADROOM 48
56
57static const struct nla_policy tipc_nl_udp_policy[TIPC_NLA_UDP_MAX + 1] = {
58 [TIPC_NLA_UDP_UNSPEC] = {.type = NLA_UNSPEC},
59 [TIPC_NLA_UDP_LOCAL] = {.type = NLA_BINARY,
60 .len = sizeof(struct sockaddr_storage)},
61 [TIPC_NLA_UDP_REMOTE] = {.type = NLA_BINARY,
62 .len = sizeof(struct sockaddr_storage)},
63};
64 57
65/** 58/**
66 * struct udp_media_addr - IP/UDP addressing information 59 * struct udp_media_addr - IP/UDP addressing information
@@ -181,6 +174,8 @@ static int tipc_udp_send_msg(struct net *net, struct sk_buff *skb,
181 err = PTR_ERR(rt); 174 err = PTR_ERR(rt);
182 goto tx_error; 175 goto tx_error;
183 } 176 }
177
178 skb->dev = rt->dst.dev;
184 ttl = ip4_dst_hoplimit(&rt->dst); 179 ttl = ip4_dst_hoplimit(&rt->dst);
185 udp_tunnel_xmit_skb(rt, ub->ubsock->sk, skb, src->ipv4.s_addr, 180 udp_tunnel_xmit_skb(rt, ub->ubsock->sk, skb, src->ipv4.s_addr,
186 dst->ipv4.s_addr, 0, ttl, 0, src->udp_port, 181 dst->ipv4.s_addr, 0, ttl, 0, src->udp_port,
@@ -201,7 +196,7 @@ static int tipc_udp_send_msg(struct net *net, struct sk_buff *skb,
201 ttl = ip6_dst_hoplimit(ndst); 196 ttl = ip6_dst_hoplimit(ndst);
202 err = udp_tunnel6_xmit_skb(ndst, ub->ubsock->sk, skb, 197 err = udp_tunnel6_xmit_skb(ndst, ub->ubsock->sk, skb,
203 ndst->dev, &src->ipv6, 198 ndst->dev, &src->ipv6,
204 &dst->ipv6, 0, ttl, src->udp_port, 199 &dst->ipv6, 0, ttl, 0, src->udp_port,
205 dst->udp_port, false); 200 dst->udp_port, false);
206#endif 201#endif
207 } 202 }
@@ -274,7 +269,7 @@ static int parse_options(struct nlattr *attrs[], struct udp_bearer *ub,
274 struct udp_media_addr *remote) 269 struct udp_media_addr *remote)
275{ 270{
276 struct nlattr *opts[TIPC_NLA_UDP_MAX + 1]; 271 struct nlattr *opts[TIPC_NLA_UDP_MAX + 1];
277 struct sockaddr_storage *sa_local, *sa_remote; 272 struct sockaddr_storage sa_local, sa_remote;
278 273
279 if (!attrs[TIPC_NLA_BEARER_UDP_OPTS]) 274 if (!attrs[TIPC_NLA_BEARER_UDP_OPTS])
280 goto err; 275 goto err;
@@ -283,41 +278,48 @@ static int parse_options(struct nlattr *attrs[], struct udp_bearer *ub,
283 tipc_nl_udp_policy)) 278 tipc_nl_udp_policy))
284 goto err; 279 goto err;
285 if (opts[TIPC_NLA_UDP_LOCAL] && opts[TIPC_NLA_UDP_REMOTE]) { 280 if (opts[TIPC_NLA_UDP_LOCAL] && opts[TIPC_NLA_UDP_REMOTE]) {
286 sa_local = nla_data(opts[TIPC_NLA_UDP_LOCAL]); 281 nla_memcpy(&sa_local, opts[TIPC_NLA_UDP_LOCAL],
287 sa_remote = nla_data(opts[TIPC_NLA_UDP_REMOTE]); 282 sizeof(sa_local));
283 nla_memcpy(&sa_remote, opts[TIPC_NLA_UDP_REMOTE],
284 sizeof(sa_remote));
288 } else { 285 } else {
289err: 286err:
290 pr_err("Invalid UDP bearer configuration"); 287 pr_err("Invalid UDP bearer configuration");
291 return -EINVAL; 288 return -EINVAL;
292 } 289 }
293 if ((sa_local->ss_family & sa_remote->ss_family) == AF_INET) { 290 if ((sa_local.ss_family & sa_remote.ss_family) == AF_INET) {
294 struct sockaddr_in *ip4; 291 struct sockaddr_in *ip4;
295 292
296 ip4 = (struct sockaddr_in *)sa_local; 293 ip4 = (struct sockaddr_in *)&sa_local;
297 local->proto = htons(ETH_P_IP); 294 local->proto = htons(ETH_P_IP);
298 local->udp_port = ip4->sin_port; 295 local->udp_port = ip4->sin_port;
299 local->ipv4.s_addr = ip4->sin_addr.s_addr; 296 local->ipv4.s_addr = ip4->sin_addr.s_addr;
300 297
301 ip4 = (struct sockaddr_in *)sa_remote; 298 ip4 = (struct sockaddr_in *)&sa_remote;
302 remote->proto = htons(ETH_P_IP); 299 remote->proto = htons(ETH_P_IP);
303 remote->udp_port = ip4->sin_port; 300 remote->udp_port = ip4->sin_port;
304 remote->ipv4.s_addr = ip4->sin_addr.s_addr; 301 remote->ipv4.s_addr = ip4->sin_addr.s_addr;
305 return 0; 302 return 0;
306 303
307#if IS_ENABLED(CONFIG_IPV6) 304#if IS_ENABLED(CONFIG_IPV6)
308 } else if ((sa_local->ss_family & sa_remote->ss_family) == AF_INET6) { 305 } else if ((sa_local.ss_family & sa_remote.ss_family) == AF_INET6) {
306 int atype;
309 struct sockaddr_in6 *ip6; 307 struct sockaddr_in6 *ip6;
310 308
311 ip6 = (struct sockaddr_in6 *)sa_local; 309 ip6 = (struct sockaddr_in6 *)&sa_local;
310 atype = ipv6_addr_type(&ip6->sin6_addr);
311 if (__ipv6_addr_needs_scope_id(atype) && !ip6->sin6_scope_id)
312 return -EINVAL;
313
312 local->proto = htons(ETH_P_IPV6); 314 local->proto = htons(ETH_P_IPV6);
313 local->udp_port = ip6->sin6_port; 315 local->udp_port = ip6->sin6_port;
314 local->ipv6 = ip6->sin6_addr; 316 memcpy(&local->ipv6, &ip6->sin6_addr, sizeof(struct in6_addr));
315 ub->ifindex = ip6->sin6_scope_id; 317 ub->ifindex = ip6->sin6_scope_id;
316 318
317 ip6 = (struct sockaddr_in6 *)sa_remote; 319 ip6 = (struct sockaddr_in6 *)&sa_remote;
318 remote->proto = htons(ETH_P_IPV6); 320 remote->proto = htons(ETH_P_IPV6);
319 remote->udp_port = ip6->sin6_port; 321 remote->udp_port = ip6->sin6_port;
320 remote->ipv6 = ip6->sin6_addr; 322 memcpy(&remote->ipv6, &ip6->sin6_addr, sizeof(struct in6_addr));
321 return 0; 323 return 0;
322#endif 324#endif
323 } 325 }
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index c5bf5ef2bf89..8269da73e9e5 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -1496,7 +1496,7 @@ static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1496 UNIXCB(skb).fp = NULL; 1496 UNIXCB(skb).fp = NULL;
1497 1497
1498 for (i = scm->fp->count-1; i >= 0; i--) 1498 for (i = scm->fp->count-1; i >= 0; i--)
1499 unix_notinflight(scm->fp->fp[i]); 1499 unix_notinflight(scm->fp->user, scm->fp->fp[i]);
1500} 1500}
1501 1501
1502static void unix_destruct_scm(struct sk_buff *skb) 1502static void unix_destruct_scm(struct sk_buff *skb)
@@ -1534,7 +1534,6 @@ static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1534{ 1534{
1535 int i; 1535 int i;
1536 unsigned char max_level = 0; 1536 unsigned char max_level = 0;
1537 int unix_sock_count = 0;
1538 1537
1539 if (too_many_unix_fds(current)) 1538 if (too_many_unix_fds(current))
1540 return -ETOOMANYREFS; 1539 return -ETOOMANYREFS;
@@ -1542,11 +1541,9 @@ static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1542 for (i = scm->fp->count - 1; i >= 0; i--) { 1541 for (i = scm->fp->count - 1; i >= 0; i--) {
1543 struct sock *sk = unix_get_socket(scm->fp->fp[i]); 1542 struct sock *sk = unix_get_socket(scm->fp->fp[i]);
1544 1543
1545 if (sk) { 1544 if (sk)
1546 unix_sock_count++;
1547 max_level = max(max_level, 1545 max_level = max(max_level,
1548 unix_sk(sk)->recursion_level); 1546 unix_sk(sk)->recursion_level);
1549 }
1550 } 1547 }
1551 if (unlikely(max_level > MAX_RECURSION_LEVEL)) 1548 if (unlikely(max_level > MAX_RECURSION_LEVEL))
1552 return -ETOOMANYREFS; 1549 return -ETOOMANYREFS;
@@ -1561,7 +1558,7 @@ static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1561 return -ENOMEM; 1558 return -ENOMEM;
1562 1559
1563 for (i = scm->fp->count - 1; i >= 0; i--) 1560 for (i = scm->fp->count - 1; i >= 0; i--)
1564 unix_inflight(scm->fp->fp[i]); 1561 unix_inflight(scm->fp->user, scm->fp->fp[i]);
1565 return max_level; 1562 return max_level;
1566} 1563}
1567 1564
@@ -1781,7 +1778,12 @@ restart_locked:
1781 goto out_unlock; 1778 goto out_unlock;
1782 } 1779 }
1783 1780
1784 if (unlikely(unix_peer(other) != sk && unix_recvq_full(other))) { 1781 /* other == sk && unix_peer(other) != sk if
1782 * - unix_peer(sk) == NULL, destination address bound to sk
1783 * - unix_peer(sk) == sk by time of get but disconnected before lock
1784 */
1785 if (other != sk &&
1786 unlikely(unix_peer(other) != sk && unix_recvq_full(other))) {
1785 if (timeo) { 1787 if (timeo) {
1786 timeo = unix_wait_for_peer(other, timeo); 1788 timeo = unix_wait_for_peer(other, timeo);
1787 1789
@@ -2277,13 +2279,15 @@ static int unix_stream_read_generic(struct unix_stream_read_state *state)
2277 size_t size = state->size; 2279 size_t size = state->size;
2278 unsigned int last_len; 2280 unsigned int last_len;
2279 2281
2280 err = -EINVAL; 2282 if (unlikely(sk->sk_state != TCP_ESTABLISHED)) {
2281 if (sk->sk_state != TCP_ESTABLISHED) 2283 err = -EINVAL;
2282 goto out; 2284 goto out;
2285 }
2283 2286
2284 err = -EOPNOTSUPP; 2287 if (unlikely(flags & MSG_OOB)) {
2285 if (flags & MSG_OOB) 2288 err = -EOPNOTSUPP;
2286 goto out; 2289 goto out;
2290 }
2287 2291
2288 target = sock_rcvlowat(sk, flags & MSG_WAITALL, size); 2292 target = sock_rcvlowat(sk, flags & MSG_WAITALL, size);
2289 timeo = sock_rcvtimeo(sk, noblock); 2293 timeo = sock_rcvtimeo(sk, noblock);
@@ -2305,6 +2309,7 @@ static int unix_stream_read_generic(struct unix_stream_read_state *state)
2305 bool drop_skb; 2309 bool drop_skb;
2306 struct sk_buff *skb, *last; 2310 struct sk_buff *skb, *last;
2307 2311
2312redo:
2308 unix_state_lock(sk); 2313 unix_state_lock(sk);
2309 if (sock_flag(sk, SOCK_DEAD)) { 2314 if (sock_flag(sk, SOCK_DEAD)) {
2310 err = -ECONNRESET; 2315 err = -ECONNRESET;
@@ -2329,9 +2334,11 @@ again:
2329 goto unlock; 2334 goto unlock;
2330 2335
2331 unix_state_unlock(sk); 2336 unix_state_unlock(sk);
2332 err = -EAGAIN; 2337 if (!timeo) {
2333 if (!timeo) 2338 err = -EAGAIN;
2334 break; 2339 break;
2340 }
2341
2335 mutex_unlock(&u->readlock); 2342 mutex_unlock(&u->readlock);
2336 2343
2337 timeo = unix_stream_data_wait(sk, timeo, last, 2344 timeo = unix_stream_data_wait(sk, timeo, last,
@@ -2339,11 +2346,12 @@ again:
2339 2346
2340 if (signal_pending(current)) { 2347 if (signal_pending(current)) {
2341 err = sock_intr_errno(timeo); 2348 err = sock_intr_errno(timeo);
2349 scm_destroy(&scm);
2342 goto out; 2350 goto out;
2343 } 2351 }
2344 2352
2345 mutex_lock(&u->readlock); 2353 mutex_lock(&u->readlock);
2346 continue; 2354 goto redo;
2347unlock: 2355unlock:
2348 unix_state_unlock(sk); 2356 unix_state_unlock(sk);
2349 break; 2357 break;
diff --git a/net/unix/diag.c b/net/unix/diag.c
index c512f64d5287..4d9679701a6d 100644
--- a/net/unix/diag.c
+++ b/net/unix/diag.c
@@ -220,7 +220,7 @@ done:
220 return skb->len; 220 return skb->len;
221} 221}
222 222
223static struct sock *unix_lookup_by_ino(int ino) 223static struct sock *unix_lookup_by_ino(unsigned int ino)
224{ 224{
225 int i; 225 int i;
226 struct sock *sk; 226 struct sock *sk;
diff --git a/net/unix/garbage.c b/net/unix/garbage.c
index 8fcdc2283af5..6a0d48525fcf 100644
--- a/net/unix/garbage.c
+++ b/net/unix/garbage.c
@@ -116,7 +116,7 @@ struct sock *unix_get_socket(struct file *filp)
116 * descriptor if it is for an AF_UNIX socket. 116 * descriptor if it is for an AF_UNIX socket.
117 */ 117 */
118 118
119void unix_inflight(struct file *fp) 119void unix_inflight(struct user_struct *user, struct file *fp)
120{ 120{
121 struct sock *s = unix_get_socket(fp); 121 struct sock *s = unix_get_socket(fp);
122 122
@@ -133,11 +133,11 @@ void unix_inflight(struct file *fp)
133 } 133 }
134 unix_tot_inflight++; 134 unix_tot_inflight++;
135 } 135 }
136 fp->f_cred->user->unix_inflight++; 136 user->unix_inflight++;
137 spin_unlock(&unix_gc_lock); 137 spin_unlock(&unix_gc_lock);
138} 138}
139 139
140void unix_notinflight(struct file *fp) 140void unix_notinflight(struct user_struct *user, struct file *fp)
141{ 141{
142 struct sock *s = unix_get_socket(fp); 142 struct sock *s = unix_get_socket(fp);
143 143
@@ -152,7 +152,7 @@ void unix_notinflight(struct file *fp)
152 list_del_init(&u->link); 152 list_del_init(&u->link);
153 unix_tot_inflight--; 153 unix_tot_inflight--;
154 } 154 }
155 fp->f_cred->user->unix_inflight--; 155 user->unix_inflight--;
156 spin_unlock(&unix_gc_lock); 156 spin_unlock(&unix_gc_lock);
157} 157}
158 158
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index 7fd1220fbfa0..3dce53ebea92 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -1209,10 +1209,14 @@ static int vsock_stream_connect(struct socket *sock, struct sockaddr *addr,
1209 1209
1210 if (signal_pending(current)) { 1210 if (signal_pending(current)) {
1211 err = sock_intr_errno(timeout); 1211 err = sock_intr_errno(timeout);
1212 goto out_wait_error; 1212 sk->sk_state = SS_UNCONNECTED;
1213 sock->state = SS_UNCONNECTED;
1214 goto out_wait;
1213 } else if (timeout == 0) { 1215 } else if (timeout == 0) {
1214 err = -ETIMEDOUT; 1216 err = -ETIMEDOUT;
1215 goto out_wait_error; 1217 sk->sk_state = SS_UNCONNECTED;
1218 sock->state = SS_UNCONNECTED;
1219 goto out_wait;
1216 } 1220 }
1217 1221
1218 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); 1222 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
@@ -1220,20 +1224,17 @@ static int vsock_stream_connect(struct socket *sock, struct sockaddr *addr,
1220 1224
1221 if (sk->sk_err) { 1225 if (sk->sk_err) {
1222 err = -sk->sk_err; 1226 err = -sk->sk_err;
1223 goto out_wait_error; 1227 sk->sk_state = SS_UNCONNECTED;
1224 } else 1228 sock->state = SS_UNCONNECTED;
1229 } else {
1225 err = 0; 1230 err = 0;
1231 }
1226 1232
1227out_wait: 1233out_wait:
1228 finish_wait(sk_sleep(sk), &wait); 1234 finish_wait(sk_sleep(sk), &wait);
1229out: 1235out:
1230 release_sock(sk); 1236 release_sock(sk);
1231 return err; 1237 return err;
1232
1233out_wait_error:
1234 sk->sk_state = SS_UNCONNECTED;
1235 sock->state = SS_UNCONNECTED;
1236 goto out_wait;
1237} 1238}
1238 1239
1239static int vsock_accept(struct socket *sock, struct socket *newsock, int flags) 1240static int vsock_accept(struct socket *sock, struct socket *newsock, int flags)
@@ -1270,18 +1271,20 @@ static int vsock_accept(struct socket *sock, struct socket *newsock, int flags)
1270 listener->sk_err == 0) { 1271 listener->sk_err == 0) {
1271 release_sock(listener); 1272 release_sock(listener);
1272 timeout = schedule_timeout(timeout); 1273 timeout = schedule_timeout(timeout);
1274 finish_wait(sk_sleep(listener), &wait);
1273 lock_sock(listener); 1275 lock_sock(listener);
1274 1276
1275 if (signal_pending(current)) { 1277 if (signal_pending(current)) {
1276 err = sock_intr_errno(timeout); 1278 err = sock_intr_errno(timeout);
1277 goto out_wait; 1279 goto out;
1278 } else if (timeout == 0) { 1280 } else if (timeout == 0) {
1279 err = -EAGAIN; 1281 err = -EAGAIN;
1280 goto out_wait; 1282 goto out;
1281 } 1283 }
1282 1284
1283 prepare_to_wait(sk_sleep(listener), &wait, TASK_INTERRUPTIBLE); 1285 prepare_to_wait(sk_sleep(listener), &wait, TASK_INTERRUPTIBLE);
1284 } 1286 }
1287 finish_wait(sk_sleep(listener), &wait);
1285 1288
1286 if (listener->sk_err) 1289 if (listener->sk_err)
1287 err = -listener->sk_err; 1290 err = -listener->sk_err;
@@ -1301,19 +1304,15 @@ static int vsock_accept(struct socket *sock, struct socket *newsock, int flags)
1301 */ 1304 */
1302 if (err) { 1305 if (err) {
1303 vconnected->rejected = true; 1306 vconnected->rejected = true;
1304 release_sock(connected); 1307 } else {
1305 sock_put(connected); 1308 newsock->state = SS_CONNECTED;
1306 goto out_wait; 1309 sock_graft(connected, newsock);
1307 } 1310 }
1308 1311
1309 newsock->state = SS_CONNECTED;
1310 sock_graft(connected, newsock);
1311 release_sock(connected); 1312 release_sock(connected);
1312 sock_put(connected); 1313 sock_put(connected);
1313 } 1314 }
1314 1315
1315out_wait:
1316 finish_wait(sk_sleep(listener), &wait);
1317out: 1316out:
1318 release_sock(listener); 1317 release_sock(listener);
1319 return err; 1318 return err;
@@ -1557,11 +1556,11 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg,
1557 if (err < 0) 1556 if (err < 0)
1558 goto out; 1557 goto out;
1559 1558
1560 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
1561 1559
1562 while (total_written < len) { 1560 while (total_written < len) {
1563 ssize_t written; 1561 ssize_t written;
1564 1562
1563 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
1565 while (vsock_stream_has_space(vsk) == 0 && 1564 while (vsock_stream_has_space(vsk) == 0 &&
1566 sk->sk_err == 0 && 1565 sk->sk_err == 0 &&
1567 !(sk->sk_shutdown & SEND_SHUTDOWN) && 1566 !(sk->sk_shutdown & SEND_SHUTDOWN) &&
@@ -1570,27 +1569,33 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg,
1570 /* Don't wait for non-blocking sockets. */ 1569 /* Don't wait for non-blocking sockets. */
1571 if (timeout == 0) { 1570 if (timeout == 0) {
1572 err = -EAGAIN; 1571 err = -EAGAIN;
1573 goto out_wait; 1572 finish_wait(sk_sleep(sk), &wait);
1573 goto out_err;
1574 } 1574 }
1575 1575
1576 err = transport->notify_send_pre_block(vsk, &send_data); 1576 err = transport->notify_send_pre_block(vsk, &send_data);
1577 if (err < 0) 1577 if (err < 0) {
1578 goto out_wait; 1578 finish_wait(sk_sleep(sk), &wait);
1579 goto out_err;
1580 }
1579 1581
1580 release_sock(sk); 1582 release_sock(sk);
1581 timeout = schedule_timeout(timeout); 1583 timeout = schedule_timeout(timeout);
1582 lock_sock(sk); 1584 lock_sock(sk);
1583 if (signal_pending(current)) { 1585 if (signal_pending(current)) {
1584 err = sock_intr_errno(timeout); 1586 err = sock_intr_errno(timeout);
1585 goto out_wait; 1587 finish_wait(sk_sleep(sk), &wait);
1588 goto out_err;
1586 } else if (timeout == 0) { 1589 } else if (timeout == 0) {
1587 err = -EAGAIN; 1590 err = -EAGAIN;
1588 goto out_wait; 1591 finish_wait(sk_sleep(sk), &wait);
1592 goto out_err;
1589 } 1593 }
1590 1594
1591 prepare_to_wait(sk_sleep(sk), &wait, 1595 prepare_to_wait(sk_sleep(sk), &wait,
1592 TASK_INTERRUPTIBLE); 1596 TASK_INTERRUPTIBLE);
1593 } 1597 }
1598 finish_wait(sk_sleep(sk), &wait);
1594 1599
1595 /* These checks occur both as part of and after the loop 1600 /* These checks occur both as part of and after the loop
1596 * conditional since we need to check before and after 1601 * conditional since we need to check before and after
@@ -1598,16 +1603,16 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg,
1598 */ 1603 */
1599 if (sk->sk_err) { 1604 if (sk->sk_err) {
1600 err = -sk->sk_err; 1605 err = -sk->sk_err;
1601 goto out_wait; 1606 goto out_err;
1602 } else if ((sk->sk_shutdown & SEND_SHUTDOWN) || 1607 } else if ((sk->sk_shutdown & SEND_SHUTDOWN) ||
1603 (vsk->peer_shutdown & RCV_SHUTDOWN)) { 1608 (vsk->peer_shutdown & RCV_SHUTDOWN)) {
1604 err = -EPIPE; 1609 err = -EPIPE;
1605 goto out_wait; 1610 goto out_err;
1606 } 1611 }
1607 1612
1608 err = transport->notify_send_pre_enqueue(vsk, &send_data); 1613 err = transport->notify_send_pre_enqueue(vsk, &send_data);
1609 if (err < 0) 1614 if (err < 0)
1610 goto out_wait; 1615 goto out_err;
1611 1616
1612 /* Note that enqueue will only write as many bytes as are free 1617 /* Note that enqueue will only write as many bytes as are free
1613 * in the produce queue, so we don't need to ensure len is 1618 * in the produce queue, so we don't need to ensure len is
@@ -1620,7 +1625,7 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg,
1620 len - total_written); 1625 len - total_written);
1621 if (written < 0) { 1626 if (written < 0) {
1622 err = -ENOMEM; 1627 err = -ENOMEM;
1623 goto out_wait; 1628 goto out_err;
1624 } 1629 }
1625 1630
1626 total_written += written; 1631 total_written += written;
@@ -1628,14 +1633,13 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg,
1628 err = transport->notify_send_post_enqueue( 1633 err = transport->notify_send_post_enqueue(
1629 vsk, written, &send_data); 1634 vsk, written, &send_data);
1630 if (err < 0) 1635 if (err < 0)
1631 goto out_wait; 1636 goto out_err;
1632 1637
1633 } 1638 }
1634 1639
1635out_wait: 1640out_err:
1636 if (total_written > 0) 1641 if (total_written > 0)
1637 err = total_written; 1642 err = total_written;
1638 finish_wait(sk_sleep(sk), &wait);
1639out: 1643out:
1640 release_sock(sk); 1644 release_sock(sk);
1641 return err; 1645 return err;
@@ -1716,21 +1720,61 @@ vsock_stream_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
1716 if (err < 0) 1720 if (err < 0)
1717 goto out; 1721 goto out;
1718 1722
1719 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
1720 1723
1721 while (1) { 1724 while (1) {
1722 s64 ready = vsock_stream_has_data(vsk); 1725 s64 ready;
1723 1726
1724 if (ready < 0) { 1727 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
1725 /* Invalid queue pair content. XXX This should be 1728 ready = vsock_stream_has_data(vsk);
1726 * changed to a connection reset in a later change.
1727 */
1728 1729
1729 err = -ENOMEM; 1730 if (ready == 0) {
1730 goto out_wait; 1731 if (sk->sk_err != 0 ||
1731 } else if (ready > 0) { 1732 (sk->sk_shutdown & RCV_SHUTDOWN) ||
1733 (vsk->peer_shutdown & SEND_SHUTDOWN)) {
1734 finish_wait(sk_sleep(sk), &wait);
1735 break;
1736 }
1737 /* Don't wait for non-blocking sockets. */
1738 if (timeout == 0) {
1739 err = -EAGAIN;
1740 finish_wait(sk_sleep(sk), &wait);
1741 break;
1742 }
1743
1744 err = transport->notify_recv_pre_block(
1745 vsk, target, &recv_data);
1746 if (err < 0) {
1747 finish_wait(sk_sleep(sk), &wait);
1748 break;
1749 }
1750 release_sock(sk);
1751 timeout = schedule_timeout(timeout);
1752 lock_sock(sk);
1753
1754 if (signal_pending(current)) {
1755 err = sock_intr_errno(timeout);
1756 finish_wait(sk_sleep(sk), &wait);
1757 break;
1758 } else if (timeout == 0) {
1759 err = -EAGAIN;
1760 finish_wait(sk_sleep(sk), &wait);
1761 break;
1762 }
1763 } else {
1732 ssize_t read; 1764 ssize_t read;
1733 1765
1766 finish_wait(sk_sleep(sk), &wait);
1767
1768 if (ready < 0) {
1769 /* Invalid queue pair content. XXX This should
1770 * be changed to a connection reset in a later
1771 * change.
1772 */
1773
1774 err = -ENOMEM;
1775 goto out;
1776 }
1777
1734 err = transport->notify_recv_pre_dequeue( 1778 err = transport->notify_recv_pre_dequeue(
1735 vsk, target, &recv_data); 1779 vsk, target, &recv_data);
1736 if (err < 0) 1780 if (err < 0)
@@ -1750,42 +1794,12 @@ vsock_stream_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
1750 vsk, target, read, 1794 vsk, target, read,
1751 !(flags & MSG_PEEK), &recv_data); 1795 !(flags & MSG_PEEK), &recv_data);
1752 if (err < 0) 1796 if (err < 0)
1753 goto out_wait; 1797 goto out;
1754 1798
1755 if (read >= target || flags & MSG_PEEK) 1799 if (read >= target || flags & MSG_PEEK)
1756 break; 1800 break;
1757 1801
1758 target -= read; 1802 target -= read;
1759 } else {
1760 if (sk->sk_err != 0 || (sk->sk_shutdown & RCV_SHUTDOWN)
1761 || (vsk->peer_shutdown & SEND_SHUTDOWN)) {
1762 break;
1763 }
1764 /* Don't wait for non-blocking sockets. */
1765 if (timeout == 0) {
1766 err = -EAGAIN;
1767 break;
1768 }
1769
1770 err = transport->notify_recv_pre_block(
1771 vsk, target, &recv_data);
1772 if (err < 0)
1773 break;
1774
1775 release_sock(sk);
1776 timeout = schedule_timeout(timeout);
1777 lock_sock(sk);
1778
1779 if (signal_pending(current)) {
1780 err = sock_intr_errno(timeout);
1781 break;
1782 } else if (timeout == 0) {
1783 err = -EAGAIN;
1784 break;
1785 }
1786
1787 prepare_to_wait(sk_sleep(sk), &wait,
1788 TASK_INTERRUPTIBLE);
1789 } 1803 }
1790 } 1804 }
1791 1805
@@ -1816,8 +1830,6 @@ vsock_stream_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
1816 err = copied; 1830 err = copied;
1817 } 1831 }
1818 1832
1819out_wait:
1820 finish_wait(sk_sleep(sk), &wait);
1821out: 1833out:
1822 release_sock(sk); 1834 release_sock(sk);
1823 return err; 1835 return err;
diff --git a/net/wireless/Kconfig b/net/wireless/Kconfig
index da72ed32f143..6c606120abfe 100644
--- a/net/wireless/Kconfig
+++ b/net/wireless/Kconfig
@@ -50,8 +50,8 @@ config CFG80211_DEVELOPER_WARNINGS
50 default n 50 default n
51 help 51 help
52 This option enables some additional warnings that help 52 This option enables some additional warnings that help
53 cfg80211 developers and driver developers, but that can 53 cfg80211 developers and driver developers, but beware that
54 trigger due to races with userspace. 54 they can also trigger due to races with userspace.
55 55
56 For example, when a driver reports that it was disconnected 56 For example, when a driver reports that it was disconnected
57 from the AP, but the user disconnects manually at the same 57 from the AP, but the user disconnects manually at the same
@@ -61,19 +61,6 @@ config CFG80211_DEVELOPER_WARNINGS
61 on it (or mac80211). 61 on it (or mac80211).
62 62
63 63
64config CFG80211_REG_DEBUG
65 bool "cfg80211 regulatory debugging"
66 depends on CFG80211
67 default n
68 ---help---
69 You can enable this if you want to debug regulatory changes.
70 For more information on cfg80211 regulatory refer to the wireless
71 wiki:
72
73 http://wireless.kernel.org/en/developers/Regulatory
74
75 If unsure, say N.
76
77config CFG80211_CERTIFICATION_ONUS 64config CFG80211_CERTIFICATION_ONUS
78 bool "cfg80211 certification onus" 65 bool "cfg80211 certification onus"
79 depends on CFG80211 && EXPERT 66 depends on CFG80211 && EXPERT
@@ -123,7 +110,7 @@ config CFG80211_REG_RELAX_NO_IR
123 interface which associated to an AP which userspace assumes or confirms 110 interface which associated to an AP which userspace assumes or confirms
124 to be an authorized master, i.e., with radar detection support and DFS 111 to be an authorized master, i.e., with radar detection support and DFS
125 capabilities. However, note that in order to not create daisy chain 112 capabilities. However, note that in order to not create daisy chain
126 scenarios, this relaxation is not allowed in cases that the BSS client 113 scenarios, this relaxation is not allowed in cases where the BSS client
127 is associated to P2P GO and in addition the P2P GO instantiated on 114 is associated to P2P GO and in addition the P2P GO instantiated on
128 a channel due to this relaxation should not allow connection from 115 a channel due to this relaxation should not allow connection from
129 non P2P clients. 116 non P2P clients.
@@ -148,7 +135,7 @@ config CFG80211_DEBUGFS
148 depends on CFG80211 135 depends on CFG80211
149 depends on DEBUG_FS 136 depends on DEBUG_FS
150 ---help--- 137 ---help---
151 You can enable this if you want to debugfs entries for cfg80211. 138 You can enable this if you want debugfs entries for cfg80211.
152 139
153 If unsure, say N. 140 If unsure, say N.
154 141
@@ -159,7 +146,7 @@ config CFG80211_INTERNAL_REGDB
159 ---help--- 146 ---help---
160 This option generates an internal data structure representing 147 This option generates an internal data structure representing
161 the wireless regulatory rules described in net/wireless/db.txt 148 the wireless regulatory rules described in net/wireless/db.txt
162 and includes code to query that database. This is an alternative 149 and includes code to query that database. This is an alternative
163 to using CRDA for defining regulatory rules for the kernel. 150 to using CRDA for defining regulatory rules for the kernel.
164 151
165 Using this option requires some parsing of the db.txt at build time, 152 Using this option requires some parsing of the db.txt at build time,
@@ -172,7 +159,7 @@ config CFG80211_INTERNAL_REGDB
172 159
173 http://wireless.kernel.org/en/developers/Regulatory 160 http://wireless.kernel.org/en/developers/Regulatory
174 161
175 Most distributions have a CRDA package. So if unsure, say N. 162 Most distributions have a CRDA package. So if unsure, say N.
176 163
177config CFG80211_CRDA_SUPPORT 164config CFG80211_CRDA_SUPPORT
178 bool "support CRDA" if CFG80211_INTERNAL_REGDB 165 bool "support CRDA" if CFG80211_INTERNAL_REGDB
diff --git a/net/wireless/core.c b/net/wireless/core.c
index b0915515640e..9f1c4aa851ef 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -352,6 +352,16 @@ struct wiphy *wiphy_new_nm(const struct cfg80211_ops *ops, int sizeof_priv,
352 WARN_ON(ops->add_station && !ops->del_station); 352 WARN_ON(ops->add_station && !ops->del_station);
353 WARN_ON(ops->add_mpath && !ops->del_mpath); 353 WARN_ON(ops->add_mpath && !ops->del_mpath);
354 WARN_ON(ops->join_mesh && !ops->leave_mesh); 354 WARN_ON(ops->join_mesh && !ops->leave_mesh);
355 WARN_ON(ops->start_p2p_device && !ops->stop_p2p_device);
356 WARN_ON(ops->start_ap && !ops->stop_ap);
357 WARN_ON(ops->join_ocb && !ops->leave_ocb);
358 WARN_ON(ops->suspend && !ops->resume);
359 WARN_ON(ops->sched_scan_start && !ops->sched_scan_stop);
360 WARN_ON(ops->remain_on_channel && !ops->cancel_remain_on_channel);
361 WARN_ON(ops->tdls_channel_switch && !ops->tdls_cancel_channel_switch);
362 WARN_ON(ops->add_tx_ts && !ops->del_tx_ts);
363 WARN_ON(ops->set_tx_power && !ops->get_tx_power);
364 WARN_ON(ops->set_antenna && !ops->get_antenna);
355 365
356 alloc_size = sizeof(*rdev) + sizeof_priv; 366 alloc_size = sizeof(*rdev) + sizeof_priv;
357 367
@@ -1147,6 +1157,8 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb,
1147 return NOTIFY_DONE; 1157 return NOTIFY_DONE;
1148 } 1158 }
1149 1159
1160 wireless_nlevent_flush();
1161
1150 return NOTIFY_OK; 1162 return NOTIFY_OK;
1151} 1163}
1152 1164
diff --git a/net/wireless/lib80211_crypt_tkip.c b/net/wireless/lib80211_crypt_tkip.c
index 3cd819539241..71447cf86306 100644
--- a/net/wireless/lib80211_crypt_tkip.c
+++ b/net/wireless/lib80211_crypt_tkip.c
@@ -29,7 +29,8 @@
29#include <linux/ieee80211.h> 29#include <linux/ieee80211.h>
30#include <net/iw_handler.h> 30#include <net/iw_handler.h>
31 31
32#include <linux/crypto.h> 32#include <crypto/hash.h>
33#include <crypto/skcipher.h>
33#include <linux/crc32.h> 34#include <linux/crc32.h>
34 35
35#include <net/lib80211.h> 36#include <net/lib80211.h>
@@ -63,10 +64,10 @@ struct lib80211_tkip_data {
63 64
64 int key_idx; 65 int key_idx;
65 66
66 struct crypto_blkcipher *rx_tfm_arc4; 67 struct crypto_skcipher *rx_tfm_arc4;
67 struct crypto_hash *rx_tfm_michael; 68 struct crypto_ahash *rx_tfm_michael;
68 struct crypto_blkcipher *tx_tfm_arc4; 69 struct crypto_skcipher *tx_tfm_arc4;
69 struct crypto_hash *tx_tfm_michael; 70 struct crypto_ahash *tx_tfm_michael;
70 71
71 /* scratch buffers for virt_to_page() (crypto API) */ 72 /* scratch buffers for virt_to_page() (crypto API) */
72 u8 rx_hdr[16], tx_hdr[16]; 73 u8 rx_hdr[16], tx_hdr[16];
@@ -98,29 +99,29 @@ static void *lib80211_tkip_init(int key_idx)
98 99
99 priv->key_idx = key_idx; 100 priv->key_idx = key_idx;
100 101
101 priv->tx_tfm_arc4 = crypto_alloc_blkcipher("ecb(arc4)", 0, 102 priv->tx_tfm_arc4 = crypto_alloc_skcipher("ecb(arc4)", 0,
102 CRYPTO_ALG_ASYNC); 103 CRYPTO_ALG_ASYNC);
103 if (IS_ERR(priv->tx_tfm_arc4)) { 104 if (IS_ERR(priv->tx_tfm_arc4)) {
104 priv->tx_tfm_arc4 = NULL; 105 priv->tx_tfm_arc4 = NULL;
105 goto fail; 106 goto fail;
106 } 107 }
107 108
108 priv->tx_tfm_michael = crypto_alloc_hash("michael_mic", 0, 109 priv->tx_tfm_michael = crypto_alloc_ahash("michael_mic", 0,
109 CRYPTO_ALG_ASYNC); 110 CRYPTO_ALG_ASYNC);
110 if (IS_ERR(priv->tx_tfm_michael)) { 111 if (IS_ERR(priv->tx_tfm_michael)) {
111 priv->tx_tfm_michael = NULL; 112 priv->tx_tfm_michael = NULL;
112 goto fail; 113 goto fail;
113 } 114 }
114 115
115 priv->rx_tfm_arc4 = crypto_alloc_blkcipher("ecb(arc4)", 0, 116 priv->rx_tfm_arc4 = crypto_alloc_skcipher("ecb(arc4)", 0,
116 CRYPTO_ALG_ASYNC); 117 CRYPTO_ALG_ASYNC);
117 if (IS_ERR(priv->rx_tfm_arc4)) { 118 if (IS_ERR(priv->rx_tfm_arc4)) {
118 priv->rx_tfm_arc4 = NULL; 119 priv->rx_tfm_arc4 = NULL;
119 goto fail; 120 goto fail;
120 } 121 }
121 122
122 priv->rx_tfm_michael = crypto_alloc_hash("michael_mic", 0, 123 priv->rx_tfm_michael = crypto_alloc_ahash("michael_mic", 0,
123 CRYPTO_ALG_ASYNC); 124 CRYPTO_ALG_ASYNC);
124 if (IS_ERR(priv->rx_tfm_michael)) { 125 if (IS_ERR(priv->rx_tfm_michael)) {
125 priv->rx_tfm_michael = NULL; 126 priv->rx_tfm_michael = NULL;
126 goto fail; 127 goto fail;
@@ -130,14 +131,10 @@ static void *lib80211_tkip_init(int key_idx)
130 131
131 fail: 132 fail:
132 if (priv) { 133 if (priv) {
133 if (priv->tx_tfm_michael) 134 crypto_free_ahash(priv->tx_tfm_michael);
134 crypto_free_hash(priv->tx_tfm_michael); 135 crypto_free_skcipher(priv->tx_tfm_arc4);
135 if (priv->tx_tfm_arc4) 136 crypto_free_ahash(priv->rx_tfm_michael);
136 crypto_free_blkcipher(priv->tx_tfm_arc4); 137 crypto_free_skcipher(priv->rx_tfm_arc4);
137 if (priv->rx_tfm_michael)
138 crypto_free_hash(priv->rx_tfm_michael);
139 if (priv->rx_tfm_arc4)
140 crypto_free_blkcipher(priv->rx_tfm_arc4);
141 kfree(priv); 138 kfree(priv);
142 } 139 }
143 140
@@ -148,14 +145,10 @@ static void lib80211_tkip_deinit(void *priv)
148{ 145{
149 struct lib80211_tkip_data *_priv = priv; 146 struct lib80211_tkip_data *_priv = priv;
150 if (_priv) { 147 if (_priv) {
151 if (_priv->tx_tfm_michael) 148 crypto_free_ahash(_priv->tx_tfm_michael);
152 crypto_free_hash(_priv->tx_tfm_michael); 149 crypto_free_skcipher(_priv->tx_tfm_arc4);
153 if (_priv->tx_tfm_arc4) 150 crypto_free_ahash(_priv->rx_tfm_michael);
154 crypto_free_blkcipher(_priv->tx_tfm_arc4); 151 crypto_free_skcipher(_priv->rx_tfm_arc4);
155 if (_priv->rx_tfm_michael)
156 crypto_free_hash(_priv->rx_tfm_michael);
157 if (_priv->rx_tfm_arc4)
158 crypto_free_blkcipher(_priv->rx_tfm_arc4);
159 } 152 }
160 kfree(priv); 153 kfree(priv);
161} 154}
@@ -353,11 +346,12 @@ static int lib80211_tkip_hdr(struct sk_buff *skb, int hdr_len,
353static int lib80211_tkip_encrypt(struct sk_buff *skb, int hdr_len, void *priv) 346static int lib80211_tkip_encrypt(struct sk_buff *skb, int hdr_len, void *priv)
354{ 347{
355 struct lib80211_tkip_data *tkey = priv; 348 struct lib80211_tkip_data *tkey = priv;
356 struct blkcipher_desc desc = { .tfm = tkey->tx_tfm_arc4 }; 349 SKCIPHER_REQUEST_ON_STACK(req, tkey->tx_tfm_arc4);
357 int len; 350 int len;
358 u8 rc4key[16], *pos, *icv; 351 u8 rc4key[16], *pos, *icv;
359 u32 crc; 352 u32 crc;
360 struct scatterlist sg; 353 struct scatterlist sg;
354 int err;
361 355
362 if (tkey->flags & IEEE80211_CRYPTO_TKIP_COUNTERMEASURES) { 356 if (tkey->flags & IEEE80211_CRYPTO_TKIP_COUNTERMEASURES) {
363 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; 357 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
@@ -382,9 +376,14 @@ static int lib80211_tkip_encrypt(struct sk_buff *skb, int hdr_len, void *priv)
382 icv[2] = crc >> 16; 376 icv[2] = crc >> 16;
383 icv[3] = crc >> 24; 377 icv[3] = crc >> 24;
384 378
385 crypto_blkcipher_setkey(tkey->tx_tfm_arc4, rc4key, 16); 379 crypto_skcipher_setkey(tkey->tx_tfm_arc4, rc4key, 16);
386 sg_init_one(&sg, pos, len + 4); 380 sg_init_one(&sg, pos, len + 4);
387 return crypto_blkcipher_encrypt(&desc, &sg, &sg, len + 4); 381 skcipher_request_set_tfm(req, tkey->tx_tfm_arc4);
382 skcipher_request_set_callback(req, 0, NULL, NULL);
383 skcipher_request_set_crypt(req, &sg, &sg, len + 4, NULL);
384 err = crypto_skcipher_encrypt(req);
385 skcipher_request_zero(req);
386 return err;
388} 387}
389 388
390/* 389/*
@@ -403,7 +402,7 @@ static inline int tkip_replay_check(u32 iv32_n, u16 iv16_n,
403static int lib80211_tkip_decrypt(struct sk_buff *skb, int hdr_len, void *priv) 402static int lib80211_tkip_decrypt(struct sk_buff *skb, int hdr_len, void *priv)
404{ 403{
405 struct lib80211_tkip_data *tkey = priv; 404 struct lib80211_tkip_data *tkey = priv;
406 struct blkcipher_desc desc = { .tfm = tkey->rx_tfm_arc4 }; 405 SKCIPHER_REQUEST_ON_STACK(req, tkey->rx_tfm_arc4);
407 u8 rc4key[16]; 406 u8 rc4key[16];
408 u8 keyidx, *pos; 407 u8 keyidx, *pos;
409 u32 iv32; 408 u32 iv32;
@@ -413,6 +412,7 @@ static int lib80211_tkip_decrypt(struct sk_buff *skb, int hdr_len, void *priv)
413 u32 crc; 412 u32 crc;
414 struct scatterlist sg; 413 struct scatterlist sg;
415 int plen; 414 int plen;
415 int err;
416 416
417 hdr = (struct ieee80211_hdr *)skb->data; 417 hdr = (struct ieee80211_hdr *)skb->data;
418 418
@@ -465,9 +465,14 @@ static int lib80211_tkip_decrypt(struct sk_buff *skb, int hdr_len, void *priv)
465 465
466 plen = skb->len - hdr_len - 12; 466 plen = skb->len - hdr_len - 12;
467 467
468 crypto_blkcipher_setkey(tkey->rx_tfm_arc4, rc4key, 16); 468 crypto_skcipher_setkey(tkey->rx_tfm_arc4, rc4key, 16);
469 sg_init_one(&sg, pos, plen + 4); 469 sg_init_one(&sg, pos, plen + 4);
470 if (crypto_blkcipher_decrypt(&desc, &sg, &sg, plen + 4)) { 470 skcipher_request_set_tfm(req, tkey->rx_tfm_arc4);
471 skcipher_request_set_callback(req, 0, NULL, NULL);
472 skcipher_request_set_crypt(req, &sg, &sg, plen + 4, NULL);
473 err = crypto_skcipher_decrypt(req);
474 skcipher_request_zero(req);
475 if (err) {
471 net_dbg_ratelimited("TKIP: failed to decrypt received packet from %pM\n", 476 net_dbg_ratelimited("TKIP: failed to decrypt received packet from %pM\n",
472 hdr->addr2); 477 hdr->addr2);
473 return -7; 478 return -7;
@@ -505,11 +510,12 @@ static int lib80211_tkip_decrypt(struct sk_buff *skb, int hdr_len, void *priv)
505 return keyidx; 510 return keyidx;
506} 511}
507 512
508static int michael_mic(struct crypto_hash *tfm_michael, u8 * key, u8 * hdr, 513static int michael_mic(struct crypto_ahash *tfm_michael, u8 * key, u8 * hdr,
509 u8 * data, size_t data_len, u8 * mic) 514 u8 * data, size_t data_len, u8 * mic)
510{ 515{
511 struct hash_desc desc; 516 AHASH_REQUEST_ON_STACK(req, tfm_michael);
512 struct scatterlist sg[2]; 517 struct scatterlist sg[2];
518 int err;
513 519
514 if (tfm_michael == NULL) { 520 if (tfm_michael == NULL) {
515 pr_warn("%s(): tfm_michael == NULL\n", __func__); 521 pr_warn("%s(): tfm_michael == NULL\n", __func__);
@@ -519,12 +525,15 @@ static int michael_mic(struct crypto_hash *tfm_michael, u8 * key, u8 * hdr,
519 sg_set_buf(&sg[0], hdr, 16); 525 sg_set_buf(&sg[0], hdr, 16);
520 sg_set_buf(&sg[1], data, data_len); 526 sg_set_buf(&sg[1], data, data_len);
521 527
522 if (crypto_hash_setkey(tfm_michael, key, 8)) 528 if (crypto_ahash_setkey(tfm_michael, key, 8))
523 return -1; 529 return -1;
524 530
525 desc.tfm = tfm_michael; 531 ahash_request_set_tfm(req, tfm_michael);
526 desc.flags = 0; 532 ahash_request_set_callback(req, 0, NULL, NULL);
527 return crypto_hash_digest(&desc, sg, data_len + 16, mic); 533 ahash_request_set_crypt(req, sg, mic, data_len + 16);
534 err = crypto_ahash_digest(req);
535 ahash_request_zero(req);
536 return err;
528} 537}
529 538
530static void michael_mic_hdr(struct sk_buff *skb, u8 * hdr) 539static void michael_mic_hdr(struct sk_buff *skb, u8 * hdr)
@@ -645,10 +654,10 @@ static int lib80211_tkip_set_key(void *key, int len, u8 * seq, void *priv)
645{ 654{
646 struct lib80211_tkip_data *tkey = priv; 655 struct lib80211_tkip_data *tkey = priv;
647 int keyidx; 656 int keyidx;
648 struct crypto_hash *tfm = tkey->tx_tfm_michael; 657 struct crypto_ahash *tfm = tkey->tx_tfm_michael;
649 struct crypto_blkcipher *tfm2 = tkey->tx_tfm_arc4; 658 struct crypto_skcipher *tfm2 = tkey->tx_tfm_arc4;
650 struct crypto_hash *tfm3 = tkey->rx_tfm_michael; 659 struct crypto_ahash *tfm3 = tkey->rx_tfm_michael;
651 struct crypto_blkcipher *tfm4 = tkey->rx_tfm_arc4; 660 struct crypto_skcipher *tfm4 = tkey->rx_tfm_arc4;
652 661
653 keyidx = tkey->key_idx; 662 keyidx = tkey->key_idx;
654 memset(tkey, 0, sizeof(*tkey)); 663 memset(tkey, 0, sizeof(*tkey));
diff --git a/net/wireless/lib80211_crypt_wep.c b/net/wireless/lib80211_crypt_wep.c
index 1c292e4ea7b6..d05f58b0fd04 100644
--- a/net/wireless/lib80211_crypt_wep.c
+++ b/net/wireless/lib80211_crypt_wep.c
@@ -22,7 +22,7 @@
22 22
23#include <net/lib80211.h> 23#include <net/lib80211.h>
24 24
25#include <linux/crypto.h> 25#include <crypto/skcipher.h>
26#include <linux/crc32.h> 26#include <linux/crc32.h>
27 27
28MODULE_AUTHOR("Jouni Malinen"); 28MODULE_AUTHOR("Jouni Malinen");
@@ -35,8 +35,8 @@ struct lib80211_wep_data {
35 u8 key[WEP_KEY_LEN + 1]; 35 u8 key[WEP_KEY_LEN + 1];
36 u8 key_len; 36 u8 key_len;
37 u8 key_idx; 37 u8 key_idx;
38 struct crypto_blkcipher *tx_tfm; 38 struct crypto_skcipher *tx_tfm;
39 struct crypto_blkcipher *rx_tfm; 39 struct crypto_skcipher *rx_tfm;
40}; 40};
41 41
42static void *lib80211_wep_init(int keyidx) 42static void *lib80211_wep_init(int keyidx)
@@ -48,13 +48,13 @@ static void *lib80211_wep_init(int keyidx)
48 goto fail; 48 goto fail;
49 priv->key_idx = keyidx; 49 priv->key_idx = keyidx;
50 50
51 priv->tx_tfm = crypto_alloc_blkcipher("ecb(arc4)", 0, CRYPTO_ALG_ASYNC); 51 priv->tx_tfm = crypto_alloc_skcipher("ecb(arc4)", 0, CRYPTO_ALG_ASYNC);
52 if (IS_ERR(priv->tx_tfm)) { 52 if (IS_ERR(priv->tx_tfm)) {
53 priv->tx_tfm = NULL; 53 priv->tx_tfm = NULL;
54 goto fail; 54 goto fail;
55 } 55 }
56 56
57 priv->rx_tfm = crypto_alloc_blkcipher("ecb(arc4)", 0, CRYPTO_ALG_ASYNC); 57 priv->rx_tfm = crypto_alloc_skcipher("ecb(arc4)", 0, CRYPTO_ALG_ASYNC);
58 if (IS_ERR(priv->rx_tfm)) { 58 if (IS_ERR(priv->rx_tfm)) {
59 priv->rx_tfm = NULL; 59 priv->rx_tfm = NULL;
60 goto fail; 60 goto fail;
@@ -66,10 +66,8 @@ static void *lib80211_wep_init(int keyidx)
66 66
67 fail: 67 fail:
68 if (priv) { 68 if (priv) {
69 if (priv->tx_tfm) 69 crypto_free_skcipher(priv->tx_tfm);
70 crypto_free_blkcipher(priv->tx_tfm); 70 crypto_free_skcipher(priv->rx_tfm);
71 if (priv->rx_tfm)
72 crypto_free_blkcipher(priv->rx_tfm);
73 kfree(priv); 71 kfree(priv);
74 } 72 }
75 return NULL; 73 return NULL;
@@ -79,10 +77,8 @@ static void lib80211_wep_deinit(void *priv)
79{ 77{
80 struct lib80211_wep_data *_priv = priv; 78 struct lib80211_wep_data *_priv = priv;
81 if (_priv) { 79 if (_priv) {
82 if (_priv->tx_tfm) 80 crypto_free_skcipher(_priv->tx_tfm);
83 crypto_free_blkcipher(_priv->tx_tfm); 81 crypto_free_skcipher(_priv->rx_tfm);
84 if (_priv->rx_tfm)
85 crypto_free_blkcipher(_priv->rx_tfm);
86 } 82 }
87 kfree(priv); 83 kfree(priv);
88} 84}
@@ -133,11 +129,12 @@ static int lib80211_wep_build_iv(struct sk_buff *skb, int hdr_len,
133static int lib80211_wep_encrypt(struct sk_buff *skb, int hdr_len, void *priv) 129static int lib80211_wep_encrypt(struct sk_buff *skb, int hdr_len, void *priv)
134{ 130{
135 struct lib80211_wep_data *wep = priv; 131 struct lib80211_wep_data *wep = priv;
136 struct blkcipher_desc desc = { .tfm = wep->tx_tfm }; 132 SKCIPHER_REQUEST_ON_STACK(req, wep->tx_tfm);
137 u32 crc, klen, len; 133 u32 crc, klen, len;
138 u8 *pos, *icv; 134 u8 *pos, *icv;
139 struct scatterlist sg; 135 struct scatterlist sg;
140 u8 key[WEP_KEY_LEN + 3]; 136 u8 key[WEP_KEY_LEN + 3];
137 int err;
141 138
142 /* other checks are in lib80211_wep_build_iv */ 139 /* other checks are in lib80211_wep_build_iv */
143 if (skb_tailroom(skb) < 4) 140 if (skb_tailroom(skb) < 4)
@@ -165,9 +162,14 @@ static int lib80211_wep_encrypt(struct sk_buff *skb, int hdr_len, void *priv)
165 icv[2] = crc >> 16; 162 icv[2] = crc >> 16;
166 icv[3] = crc >> 24; 163 icv[3] = crc >> 24;
167 164
168 crypto_blkcipher_setkey(wep->tx_tfm, key, klen); 165 crypto_skcipher_setkey(wep->tx_tfm, key, klen);
169 sg_init_one(&sg, pos, len + 4); 166 sg_init_one(&sg, pos, len + 4);
170 return crypto_blkcipher_encrypt(&desc, &sg, &sg, len + 4); 167 skcipher_request_set_tfm(req, wep->tx_tfm);
168 skcipher_request_set_callback(req, 0, NULL, NULL);
169 skcipher_request_set_crypt(req, &sg, &sg, len + 4, NULL);
170 err = crypto_skcipher_encrypt(req);
171 skcipher_request_zero(req);
172 return err;
171} 173}
172 174
173/* Perform WEP decryption on given buffer. Buffer includes whole WEP part of 175/* Perform WEP decryption on given buffer. Buffer includes whole WEP part of
@@ -180,11 +182,12 @@ static int lib80211_wep_encrypt(struct sk_buff *skb, int hdr_len, void *priv)
180static int lib80211_wep_decrypt(struct sk_buff *skb, int hdr_len, void *priv) 182static int lib80211_wep_decrypt(struct sk_buff *skb, int hdr_len, void *priv)
181{ 183{
182 struct lib80211_wep_data *wep = priv; 184 struct lib80211_wep_data *wep = priv;
183 struct blkcipher_desc desc = { .tfm = wep->rx_tfm }; 185 SKCIPHER_REQUEST_ON_STACK(req, wep->rx_tfm);
184 u32 crc, klen, plen; 186 u32 crc, klen, plen;
185 u8 key[WEP_KEY_LEN + 3]; 187 u8 key[WEP_KEY_LEN + 3];
186 u8 keyidx, *pos, icv[4]; 188 u8 keyidx, *pos, icv[4];
187 struct scatterlist sg; 189 struct scatterlist sg;
190 int err;
188 191
189 if (skb->len < hdr_len + 8) 192 if (skb->len < hdr_len + 8)
190 return -1; 193 return -1;
@@ -205,9 +208,14 @@ static int lib80211_wep_decrypt(struct sk_buff *skb, int hdr_len, void *priv)
205 /* Apply RC4 to data and compute CRC32 over decrypted data */ 208 /* Apply RC4 to data and compute CRC32 over decrypted data */
206 plen = skb->len - hdr_len - 8; 209 plen = skb->len - hdr_len - 8;
207 210
208 crypto_blkcipher_setkey(wep->rx_tfm, key, klen); 211 crypto_skcipher_setkey(wep->rx_tfm, key, klen);
209 sg_init_one(&sg, pos, plen + 4); 212 sg_init_one(&sg, pos, plen + 4);
210 if (crypto_blkcipher_decrypt(&desc, &sg, &sg, plen + 4)) 213 skcipher_request_set_tfm(req, wep->rx_tfm);
214 skcipher_request_set_callback(req, 0, NULL, NULL);
215 skcipher_request_set_crypt(req, &sg, &sg, plen + 4, NULL);
216 err = crypto_skcipher_decrypt(req);
217 skcipher_request_zero(req);
218 if (err)
211 return -7; 219 return -7;
212 220
213 crc = ~crc32_le(~0, pos, plen); 221 crc = ~crc32_le(~0, pos, plen);
diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c
index fb44fa3bf4ef..ff328250bc44 100644
--- a/net/wireless/mlme.c
+++ b/net/wireless/mlme.c
@@ -711,7 +711,7 @@ EXPORT_SYMBOL(cfg80211_rx_mgmt);
711 711
712void cfg80211_dfs_channels_update_work(struct work_struct *work) 712void cfg80211_dfs_channels_update_work(struct work_struct *work)
713{ 713{
714 struct delayed_work *delayed_work; 714 struct delayed_work *delayed_work = to_delayed_work(work);
715 struct cfg80211_registered_device *rdev; 715 struct cfg80211_registered_device *rdev;
716 struct cfg80211_chan_def chandef; 716 struct cfg80211_chan_def chandef;
717 struct ieee80211_supported_band *sband; 717 struct ieee80211_supported_band *sband;
@@ -721,7 +721,6 @@ void cfg80211_dfs_channels_update_work(struct work_struct *work)
721 unsigned long timeout, next_time = 0; 721 unsigned long timeout, next_time = 0;
722 int bandid, i; 722 int bandid, i;
723 723
724 delayed_work = container_of(work, struct delayed_work, work);
725 rdev = container_of(delayed_work, struct cfg80211_registered_device, 724 rdev = container_of(delayed_work, struct cfg80211_registered_device,
726 dfs_update_channels_wk); 725 dfs_update_channels_wk);
727 wiphy = &rdev->wiphy; 726 wiphy = &rdev->wiphy;
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index d4786f2802aa..98c924260b3d 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -3,7 +3,7 @@
3 * 3 *
4 * Copyright 2006-2010 Johannes Berg <johannes@sipsolutions.net> 4 * Copyright 2006-2010 Johannes Berg <johannes@sipsolutions.net>
5 * Copyright 2013-2014 Intel Mobile Communications GmbH 5 * Copyright 2013-2014 Intel Mobile Communications GmbH
6 * Copyright 2015 Intel Deutschland GmbH 6 * Copyright 2015-2016 Intel Deutschland GmbH
7 */ 7 */
8 8
9#include <linux/if.h> 9#include <linux/if.h>
@@ -401,6 +401,7 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
401 [NL80211_ATTR_NETNS_FD] = { .type = NLA_U32 }, 401 [NL80211_ATTR_NETNS_FD] = { .type = NLA_U32 },
402 [NL80211_ATTR_SCHED_SCAN_DELAY] = { .type = NLA_U32 }, 402 [NL80211_ATTR_SCHED_SCAN_DELAY] = { .type = NLA_U32 },
403 [NL80211_ATTR_REG_INDOOR] = { .type = NLA_FLAG }, 403 [NL80211_ATTR_REG_INDOOR] = { .type = NLA_FLAG },
404 [NL80211_ATTR_PBSS] = { .type = NLA_FLAG },
404}; 405};
405 406
406/* policy for the key attributes */ 407/* policy for the key attributes */
@@ -3461,6 +3462,10 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info)
3461 return PTR_ERR(params.acl); 3462 return PTR_ERR(params.acl);
3462 } 3463 }
3463 3464
3465 params.pbss = nla_get_flag(info->attrs[NL80211_ATTR_PBSS]);
3466 if (params.pbss && !rdev->wiphy.bands[IEEE80211_BAND_60GHZ])
3467 return -EOPNOTSUPP;
3468
3464 wdev_lock(wdev); 3469 wdev_lock(wdev);
3465 err = rdev_start_ap(rdev, dev, &params); 3470 err = rdev_start_ap(rdev, dev, &params);
3466 if (!err) { 3471 if (!err) {
@@ -7281,9 +7286,11 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info)
7281 } 7286 }
7282 7287
7283 if (nla_get_flag(info->attrs[NL80211_ATTR_USE_RRM])) { 7288 if (nla_get_flag(info->attrs[NL80211_ATTR_USE_RRM])) {
7284 if (!(rdev->wiphy.features & 7289 if (!((rdev->wiphy.features &
7285 NL80211_FEATURE_DS_PARAM_SET_IE_IN_PROBES) || 7290 NL80211_FEATURE_DS_PARAM_SET_IE_IN_PROBES) &&
7286 !(rdev->wiphy.features & NL80211_FEATURE_QUIET)) 7291 (rdev->wiphy.features & NL80211_FEATURE_QUIET)) &&
7292 !wiphy_ext_feature_isset(&rdev->wiphy,
7293 NL80211_EXT_FEATURE_RRM))
7287 return -EINVAL; 7294 return -EINVAL;
7288 req.flags |= ASSOC_REQ_USE_RRM; 7295 req.flags |= ASSOC_REQ_USE_RRM;
7289 } 7296 }
@@ -7547,7 +7554,7 @@ static int nl80211_join_ibss(struct sk_buff *skb, struct genl_info *info)
7547 7554
7548 if ((ibss.chandef.width != NL80211_CHAN_WIDTH_20_NOHT) && 7555 if ((ibss.chandef.width != NL80211_CHAN_WIDTH_20_NOHT) &&
7549 no_ht) { 7556 no_ht) {
7550 kfree(connkeys); 7557 kzfree(connkeys);
7551 return -EINVAL; 7558 return -EINVAL;
7552 } 7559 }
7553 } 7560 }
@@ -7971,15 +7978,23 @@ static int nl80211_connect(struct sk_buff *skb, struct genl_info *info)
7971 } 7978 }
7972 7979
7973 if (nla_get_flag(info->attrs[NL80211_ATTR_USE_RRM])) { 7980 if (nla_get_flag(info->attrs[NL80211_ATTR_USE_RRM])) {
7974 if (!(rdev->wiphy.features & 7981 if (!((rdev->wiphy.features &
7975 NL80211_FEATURE_DS_PARAM_SET_IE_IN_PROBES) || 7982 NL80211_FEATURE_DS_PARAM_SET_IE_IN_PROBES) &&
7976 !(rdev->wiphy.features & NL80211_FEATURE_QUIET)) { 7983 (rdev->wiphy.features & NL80211_FEATURE_QUIET)) &&
7984 !wiphy_ext_feature_isset(&rdev->wiphy,
7985 NL80211_EXT_FEATURE_RRM)) {
7977 kzfree(connkeys); 7986 kzfree(connkeys);
7978 return -EINVAL; 7987 return -EINVAL;
7979 } 7988 }
7980 connect.flags |= ASSOC_REQ_USE_RRM; 7989 connect.flags |= ASSOC_REQ_USE_RRM;
7981 } 7990 }
7982 7991
7992 connect.pbss = nla_get_flag(info->attrs[NL80211_ATTR_PBSS]);
7993 if (connect.pbss && !rdev->wiphy.bands[IEEE80211_BAND_60GHZ]) {
7994 kzfree(connkeys);
7995 return -EOPNOTSUPP;
7996 }
7997
7983 wdev_lock(dev->ieee80211_ptr); 7998 wdev_lock(dev->ieee80211_ptr);
7984 err = cfg80211_connect(rdev, dev, &connect, connkeys, NULL); 7999 err = cfg80211_connect(rdev, dev, &connect, connkeys, NULL);
7985 wdev_unlock(dev->ieee80211_ptr); 8000 wdev_unlock(dev->ieee80211_ptr);
diff --git a/net/wireless/radiotap.c b/net/wireless/radiotap.c
index 722da616438c..6582d155e2fc 100644
--- a/net/wireless/radiotap.c
+++ b/net/wireless/radiotap.c
@@ -43,6 +43,7 @@ static const struct radiotap_align_size rtap_namespace_sizes[] = {
43 [IEEE80211_RADIOTAP_DATA_RETRIES] = { .align = 1, .size = 1, }, 43 [IEEE80211_RADIOTAP_DATA_RETRIES] = { .align = 1, .size = 1, },
44 [IEEE80211_RADIOTAP_MCS] = { .align = 1, .size = 3, }, 44 [IEEE80211_RADIOTAP_MCS] = { .align = 1, .size = 3, },
45 [IEEE80211_RADIOTAP_AMPDU_STATUS] = { .align = 4, .size = 8, }, 45 [IEEE80211_RADIOTAP_AMPDU_STATUS] = { .align = 4, .size = 8, },
46 [IEEE80211_RADIOTAP_VHT] = { .align = 2, .size = 12, },
46 /* 47 /*
47 * add more here as they are defined in radiotap.h 48 * add more here as they are defined in radiotap.h
48 */ 49 */
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 3b0ce1c484a3..c5fb317eee68 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -60,13 +60,6 @@
60#include "regdb.h" 60#include "regdb.h"
61#include "nl80211.h" 61#include "nl80211.h"
62 62
63#ifdef CONFIG_CFG80211_REG_DEBUG
64#define REG_DBG_PRINT(format, args...) \
65 printk(KERN_DEBUG pr_fmt(format), ##args)
66#else
67#define REG_DBG_PRINT(args...)
68#endif
69
70/* 63/*
71 * Grace period we give before making sure all current interfaces reside on 64 * Grace period we give before making sure all current interfaces reside on
72 * channels allowed by the current regulatory domain. 65 * channels allowed by the current regulatory domain.
@@ -178,12 +171,10 @@ enum nl80211_dfs_regions reg_get_dfs_region(struct wiphy *wiphy)
178 if (wiphy_regd->dfs_region == regd->dfs_region) 171 if (wiphy_regd->dfs_region == regd->dfs_region)
179 goto out; 172 goto out;
180 173
181 REG_DBG_PRINT("%s: device specific dfs_region " 174 pr_debug("%s: device specific dfs_region (%s) disagrees with cfg80211's central dfs_region (%s)\n",
182 "(%s) disagrees with cfg80211's " 175 dev_name(&wiphy->dev),
183 "central dfs_region (%s)\n", 176 reg_dfs_region_str(wiphy_regd->dfs_region),
184 dev_name(&wiphy->dev), 177 reg_dfs_region_str(regd->dfs_region));
185 reg_dfs_region_str(wiphy_regd->dfs_region),
186 reg_dfs_region_str(regd->dfs_region));
187 178
188out: 179out:
189 return regd->dfs_region; 180 return regd->dfs_region;
@@ -231,20 +222,22 @@ static const struct ieee80211_regdomain world_regdom = {
231 /* IEEE 802.11b/g, channels 1..11 */ 222 /* IEEE 802.11b/g, channels 1..11 */
232 REG_RULE(2412-10, 2462+10, 40, 6, 20, 0), 223 REG_RULE(2412-10, 2462+10, 40, 6, 20, 0),
233 /* IEEE 802.11b/g, channels 12..13. */ 224 /* IEEE 802.11b/g, channels 12..13. */
234 REG_RULE(2467-10, 2472+10, 40, 6, 20, 225 REG_RULE(2467-10, 2472+10, 20, 6, 20,
235 NL80211_RRF_NO_IR), 226 NL80211_RRF_NO_IR | NL80211_RRF_AUTO_BW),
236 /* IEEE 802.11 channel 14 - Only JP enables 227 /* IEEE 802.11 channel 14 - Only JP enables
237 * this and for 802.11b only */ 228 * this and for 802.11b only */
238 REG_RULE(2484-10, 2484+10, 20, 6, 20, 229 REG_RULE(2484-10, 2484+10, 20, 6, 20,
239 NL80211_RRF_NO_IR | 230 NL80211_RRF_NO_IR |
240 NL80211_RRF_NO_OFDM), 231 NL80211_RRF_NO_OFDM),
241 /* IEEE 802.11a, channel 36..48 */ 232 /* IEEE 802.11a, channel 36..48 */
242 REG_RULE(5180-10, 5240+10, 160, 6, 20, 233 REG_RULE(5180-10, 5240+10, 80, 6, 20,
243 NL80211_RRF_NO_IR), 234 NL80211_RRF_NO_IR |
235 NL80211_RRF_AUTO_BW),
244 236
245 /* IEEE 802.11a, channel 52..64 - DFS required */ 237 /* IEEE 802.11a, channel 52..64 - DFS required */
246 REG_RULE(5260-10, 5320+10, 160, 6, 20, 238 REG_RULE(5260-10, 5320+10, 80, 6, 20,
247 NL80211_RRF_NO_IR | 239 NL80211_RRF_NO_IR |
240 NL80211_RRF_AUTO_BW |
248 NL80211_RRF_DFS), 241 NL80211_RRF_DFS),
249 242
250 /* IEEE 802.11a, channel 100..144 - DFS required */ 243 /* IEEE 802.11a, channel 100..144 - DFS required */
@@ -541,7 +534,7 @@ static DECLARE_DELAYED_WORK(crda_timeout, crda_timeout_work);
541 534
542static void crda_timeout_work(struct work_struct *work) 535static void crda_timeout_work(struct work_struct *work)
543{ 536{
544 REG_DBG_PRINT("Timeout while waiting for CRDA to reply, restoring regulatory settings\n"); 537 pr_debug("Timeout while waiting for CRDA to reply, restoring regulatory settings\n");
545 rtnl_lock(); 538 rtnl_lock();
546 reg_crda_timeouts++; 539 reg_crda_timeouts++;
547 restore_regulatory_settings(true); 540 restore_regulatory_settings(true);
@@ -583,7 +576,7 @@ static int call_crda(const char *alpha2)
583 576
584 if (!is_world_regdom((char *) alpha2)) 577 if (!is_world_regdom((char *) alpha2))
585 pr_debug("Calling CRDA for country: %c%c\n", 578 pr_debug("Calling CRDA for country: %c%c\n",
586 alpha2[0], alpha2[1]); 579 alpha2[0], alpha2[1]);
587 else 580 else
588 pr_debug("Calling CRDA to update world regulatory domain\n"); 581 pr_debug("Calling CRDA to update world regulatory domain\n");
589 582
@@ -1130,42 +1123,6 @@ const char *reg_initiator_name(enum nl80211_reg_initiator initiator)
1130} 1123}
1131EXPORT_SYMBOL(reg_initiator_name); 1124EXPORT_SYMBOL(reg_initiator_name);
1132 1125
1133static void chan_reg_rule_print_dbg(const struct ieee80211_regdomain *regd,
1134 struct ieee80211_channel *chan,
1135 const struct ieee80211_reg_rule *reg_rule)
1136{
1137#ifdef CONFIG_CFG80211_REG_DEBUG
1138 const struct ieee80211_power_rule *power_rule;
1139 const struct ieee80211_freq_range *freq_range;
1140 char max_antenna_gain[32], bw[32];
1141
1142 power_rule = &reg_rule->power_rule;
1143 freq_range = &reg_rule->freq_range;
1144
1145 if (!power_rule->max_antenna_gain)
1146 snprintf(max_antenna_gain, sizeof(max_antenna_gain), "N/A");
1147 else
1148 snprintf(max_antenna_gain, sizeof(max_antenna_gain), "%d mBi",
1149 power_rule->max_antenna_gain);
1150
1151 if (reg_rule->flags & NL80211_RRF_AUTO_BW)
1152 snprintf(bw, sizeof(bw), "%d KHz, %d KHz AUTO",
1153 freq_range->max_bandwidth_khz,
1154 reg_get_max_bandwidth(regd, reg_rule));
1155 else
1156 snprintf(bw, sizeof(bw), "%d KHz",
1157 freq_range->max_bandwidth_khz);
1158
1159 REG_DBG_PRINT("Updating information on frequency %d MHz with regulatory rule:\n",
1160 chan->center_freq);
1161
1162 REG_DBG_PRINT("(%d KHz - %d KHz @ %s), (%s, %d mBm)\n",
1163 freq_range->start_freq_khz, freq_range->end_freq_khz,
1164 bw, max_antenna_gain,
1165 power_rule->max_eirp);
1166#endif
1167}
1168
1169static uint32_t reg_rule_to_chan_bw_flags(const struct ieee80211_regdomain *regd, 1126static uint32_t reg_rule_to_chan_bw_flags(const struct ieee80211_regdomain *regd,
1170 const struct ieee80211_reg_rule *reg_rule, 1127 const struct ieee80211_reg_rule *reg_rule,
1171 const struct ieee80211_channel *chan) 1128 const struct ieee80211_channel *chan)
@@ -1240,20 +1197,19 @@ static void handle_channel(struct wiphy *wiphy,
1240 if (lr->initiator == NL80211_REGDOM_SET_BY_DRIVER && 1197 if (lr->initiator == NL80211_REGDOM_SET_BY_DRIVER &&
1241 request_wiphy && request_wiphy == wiphy && 1198 request_wiphy && request_wiphy == wiphy &&
1242 request_wiphy->regulatory_flags & REGULATORY_STRICT_REG) { 1199 request_wiphy->regulatory_flags & REGULATORY_STRICT_REG) {
1243 REG_DBG_PRINT("Disabling freq %d MHz for good\n", 1200 pr_debug("Disabling freq %d MHz for good\n",
1244 chan->center_freq); 1201 chan->center_freq);
1245 chan->orig_flags |= IEEE80211_CHAN_DISABLED; 1202 chan->orig_flags |= IEEE80211_CHAN_DISABLED;
1246 chan->flags = chan->orig_flags; 1203 chan->flags = chan->orig_flags;
1247 } else { 1204 } else {
1248 REG_DBG_PRINT("Disabling freq %d MHz\n", 1205 pr_debug("Disabling freq %d MHz\n",
1249 chan->center_freq); 1206 chan->center_freq);
1250 chan->flags |= IEEE80211_CHAN_DISABLED; 1207 chan->flags |= IEEE80211_CHAN_DISABLED;
1251 } 1208 }
1252 return; 1209 return;
1253 } 1210 }
1254 1211
1255 regd = reg_get_regdomain(wiphy); 1212 regd = reg_get_regdomain(wiphy);
1256 chan_reg_rule_print_dbg(regd, chan, reg_rule);
1257 1213
1258 power_rule = &reg_rule->power_rule; 1214 power_rule = &reg_rule->power_rule;
1259 bw_flags = reg_rule_to_chan_bw_flags(regd, reg_rule, chan); 1215 bw_flags = reg_rule_to_chan_bw_flags(regd, reg_rule, chan);
@@ -1391,18 +1347,15 @@ static bool ignore_reg_update(struct wiphy *wiphy,
1391 return true; 1347 return true;
1392 1348
1393 if (!lr) { 1349 if (!lr) {
1394 REG_DBG_PRINT("Ignoring regulatory request set by %s " 1350 pr_debug("Ignoring regulatory request set by %s since last_request is not set\n",
1395 "since last_request is not set\n", 1351 reg_initiator_name(initiator));
1396 reg_initiator_name(initiator));
1397 return true; 1352 return true;
1398 } 1353 }
1399 1354
1400 if (initiator == NL80211_REGDOM_SET_BY_CORE && 1355 if (initiator == NL80211_REGDOM_SET_BY_CORE &&
1401 wiphy->regulatory_flags & REGULATORY_CUSTOM_REG) { 1356 wiphy->regulatory_flags & REGULATORY_CUSTOM_REG) {
1402 REG_DBG_PRINT("Ignoring regulatory request set by %s " 1357 pr_debug("Ignoring regulatory request set by %s since the driver uses its own custom regulatory domain\n",
1403 "since the driver uses its own custom " 1358 reg_initiator_name(initiator));
1404 "regulatory domain\n",
1405 reg_initiator_name(initiator));
1406 return true; 1359 return true;
1407 } 1360 }
1408 1361
@@ -1413,10 +1366,8 @@ static bool ignore_reg_update(struct wiphy *wiphy,
1413 if (wiphy_strict_alpha2_regd(wiphy) && !wiphy->regd && 1366 if (wiphy_strict_alpha2_regd(wiphy) && !wiphy->regd &&
1414 initiator != NL80211_REGDOM_SET_BY_COUNTRY_IE && 1367 initiator != NL80211_REGDOM_SET_BY_COUNTRY_IE &&
1415 !is_world_regdom(lr->alpha2)) { 1368 !is_world_regdom(lr->alpha2)) {
1416 REG_DBG_PRINT("Ignoring regulatory request set by %s " 1369 pr_debug("Ignoring regulatory request set by %s since the driver requires its own regulatory domain to be set first\n",
1417 "since the driver requires its own regulatory " 1370 reg_initiator_name(initiator));
1418 "domain to be set first\n",
1419 reg_initiator_name(initiator));
1420 return true; 1371 return true;
1421 } 1372 }
1422 1373
@@ -1697,7 +1648,7 @@ static void reg_check_chans_work(struct work_struct *work)
1697{ 1648{
1698 struct cfg80211_registered_device *rdev; 1649 struct cfg80211_registered_device *rdev;
1699 1650
1700 REG_DBG_PRINT("Verifying active interfaces after reg change\n"); 1651 pr_debug("Verifying active interfaces after reg change\n");
1701 rtnl_lock(); 1652 rtnl_lock();
1702 1653
1703 list_for_each_entry(rdev, &cfg80211_rdev_list, list) 1654 list_for_each_entry(rdev, &cfg80211_rdev_list, list)
@@ -1779,8 +1730,8 @@ static void handle_channel_custom(struct wiphy *wiphy,
1779 } 1730 }
1780 1731
1781 if (IS_ERR(reg_rule)) { 1732 if (IS_ERR(reg_rule)) {
1782 REG_DBG_PRINT("Disabling freq %d MHz as custom regd has no rule that fits it\n", 1733 pr_debug("Disabling freq %d MHz as custom regd has no rule that fits it\n",
1783 chan->center_freq); 1734 chan->center_freq);
1784 if (wiphy->regulatory_flags & REGULATORY_WIPHY_SELF_MANAGED) { 1735 if (wiphy->regulatory_flags & REGULATORY_WIPHY_SELF_MANAGED) {
1785 chan->flags |= IEEE80211_CHAN_DISABLED; 1736 chan->flags |= IEEE80211_CHAN_DISABLED;
1786 } else { 1737 } else {
@@ -1790,8 +1741,6 @@ static void handle_channel_custom(struct wiphy *wiphy,
1790 return; 1741 return;
1791 } 1742 }
1792 1743
1793 chan_reg_rule_print_dbg(regd, chan, reg_rule);
1794
1795 power_rule = &reg_rule->power_rule; 1744 power_rule = &reg_rule->power_rule;
1796 bw_flags = reg_rule_to_chan_bw_flags(regd, reg_rule, chan); 1745 bw_flags = reg_rule_to_chan_bw_flags(regd, reg_rule, chan);
1797 1746
@@ -2522,7 +2471,7 @@ static void restore_alpha2(char *alpha2, bool reset_user)
2522 if (is_user_regdom_saved()) { 2471 if (is_user_regdom_saved()) {
2523 /* Unless we're asked to ignore it and reset it */ 2472 /* Unless we're asked to ignore it and reset it */
2524 if (reset_user) { 2473 if (reset_user) {
2525 REG_DBG_PRINT("Restoring regulatory settings including user preference\n"); 2474 pr_debug("Restoring regulatory settings including user preference\n");
2526 user_alpha2[0] = '9'; 2475 user_alpha2[0] = '9';
2527 user_alpha2[1] = '7'; 2476 user_alpha2[1] = '7';
2528 2477
@@ -2532,24 +2481,24 @@ static void restore_alpha2(char *alpha2, bool reset_user)
2532 * back as they were for a full restore. 2481 * back as they were for a full restore.
2533 */ 2482 */
2534 if (!is_world_regdom(ieee80211_regdom)) { 2483 if (!is_world_regdom(ieee80211_regdom)) {
2535 REG_DBG_PRINT("Keeping preference on module parameter ieee80211_regdom: %c%c\n", 2484 pr_debug("Keeping preference on module parameter ieee80211_regdom: %c%c\n",
2536 ieee80211_regdom[0], ieee80211_regdom[1]); 2485 ieee80211_regdom[0], ieee80211_regdom[1]);
2537 alpha2[0] = ieee80211_regdom[0]; 2486 alpha2[0] = ieee80211_regdom[0];
2538 alpha2[1] = ieee80211_regdom[1]; 2487 alpha2[1] = ieee80211_regdom[1];
2539 } 2488 }
2540 } else { 2489 } else {
2541 REG_DBG_PRINT("Restoring regulatory settings while preserving user preference for: %c%c\n", 2490 pr_debug("Restoring regulatory settings while preserving user preference for: %c%c\n",
2542 user_alpha2[0], user_alpha2[1]); 2491 user_alpha2[0], user_alpha2[1]);
2543 alpha2[0] = user_alpha2[0]; 2492 alpha2[0] = user_alpha2[0];
2544 alpha2[1] = user_alpha2[1]; 2493 alpha2[1] = user_alpha2[1];
2545 } 2494 }
2546 } else if (!is_world_regdom(ieee80211_regdom)) { 2495 } else if (!is_world_regdom(ieee80211_regdom)) {
2547 REG_DBG_PRINT("Keeping preference on module parameter ieee80211_regdom: %c%c\n", 2496 pr_debug("Keeping preference on module parameter ieee80211_regdom: %c%c\n",
2548 ieee80211_regdom[0], ieee80211_regdom[1]); 2497 ieee80211_regdom[0], ieee80211_regdom[1]);
2549 alpha2[0] = ieee80211_regdom[0]; 2498 alpha2[0] = ieee80211_regdom[0];
2550 alpha2[1] = ieee80211_regdom[1]; 2499 alpha2[1] = ieee80211_regdom[1];
2551 } else 2500 } else
2552 REG_DBG_PRINT("Restoring regulatory settings\n"); 2501 pr_debug("Restoring regulatory settings\n");
2553} 2502}
2554 2503
2555static void restore_custom_reg_settings(struct wiphy *wiphy) 2504static void restore_custom_reg_settings(struct wiphy *wiphy)
@@ -2661,14 +2610,14 @@ static void restore_regulatory_settings(bool reset_user)
2661 list_splice_tail_init(&tmp_reg_req_list, &reg_requests_list); 2610 list_splice_tail_init(&tmp_reg_req_list, &reg_requests_list);
2662 spin_unlock(&reg_requests_lock); 2611 spin_unlock(&reg_requests_lock);
2663 2612
2664 REG_DBG_PRINT("Kicking the queue\n"); 2613 pr_debug("Kicking the queue\n");
2665 2614
2666 schedule_work(&reg_work); 2615 schedule_work(&reg_work);
2667} 2616}
2668 2617
2669void regulatory_hint_disconnect(void) 2618void regulatory_hint_disconnect(void)
2670{ 2619{
2671 REG_DBG_PRINT("All devices are disconnected, going to restore regulatory settings\n"); 2620 pr_debug("All devices are disconnected, going to restore regulatory settings\n");
2672 restore_regulatory_settings(false); 2621 restore_regulatory_settings(false);
2673} 2622}
2674 2623
@@ -2716,10 +2665,10 @@ int regulatory_hint_found_beacon(struct wiphy *wiphy,
2716 if (!reg_beacon) 2665 if (!reg_beacon)
2717 return -ENOMEM; 2666 return -ENOMEM;
2718 2667
2719 REG_DBG_PRINT("Found new beacon on frequency: %d MHz (Ch %d) on %s\n", 2668 pr_debug("Found new beacon on frequency: %d MHz (Ch %d) on %s\n",
2720 beacon_chan->center_freq, 2669 beacon_chan->center_freq,
2721 ieee80211_frequency_to_channel(beacon_chan->center_freq), 2670 ieee80211_frequency_to_channel(beacon_chan->center_freq),
2722 wiphy_name(wiphy)); 2671 wiphy_name(wiphy));
2723 2672
2724 memcpy(&reg_beacon->chan, beacon_chan, 2673 memcpy(&reg_beacon->chan, beacon_chan,
2725 sizeof(struct ieee80211_channel)); 2674 sizeof(struct ieee80211_channel));
@@ -2745,7 +2694,7 @@ static void print_rd_rules(const struct ieee80211_regdomain *rd)
2745 const struct ieee80211_power_rule *power_rule = NULL; 2694 const struct ieee80211_power_rule *power_rule = NULL;
2746 char bw[32], cac_time[32]; 2695 char bw[32], cac_time[32];
2747 2696
2748 pr_info(" (start_freq - end_freq @ bandwidth), (max_antenna_gain, max_eirp), (dfs_cac_time)\n"); 2697 pr_debug(" (start_freq - end_freq @ bandwidth), (max_antenna_gain, max_eirp), (dfs_cac_time)\n");
2749 2698
2750 for (i = 0; i < rd->n_reg_rules; i++) { 2699 for (i = 0; i < rd->n_reg_rules; i++) {
2751 reg_rule = &rd->reg_rules[i]; 2700 reg_rule = &rd->reg_rules[i];
@@ -2772,7 +2721,7 @@ static void print_rd_rules(const struct ieee80211_regdomain *rd)
2772 * in certain regions 2721 * in certain regions
2773 */ 2722 */
2774 if (power_rule->max_antenna_gain) 2723 if (power_rule->max_antenna_gain)
2775 pr_info(" (%d KHz - %d KHz @ %s), (%d mBi, %d mBm), (%s)\n", 2724 pr_debug(" (%d KHz - %d KHz @ %s), (%d mBi, %d mBm), (%s)\n",
2776 freq_range->start_freq_khz, 2725 freq_range->start_freq_khz,
2777 freq_range->end_freq_khz, 2726 freq_range->end_freq_khz,
2778 bw, 2727 bw,
@@ -2780,7 +2729,7 @@ static void print_rd_rules(const struct ieee80211_regdomain *rd)
2780 power_rule->max_eirp, 2729 power_rule->max_eirp,
2781 cac_time); 2730 cac_time);
2782 else 2731 else
2783 pr_info(" (%d KHz - %d KHz @ %s), (N/A, %d mBm), (%s)\n", 2732 pr_debug(" (%d KHz - %d KHz @ %s), (N/A, %d mBm), (%s)\n",
2784 freq_range->start_freq_khz, 2733 freq_range->start_freq_khz,
2785 freq_range->end_freq_khz, 2734 freq_range->end_freq_khz,
2786 bw, 2735 bw,
@@ -2798,8 +2747,7 @@ bool reg_supported_dfs_region(enum nl80211_dfs_regions dfs_region)
2798 case NL80211_DFS_JP: 2747 case NL80211_DFS_JP:
2799 return true; 2748 return true;
2800 default: 2749 default:
2801 REG_DBG_PRINT("Ignoring uknown DFS master region: %d\n", 2750 pr_debug("Ignoring uknown DFS master region: %d\n", dfs_region);
2802 dfs_region);
2803 return false; 2751 return false;
2804 } 2752 }
2805} 2753}
@@ -2813,35 +2761,35 @@ static void print_regdomain(const struct ieee80211_regdomain *rd)
2813 struct cfg80211_registered_device *rdev; 2761 struct cfg80211_registered_device *rdev;
2814 rdev = cfg80211_rdev_by_wiphy_idx(lr->wiphy_idx); 2762 rdev = cfg80211_rdev_by_wiphy_idx(lr->wiphy_idx);
2815 if (rdev) { 2763 if (rdev) {
2816 pr_info("Current regulatory domain updated by AP to: %c%c\n", 2764 pr_debug("Current regulatory domain updated by AP to: %c%c\n",
2817 rdev->country_ie_alpha2[0], 2765 rdev->country_ie_alpha2[0],
2818 rdev->country_ie_alpha2[1]); 2766 rdev->country_ie_alpha2[1]);
2819 } else 2767 } else
2820 pr_info("Current regulatory domain intersected:\n"); 2768 pr_debug("Current regulatory domain intersected:\n");
2821 } else 2769 } else
2822 pr_info("Current regulatory domain intersected:\n"); 2770 pr_debug("Current regulatory domain intersected:\n");
2823 } else if (is_world_regdom(rd->alpha2)) { 2771 } else if (is_world_regdom(rd->alpha2)) {
2824 pr_info("World regulatory domain updated:\n"); 2772 pr_debug("World regulatory domain updated:\n");
2825 } else { 2773 } else {
2826 if (is_unknown_alpha2(rd->alpha2)) 2774 if (is_unknown_alpha2(rd->alpha2))
2827 pr_info("Regulatory domain changed to driver built-in settings (unknown country)\n"); 2775 pr_debug("Regulatory domain changed to driver built-in settings (unknown country)\n");
2828 else { 2776 else {
2829 if (reg_request_cell_base(lr)) 2777 if (reg_request_cell_base(lr))
2830 pr_info("Regulatory domain changed to country: %c%c by Cell Station\n", 2778 pr_debug("Regulatory domain changed to country: %c%c by Cell Station\n",
2831 rd->alpha2[0], rd->alpha2[1]); 2779 rd->alpha2[0], rd->alpha2[1]);
2832 else 2780 else
2833 pr_info("Regulatory domain changed to country: %c%c\n", 2781 pr_debug("Regulatory domain changed to country: %c%c\n",
2834 rd->alpha2[0], rd->alpha2[1]); 2782 rd->alpha2[0], rd->alpha2[1]);
2835 } 2783 }
2836 } 2784 }
2837 2785
2838 pr_info(" DFS Master region: %s", reg_dfs_region_str(rd->dfs_region)); 2786 pr_debug(" DFS Master region: %s", reg_dfs_region_str(rd->dfs_region));
2839 print_rd_rules(rd); 2787 print_rd_rules(rd);
2840} 2788}
2841 2789
2842static void print_regdomain_info(const struct ieee80211_regdomain *rd) 2790static void print_regdomain_info(const struct ieee80211_regdomain *rd)
2843{ 2791{
2844 pr_info("Regulatory domain: %c%c\n", rd->alpha2[0], rd->alpha2[1]); 2792 pr_debug("Regulatory domain: %c%c\n", rd->alpha2[0], rd->alpha2[1]);
2845 print_rd_rules(rd); 2793 print_rd_rules(rd);
2846} 2794}
2847 2795
@@ -2862,7 +2810,8 @@ static int reg_set_rd_user(const struct ieee80211_regdomain *rd,
2862 return -EALREADY; 2810 return -EALREADY;
2863 2811
2864 if (!is_valid_rd(rd)) { 2812 if (!is_valid_rd(rd)) {
2865 pr_err("Invalid regulatory domain detected:\n"); 2813 pr_err("Invalid regulatory domain detected: %c%c\n",
2814 rd->alpha2[0], rd->alpha2[1]);
2866 print_regdomain_info(rd); 2815 print_regdomain_info(rd);
2867 return -EINVAL; 2816 return -EINVAL;
2868 } 2817 }
@@ -2898,7 +2847,8 @@ static int reg_set_rd_driver(const struct ieee80211_regdomain *rd,
2898 return -EALREADY; 2847 return -EALREADY;
2899 2848
2900 if (!is_valid_rd(rd)) { 2849 if (!is_valid_rd(rd)) {
2901 pr_err("Invalid regulatory domain detected:\n"); 2850 pr_err("Invalid regulatory domain detected: %c%c\n",
2851 rd->alpha2[0], rd->alpha2[1]);
2902 print_regdomain_info(rd); 2852 print_regdomain_info(rd);
2903 return -EINVAL; 2853 return -EINVAL;
2904 } 2854 }
@@ -2956,7 +2906,8 @@ static int reg_set_rd_country_ie(const struct ieee80211_regdomain *rd,
2956 */ 2906 */
2957 2907
2958 if (!is_valid_rd(rd)) { 2908 if (!is_valid_rd(rd)) {
2959 pr_err("Invalid regulatory domain detected:\n"); 2909 pr_err("Invalid regulatory domain detected: %c%c\n",
2910 rd->alpha2[0], rd->alpha2[1]);
2960 print_regdomain_info(rd); 2911 print_regdomain_info(rd);
2961 return -EINVAL; 2912 return -EINVAL;
2962 } 2913 }
diff --git a/net/wireless/sme.c b/net/wireless/sme.c
index 8020b5b094d4..544558171787 100644
--- a/net/wireless/sme.c
+++ b/net/wireless/sme.c
@@ -264,7 +264,7 @@ static struct cfg80211_bss *cfg80211_get_conn_bss(struct wireless_dev *wdev)
264 wdev->conn->params.bssid, 264 wdev->conn->params.bssid,
265 wdev->conn->params.ssid, 265 wdev->conn->params.ssid,
266 wdev->conn->params.ssid_len, 266 wdev->conn->params.ssid_len,
267 IEEE80211_BSS_TYPE_ESS, 267 wdev->conn_bss_type,
268 IEEE80211_PRIVACY(wdev->conn->params.privacy)); 268 IEEE80211_PRIVACY(wdev->conn->params.privacy));
269 if (!bss) 269 if (!bss)
270 return NULL; 270 return NULL;
@@ -687,7 +687,7 @@ void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid,
687 WARN_ON_ONCE(!wiphy_to_rdev(wdev->wiphy)->ops->connect); 687 WARN_ON_ONCE(!wiphy_to_rdev(wdev->wiphy)->ops->connect);
688 bss = cfg80211_get_bss(wdev->wiphy, NULL, bssid, 688 bss = cfg80211_get_bss(wdev->wiphy, NULL, bssid,
689 wdev->ssid, wdev->ssid_len, 689 wdev->ssid, wdev->ssid_len,
690 IEEE80211_BSS_TYPE_ESS, 690 wdev->conn_bss_type,
691 IEEE80211_PRIVACY_ANY); 691 IEEE80211_PRIVACY_ANY);
692 if (bss) 692 if (bss)
693 cfg80211_hold_bss(bss_from_pub(bss)); 693 cfg80211_hold_bss(bss_from_pub(bss));
@@ -846,7 +846,7 @@ void cfg80211_roamed(struct net_device *dev,
846 846
847 bss = cfg80211_get_bss(wdev->wiphy, channel, bssid, wdev->ssid, 847 bss = cfg80211_get_bss(wdev->wiphy, channel, bssid, wdev->ssid,
848 wdev->ssid_len, 848 wdev->ssid_len,
849 IEEE80211_BSS_TYPE_ESS, IEEE80211_PRIVACY_ANY); 849 wdev->conn_bss_type, IEEE80211_PRIVACY_ANY);
850 if (WARN_ON(!bss)) 850 if (WARN_ON(!bss))
851 return; 851 return;
852 852
@@ -917,6 +917,12 @@ void __cfg80211_disconnected(struct net_device *dev, const u8 *ie,
917 917
918 nl80211_send_disconnected(rdev, dev, reason, ie, ie_len, from_ap); 918 nl80211_send_disconnected(rdev, dev, reason, ie, ie_len, from_ap);
919 919
920 /* stop critical protocol if supported */
921 if (rdev->ops->crit_proto_stop && rdev->crit_proto_nlportid) {
922 rdev->crit_proto_nlportid = 0;
923 rdev_crit_proto_stop(rdev, wdev);
924 }
925
920 /* 926 /*
921 * Delete all the keys ... pairwise keys can't really 927 * Delete all the keys ... pairwise keys can't really
922 * exist any more anyway, but default keys might. 928 * exist any more anyway, but default keys might.
@@ -1017,6 +1023,9 @@ int cfg80211_connect(struct cfg80211_registered_device *rdev,
1017 memcpy(wdev->ssid, connect->ssid, connect->ssid_len); 1023 memcpy(wdev->ssid, connect->ssid, connect->ssid_len);
1018 wdev->ssid_len = connect->ssid_len; 1024 wdev->ssid_len = connect->ssid_len;
1019 1025
1026 wdev->conn_bss_type = connect->pbss ? IEEE80211_BSS_TYPE_PBSS :
1027 IEEE80211_BSS_TYPE_ESS;
1028
1020 if (!rdev->ops->connect) 1029 if (!rdev->ops->connect)
1021 err = cfg80211_sme_connect(wdev, connect, prev_bssid); 1030 err = cfg80211_sme_connect(wdev, connect, prev_bssid);
1022 else 1031 else
diff --git a/net/wireless/util.c b/net/wireless/util.c
index 92770427b211..9f440a9de63b 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -393,9 +393,9 @@ unsigned int ieee80211_get_hdrlen_from_skb(const struct sk_buff *skb)
393} 393}
394EXPORT_SYMBOL(ieee80211_get_hdrlen_from_skb); 394EXPORT_SYMBOL(ieee80211_get_hdrlen_from_skb);
395 395
396unsigned int ieee80211_get_mesh_hdrlen(struct ieee80211s_hdr *meshhdr) 396static unsigned int __ieee80211_get_mesh_hdrlen(u8 flags)
397{ 397{
398 int ae = meshhdr->flags & MESH_FLAGS_AE; 398 int ae = flags & MESH_FLAGS_AE;
399 /* 802.11-2012, 8.2.4.7.3 */ 399 /* 802.11-2012, 8.2.4.7.3 */
400 switch (ae) { 400 switch (ae) {
401 default: 401 default:
@@ -407,21 +407,31 @@ unsigned int ieee80211_get_mesh_hdrlen(struct ieee80211s_hdr *meshhdr)
407 return 18; 407 return 18;
408 } 408 }
409} 409}
410
411unsigned int ieee80211_get_mesh_hdrlen(struct ieee80211s_hdr *meshhdr)
412{
413 return __ieee80211_get_mesh_hdrlen(meshhdr->flags);
414}
410EXPORT_SYMBOL(ieee80211_get_mesh_hdrlen); 415EXPORT_SYMBOL(ieee80211_get_mesh_hdrlen);
411 416
412int ieee80211_data_to_8023(struct sk_buff *skb, const u8 *addr, 417static int __ieee80211_data_to_8023(struct sk_buff *skb, struct ethhdr *ehdr,
413 enum nl80211_iftype iftype) 418 const u8 *addr, enum nl80211_iftype iftype)
414{ 419{
415 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; 420 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
416 u16 hdrlen, ethertype; 421 struct {
417 u8 *payload; 422 u8 hdr[ETH_ALEN] __aligned(2);
418 u8 dst[ETH_ALEN]; 423 __be16 proto;
419 u8 src[ETH_ALEN] __aligned(2); 424 } payload;
425 struct ethhdr tmp;
426 u16 hdrlen;
427 u8 mesh_flags = 0;
420 428
421 if (unlikely(!ieee80211_is_data_present(hdr->frame_control))) 429 if (unlikely(!ieee80211_is_data_present(hdr->frame_control)))
422 return -1; 430 return -1;
423 431
424 hdrlen = ieee80211_hdrlen(hdr->frame_control); 432 hdrlen = ieee80211_hdrlen(hdr->frame_control);
433 if (skb->len < hdrlen + 8)
434 return -1;
425 435
426 /* convert IEEE 802.11 header + possible LLC headers into Ethernet 436 /* convert IEEE 802.11 header + possible LLC headers into Ethernet
427 * header 437 * header
@@ -432,8 +442,11 @@ int ieee80211_data_to_8023(struct sk_buff *skb, const u8 *addr,
432 * 1 0 BSSID SA DA n/a 442 * 1 0 BSSID SA DA n/a
433 * 1 1 RA TA DA SA 443 * 1 1 RA TA DA SA
434 */ 444 */
435 memcpy(dst, ieee80211_get_DA(hdr), ETH_ALEN); 445 memcpy(tmp.h_dest, ieee80211_get_DA(hdr), ETH_ALEN);
436 memcpy(src, ieee80211_get_SA(hdr), ETH_ALEN); 446 memcpy(tmp.h_source, ieee80211_get_SA(hdr), ETH_ALEN);
447
448 if (iftype == NL80211_IFTYPE_MESH_POINT)
449 skb_copy_bits(skb, hdrlen, &mesh_flags, 1);
437 450
438 switch (hdr->frame_control & 451 switch (hdr->frame_control &
439 cpu_to_le16(IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS)) { 452 cpu_to_le16(IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS)) {
@@ -450,44 +463,31 @@ int ieee80211_data_to_8023(struct sk_buff *skb, const u8 *addr,
450 iftype != NL80211_IFTYPE_STATION)) 463 iftype != NL80211_IFTYPE_STATION))
451 return -1; 464 return -1;
452 if (iftype == NL80211_IFTYPE_MESH_POINT) { 465 if (iftype == NL80211_IFTYPE_MESH_POINT) {
453 struct ieee80211s_hdr *meshdr = 466 if (mesh_flags & MESH_FLAGS_AE_A4)
454 (struct ieee80211s_hdr *) (skb->data + hdrlen);
455 /* make sure meshdr->flags is on the linear part */
456 if (!pskb_may_pull(skb, hdrlen + 1))
457 return -1;
458 if (meshdr->flags & MESH_FLAGS_AE_A4)
459 return -1; 467 return -1;
460 if (meshdr->flags & MESH_FLAGS_AE_A5_A6) { 468 if (mesh_flags & MESH_FLAGS_AE_A5_A6) {
461 skb_copy_bits(skb, hdrlen + 469 skb_copy_bits(skb, hdrlen +
462 offsetof(struct ieee80211s_hdr, eaddr1), 470 offsetof(struct ieee80211s_hdr, eaddr1),
463 dst, ETH_ALEN); 471 tmp.h_dest, 2 * ETH_ALEN);
464 skb_copy_bits(skb, hdrlen +
465 offsetof(struct ieee80211s_hdr, eaddr2),
466 src, ETH_ALEN);
467 } 472 }
468 hdrlen += ieee80211_get_mesh_hdrlen(meshdr); 473 hdrlen += __ieee80211_get_mesh_hdrlen(mesh_flags);
469 } 474 }
470 break; 475 break;
471 case cpu_to_le16(IEEE80211_FCTL_FROMDS): 476 case cpu_to_le16(IEEE80211_FCTL_FROMDS):
472 if ((iftype != NL80211_IFTYPE_STATION && 477 if ((iftype != NL80211_IFTYPE_STATION &&
473 iftype != NL80211_IFTYPE_P2P_CLIENT && 478 iftype != NL80211_IFTYPE_P2P_CLIENT &&
474 iftype != NL80211_IFTYPE_MESH_POINT) || 479 iftype != NL80211_IFTYPE_MESH_POINT) ||
475 (is_multicast_ether_addr(dst) && 480 (is_multicast_ether_addr(tmp.h_dest) &&
476 ether_addr_equal(src, addr))) 481 ether_addr_equal(tmp.h_source, addr)))
477 return -1; 482 return -1;
478 if (iftype == NL80211_IFTYPE_MESH_POINT) { 483 if (iftype == NL80211_IFTYPE_MESH_POINT) {
479 struct ieee80211s_hdr *meshdr = 484 if (mesh_flags & MESH_FLAGS_AE_A5_A6)
480 (struct ieee80211s_hdr *) (skb->data + hdrlen);
481 /* make sure meshdr->flags is on the linear part */
482 if (!pskb_may_pull(skb, hdrlen + 1))
483 return -1;
484 if (meshdr->flags & MESH_FLAGS_AE_A5_A6)
485 return -1; 485 return -1;
486 if (meshdr->flags & MESH_FLAGS_AE_A4) 486 if (mesh_flags & MESH_FLAGS_AE_A4)
487 skb_copy_bits(skb, hdrlen + 487 skb_copy_bits(skb, hdrlen +
488 offsetof(struct ieee80211s_hdr, eaddr1), 488 offsetof(struct ieee80211s_hdr, eaddr1),
489 src, ETH_ALEN); 489 tmp.h_source, ETH_ALEN);
490 hdrlen += ieee80211_get_mesh_hdrlen(meshdr); 490 hdrlen += __ieee80211_get_mesh_hdrlen(mesh_flags);
491 } 491 }
492 break; 492 break;
493 case cpu_to_le16(0): 493 case cpu_to_le16(0):
@@ -498,33 +498,33 @@ int ieee80211_data_to_8023(struct sk_buff *skb, const u8 *addr,
498 break; 498 break;
499 } 499 }
500 500
501 if (!pskb_may_pull(skb, hdrlen + 8)) 501 skb_copy_bits(skb, hdrlen, &payload, sizeof(payload));
502 return -1; 502 tmp.h_proto = payload.proto;
503
504 payload = skb->data + hdrlen;
505 ethertype = (payload[6] << 8) | payload[7];
506 503
507 if (likely((ether_addr_equal(payload, rfc1042_header) && 504 if (likely((ether_addr_equal(payload.hdr, rfc1042_header) &&
508 ethertype != ETH_P_AARP && ethertype != ETH_P_IPX) || 505 tmp.h_proto != htons(ETH_P_AARP) &&
509 ether_addr_equal(payload, bridge_tunnel_header))) { 506 tmp.h_proto != htons(ETH_P_IPX)) ||
507 ether_addr_equal(payload.hdr, bridge_tunnel_header)))
510 /* remove RFC1042 or Bridge-Tunnel encapsulation and 508 /* remove RFC1042 or Bridge-Tunnel encapsulation and
511 * replace EtherType */ 509 * replace EtherType */
512 skb_pull(skb, hdrlen + 6); 510 hdrlen += ETH_ALEN + 2;
513 memcpy(skb_push(skb, ETH_ALEN), src, ETH_ALEN); 511 else
514 memcpy(skb_push(skb, ETH_ALEN), dst, ETH_ALEN); 512 tmp.h_proto = htons(skb->len);
515 } else {
516 struct ethhdr *ehdr;
517 __be16 len;
518 513
519 skb_pull(skb, hdrlen); 514 pskb_pull(skb, hdrlen);
520 len = htons(skb->len); 515
516 if (!ehdr)
521 ehdr = (struct ethhdr *) skb_push(skb, sizeof(struct ethhdr)); 517 ehdr = (struct ethhdr *) skb_push(skb, sizeof(struct ethhdr));
522 memcpy(ehdr->h_dest, dst, ETH_ALEN); 518 memcpy(ehdr, &tmp, sizeof(tmp));
523 memcpy(ehdr->h_source, src, ETH_ALEN); 519
524 ehdr->h_proto = len;
525 }
526 return 0; 520 return 0;
527} 521}
522
523int ieee80211_data_to_8023(struct sk_buff *skb, const u8 *addr,
524 enum nl80211_iftype iftype)
525{
526 return __ieee80211_data_to_8023(skb, NULL, addr, iftype);
527}
528EXPORT_SYMBOL(ieee80211_data_to_8023); 528EXPORT_SYMBOL(ieee80211_data_to_8023);
529 529
530int ieee80211_data_from_8023(struct sk_buff *skb, const u8 *addr, 530int ieee80211_data_from_8023(struct sk_buff *skb, const u8 *addr,
@@ -636,7 +636,7 @@ int ieee80211_data_from_8023(struct sk_buff *skb, const u8 *addr,
636 /* Update skb pointers to various headers since this modified frame 636 /* Update skb pointers to various headers since this modified frame
637 * is going to go through Linux networking code that may potentially 637 * is going to go through Linux networking code that may potentially
638 * need things like pointer to IP header. */ 638 * need things like pointer to IP header. */
639 skb_set_mac_header(skb, 0); 639 skb_reset_mac_header(skb);
640 skb_set_network_header(skb, nh_pos); 640 skb_set_network_header(skb, nh_pos);
641 skb_set_transport_header(skb, h_pos); 641 skb_set_transport_header(skb, h_pos);
642 642
@@ -644,70 +644,147 @@ int ieee80211_data_from_8023(struct sk_buff *skb, const u8 *addr,
644} 644}
645EXPORT_SYMBOL(ieee80211_data_from_8023); 645EXPORT_SYMBOL(ieee80211_data_from_8023);
646 646
647static void
648__frame_add_frag(struct sk_buff *skb, struct page *page,
649 void *ptr, int len, int size)
650{
651 struct skb_shared_info *sh = skb_shinfo(skb);
652 int page_offset;
653
654 atomic_inc(&page->_count);
655 page_offset = ptr - page_address(page);
656 skb_add_rx_frag(skb, sh->nr_frags, page, page_offset, len, size);
657}
658
659static void
660__ieee80211_amsdu_copy_frag(struct sk_buff *skb, struct sk_buff *frame,
661 int offset, int len)
662{
663 struct skb_shared_info *sh = skb_shinfo(skb);
664 const skb_frag_t *frag = &sh->frags[-1];
665 struct page *frag_page;
666 void *frag_ptr;
667 int frag_len, frag_size;
668 int head_size = skb->len - skb->data_len;
669 int cur_len;
670
671 frag_page = virt_to_head_page(skb->head);
672 frag_ptr = skb->data;
673 frag_size = head_size;
674
675 while (offset >= frag_size) {
676 offset -= frag_size;
677 frag++;
678 frag_page = skb_frag_page(frag);
679 frag_ptr = skb_frag_address(frag);
680 frag_size = skb_frag_size(frag);
681 }
682
683 frag_ptr += offset;
684 frag_len = frag_size - offset;
685
686 cur_len = min(len, frag_len);
687
688 __frame_add_frag(frame, frag_page, frag_ptr, cur_len, frag_size);
689 len -= cur_len;
690
691 while (len > 0) {
692 frag++;
693 frag_len = skb_frag_size(frag);
694 cur_len = min(len, frag_len);
695 __frame_add_frag(frame, skb_frag_page(frag),
696 skb_frag_address(frag), cur_len, frag_len);
697 len -= cur_len;
698 }
699}
700
701static struct sk_buff *
702__ieee80211_amsdu_copy(struct sk_buff *skb, unsigned int hlen,
703 int offset, int len, bool reuse_frag)
704{
705 struct sk_buff *frame;
706 int cur_len = len;
707
708 if (skb->len - offset < len)
709 return NULL;
710
711 /*
712 * When reusing framents, copy some data to the head to simplify
713 * ethernet header handling and speed up protocol header processing
714 * in the stack later.
715 */
716 if (reuse_frag)
717 cur_len = min_t(int, len, 32);
718
719 /*
720 * Allocate and reserve two bytes more for payload
721 * alignment since sizeof(struct ethhdr) is 14.
722 */
723 frame = dev_alloc_skb(hlen + sizeof(struct ethhdr) + 2 + cur_len);
724
725 skb_reserve(frame, hlen + sizeof(struct ethhdr) + 2);
726 skb_copy_bits(skb, offset, skb_put(frame, cur_len), cur_len);
727
728 len -= cur_len;
729 if (!len)
730 return frame;
731
732 offset += cur_len;
733 __ieee80211_amsdu_copy_frag(skb, frame, offset, len);
734
735 return frame;
736}
647 737
648void ieee80211_amsdu_to_8023s(struct sk_buff *skb, struct sk_buff_head *list, 738void ieee80211_amsdu_to_8023s(struct sk_buff *skb, struct sk_buff_head *list,
649 const u8 *addr, enum nl80211_iftype iftype, 739 const u8 *addr, enum nl80211_iftype iftype,
650 const unsigned int extra_headroom, 740 const unsigned int extra_headroom,
651 bool has_80211_header) 741 bool has_80211_header)
652{ 742{
743 unsigned int hlen = ALIGN(extra_headroom, 4);
653 struct sk_buff *frame = NULL; 744 struct sk_buff *frame = NULL;
654 u16 ethertype; 745 u16 ethertype;
655 u8 *payload; 746 u8 *payload;
656 const struct ethhdr *eth; 747 int offset = 0, remaining, err;
657 int remaining, err; 748 struct ethhdr eth;
658 u8 dst[ETH_ALEN], src[ETH_ALEN]; 749 bool reuse_frag = skb->head_frag && !skb_has_frag_list(skb);
750 bool reuse_skb = false;
751 bool last = false;
659 752
660 if (has_80211_header) { 753 if (has_80211_header) {
661 err = ieee80211_data_to_8023(skb, addr, iftype); 754 err = __ieee80211_data_to_8023(skb, &eth, addr, iftype);
662 if (err) 755 if (err)
663 goto out; 756 goto out;
664
665 /* skip the wrapping header */
666 eth = (struct ethhdr *) skb_pull(skb, sizeof(struct ethhdr));
667 if (!eth)
668 goto out;
669 } else {
670 eth = (struct ethhdr *) skb->data;
671 } 757 }
672 758
673 while (skb != frame) { 759 while (!last) {
760 unsigned int subframe_len;
761 int len;
674 u8 padding; 762 u8 padding;
675 __be16 len = eth->h_proto;
676 unsigned int subframe_len = sizeof(struct ethhdr) + ntohs(len);
677
678 remaining = skb->len;
679 memcpy(dst, eth->h_dest, ETH_ALEN);
680 memcpy(src, eth->h_source, ETH_ALEN);
681 763
764 skb_copy_bits(skb, offset, &eth, sizeof(eth));
765 len = ntohs(eth.h_proto);
766 subframe_len = sizeof(struct ethhdr) + len;
682 padding = (4 - subframe_len) & 0x3; 767 padding = (4 - subframe_len) & 0x3;
768
683 /* the last MSDU has no padding */ 769 /* the last MSDU has no padding */
770 remaining = skb->len - offset;
684 if (subframe_len > remaining) 771 if (subframe_len > remaining)
685 goto purge; 772 goto purge;
686 773
687 skb_pull(skb, sizeof(struct ethhdr)); 774 offset += sizeof(struct ethhdr);
688 /* reuse skb for the last subframe */ 775 /* reuse skb for the last subframe */
689 if (remaining <= subframe_len + padding) 776 last = remaining <= subframe_len + padding;
777 if (!skb_is_nonlinear(skb) && !reuse_frag && last) {
778 skb_pull(skb, offset);
690 frame = skb; 779 frame = skb;
691 else { 780 reuse_skb = true;
692 unsigned int hlen = ALIGN(extra_headroom, 4); 781 } else {
693 /* 782 frame = __ieee80211_amsdu_copy(skb, hlen, offset, len,
694 * Allocate and reserve two bytes more for payload 783 reuse_frag);
695 * alignment since sizeof(struct ethhdr) is 14.
696 */
697 frame = dev_alloc_skb(hlen + subframe_len + 2);
698 if (!frame) 784 if (!frame)
699 goto purge; 785 goto purge;
700 786
701 skb_reserve(frame, hlen + sizeof(struct ethhdr) + 2); 787 offset += len + padding;
702 memcpy(skb_put(frame, ntohs(len)), skb->data,
703 ntohs(len));
704
705 eth = (struct ethhdr *)skb_pull(skb, ntohs(len) +
706 padding);
707 if (!eth) {
708 dev_kfree_skb(frame);
709 goto purge;
710 }
711 } 788 }
712 789
713 skb_reset_network_header(frame); 790 skb_reset_network_header(frame);
@@ -716,24 +793,20 @@ void ieee80211_amsdu_to_8023s(struct sk_buff *skb, struct sk_buff_head *list,
716 793
717 payload = frame->data; 794 payload = frame->data;
718 ethertype = (payload[6] << 8) | payload[7]; 795 ethertype = (payload[6] << 8) | payload[7];
719
720 if (likely((ether_addr_equal(payload, rfc1042_header) && 796 if (likely((ether_addr_equal(payload, rfc1042_header) &&
721 ethertype != ETH_P_AARP && ethertype != ETH_P_IPX) || 797 ethertype != ETH_P_AARP && ethertype != ETH_P_IPX) ||
722 ether_addr_equal(payload, bridge_tunnel_header))) { 798 ether_addr_equal(payload, bridge_tunnel_header))) {
723 /* remove RFC1042 or Bridge-Tunnel 799 eth.h_proto = htons(ethertype);
724 * encapsulation and replace EtherType */ 800 skb_pull(frame, ETH_ALEN + 2);
725 skb_pull(frame, 6);
726 memcpy(skb_push(frame, ETH_ALEN), src, ETH_ALEN);
727 memcpy(skb_push(frame, ETH_ALEN), dst, ETH_ALEN);
728 } else {
729 memcpy(skb_push(frame, sizeof(__be16)), &len,
730 sizeof(__be16));
731 memcpy(skb_push(frame, ETH_ALEN), src, ETH_ALEN);
732 memcpy(skb_push(frame, ETH_ALEN), dst, ETH_ALEN);
733 } 801 }
802
803 memcpy(skb_push(frame, sizeof(eth)), &eth, sizeof(eth));
734 __skb_queue_tail(list, frame); 804 __skb_queue_tail(list, frame);
735 } 805 }
736 806
807 if (!reuse_skb)
808 dev_kfree_skb(skb);
809
737 return; 810 return;
738 811
739 purge: 812 purge:
diff --git a/net/wireless/wext-core.c b/net/wireless/wext-core.c
index c8717c1d082e..b50ee5d622e1 100644
--- a/net/wireless/wext-core.c
+++ b/net/wireless/wext-core.c
@@ -342,6 +342,40 @@ static const int compat_event_type_size[] = {
342 342
343/* IW event code */ 343/* IW event code */
344 344
345void wireless_nlevent_flush(void)
346{
347 struct sk_buff *skb;
348 struct net *net;
349
350 ASSERT_RTNL();
351
352 for_each_net(net) {
353 while ((skb = skb_dequeue(&net->wext_nlevents)))
354 rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL,
355 GFP_KERNEL);
356 }
357}
358EXPORT_SYMBOL_GPL(wireless_nlevent_flush);
359
360static int wext_netdev_notifier_call(struct notifier_block *nb,
361 unsigned long state, void *ptr)
362{
363 /*
364 * When a netdev changes state in any way, flush all pending messages
365 * to avoid them going out in a strange order, e.g. RTM_NEWLINK after
366 * RTM_DELLINK, or with IFF_UP after without IFF_UP during dev_close()
367 * or similar - all of which could otherwise happen due to delays from
368 * schedule_work().
369 */
370 wireless_nlevent_flush();
371
372 return NOTIFY_OK;
373}
374
375static struct notifier_block wext_netdev_notifier = {
376 .notifier_call = wext_netdev_notifier_call,
377};
378
345static int __net_init wext_pernet_init(struct net *net) 379static int __net_init wext_pernet_init(struct net *net)
346{ 380{
347 skb_queue_head_init(&net->wext_nlevents); 381 skb_queue_head_init(&net->wext_nlevents);
@@ -360,7 +394,12 @@ static struct pernet_operations wext_pernet_ops = {
360 394
361static int __init wireless_nlevent_init(void) 395static int __init wireless_nlevent_init(void)
362{ 396{
363 return register_pernet_subsys(&wext_pernet_ops); 397 int err = register_pernet_subsys(&wext_pernet_ops);
398
399 if (err)
400 return err;
401
402 return register_netdevice_notifier(&wext_netdev_notifier);
364} 403}
365 404
366subsys_initcall(wireless_nlevent_init); 405subsys_initcall(wireless_nlevent_init);
@@ -368,17 +407,8 @@ subsys_initcall(wireless_nlevent_init);
368/* Process events generated by the wireless layer or the driver. */ 407/* Process events generated by the wireless layer or the driver. */
369static void wireless_nlevent_process(struct work_struct *work) 408static void wireless_nlevent_process(struct work_struct *work)
370{ 409{
371 struct sk_buff *skb;
372 struct net *net;
373
374 rtnl_lock(); 410 rtnl_lock();
375 411 wireless_nlevent_flush();
376 for_each_net(net) {
377 while ((skb = skb_dequeue(&net->wext_nlevents)))
378 rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL,
379 GFP_KERNEL);
380 }
381
382 rtnl_unlock(); 412 rtnl_unlock();
383} 413}
384 414
diff --git a/net/xfrm/xfrm_algo.c b/net/xfrm/xfrm_algo.c
index f07224d8b88f..250e567ba3d6 100644
--- a/net/xfrm/xfrm_algo.c
+++ b/net/xfrm/xfrm_algo.c
@@ -9,6 +9,8 @@
9 * any later version. 9 * any later version.
10 */ 10 */
11 11
12#include <crypto/hash.h>
13#include <crypto/skcipher.h>
12#include <linux/module.h> 14#include <linux/module.h>
13#include <linux/kernel.h> 15#include <linux/kernel.h>
14#include <linux/pfkeyv2.h> 16#include <linux/pfkeyv2.h>
@@ -782,14 +784,13 @@ void xfrm_probe_algs(void)
782 BUG_ON(in_softirq()); 784 BUG_ON(in_softirq());
783 785
784 for (i = 0; i < aalg_entries(); i++) { 786 for (i = 0; i < aalg_entries(); i++) {
785 status = crypto_has_hash(aalg_list[i].name, 0, 787 status = crypto_has_ahash(aalg_list[i].name, 0, 0);
786 CRYPTO_ALG_ASYNC);
787 if (aalg_list[i].available != status) 788 if (aalg_list[i].available != status)
788 aalg_list[i].available = status; 789 aalg_list[i].available = status;
789 } 790 }
790 791
791 for (i = 0; i < ealg_entries(); i++) { 792 for (i = 0; i < ealg_entries(); i++) {
792 status = crypto_has_ablkcipher(ealg_list[i].name, 0, 0); 793 status = crypto_has_skcipher(ealg_list[i].name, 0, 0);
793 if (ealg_list[i].available != status) 794 if (ealg_list[i].available != status)
794 ealg_list[i].available = status; 795 ealg_list[i].available = status;
795 } 796 }
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 805681a7d356..2cc7af858c6f 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -2449,7 +2449,7 @@ static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
2449 int type, err; 2449 int type, err;
2450 2450
2451#ifdef CONFIG_COMPAT 2451#ifdef CONFIG_COMPAT
2452 if (is_compat_task()) 2452 if (in_compat_syscall())
2453 return -ENOTSUPP; 2453 return -ENOTSUPP;
2454#endif 2454#endif
2455 2455